1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -amdgpu-load-store-vectorizer=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s
3
4define i32 @v_bfe_i32_arg_arg_arg(i32 %src0, i32 %src1, i32 %src2) #0 {
5; GFX6-LABEL: v_bfe_i32_arg_arg_arg:
6; GFX6:       ; %bb.0:
7; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8; GFX6-NEXT:    v_bfe_u32 v0, v0, v1, v2
9; GFX6-NEXT:    s_setpc_b64 s[30:31]
10  %bfe_i32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src2)
11  ret i32 %bfe_i32
12}
13
14define amdgpu_ps i32 @s_bfe_i32_arg_arg_arg(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 {
15; GFX6-LABEL: s_bfe_i32_arg_arg_arg:
16; GFX6:       ; %bb.0:
17; GFX6-NEXT:    s_and_b32 s1, s1, 63
18; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
19; GFX6-NEXT:    s_or_b32 s1, s1, s2
20; GFX6-NEXT:    s_bfe_u32 s0, s0, s1
21; GFX6-NEXT:    ; return to shader part epilog
22  %bfe_i32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src2)
23  ret i32 %bfe_i32
24}
25
26; TODO: Need to expand this.
27; define i64 @v_bfe_i64_arg_arg_arg(i64 %src0, i32 %src1, i32 %src2) #0 {
28;   %bfe_i64 = call i32 @llvm.amdgcn.ubfe.i64(i32 %src0, i32 %src1, i32 %src2)
29;   ret i64 %bfe_i64
30; }
31
32define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 {
33; GFX6-LABEL: s_bfe_i64_arg_arg_arg:
34; GFX6:       ; %bb.0:
35; GFX6-NEXT:    s_and_b32 s2, s2, 63
36; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
37; GFX6-NEXT:    s_or_b32 s2, s2, s3
38; GFX6-NEXT:    s_bfe_u64 s[0:1], s[0:1], s2
39; GFX6-NEXT:    ; return to shader part epilog
40  %bfe_i32 = call i64 @llvm.amdgcn.ubfe.i64(i64 %src0, i32 %src1, i32 %src2)
41  ret i64 %bfe_i32
42}
43
44define amdgpu_kernel void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #0 {
45; GFX6-LABEL: bfe_u32_arg_arg_arg:
46; GFX6:       ; %bb.0:
47; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
48; GFX6-NEXT:    s_load_dword s2, s[0:1], 0xb
49; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xc
50; GFX6-NEXT:    s_mov_b32 s6, -1
51; GFX6-NEXT:    s_mov_b32 s7, 0xf000
52; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
53; GFX6-NEXT:    s_and_b32 s1, s0, 63
54; GFX6-NEXT:    s_lshl_b32 s0, s0, 16
55; GFX6-NEXT:    s_or_b32 s0, s1, s0
56; GFX6-NEXT:    s_bfe_u32 s0, s2, s0
57; GFX6-NEXT:    v_mov_b32_e32 v0, s0
58; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
59; GFX6-NEXT:    s_endpgm
60  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src1)
61  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
62  ret void
63}
64
65define amdgpu_kernel void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
66; GFX6-LABEL: bfe_u32_arg_arg_imm:
67; GFX6:       ; %bb.0:
68; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
69; GFX6-NEXT:    s_load_dword s2, s[0:1], 0xb
70; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xc
71; GFX6-NEXT:    s_mov_b32 s6, -1
72; GFX6-NEXT:    s_mov_b32 s7, 0xf000
73; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
74; GFX6-NEXT:    s_and_b32 s0, s0, 63
75; GFX6-NEXT:    s_or_b32 s0, s0, 0x7b0000
76; GFX6-NEXT:    s_bfe_u32 s0, s2, s0
77; GFX6-NEXT:    v_mov_b32_e32 v0, s0
78; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
79; GFX6-NEXT:    s_endpgm
80  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 123)
81  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
82  ret void
83}
84
85define amdgpu_kernel void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 {
86; GFX6-LABEL: bfe_u32_arg_imm_arg:
87; GFX6:       ; %bb.0:
88; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
89; GFX6-NEXT:    s_load_dword s2, s[0:1], 0xb
90; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xc
91; GFX6-NEXT:    s_mov_b32 s6, -1
92; GFX6-NEXT:    s_mov_b32 s7, 0xf000
93; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
94; GFX6-NEXT:    s_lshl_b32 s0, s0, 16
95; GFX6-NEXT:    s_or_b32 s0, 59, s0
96; GFX6-NEXT:    s_bfe_u32 s0, s2, s0
97; GFX6-NEXT:    v_mov_b32_e32 v0, s0
98; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
99; GFX6-NEXT:    s_endpgm
100  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 123, i32 %src2)
101  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
102  ret void
103}
104
105define amdgpu_kernel void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 {
106; GFX6-LABEL: bfe_u32_imm_arg_arg:
107; GFX6:       ; %bb.0:
108; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
109; GFX6-NEXT:    s_load_dword s2, s[0:1], 0xb
110; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xc
111; GFX6-NEXT:    s_mov_b32 s6, -1
112; GFX6-NEXT:    s_mov_b32 s7, 0xf000
113; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
114; GFX6-NEXT:    s_and_b32 s1, s2, 63
115; GFX6-NEXT:    s_lshl_b32 s0, s0, 16
116; GFX6-NEXT:    s_or_b32 s0, s1, s0
117; GFX6-NEXT:    s_bfe_u32 s0, 0x7b, s0
118; GFX6-NEXT:    v_mov_b32_e32 v0, s0
119; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
120; GFX6-NEXT:    s_endpgm
121  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 123, i32 %src1, i32 %src2)
122  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
123  ret void
124}
125
126define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
127; GFX6-LABEL: bfe_u32_arg_0_width_reg_offset:
128; GFX6:       ; %bb.0:
129; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
130; GFX6-NEXT:    s_load_dword s2, s[0:1], 0xb
131; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xc
132; GFX6-NEXT:    s_mov_b32 s6, -1
133; GFX6-NEXT:    s_mov_b32 s7, 0xf000
134; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
135; GFX6-NEXT:    s_and_b32 s0, s0, 63
136; GFX6-NEXT:    s_bfe_u32 s0, s2, s0
137; GFX6-NEXT:    v_mov_b32_e32 v0, s0
138; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
139; GFX6-NEXT:    s_endpgm
140  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 0)
141  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
142  ret void
143}
144
145define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
146; GFX6-LABEL: bfe_u32_arg_0_width_imm_offset:
147; GFX6:       ; %bb.0:
148; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
149; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
150; GFX6-NEXT:    s_mov_b32 s6, -1
151; GFX6-NEXT:    s_mov_b32 s7, 0xf000
152; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
153; GFX6-NEXT:    s_bfe_u32 s0, s0, 8
154; GFX6-NEXT:    v_mov_b32_e32 v0, s0
155; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
156; GFX6-NEXT:    s_endpgm
157  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 8, i32 0)
158  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
159  ret void
160}
161
162define amdgpu_kernel void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
163; GFX6-LABEL: bfe_u32_zextload_i8:
164; GFX6:       ; %bb.0:
165; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
166; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
167; GFX6-NEXT:    s_mov_b32 s2, -1
168; GFX6-NEXT:    s_mov_b32 s3, 0xf000
169; GFX6-NEXT:    s_mov_b64 s[6:7], s[2:3]
170; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
171; GFX6-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0
172; GFX6-NEXT:    s_waitcnt vmcnt(0)
173; GFX6-NEXT:    v_bfe_u32 v0, v0, 0, 8
174; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
175; GFX6-NEXT:    s_endpgm
176  %load = load i8, i8 addrspace(1)* %in
177  %ext = zext i8 %load to i32
178  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8)
179  store i32 %bfe, i32 addrspace(1)* %out, align 4
180  ret void
181}
182
183; FIXME: Should be using s_add_i32
184define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
185; GFX6-LABEL: bfe_u32_zext_in_reg_i8:
186; GFX6:       ; %bb.0:
187; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
188; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
189; GFX6-NEXT:    s_mov_b32 s6, -1
190; GFX6-NEXT:    s_mov_b32 s7, 0xf000
191; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
192; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
193; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
194; GFX6-NEXT:    s_add_i32 s0, s0, 1
195; GFX6-NEXT:    s_and_b32 s0, s0, 0xff
196; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x80000
197; GFX6-NEXT:    v_mov_b32_e32 v0, s0
198; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
199; GFX6-NEXT:    s_endpgm
200  %load = load i32, i32 addrspace(1)* %in, align 4
201  %add = add i32 %load, 1
202  %ext = and i32 %add, 255
203  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8)
204  store i32 %bfe, i32 addrspace(1)* %out, align 4
205  ret void
206}
207
208define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
209; GFX6-LABEL: bfe_u32_zext_in_reg_i16:
210; GFX6:       ; %bb.0:
211; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
212; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
213; GFX6-NEXT:    s_mov_b32 s6, -1
214; GFX6-NEXT:    s_mov_b32 s7, 0xf000
215; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
216; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
217; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
218; GFX6-NEXT:    s_add_i32 s0, s0, 1
219; GFX6-NEXT:    s_and_b32 s0, s0, 0xffff
220; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x100000
221; GFX6-NEXT:    v_mov_b32_e32 v0, s0
222; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
223; GFX6-NEXT:    s_endpgm
224  %load = load i32, i32 addrspace(1)* %in, align 4
225  %add = add i32 %load, 1
226  %ext = and i32 %add, 65535
227  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 16)
228  store i32 %bfe, i32 addrspace(1)* %out, align 4
229  ret void
230}
231
232define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
233; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_1:
234; GFX6:       ; %bb.0:
235; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
236; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
237; GFX6-NEXT:    s_mov_b32 s6, -1
238; GFX6-NEXT:    s_mov_b32 s7, 0xf000
239; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
240; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
241; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
242; GFX6-NEXT:    s_add_i32 s0, s0, 1
243; GFX6-NEXT:    s_and_b32 s0, s0, 0xff
244; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x80001
245; GFX6-NEXT:    v_mov_b32_e32 v0, s0
246; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
247; GFX6-NEXT:    s_endpgm
248  %load = load i32, i32 addrspace(1)* %in, align 4
249  %add = add i32 %load, 1
250  %ext = and i32 %add, 255
251  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 1, i32 8)
252  store i32 %bfe, i32 addrspace(1)* %out, align 4
253  ret void
254}
255
256define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
257; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_3:
258; GFX6:       ; %bb.0:
259; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
260; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
261; GFX6-NEXT:    s_mov_b32 s6, -1
262; GFX6-NEXT:    s_mov_b32 s7, 0xf000
263; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
264; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
265; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
266; GFX6-NEXT:    s_add_i32 s0, s0, 1
267; GFX6-NEXT:    s_and_b32 s0, s0, 0xff
268; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x80003
269; GFX6-NEXT:    v_mov_b32_e32 v0, s0
270; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
271; GFX6-NEXT:    s_endpgm
272  %load = load i32, i32 addrspace(1)* %in, align 4
273  %add = add i32 %load, 1
274  %ext = and i32 %add, 255
275  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 3, i32 8)
276  store i32 %bfe, i32 addrspace(1)* %out, align 4
277  ret void
278}
279
280define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
281; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_7:
282; GFX6:       ; %bb.0:
283; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
284; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
285; GFX6-NEXT:    s_mov_b32 s6, -1
286; GFX6-NEXT:    s_mov_b32 s7, 0xf000
287; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
288; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
289; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
290; GFX6-NEXT:    s_add_i32 s0, s0, 1
291; GFX6-NEXT:    s_and_b32 s0, s0, 0xff
292; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x80007
293; GFX6-NEXT:    v_mov_b32_e32 v0, s0
294; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
295; GFX6-NEXT:    s_endpgm
296  %load = load i32, i32 addrspace(1)* %in, align 4
297  %add = add i32 %load, 1
298  %ext = and i32 %add, 255
299  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 7, i32 8)
300  store i32 %bfe, i32 addrspace(1)* %out, align 4
301  ret void
302}
303
304define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
305; GFX6-LABEL: bfe_u32_zext_in_reg_i16_offset_8:
306; GFX6:       ; %bb.0:
307; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
308; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
309; GFX6-NEXT:    s_mov_b32 s6, -1
310; GFX6-NEXT:    s_mov_b32 s7, 0xf000
311; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
312; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
313; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
314; GFX6-NEXT:    s_add_i32 s0, s0, 1
315; GFX6-NEXT:    s_and_b32 s0, s0, 0xffff
316; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x80008
317; GFX6-NEXT:    v_mov_b32_e32 v0, s0
318; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
319; GFX6-NEXT:    s_endpgm
320  %load = load i32, i32 addrspace(1)* %in, align 4
321  %add = add i32 %load, 1
322  %ext = and i32 %add, 65535
323  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 8, i32 8)
324  store i32 %bfe, i32 addrspace(1)* %out, align 4
325  ret void
326}
327
328define amdgpu_kernel void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
329; GFX6-LABEL: bfe_u32_test_1:
330; GFX6:       ; %bb.0:
331; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
332; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
333; GFX6-NEXT:    s_mov_b32 s6, -1
334; GFX6-NEXT:    s_mov_b32 s7, 0xf000
335; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
336; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
337; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
338; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x10000
339; GFX6-NEXT:    v_mov_b32_e32 v0, s0
340; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
341; GFX6-NEXT:    s_endpgm
342  %x = load i32, i32 addrspace(1)* %in, align 4
343  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 0, i32 1)
344  store i32 %bfe, i32 addrspace(1)* %out, align 4
345  ret void
346}
347
348define amdgpu_kernel void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
349; GFX6-LABEL: bfe_u32_test_2:
350; GFX6:       ; %bb.0:
351; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
352; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
353; GFX6-NEXT:    s_mov_b32 s6, -1
354; GFX6-NEXT:    s_mov_b32 s7, 0xf000
355; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
356; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
357; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
358; GFX6-NEXT:    s_lshl_b32 s0, s0, 31
359; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x80000
360; GFX6-NEXT:    v_mov_b32_e32 v0, s0
361; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
362; GFX6-NEXT:    s_endpgm
363  %x = load i32, i32 addrspace(1)* %in, align 4
364  %shl = shl i32 %x, 31
365  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 8)
366  store i32 %bfe, i32 addrspace(1)* %out, align 4
367  ret void
368}
369
370define amdgpu_kernel void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
371; GFX6-LABEL: bfe_u32_test_3:
372; GFX6:       ; %bb.0:
373; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
374; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
375; GFX6-NEXT:    s_mov_b32 s6, -1
376; GFX6-NEXT:    s_mov_b32 s7, 0xf000
377; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
378; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
379; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
380; GFX6-NEXT:    s_lshl_b32 s0, s0, 31
381; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x10000
382; GFX6-NEXT:    v_mov_b32_e32 v0, s0
383; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
384; GFX6-NEXT:    s_endpgm
385  %x = load i32, i32 addrspace(1)* %in, align 4
386  %shl = shl i32 %x, 31
387  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 1)
388  store i32 %bfe, i32 addrspace(1)* %out, align 4
389  ret void
390}
391
392define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
393; GFX6-LABEL: bfe_u32_test_4:
394; GFX6:       ; %bb.0:
395; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
396; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
397; GFX6-NEXT:    s_mov_b32 s6, -1
398; GFX6-NEXT:    s_mov_b32 s7, 0xf000
399; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
400; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
401; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
402; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x10000
403; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x1001f
404; GFX6-NEXT:    v_mov_b32_e32 v0, s0
405; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
406; GFX6-NEXT:    s_endpgm
407  %x = load i32, i32 addrspace(1)* %in, align 4
408  %shl = shl i32 %x, 31
409  %shr = lshr i32 %shl, 31
410  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 31, i32 1)
411  store i32 %bfe, i32 addrspace(1)* %out, align 4
412  ret void
413}
414
415define amdgpu_kernel void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
416; GFX6-LABEL: bfe_u32_test_5:
417; GFX6:       ; %bb.0:
418; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
419; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
420; GFX6-NEXT:    s_mov_b32 s6, -1
421; GFX6-NEXT:    s_mov_b32 s7, 0xf000
422; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
423; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
424; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
425; GFX6-NEXT:    s_bfe_i32 s0, s0, 0x10000
426; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x10000
427; GFX6-NEXT:    v_mov_b32_e32 v0, s0
428; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
429; GFX6-NEXT:    s_endpgm
430  %x = load i32, i32 addrspace(1)* %in, align 4
431  %shl = shl i32 %x, 31
432  %shr = ashr i32 %shl, 31
433  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 0, i32 1)
434  store i32 %bfe, i32 addrspace(1)* %out, align 4
435  ret void
436}
437
438define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
439; GFX6-LABEL: bfe_u32_test_6:
440; GFX6:       ; %bb.0:
441; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
442; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
443; GFX6-NEXT:    s_mov_b32 s6, -1
444; GFX6-NEXT:    s_mov_b32 s7, 0xf000
445; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
446; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
447; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
448; GFX6-NEXT:    s_lshl_b32 s0, s0, 31
449; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x1f0001
450; GFX6-NEXT:    v_mov_b32_e32 v0, s0
451; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
452; GFX6-NEXT:    s_endpgm
453  %x = load i32, i32 addrspace(1)* %in, align 4
454  %shl = shl i32 %x, 31
455  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 1, i32 31)
456  store i32 %bfe, i32 addrspace(1)* %out, align 4
457  ret void
458}
459
460define amdgpu_kernel void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
461; GFX6-LABEL: bfe_u32_test_7:
462; GFX6:       ; %bb.0:
463; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
464; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
465; GFX6-NEXT:    s_mov_b32 s6, -1
466; GFX6-NEXT:    s_mov_b32 s7, 0xf000
467; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
468; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
469; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
470; GFX6-NEXT:    s_lshl_b32 s0, s0, 31
471; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x1f0000
472; GFX6-NEXT:    v_mov_b32_e32 v0, s0
473; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
474; GFX6-NEXT:    s_endpgm
475  %x = load i32, i32 addrspace(1)* %in, align 4
476  %shl = shl i32 %x, 31
477  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 31)
478  store i32 %bfe, i32 addrspace(1)* %out, align 4
479  ret void
480}
481
482define amdgpu_kernel void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
483; GFX6-LABEL: bfe_u32_test_8:
484; GFX6:       ; %bb.0:
485; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
486; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
487; GFX6-NEXT:    s_mov_b32 s6, -1
488; GFX6-NEXT:    s_mov_b32 s7, 0xf000
489; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
490; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
491; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
492; GFX6-NEXT:    s_lshl_b32 s0, s0, 31
493; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x1001f
494; GFX6-NEXT:    v_mov_b32_e32 v0, s0
495; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
496; GFX6-NEXT:    s_endpgm
497  %x = load i32, i32 addrspace(1)* %in, align 4
498  %shl = shl i32 %x, 31
499  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
500  store i32 %bfe, i32 addrspace(1)* %out, align 4
501  ret void
502}
503
504define amdgpu_kernel void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
505; GFX6-LABEL: bfe_u32_test_9:
506; GFX6:       ; %bb.0:
507; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
508; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
509; GFX6-NEXT:    s_mov_b32 s6, -1
510; GFX6-NEXT:    s_mov_b32 s7, 0xf000
511; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
512; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
513; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
514; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x1001f
515; GFX6-NEXT:    v_mov_b32_e32 v0, s0
516; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
517; GFX6-NEXT:    s_endpgm
518  %x = load i32, i32 addrspace(1)* %in, align 4
519  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 31, i32 1)
520  store i32 %bfe, i32 addrspace(1)* %out, align 4
521  ret void
522}
523
524define amdgpu_kernel void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
525; GFX6-LABEL: bfe_u32_test_10:
526; GFX6:       ; %bb.0:
527; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
528; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
529; GFX6-NEXT:    s_mov_b32 s6, -1
530; GFX6-NEXT:    s_mov_b32 s7, 0xf000
531; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
532; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
533; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
534; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x1f0001
535; GFX6-NEXT:    v_mov_b32_e32 v0, s0
536; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
537; GFX6-NEXT:    s_endpgm
538  %x = load i32, i32 addrspace(1)* %in, align 4
539  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 1, i32 31)
540  store i32 %bfe, i32 addrspace(1)* %out, align 4
541  ret void
542}
543
544define amdgpu_kernel void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
545; GFX6-LABEL: bfe_u32_test_11:
546; GFX6:       ; %bb.0:
547; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
548; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
549; GFX6-NEXT:    s_mov_b32 s6, -1
550; GFX6-NEXT:    s_mov_b32 s7, 0xf000
551; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
552; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
553; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
554; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x180008
555; GFX6-NEXT:    v_mov_b32_e32 v0, s0
556; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
557; GFX6-NEXT:    s_endpgm
558  %x = load i32, i32 addrspace(1)* %in, align 4
559  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 8, i32 24)
560  store i32 %bfe, i32 addrspace(1)* %out, align 4
561  ret void
562}
563
564define amdgpu_kernel void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
565; GFX6-LABEL: bfe_u32_test_12:
566; GFX6:       ; %bb.0:
567; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
568; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
569; GFX6-NEXT:    s_mov_b32 s6, -1
570; GFX6-NEXT:    s_mov_b32 s7, 0xf000
571; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
572; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
573; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
574; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x80018
575; GFX6-NEXT:    v_mov_b32_e32 v0, s0
576; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
577; GFX6-NEXT:    s_endpgm
578  %x = load i32, i32 addrspace(1)* %in, align 4
579  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 24, i32 8)
580  store i32 %bfe, i32 addrspace(1)* %out, align 4
581  ret void
582}
583
584; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
585define amdgpu_kernel void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
586; GFX6-LABEL: bfe_u32_test_13:
587; GFX6:       ; %bb.0:
588; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
589; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
590; GFX6-NEXT:    s_mov_b32 s6, -1
591; GFX6-NEXT:    s_mov_b32 s7, 0xf000
592; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
593; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
594; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
595; GFX6-NEXT:    s_ashr_i32 s0, s0, 31
596; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x1001f
597; GFX6-NEXT:    v_mov_b32_e32 v0, s0
598; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
599; GFX6-NEXT:    s_endpgm
600  %x = load i32, i32 addrspace(1)* %in, align 4
601  %shl = ashr i32 %x, 31
602  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
603  store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
604}
605
606define amdgpu_kernel void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
607; GFX6-LABEL: bfe_u32_test_14:
608; GFX6:       ; %bb.0:
609; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
610; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
611; GFX6-NEXT:    s_mov_b32 s6, -1
612; GFX6-NEXT:    s_mov_b32 s7, 0xf000
613; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
614; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
615; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
616; GFX6-NEXT:    s_lshr_b32 s0, s0, 31
617; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x1001f
618; GFX6-NEXT:    v_mov_b32_e32 v0, s0
619; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
620; GFX6-NEXT:    s_endpgm
621  %x = load i32, i32 addrspace(1)* %in, align 4
622  %shl = lshr i32 %x, 31
623  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
624  store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
625}
626
627define amdgpu_kernel void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) #0 {
628; GFX6-LABEL: bfe_u32_constant_fold_test_0:
629; GFX6:       ; %bb.0:
630; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
631; GFX6-NEXT:    s_bfe_u32 s2, 0, 0
632; GFX6-NEXT:    v_mov_b32_e32 v0, s2
633; GFX6-NEXT:    s_mov_b32 s2, -1
634; GFX6-NEXT:    s_mov_b32 s3, 0xf000
635; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
636; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
637; GFX6-NEXT:    s_endpgm
638  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0)
639  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
640  ret void
641}
642
643define amdgpu_kernel void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) #0 {
644; GFX6-LABEL: bfe_u32_constant_fold_test_1:
645; GFX6:       ; %bb.0:
646; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
647; GFX6-NEXT:    s_bfe_u32 s2, 0x302e, 0
648; GFX6-NEXT:    v_mov_b32_e32 v0, s2
649; GFX6-NEXT:    s_mov_b32 s2, -1
650; GFX6-NEXT:    s_mov_b32 s3, 0xf000
651; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
652; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
653; GFX6-NEXT:    s_endpgm
654  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 12334, i32 0, i32 0)
655  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
656  ret void
657}
658
659define amdgpu_kernel void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) #0 {
660; GFX6-LABEL: bfe_u32_constant_fold_test_2:
661; GFX6:       ; %bb.0:
662; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
663; GFX6-NEXT:    s_bfe_u32 s2, 0, 0x10000
664; GFX6-NEXT:    v_mov_b32_e32 v0, s2
665; GFX6-NEXT:    s_mov_b32 s2, -1
666; GFX6-NEXT:    s_mov_b32 s3, 0xf000
667; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
668; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
669; GFX6-NEXT:    s_endpgm
670  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 1)
671  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
672  ret void
673}
674
675define amdgpu_kernel void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) #0 {
676; GFX6-LABEL: bfe_u32_constant_fold_test_3:
677; GFX6:       ; %bb.0:
678; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
679; GFX6-NEXT:    s_bfe_u32 s2, 1, 0x10000
680; GFX6-NEXT:    v_mov_b32_e32 v0, s2
681; GFX6-NEXT:    s_mov_b32 s2, -1
682; GFX6-NEXT:    s_mov_b32 s3, 0xf000
683; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
684; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
685; GFX6-NEXT:    s_endpgm
686  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 1, i32 0, i32 1)
687  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
688  ret void
689}
690
691define amdgpu_kernel void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) #0 {
692; GFX6-LABEL: bfe_u32_constant_fold_test_4:
693; GFX6:       ; %bb.0:
694; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
695; GFX6-NEXT:    s_bfe_u32 s2, -1, 0x10000
696; GFX6-NEXT:    v_mov_b32_e32 v0, s2
697; GFX6-NEXT:    s_mov_b32 s2, -1
698; GFX6-NEXT:    s_mov_b32 s3, 0xf000
699; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
700; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
701; GFX6-NEXT:    s_endpgm
702  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 0, i32 1)
703  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
704  ret void
705}
706
707define amdgpu_kernel void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) #0 {
708; GFX6-LABEL: bfe_u32_constant_fold_test_5:
709; GFX6:       ; %bb.0:
710; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
711; GFX6-NEXT:    s_mov_b32 s2, 0x10007
712; GFX6-NEXT:    s_bfe_u32 s2, 0x80, s2
713; GFX6-NEXT:    v_mov_b32_e32 v0, s2
714; GFX6-NEXT:    s_mov_b32 s2, -1
715; GFX6-NEXT:    s_mov_b32 s3, 0xf000
716; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
717; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
718; GFX6-NEXT:    s_endpgm
719  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 7, i32 1)
720  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
721  ret void
722}
723
724define amdgpu_kernel void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) #0 {
725; GFX6-LABEL: bfe_u32_constant_fold_test_6:
726; GFX6:       ; %bb.0:
727; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
728; GFX6-NEXT:    s_mov_b32 s2, 0x80000
729; GFX6-NEXT:    s_bfe_u32 s2, 0x80, s2
730; GFX6-NEXT:    v_mov_b32_e32 v0, s2
731; GFX6-NEXT:    s_mov_b32 s2, -1
732; GFX6-NEXT:    s_mov_b32 s3, 0xf000
733; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
734; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
735; GFX6-NEXT:    s_endpgm
736  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 0, i32 8)
737  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
738  ret void
739}
740
741define amdgpu_kernel void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) #0 {
742; GFX6-LABEL: bfe_u32_constant_fold_test_7:
743; GFX6:       ; %bb.0:
744; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
745; GFX6-NEXT:    s_mov_b32 s2, 0x80000
746; GFX6-NEXT:    s_bfe_u32 s2, 0x7f, s2
747; GFX6-NEXT:    v_mov_b32_e32 v0, s2
748; GFX6-NEXT:    s_mov_b32 s2, -1
749; GFX6-NEXT:    s_mov_b32 s3, 0xf000
750; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
751; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
752; GFX6-NEXT:    s_endpgm
753  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 0, i32 8)
754  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
755  ret void
756}
757
758define amdgpu_kernel void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) #0 {
759; GFX6-LABEL: bfe_u32_constant_fold_test_8:
760; GFX6:       ; %bb.0:
761; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
762; GFX6-NEXT:    s_mov_b32 s2, 0x80006
763; GFX6-NEXT:    s_bfe_u32 s2, 0x7f, s2
764; GFX6-NEXT:    v_mov_b32_e32 v0, s2
765; GFX6-NEXT:    s_mov_b32 s2, -1
766; GFX6-NEXT:    s_mov_b32 s3, 0xf000
767; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
768; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
769; GFX6-NEXT:    s_endpgm
770  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 6, i32 8)
771  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
772  ret void
773}
774
775define amdgpu_kernel void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) #0 {
776; GFX6-LABEL: bfe_u32_constant_fold_test_9:
777; GFX6:       ; %bb.0:
778; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
779; GFX6-NEXT:    s_mov_b32 s2, 0x80010
780; GFX6-NEXT:    s_bfe_u32 s2, 0x10000, s2
781; GFX6-NEXT:    v_mov_b32_e32 v0, s2
782; GFX6-NEXT:    s_mov_b32 s2, -1
783; GFX6-NEXT:    s_mov_b32 s3, 0xf000
784; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
785; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
786; GFX6-NEXT:    s_endpgm
787  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65536, i32 16, i32 8)
788  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
789  ret void
790}
791
792define amdgpu_kernel void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) #0 {
793; GFX6-LABEL: bfe_u32_constant_fold_test_10:
794; GFX6:       ; %bb.0:
795; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
796; GFX6-NEXT:    s_mov_b32 s2, 0x100010
797; GFX6-NEXT:    s_bfe_u32 s2, 0xffff, s2
798; GFX6-NEXT:    v_mov_b32_e32 v0, s2
799; GFX6-NEXT:    s_mov_b32 s2, -1
800; GFX6-NEXT:    s_mov_b32 s3, 0xf000
801; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
802; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
803; GFX6-NEXT:    s_endpgm
804  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65535, i32 16, i32 16)
805  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
806  ret void
807}
808
809define amdgpu_kernel void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) #0 {
810; GFX6-LABEL: bfe_u32_constant_fold_test_11:
811; GFX6:       ; %bb.0:
812; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
813; GFX6-NEXT:    s_mov_b32 s2, 0x40004
814; GFX6-NEXT:    s_bfe_u32 s2, 0xa0, s2
815; GFX6-NEXT:    v_mov_b32_e32 v0, s2
816; GFX6-NEXT:    s_mov_b32 s2, -1
817; GFX6-NEXT:    s_mov_b32 s3, 0xf000
818; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
819; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
820; GFX6-NEXT:    s_endpgm
821  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 4)
822  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
823  ret void
824}
825
826define amdgpu_kernel void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) #0 {
827; GFX6-LABEL: bfe_u32_constant_fold_test_12:
828; GFX6:       ; %bb.0:
829; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
830; GFX6-NEXT:    s_mov_b32 s2, 0x1001f
831; GFX6-NEXT:    s_bfe_u32 s2, 0xa0, s2
832; GFX6-NEXT:    v_mov_b32_e32 v0, s2
833; GFX6-NEXT:    s_mov_b32 s2, -1
834; GFX6-NEXT:    s_mov_b32 s3, 0xf000
835; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
836; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
837; GFX6-NEXT:    s_endpgm
838  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 31, i32 1)
839  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
840  ret void
841}
842
843define amdgpu_kernel void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) #0 {
844; GFX6-LABEL: bfe_u32_constant_fold_test_13:
845; GFX6:       ; %bb.0:
846; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
847; GFX6-NEXT:    s_mov_b32 s2, 0x100010
848; GFX6-NEXT:    s_bfe_u32 s2, 0x1fffe, s2
849; GFX6-NEXT:    v_mov_b32_e32 v0, s2
850; GFX6-NEXT:    s_mov_b32 s2, -1
851; GFX6-NEXT:    s_mov_b32 s3, 0xf000
852; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
853; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
854; GFX6-NEXT:    s_endpgm
855  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 131070, i32 16, i32 16)
856  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
857  ret void
858}
859
860define amdgpu_kernel void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) #0 {
861; GFX6-LABEL: bfe_u32_constant_fold_test_14:
862; GFX6:       ; %bb.0:
863; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
864; GFX6-NEXT:    s_mov_b32 s2, 0x1e0002
865; GFX6-NEXT:    s_bfe_u32 s2, 0xa0, s2
866; GFX6-NEXT:    v_mov_b32_e32 v0, s2
867; GFX6-NEXT:    s_mov_b32 s2, -1
868; GFX6-NEXT:    s_mov_b32 s3, 0xf000
869; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
870; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
871; GFX6-NEXT:    s_endpgm
872  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 2, i32 30)
873  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
874  ret void
875}
876
877define amdgpu_kernel void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) #0 {
878; GFX6-LABEL: bfe_u32_constant_fold_test_15:
879; GFX6:       ; %bb.0:
880; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
881; GFX6-NEXT:    s_mov_b32 s2, 0x1c0004
882; GFX6-NEXT:    s_bfe_u32 s2, 0xa0, s2
883; GFX6-NEXT:    v_mov_b32_e32 v0, s2
884; GFX6-NEXT:    s_mov_b32 s2, -1
885; GFX6-NEXT:    s_mov_b32 s3, 0xf000
886; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
887; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
888; GFX6-NEXT:    s_endpgm
889  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 28)
890  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
891  ret void
892}
893
894define amdgpu_kernel void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) #0 {
895; GFX6-LABEL: bfe_u32_constant_fold_test_16:
896; GFX6:       ; %bb.0:
897; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
898; GFX6-NEXT:    s_bfe_u32 s2, -1, 0x70001
899; GFX6-NEXT:    v_mov_b32_e32 v0, s2
900; GFX6-NEXT:    s_mov_b32 s2, -1
901; GFX6-NEXT:    s_mov_b32 s3, 0xf000
902; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
903; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
904; GFX6-NEXT:    s_endpgm
905  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 1, i32 7)
906  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
907  ret void
908}
909
910define amdgpu_kernel void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) #0 {
911; GFX6-LABEL: bfe_u32_constant_fold_test_17:
912; GFX6:       ; %bb.0:
913; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
914; GFX6-NEXT:    s_mov_b32 s2, 0x1f0001
915; GFX6-NEXT:    s_bfe_u32 s2, 0xff, s2
916; GFX6-NEXT:    v_mov_b32_e32 v0, s2
917; GFX6-NEXT:    s_mov_b32 s2, -1
918; GFX6-NEXT:    s_mov_b32 s3, 0xf000
919; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
920; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
921; GFX6-NEXT:    s_endpgm
922  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 1, i32 31)
923  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
924  ret void
925}
926
927define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) #0 {
928; GFX6-LABEL: bfe_u32_constant_fold_test_18:
929; GFX6:       ; %bb.0:
930; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
931; GFX6-NEXT:    s_mov_b32 s2, 0x1001f
932; GFX6-NEXT:    s_bfe_u32 s2, 0xff, s2
933; GFX6-NEXT:    v_mov_b32_e32 v0, s2
934; GFX6-NEXT:    s_mov_b32 s2, -1
935; GFX6-NEXT:    s_mov_b32 s3, 0xf000
936; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
937; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
938; GFX6-NEXT:    s_endpgm
939  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 31, i32 1)
940  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
941  ret void
942}
943
944; Make sure that SimplifyDemandedBits doesn't cause the and to be
945; reduced to the bits demanded by the bfe.
946
947; XXX: The operand to v_bfe_u32 could also just directly be the load register.
948define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
949; GFX6-LABEL: simplify_bfe_u32_multi_use_arg:
950; GFX6:       ; %bb.0:
951; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
952; GFX6-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xb
953; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
954; GFX6-NEXT:    s_mov_b32 s6, -1
955; GFX6-NEXT:    s_mov_b32 s7, 0xf000
956; GFX6-NEXT:    s_mov_b64 s[10:11], s[6:7]
957; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
958; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
959; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
960; GFX6-NEXT:    s_and_b32 s0, s0, 63
961; GFX6-NEXT:    s_bfe_u32 s1, s0, 0x20002
962; GFX6-NEXT:    v_mov_b32_e32 v1, s1
963; GFX6-NEXT:    v_mov_b32_e32 v0, s0
964; GFX6-NEXT:    buffer_store_dword v1, off, s[4:7], 0
965; GFX6-NEXT:    buffer_store_dword v0, off, s[8:11], 0
966; GFX6-NEXT:    s_endpgm
967                                            i32 addrspace(1)* %out1,
968                                            i32 addrspace(1)* %in) #0 {
969  %src = load i32, i32 addrspace(1)* %in, align 4
970  %and = and i32 %src, 63
971  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %and, i32 2, i32 2)
972  store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4
973  store i32 %and, i32 addrspace(1)* %out1, align 4
974  ret void
975}
976
977define amdgpu_kernel void @lshr_and(i32 addrspace(1)* %out, i32 %a) #0 {
978; GFX6-LABEL: lshr_and:
979; GFX6:       ; %bb.0:
980; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
981; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
982; GFX6-NEXT:    s_mov_b32 s6, -1
983; GFX6-NEXT:    s_mov_b32 s7, 0xf000
984; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
985; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x30006
986; GFX6-NEXT:    v_mov_b32_e32 v0, s0
987; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
988; GFX6-NEXT:    s_endpgm
989  %b = lshr i32 %a, 6
990  %c = and i32 %b, 7
991  store i32 %c, i32 addrspace(1)* %out, align 8
992  ret void
993}
994
995define amdgpu_kernel void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
996; GFX6-LABEL: v_lshr_and:
997; GFX6:       ; %bb.0:
998; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
999; GFX6-NEXT:    s_load_dword s2, s[0:1], 0xb
1000; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xc
1001; GFX6-NEXT:    s_mov_b32 s6, -1
1002; GFX6-NEXT:    s_mov_b32 s7, 0xf000
1003; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1004; GFX6-NEXT:    s_lshr_b32 s0, s2, s0
1005; GFX6-NEXT:    s_and_b32 s0, s0, 7
1006; GFX6-NEXT:    v_mov_b32_e32 v0, s0
1007; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1008; GFX6-NEXT:    s_endpgm
1009  %c = lshr i32 %a, %b
1010  %d = and i32 %c, 7
1011  store i32 %d, i32 addrspace(1)* %out, align 8
1012  ret void
1013}
1014
1015define amdgpu_kernel void @and_lshr(i32 addrspace(1)* %out, i32 %a) #0 {
1016; GFX6-LABEL: and_lshr:
1017; GFX6:       ; %bb.0:
1018; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
1019; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
1020; GFX6-NEXT:    s_mov_b32 s6, -1
1021; GFX6-NEXT:    s_mov_b32 s7, 0xf000
1022; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1023; GFX6-NEXT:    s_and_b32 s0, s0, 0x1c0
1024; GFX6-NEXT:    s_lshr_b32 s0, s0, 6
1025; GFX6-NEXT:    v_mov_b32_e32 v0, s0
1026; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1027; GFX6-NEXT:    s_endpgm
1028  %b = and i32 %a, 448
1029  %c = lshr i32 %b, 6
1030  store i32 %c, i32 addrspace(1)* %out, align 8
1031  ret void
1032}
1033
1034define amdgpu_kernel void @and_lshr2(i32 addrspace(1)* %out, i32 %a) #0 {
1035; GFX6-LABEL: and_lshr2:
1036; GFX6:       ; %bb.0:
1037; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
1038; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
1039; GFX6-NEXT:    s_mov_b32 s6, -1
1040; GFX6-NEXT:    s_mov_b32 s7, 0xf000
1041; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1042; GFX6-NEXT:    s_and_b32 s0, s0, 0x1ff
1043; GFX6-NEXT:    s_lshr_b32 s0, s0, 6
1044; GFX6-NEXT:    v_mov_b32_e32 v0, s0
1045; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1046; GFX6-NEXT:    s_endpgm
1047  %b = and i32 %a, 511
1048  %c = lshr i32 %b, 6
1049  store i32 %c, i32 addrspace(1)* %out, align 8
1050  ret void
1051}
1052
1053define amdgpu_kernel void @shl_lshr(i32 addrspace(1)* %out, i32 %a) #0 {
1054; GFX6-LABEL: shl_lshr:
1055; GFX6:       ; %bb.0:
1056; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
1057; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
1058; GFX6-NEXT:    s_mov_b32 s6, -1
1059; GFX6-NEXT:    s_mov_b32 s7, 0xf000
1060; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1061; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x150002
1062; GFX6-NEXT:    v_mov_b32_e32 v0, s0
1063; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1064; GFX6-NEXT:    s_endpgm
1065  %b = shl i32 %a, 9
1066  %c = lshr i32 %b, 11
1067  store i32 %c, i32 addrspace(1)* %out, align 8
1068  ret void
1069}
1070
1071declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) #1
1072declare i64 @llvm.amdgcn.ubfe.i64(i64, i32, i32) #1
1073
1074attributes #0 = { nounwind }
1075attributes #1 = { nounwind readnone }
1076