1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
3; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
4
5; Loosely based on test/CodeGen/{X86,AArch64}/extract-lowbits.ll,
6; but with all 64-bit tests, and tests with loads dropped.
7
8; Patterns:
9;   a) x &  (1 << nbits) - 1
10;   b) x & ~(-1 << nbits)
11;   c) x &  (-1 >> (32 - y))
12;   d) x << (32 - y) >> (32 - y)
13; are equivalent.
14
15; ---------------------------------------------------------------------------- ;
16; Pattern a. 32-bit
17; ---------------------------------------------------------------------------- ;
18
19define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind {
20; GCN-LABEL: bzhi32_a0:
21; GCN:       ; %bb.0:
22; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
24; GCN-NEXT:    s_setpc_b64 s[30:31]
25  %onebit = shl i32 1, %numlowbits
26  %mask = add nsw i32 %onebit, -1
27  %masked = and i32 %mask, %val
28  ret i32 %masked
29}
30
31define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
32; GCN-LABEL: bzhi32_a1_indexzext:
33; GCN:       ; %bb.0:
34; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
36; GCN-NEXT:    s_setpc_b64 s[30:31]
37  %conv = zext i8 %numlowbits to i32
38  %onebit = shl i32 1, %conv
39  %mask = add nsw i32 %onebit, -1
40  %masked = and i32 %mask, %val
41  ret i32 %masked
42}
43
44define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind {
45; GCN-LABEL: bzhi32_a4_commutative:
46; GCN:       ; %bb.0:
47; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
49; GCN-NEXT:    s_setpc_b64 s[30:31]
50  %onebit = shl i32 1, %numlowbits
51  %mask = add nsw i32 %onebit, -1
52  %masked = and i32 %val, %mask ; swapped order
53  ret i32 %masked
54}
55
56; ---------------------------------------------------------------------------- ;
57; Pattern b. 32-bit
58; ---------------------------------------------------------------------------- ;
59
60define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind {
61; GCN-LABEL: bzhi32_b0:
62; GCN:       ; %bb.0:
63; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
65; GCN-NEXT:    s_setpc_b64 s[30:31]
66  %notmask = shl i32 -1, %numlowbits
67  %mask = xor i32 %notmask, -1
68  %masked = and i32 %mask, %val
69  ret i32 %masked
70}
71
72define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
73; GCN-LABEL: bzhi32_b1_indexzext:
74; GCN:       ; %bb.0:
75; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
77; GCN-NEXT:    s_setpc_b64 s[30:31]
78  %conv = zext i8 %numlowbits to i32
79  %notmask = shl i32 -1, %conv
80  %mask = xor i32 %notmask, -1
81  %masked = and i32 %mask, %val
82  ret i32 %masked
83}
84
85define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind {
86; GCN-LABEL: bzhi32_b4_commutative:
87; GCN:       ; %bb.0:
88; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
89; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
90; GCN-NEXT:    s_setpc_b64 s[30:31]
91  %notmask = shl i32 -1, %numlowbits
92  %mask = xor i32 %notmask, -1
93  %masked = and i32 %val, %mask ; swapped order
94  ret i32 %masked
95}
96
97; ---------------------------------------------------------------------------- ;
98; Pattern c. 32-bit
99; ---------------------------------------------------------------------------- ;
100
101define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
102; GCN-LABEL: bzhi32_c0:
103; GCN:       ; %bb.0:
104; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
105; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
106; GCN-NEXT:    s_setpc_b64 s[30:31]
107  %numhighbits = sub i32 32, %numlowbits
108  %mask = lshr i32 -1, %numhighbits
109  %masked = and i32 %mask, %val
110  ret i32 %masked
111}
112
113define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
114; SI-LABEL: bzhi32_c1_indexzext:
115; SI:       ; %bb.0:
116; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
117; SI-NEXT:    v_sub_i32_e32 v1, vcc, 32, v1
118; SI-NEXT:    v_and_b32_e32 v1, 0xff, v1
119; SI-NEXT:    v_lshr_b32_e32 v1, -1, v1
120; SI-NEXT:    v_and_b32_e32 v0, v1, v0
121; SI-NEXT:    s_setpc_b64 s[30:31]
122;
123; VI-LABEL: bzhi32_c1_indexzext:
124; VI:       ; %bb.0:
125; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126; VI-NEXT:    v_sub_u16_e32 v1, 32, v1
127; VI-NEXT:    v_mov_b32_e32 v2, -1
128; VI-NEXT:    v_lshrrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
129; VI-NEXT:    v_and_b32_e32 v0, v1, v0
130; VI-NEXT:    s_setpc_b64 s[30:31]
131  %numhighbits = sub i8 32, %numlowbits
132  %sh_prom = zext i8 %numhighbits to i32
133  %mask = lshr i32 -1, %sh_prom
134  %masked = and i32 %mask, %val
135  ret i32 %masked
136}
137
138define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
139; GCN-LABEL: bzhi32_c4_commutative:
140; GCN:       ; %bb.0:
141; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
143; GCN-NEXT:    s_setpc_b64 s[30:31]
144  %numhighbits = sub i32 32, %numlowbits
145  %mask = lshr i32 -1, %numhighbits
146  %masked = and i32 %val, %mask ; swapped order
147  ret i32 %masked
148}
149
150; ---------------------------------------------------------------------------- ;
151; Pattern d. 32-bit.
152; ---------------------------------------------------------------------------- ;
153
154define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind {
155; GCN-LABEL: bzhi32_d0:
156; GCN:       ; %bb.0:
157; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
158; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
159; GCN-NEXT:    s_setpc_b64 s[30:31]
160  %numhighbits = sub i32 32, %numlowbits
161  %highbitscleared = shl i32 %val, %numhighbits
162  %masked = lshr i32 %highbitscleared, %numhighbits
163  ret i32 %masked
164}
165
166define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind {
167; SI-LABEL: bzhi32_d1_indexzext:
168; SI:       ; %bb.0:
169; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
170; SI-NEXT:    v_sub_i32_e32 v1, vcc, 32, v1
171; SI-NEXT:    v_and_b32_e32 v1, 0xff, v1
172; SI-NEXT:    v_lshl_b32_e32 v0, v0, v1
173; SI-NEXT:    v_lshr_b32_e32 v0, v0, v1
174; SI-NEXT:    s_setpc_b64 s[30:31]
175;
176; VI-LABEL: bzhi32_d1_indexzext:
177; VI:       ; %bb.0:
178; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
179; VI-NEXT:    v_sub_u16_e32 v1, 32, v1
180; VI-NEXT:    v_and_b32_e32 v1, 0xff, v1
181; VI-NEXT:    v_lshlrev_b32_e32 v0, v1, v0
182; VI-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
183; VI-NEXT:    s_setpc_b64 s[30:31]
184  %numhighbits = sub i8 32, %numlowbits
185  %sh_prom = zext i8 %numhighbits to i32
186  %highbitscleared = shl i32 %val, %sh_prom
187  %masked = lshr i32 %highbitscleared, %sh_prom
188  ret i32 %masked
189}
190