1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI
3; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI1
4; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI1
5; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI2
6; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI2
7; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI
8; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI1,X64-BMI1NOTBM
9; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI1,X64-BMI1TBM
10; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI2,X64-BMI2TBM
11; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI2,X64-BMI2NOTBM
12
13; *Please* keep in sync with test/CodeGen/AArch64/extract-lowbits.ll
14
15; https://bugs.llvm.org/show_bug.cgi?id=36419
16; https://bugs.llvm.org/show_bug.cgi?id=37603
17; https://bugs.llvm.org/show_bug.cgi?id=37610
18
19; Patterns:
20;   a) x &  (1 << nbits) - 1
21;   b) x & ~(-1 << nbits)
22;   c) x &  (-1 >> (32 - y))
23;   d) x << (32 - y) >> (32 - y)
24; are equivalent.
25
26; ---------------------------------------------------------------------------- ;
27; Pattern a. 32-bit
28; ---------------------------------------------------------------------------- ;
29
30define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind {
31; X86-NOBMI-LABEL: bzhi32_a0:
32; X86-NOBMI:       # %bb.0:
33; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
34; X86-NOBMI-NEXT:    movl $1, %eax
35; X86-NOBMI-NEXT:    shll %cl, %eax
36; X86-NOBMI-NEXT:    decl %eax
37; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
38; X86-NOBMI-NEXT:    retl
39;
40; X86-BMI1-LABEL: bzhi32_a0:
41; X86-BMI1:       # %bb.0:
42; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
43; X86-BMI1-NEXT:    shll $8, %eax
44; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
45; X86-BMI1-NEXT:    retl
46;
47; X86-BMI2-LABEL: bzhi32_a0:
48; X86-BMI2:       # %bb.0:
49; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
50; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
51; X86-BMI2-NEXT:    retl
52;
53; X64-NOBMI-LABEL: bzhi32_a0:
54; X64-NOBMI:       # %bb.0:
55; X64-NOBMI-NEXT:    movl %esi, %ecx
56; X64-NOBMI-NEXT:    movl $1, %eax
57; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
58; X64-NOBMI-NEXT:    shll %cl, %eax
59; X64-NOBMI-NEXT:    decl %eax
60; X64-NOBMI-NEXT:    andl %edi, %eax
61; X64-NOBMI-NEXT:    retq
62;
63; X64-BMI1-LABEL: bzhi32_a0:
64; X64-BMI1:       # %bb.0:
65; X64-BMI1-NEXT:    shll $8, %esi
66; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
67; X64-BMI1-NEXT:    retq
68;
69; X64-BMI2-LABEL: bzhi32_a0:
70; X64-BMI2:       # %bb.0:
71; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
72; X64-BMI2-NEXT:    retq
73  %onebit = shl i32 1, %numlowbits
74  %mask = add nsw i32 %onebit, -1
75  %masked = and i32 %mask, %val
76  ret i32 %masked
77}
78
79define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
80; X86-NOBMI-LABEL: bzhi32_a1_indexzext:
81; X86-NOBMI:       # %bb.0:
82; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
83; X86-NOBMI-NEXT:    movl $1, %eax
84; X86-NOBMI-NEXT:    shll %cl, %eax
85; X86-NOBMI-NEXT:    decl %eax
86; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
87; X86-NOBMI-NEXT:    retl
88;
89; X86-BMI1-LABEL: bzhi32_a1_indexzext:
90; X86-BMI1:       # %bb.0:
91; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
92; X86-BMI1-NEXT:    shll $8, %eax
93; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
94; X86-BMI1-NEXT:    retl
95;
96; X86-BMI2-LABEL: bzhi32_a1_indexzext:
97; X86-BMI2:       # %bb.0:
98; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
99; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
100; X86-BMI2-NEXT:    retl
101;
102; X64-NOBMI-LABEL: bzhi32_a1_indexzext:
103; X64-NOBMI:       # %bb.0:
104; X64-NOBMI-NEXT:    movl %esi, %ecx
105; X64-NOBMI-NEXT:    movl $1, %eax
106; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
107; X64-NOBMI-NEXT:    shll %cl, %eax
108; X64-NOBMI-NEXT:    decl %eax
109; X64-NOBMI-NEXT:    andl %edi, %eax
110; X64-NOBMI-NEXT:    retq
111;
112; X64-BMI1-LABEL: bzhi32_a1_indexzext:
113; X64-BMI1:       # %bb.0:
114; X64-BMI1-NEXT:    shll $8, %esi
115; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
116; X64-BMI1-NEXT:    retq
117;
118; X64-BMI2-LABEL: bzhi32_a1_indexzext:
119; X64-BMI2:       # %bb.0:
120; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
121; X64-BMI2-NEXT:    retq
122  %conv = zext i8 %numlowbits to i32
123  %onebit = shl i32 1, %conv
124  %mask = add nsw i32 %onebit, -1
125  %masked = and i32 %mask, %val
126  ret i32 %masked
127}
128
129define i32 @bzhi32_a2_load(i32* %w, i32 %numlowbits) nounwind {
130; X86-NOBMI-LABEL: bzhi32_a2_load:
131; X86-NOBMI:       # %bb.0:
132; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
133; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
134; X86-NOBMI-NEXT:    movl $1, %eax
135; X86-NOBMI-NEXT:    shll %cl, %eax
136; X86-NOBMI-NEXT:    decl %eax
137; X86-NOBMI-NEXT:    andl (%edx), %eax
138; X86-NOBMI-NEXT:    retl
139;
140; X86-BMI1-LABEL: bzhi32_a2_load:
141; X86-BMI1:       # %bb.0:
142; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
143; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
144; X86-BMI1-NEXT:    shll $8, %ecx
145; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
146; X86-BMI1-NEXT:    retl
147;
148; X86-BMI2-LABEL: bzhi32_a2_load:
149; X86-BMI2:       # %bb.0:
150; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
151; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
152; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
153; X86-BMI2-NEXT:    retl
154;
155; X64-NOBMI-LABEL: bzhi32_a2_load:
156; X64-NOBMI:       # %bb.0:
157; X64-NOBMI-NEXT:    movl %esi, %ecx
158; X64-NOBMI-NEXT:    movl $1, %eax
159; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
160; X64-NOBMI-NEXT:    shll %cl, %eax
161; X64-NOBMI-NEXT:    decl %eax
162; X64-NOBMI-NEXT:    andl (%rdi), %eax
163; X64-NOBMI-NEXT:    retq
164;
165; X64-BMI1-LABEL: bzhi32_a2_load:
166; X64-BMI1:       # %bb.0:
167; X64-BMI1-NEXT:    shll $8, %esi
168; X64-BMI1-NEXT:    bextrl %esi, (%rdi), %eax
169; X64-BMI1-NEXT:    retq
170;
171; X64-BMI2-LABEL: bzhi32_a2_load:
172; X64-BMI2:       # %bb.0:
173; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
174; X64-BMI2-NEXT:    retq
175  %val = load i32, i32* %w
176  %onebit = shl i32 1, %numlowbits
177  %mask = add nsw i32 %onebit, -1
178  %masked = and i32 %mask, %val
179  ret i32 %masked
180}
181
182define i32 @bzhi32_a3_load_indexzext(i32* %w, i8 zeroext %numlowbits) nounwind {
183; X86-NOBMI-LABEL: bzhi32_a3_load_indexzext:
184; X86-NOBMI:       # %bb.0:
185; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
186; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
187; X86-NOBMI-NEXT:    movl $1, %eax
188; X86-NOBMI-NEXT:    shll %cl, %eax
189; X86-NOBMI-NEXT:    decl %eax
190; X86-NOBMI-NEXT:    andl (%edx), %eax
191; X86-NOBMI-NEXT:    retl
192;
193; X86-BMI1-LABEL: bzhi32_a3_load_indexzext:
194; X86-BMI1:       # %bb.0:
195; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
196; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
197; X86-BMI1-NEXT:    shll $8, %ecx
198; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
199; X86-BMI1-NEXT:    retl
200;
201; X86-BMI2-LABEL: bzhi32_a3_load_indexzext:
202; X86-BMI2:       # %bb.0:
203; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
204; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
205; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
206; X86-BMI2-NEXT:    retl
207;
208; X64-NOBMI-LABEL: bzhi32_a3_load_indexzext:
209; X64-NOBMI:       # %bb.0:
210; X64-NOBMI-NEXT:    movl %esi, %ecx
211; X64-NOBMI-NEXT:    movl $1, %eax
212; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
213; X64-NOBMI-NEXT:    shll %cl, %eax
214; X64-NOBMI-NEXT:    decl %eax
215; X64-NOBMI-NEXT:    andl (%rdi), %eax
216; X64-NOBMI-NEXT:    retq
217;
218; X64-BMI1-LABEL: bzhi32_a3_load_indexzext:
219; X64-BMI1:       # %bb.0:
220; X64-BMI1-NEXT:    shll $8, %esi
221; X64-BMI1-NEXT:    bextrl %esi, (%rdi), %eax
222; X64-BMI1-NEXT:    retq
223;
224; X64-BMI2-LABEL: bzhi32_a3_load_indexzext:
225; X64-BMI2:       # %bb.0:
226; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
227; X64-BMI2-NEXT:    retq
228  %val = load i32, i32* %w
229  %conv = zext i8 %numlowbits to i32
230  %onebit = shl i32 1, %conv
231  %mask = add nsw i32 %onebit, -1
232  %masked = and i32 %mask, %val
233  ret i32 %masked
234}
235
236define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind {
237; X86-NOBMI-LABEL: bzhi32_a4_commutative:
238; X86-NOBMI:       # %bb.0:
239; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
240; X86-NOBMI-NEXT:    movl $1, %eax
241; X86-NOBMI-NEXT:    shll %cl, %eax
242; X86-NOBMI-NEXT:    decl %eax
243; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
244; X86-NOBMI-NEXT:    retl
245;
246; X86-BMI1-LABEL: bzhi32_a4_commutative:
247; X86-BMI1:       # %bb.0:
248; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
249; X86-BMI1-NEXT:    shll $8, %eax
250; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
251; X86-BMI1-NEXT:    retl
252;
253; X86-BMI2-LABEL: bzhi32_a4_commutative:
254; X86-BMI2:       # %bb.0:
255; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
256; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
257; X86-BMI2-NEXT:    retl
258;
259; X64-NOBMI-LABEL: bzhi32_a4_commutative:
260; X64-NOBMI:       # %bb.0:
261; X64-NOBMI-NEXT:    movl %esi, %ecx
262; X64-NOBMI-NEXT:    movl $1, %eax
263; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
264; X64-NOBMI-NEXT:    shll %cl, %eax
265; X64-NOBMI-NEXT:    decl %eax
266; X64-NOBMI-NEXT:    andl %edi, %eax
267; X64-NOBMI-NEXT:    retq
268;
269; X64-BMI1-LABEL: bzhi32_a4_commutative:
270; X64-BMI1:       # %bb.0:
271; X64-BMI1-NEXT:    shll $8, %esi
272; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
273; X64-BMI1-NEXT:    retq
274;
275; X64-BMI2-LABEL: bzhi32_a4_commutative:
276; X64-BMI2:       # %bb.0:
277; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
278; X64-BMI2-NEXT:    retq
279  %onebit = shl i32 1, %numlowbits
280  %mask = add nsw i32 %onebit, -1
281  %masked = and i32 %val, %mask ; swapped order
282  ret i32 %masked
283}
284
285; 64-bit
286
287define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind {
288; X86-NOBMI-LABEL: bzhi64_a0:
289; X86-NOBMI:       # %bb.0:
290; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
291; X86-NOBMI-NEXT:    movl $1, %eax
292; X86-NOBMI-NEXT:    xorl %edx, %edx
293; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
294; X86-NOBMI-NEXT:    shll %cl, %eax
295; X86-NOBMI-NEXT:    testb $32, %cl
296; X86-NOBMI-NEXT:    je .LBB5_2
297; X86-NOBMI-NEXT:  # %bb.1:
298; X86-NOBMI-NEXT:    movl %eax, %edx
299; X86-NOBMI-NEXT:    xorl %eax, %eax
300; X86-NOBMI-NEXT:  .LBB5_2:
301; X86-NOBMI-NEXT:    addl $-1, %eax
302; X86-NOBMI-NEXT:    adcl $-1, %edx
303; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
304; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
305; X86-NOBMI-NEXT:    retl
306;
307; X86-BMI1-LABEL: bzhi64_a0:
308; X86-BMI1:       # %bb.0:
309; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
310; X86-BMI1-NEXT:    movl $1, %eax
311; X86-BMI1-NEXT:    xorl %edx, %edx
312; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
313; X86-BMI1-NEXT:    shll %cl, %eax
314; X86-BMI1-NEXT:    testb $32, %cl
315; X86-BMI1-NEXT:    je .LBB5_2
316; X86-BMI1-NEXT:  # %bb.1:
317; X86-BMI1-NEXT:    movl %eax, %edx
318; X86-BMI1-NEXT:    xorl %eax, %eax
319; X86-BMI1-NEXT:  .LBB5_2:
320; X86-BMI1-NEXT:    addl $-1, %eax
321; X86-BMI1-NEXT:    adcl $-1, %edx
322; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
323; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
324; X86-BMI1-NEXT:    retl
325;
326; X86-BMI2-LABEL: bzhi64_a0:
327; X86-BMI2:       # %bb.0:
328; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
329; X86-BMI2-NEXT:    movl $1, %eax
330; X86-BMI2-NEXT:    xorl %edx, %edx
331; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
332; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
333; X86-BMI2-NEXT:    testb $32, %cl
334; X86-BMI2-NEXT:    je .LBB5_2
335; X86-BMI2-NEXT:  # %bb.1:
336; X86-BMI2-NEXT:    movl %eax, %edx
337; X86-BMI2-NEXT:    xorl %eax, %eax
338; X86-BMI2-NEXT:  .LBB5_2:
339; X86-BMI2-NEXT:    addl $-1, %eax
340; X86-BMI2-NEXT:    adcl $-1, %edx
341; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
342; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
343; X86-BMI2-NEXT:    retl
344;
345; X64-NOBMI-LABEL: bzhi64_a0:
346; X64-NOBMI:       # %bb.0:
347; X64-NOBMI-NEXT:    movq %rsi, %rcx
348; X64-NOBMI-NEXT:    movl $1, %eax
349; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
350; X64-NOBMI-NEXT:    shlq %cl, %rax
351; X64-NOBMI-NEXT:    decq %rax
352; X64-NOBMI-NEXT:    andq %rdi, %rax
353; X64-NOBMI-NEXT:    retq
354;
355; X64-BMI1-LABEL: bzhi64_a0:
356; X64-BMI1:       # %bb.0:
357; X64-BMI1-NEXT:    shll $8, %esi
358; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
359; X64-BMI1-NEXT:    retq
360;
361; X64-BMI2-LABEL: bzhi64_a0:
362; X64-BMI2:       # %bb.0:
363; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
364; X64-BMI2-NEXT:    retq
365  %onebit = shl i64 1, %numlowbits
366  %mask = add nsw i64 %onebit, -1
367  %masked = and i64 %mask, %val
368  ret i64 %masked
369}
370
371define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
372; X86-NOBMI-LABEL: bzhi64_a1_indexzext:
373; X86-NOBMI:       # %bb.0:
374; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
375; X86-NOBMI-NEXT:    movl $1, %eax
376; X86-NOBMI-NEXT:    xorl %edx, %edx
377; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
378; X86-NOBMI-NEXT:    shll %cl, %eax
379; X86-NOBMI-NEXT:    testb $32, %cl
380; X86-NOBMI-NEXT:    je .LBB6_2
381; X86-NOBMI-NEXT:  # %bb.1:
382; X86-NOBMI-NEXT:    movl %eax, %edx
383; X86-NOBMI-NEXT:    xorl %eax, %eax
384; X86-NOBMI-NEXT:  .LBB6_2:
385; X86-NOBMI-NEXT:    addl $-1, %eax
386; X86-NOBMI-NEXT:    adcl $-1, %edx
387; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
388; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
389; X86-NOBMI-NEXT:    retl
390;
391; X86-BMI1-LABEL: bzhi64_a1_indexzext:
392; X86-BMI1:       # %bb.0:
393; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
394; X86-BMI1-NEXT:    movl $1, %eax
395; X86-BMI1-NEXT:    xorl %edx, %edx
396; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
397; X86-BMI1-NEXT:    shll %cl, %eax
398; X86-BMI1-NEXT:    testb $32, %cl
399; X86-BMI1-NEXT:    je .LBB6_2
400; X86-BMI1-NEXT:  # %bb.1:
401; X86-BMI1-NEXT:    movl %eax, %edx
402; X86-BMI1-NEXT:    xorl %eax, %eax
403; X86-BMI1-NEXT:  .LBB6_2:
404; X86-BMI1-NEXT:    addl $-1, %eax
405; X86-BMI1-NEXT:    adcl $-1, %edx
406; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
407; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
408; X86-BMI1-NEXT:    retl
409;
410; X86-BMI2-LABEL: bzhi64_a1_indexzext:
411; X86-BMI2:       # %bb.0:
412; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
413; X86-BMI2-NEXT:    movl $1, %eax
414; X86-BMI2-NEXT:    xorl %edx, %edx
415; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
416; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
417; X86-BMI2-NEXT:    testb $32, %cl
418; X86-BMI2-NEXT:    je .LBB6_2
419; X86-BMI2-NEXT:  # %bb.1:
420; X86-BMI2-NEXT:    movl %eax, %edx
421; X86-BMI2-NEXT:    xorl %eax, %eax
422; X86-BMI2-NEXT:  .LBB6_2:
423; X86-BMI2-NEXT:    addl $-1, %eax
424; X86-BMI2-NEXT:    adcl $-1, %edx
425; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
426; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
427; X86-BMI2-NEXT:    retl
428;
429; X64-NOBMI-LABEL: bzhi64_a1_indexzext:
430; X64-NOBMI:       # %bb.0:
431; X64-NOBMI-NEXT:    movl %esi, %ecx
432; X64-NOBMI-NEXT:    movl $1, %eax
433; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
434; X64-NOBMI-NEXT:    shlq %cl, %rax
435; X64-NOBMI-NEXT:    decq %rax
436; X64-NOBMI-NEXT:    andq %rdi, %rax
437; X64-NOBMI-NEXT:    retq
438;
439; X64-BMI1-LABEL: bzhi64_a1_indexzext:
440; X64-BMI1:       # %bb.0:
441; X64-BMI1-NEXT:    # kill: def $esi killed $esi def $rsi
442; X64-BMI1-NEXT:    shll $8, %esi
443; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
444; X64-BMI1-NEXT:    retq
445;
446; X64-BMI2-LABEL: bzhi64_a1_indexzext:
447; X64-BMI2:       # %bb.0:
448; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
449; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
450; X64-BMI2-NEXT:    retq
451  %conv = zext i8 %numlowbits to i64
452  %onebit = shl i64 1, %conv
453  %mask = add nsw i64 %onebit, -1
454  %masked = and i64 %mask, %val
455  ret i64 %masked
456}
457
458define i64 @bzhi64_a2_load(i64* %w, i64 %numlowbits) nounwind {
459; X86-NOBMI-LABEL: bzhi64_a2_load:
460; X86-NOBMI:       # %bb.0:
461; X86-NOBMI-NEXT:    pushl %esi
462; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
463; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
464; X86-NOBMI-NEXT:    movl $1, %eax
465; X86-NOBMI-NEXT:    xorl %edx, %edx
466; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
467; X86-NOBMI-NEXT:    shll %cl, %eax
468; X86-NOBMI-NEXT:    testb $32, %cl
469; X86-NOBMI-NEXT:    je .LBB7_2
470; X86-NOBMI-NEXT:  # %bb.1:
471; X86-NOBMI-NEXT:    movl %eax, %edx
472; X86-NOBMI-NEXT:    xorl %eax, %eax
473; X86-NOBMI-NEXT:  .LBB7_2:
474; X86-NOBMI-NEXT:    addl $-1, %eax
475; X86-NOBMI-NEXT:    adcl $-1, %edx
476; X86-NOBMI-NEXT:    andl 4(%esi), %edx
477; X86-NOBMI-NEXT:    andl (%esi), %eax
478; X86-NOBMI-NEXT:    popl %esi
479; X86-NOBMI-NEXT:    retl
480;
481; X86-BMI1-LABEL: bzhi64_a2_load:
482; X86-BMI1:       # %bb.0:
483; X86-BMI1-NEXT:    pushl %esi
484; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
485; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
486; X86-BMI1-NEXT:    movl $1, %eax
487; X86-BMI1-NEXT:    xorl %edx, %edx
488; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
489; X86-BMI1-NEXT:    shll %cl, %eax
490; X86-BMI1-NEXT:    testb $32, %cl
491; X86-BMI1-NEXT:    je .LBB7_2
492; X86-BMI1-NEXT:  # %bb.1:
493; X86-BMI1-NEXT:    movl %eax, %edx
494; X86-BMI1-NEXT:    xorl %eax, %eax
495; X86-BMI1-NEXT:  .LBB7_2:
496; X86-BMI1-NEXT:    addl $-1, %eax
497; X86-BMI1-NEXT:    adcl $-1, %edx
498; X86-BMI1-NEXT:    andl 4(%esi), %edx
499; X86-BMI1-NEXT:    andl (%esi), %eax
500; X86-BMI1-NEXT:    popl %esi
501; X86-BMI1-NEXT:    retl
502;
503; X86-BMI2-LABEL: bzhi64_a2_load:
504; X86-BMI2:       # %bb.0:
505; X86-BMI2-NEXT:    pushl %esi
506; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
507; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
508; X86-BMI2-NEXT:    movl $1, %eax
509; X86-BMI2-NEXT:    xorl %edx, %edx
510; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
511; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
512; X86-BMI2-NEXT:    testb $32, %cl
513; X86-BMI2-NEXT:    je .LBB7_2
514; X86-BMI2-NEXT:  # %bb.1:
515; X86-BMI2-NEXT:    movl %eax, %edx
516; X86-BMI2-NEXT:    xorl %eax, %eax
517; X86-BMI2-NEXT:  .LBB7_2:
518; X86-BMI2-NEXT:    addl $-1, %eax
519; X86-BMI2-NEXT:    adcl $-1, %edx
520; X86-BMI2-NEXT:    andl 4(%esi), %edx
521; X86-BMI2-NEXT:    andl (%esi), %eax
522; X86-BMI2-NEXT:    popl %esi
523; X86-BMI2-NEXT:    retl
524;
525; X64-NOBMI-LABEL: bzhi64_a2_load:
526; X64-NOBMI:       # %bb.0:
527; X64-NOBMI-NEXT:    movq %rsi, %rcx
528; X64-NOBMI-NEXT:    movl $1, %eax
529; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
530; X64-NOBMI-NEXT:    shlq %cl, %rax
531; X64-NOBMI-NEXT:    decq %rax
532; X64-NOBMI-NEXT:    andq (%rdi), %rax
533; X64-NOBMI-NEXT:    retq
534;
535; X64-BMI1-LABEL: bzhi64_a2_load:
536; X64-BMI1:       # %bb.0:
537; X64-BMI1-NEXT:    shll $8, %esi
538; X64-BMI1-NEXT:    bextrq %rsi, (%rdi), %rax
539; X64-BMI1-NEXT:    retq
540;
541; X64-BMI2-LABEL: bzhi64_a2_load:
542; X64-BMI2:       # %bb.0:
543; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
544; X64-BMI2-NEXT:    retq
545  %val = load i64, i64* %w
546  %onebit = shl i64 1, %numlowbits
547  %mask = add nsw i64 %onebit, -1
548  %masked = and i64 %mask, %val
549  ret i64 %masked
550}
551
552define i64 @bzhi64_a3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind {
553; X86-NOBMI-LABEL: bzhi64_a3_load_indexzext:
554; X86-NOBMI:       # %bb.0:
555; X86-NOBMI-NEXT:    pushl %esi
556; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
557; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
558; X86-NOBMI-NEXT:    movl $1, %eax
559; X86-NOBMI-NEXT:    xorl %edx, %edx
560; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
561; X86-NOBMI-NEXT:    shll %cl, %eax
562; X86-NOBMI-NEXT:    testb $32, %cl
563; X86-NOBMI-NEXT:    je .LBB8_2
564; X86-NOBMI-NEXT:  # %bb.1:
565; X86-NOBMI-NEXT:    movl %eax, %edx
566; X86-NOBMI-NEXT:    xorl %eax, %eax
567; X86-NOBMI-NEXT:  .LBB8_2:
568; X86-NOBMI-NEXT:    addl $-1, %eax
569; X86-NOBMI-NEXT:    adcl $-1, %edx
570; X86-NOBMI-NEXT:    andl 4(%esi), %edx
571; X86-NOBMI-NEXT:    andl (%esi), %eax
572; X86-NOBMI-NEXT:    popl %esi
573; X86-NOBMI-NEXT:    retl
574;
575; X86-BMI1-LABEL: bzhi64_a3_load_indexzext:
576; X86-BMI1:       # %bb.0:
577; X86-BMI1-NEXT:    pushl %esi
578; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
579; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
580; X86-BMI1-NEXT:    movl $1, %eax
581; X86-BMI1-NEXT:    xorl %edx, %edx
582; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
583; X86-BMI1-NEXT:    shll %cl, %eax
584; X86-BMI1-NEXT:    testb $32, %cl
585; X86-BMI1-NEXT:    je .LBB8_2
586; X86-BMI1-NEXT:  # %bb.1:
587; X86-BMI1-NEXT:    movl %eax, %edx
588; X86-BMI1-NEXT:    xorl %eax, %eax
589; X86-BMI1-NEXT:  .LBB8_2:
590; X86-BMI1-NEXT:    addl $-1, %eax
591; X86-BMI1-NEXT:    adcl $-1, %edx
592; X86-BMI1-NEXT:    andl 4(%esi), %edx
593; X86-BMI1-NEXT:    andl (%esi), %eax
594; X86-BMI1-NEXT:    popl %esi
595; X86-BMI1-NEXT:    retl
596;
597; X86-BMI2-LABEL: bzhi64_a3_load_indexzext:
598; X86-BMI2:       # %bb.0:
599; X86-BMI2-NEXT:    pushl %esi
600; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
601; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
602; X86-BMI2-NEXT:    movl $1, %eax
603; X86-BMI2-NEXT:    xorl %edx, %edx
604; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
605; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
606; X86-BMI2-NEXT:    testb $32, %cl
607; X86-BMI2-NEXT:    je .LBB8_2
608; X86-BMI2-NEXT:  # %bb.1:
609; X86-BMI2-NEXT:    movl %eax, %edx
610; X86-BMI2-NEXT:    xorl %eax, %eax
611; X86-BMI2-NEXT:  .LBB8_2:
612; X86-BMI2-NEXT:    addl $-1, %eax
613; X86-BMI2-NEXT:    adcl $-1, %edx
614; X86-BMI2-NEXT:    andl 4(%esi), %edx
615; X86-BMI2-NEXT:    andl (%esi), %eax
616; X86-BMI2-NEXT:    popl %esi
617; X86-BMI2-NEXT:    retl
618;
619; X64-NOBMI-LABEL: bzhi64_a3_load_indexzext:
620; X64-NOBMI:       # %bb.0:
621; X64-NOBMI-NEXT:    movl %esi, %ecx
622; X64-NOBMI-NEXT:    movl $1, %eax
623; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
624; X64-NOBMI-NEXT:    shlq %cl, %rax
625; X64-NOBMI-NEXT:    decq %rax
626; X64-NOBMI-NEXT:    andq (%rdi), %rax
627; X64-NOBMI-NEXT:    retq
628;
629; X64-BMI1-LABEL: bzhi64_a3_load_indexzext:
630; X64-BMI1:       # %bb.0:
631; X64-BMI1-NEXT:    # kill: def $esi killed $esi def $rsi
632; X64-BMI1-NEXT:    shll $8, %esi
633; X64-BMI1-NEXT:    bextrq %rsi, (%rdi), %rax
634; X64-BMI1-NEXT:    retq
635;
636; X64-BMI2-LABEL: bzhi64_a3_load_indexzext:
637; X64-BMI2:       # %bb.0:
638; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
639; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
640; X64-BMI2-NEXT:    retq
641  %val = load i64, i64* %w
642  %conv = zext i8 %numlowbits to i64
643  %onebit = shl i64 1, %conv
644  %mask = add nsw i64 %onebit, -1
645  %masked = and i64 %mask, %val
646  ret i64 %masked
647}
648
649define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind {
650; X86-NOBMI-LABEL: bzhi64_a4_commutative:
651; X86-NOBMI:       # %bb.0:
652; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
653; X86-NOBMI-NEXT:    movl $1, %eax
654; X86-NOBMI-NEXT:    xorl %edx, %edx
655; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
656; X86-NOBMI-NEXT:    shll %cl, %eax
657; X86-NOBMI-NEXT:    testb $32, %cl
658; X86-NOBMI-NEXT:    je .LBB9_2
659; X86-NOBMI-NEXT:  # %bb.1:
660; X86-NOBMI-NEXT:    movl %eax, %edx
661; X86-NOBMI-NEXT:    xorl %eax, %eax
662; X86-NOBMI-NEXT:  .LBB9_2:
663; X86-NOBMI-NEXT:    addl $-1, %eax
664; X86-NOBMI-NEXT:    adcl $-1, %edx
665; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
666; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
667; X86-NOBMI-NEXT:    retl
668;
669; X86-BMI1-LABEL: bzhi64_a4_commutative:
670; X86-BMI1:       # %bb.0:
671; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
672; X86-BMI1-NEXT:    movl $1, %eax
673; X86-BMI1-NEXT:    xorl %edx, %edx
674; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
675; X86-BMI1-NEXT:    shll %cl, %eax
676; X86-BMI1-NEXT:    testb $32, %cl
677; X86-BMI1-NEXT:    je .LBB9_2
678; X86-BMI1-NEXT:  # %bb.1:
679; X86-BMI1-NEXT:    movl %eax, %edx
680; X86-BMI1-NEXT:    xorl %eax, %eax
681; X86-BMI1-NEXT:  .LBB9_2:
682; X86-BMI1-NEXT:    addl $-1, %eax
683; X86-BMI1-NEXT:    adcl $-1, %edx
684; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
685; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
686; X86-BMI1-NEXT:    retl
687;
688; X86-BMI2-LABEL: bzhi64_a4_commutative:
689; X86-BMI2:       # %bb.0:
690; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
691; X86-BMI2-NEXT:    movl $1, %eax
692; X86-BMI2-NEXT:    xorl %edx, %edx
693; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
694; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
695; X86-BMI2-NEXT:    testb $32, %cl
696; X86-BMI2-NEXT:    je .LBB9_2
697; X86-BMI2-NEXT:  # %bb.1:
698; X86-BMI2-NEXT:    movl %eax, %edx
699; X86-BMI2-NEXT:    xorl %eax, %eax
700; X86-BMI2-NEXT:  .LBB9_2:
701; X86-BMI2-NEXT:    addl $-1, %eax
702; X86-BMI2-NEXT:    adcl $-1, %edx
703; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
704; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
705; X86-BMI2-NEXT:    retl
706;
707; X64-NOBMI-LABEL: bzhi64_a4_commutative:
708; X64-NOBMI:       # %bb.0:
709; X64-NOBMI-NEXT:    movq %rsi, %rcx
710; X64-NOBMI-NEXT:    movl $1, %eax
711; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
712; X64-NOBMI-NEXT:    shlq %cl, %rax
713; X64-NOBMI-NEXT:    decq %rax
714; X64-NOBMI-NEXT:    andq %rdi, %rax
715; X64-NOBMI-NEXT:    retq
716;
717; X64-BMI1-LABEL: bzhi64_a4_commutative:
718; X64-BMI1:       # %bb.0:
719; X64-BMI1-NEXT:    shll $8, %esi
720; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
721; X64-BMI1-NEXT:    retq
722;
723; X64-BMI2-LABEL: bzhi64_a4_commutative:
724; X64-BMI2:       # %bb.0:
725; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
726; X64-BMI2-NEXT:    retq
727  %onebit = shl i64 1, %numlowbits
728  %mask = add nsw i64 %onebit, -1
729  %masked = and i64 %val, %mask ; swapped order
730  ret i64 %masked
731}
732
733; 64-bit, but with 32-bit output
734
735; Everything done in 64-bit, truncation happens last.
736define i32 @bzhi64_32_a0(i64 %val, i64 %numlowbits) nounwind {
737; X86-NOBMI-LABEL: bzhi64_32_a0:
738; X86-NOBMI:       # %bb.0:
739; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
740; X86-NOBMI-NEXT:    movl $1, %edx
741; X86-NOBMI-NEXT:    shll %cl, %edx
742; X86-NOBMI-NEXT:    xorl %eax, %eax
743; X86-NOBMI-NEXT:    testb $32, %cl
744; X86-NOBMI-NEXT:    jne .LBB10_2
745; X86-NOBMI-NEXT:  # %bb.1:
746; X86-NOBMI-NEXT:    movl %edx, %eax
747; X86-NOBMI-NEXT:  .LBB10_2:
748; X86-NOBMI-NEXT:    decl %eax
749; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
750; X86-NOBMI-NEXT:    retl
751;
752; X86-BMI1-LABEL: bzhi64_32_a0:
753; X86-BMI1:       # %bb.0:
754; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
755; X86-BMI1-NEXT:    movl $1, %edx
756; X86-BMI1-NEXT:    shll %cl, %edx
757; X86-BMI1-NEXT:    xorl %eax, %eax
758; X86-BMI1-NEXT:    testb $32, %cl
759; X86-BMI1-NEXT:    jne .LBB10_2
760; X86-BMI1-NEXT:  # %bb.1:
761; X86-BMI1-NEXT:    movl %edx, %eax
762; X86-BMI1-NEXT:  .LBB10_2:
763; X86-BMI1-NEXT:    decl %eax
764; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
765; X86-BMI1-NEXT:    retl
766;
767; X86-BMI2-LABEL: bzhi64_32_a0:
768; X86-BMI2:       # %bb.0:
769; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
770; X86-BMI2-NEXT:    xorl %eax, %eax
771; X86-BMI2-NEXT:    testb $32, %cl
772; X86-BMI2-NEXT:    jne .LBB10_2
773; X86-BMI2-NEXT:  # %bb.1:
774; X86-BMI2-NEXT:    movl $1, %eax
775; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
776; X86-BMI2-NEXT:  .LBB10_2:
777; X86-BMI2-NEXT:    decl %eax
778; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
779; X86-BMI2-NEXT:    retl
780;
781; X64-NOBMI-LABEL: bzhi64_32_a0:
782; X64-NOBMI:       # %bb.0:
783; X64-NOBMI-NEXT:    movq %rsi, %rcx
784; X64-NOBMI-NEXT:    movl $1, %eax
785; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
786; X64-NOBMI-NEXT:    shlq %cl, %rax
787; X64-NOBMI-NEXT:    decl %eax
788; X64-NOBMI-NEXT:    andl %edi, %eax
789; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
790; X64-NOBMI-NEXT:    retq
791;
792; X64-BMI1-LABEL: bzhi64_32_a0:
793; X64-BMI1:       # %bb.0:
794; X64-BMI1-NEXT:    shll $8, %esi
795; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
796; X64-BMI1-NEXT:    retq
797;
798; X64-BMI2-LABEL: bzhi64_32_a0:
799; X64-BMI2:       # %bb.0:
800; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
801; X64-BMI2-NEXT:    retq
802  %onebit = shl i64 1, %numlowbits
803  %mask = add nsw i64 %onebit, -1
804  %masked = and i64 %mask, %val
805  %res = trunc i64 %masked to i32
806  ret i32 %res
807}
808
809; Shifting happens in 64-bit, then truncation. Masking is 32-bit.
810define i32 @bzhi64_32_a1(i64 %val, i32 %numlowbits) nounwind {
811; X86-NOBMI-LABEL: bzhi64_32_a1:
812; X86-NOBMI:       # %bb.0:
813; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
814; X86-NOBMI-NEXT:    movl $1, %eax
815; X86-NOBMI-NEXT:    shll %cl, %eax
816; X86-NOBMI-NEXT:    decl %eax
817; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
818; X86-NOBMI-NEXT:    retl
819;
820; X86-BMI1-LABEL: bzhi64_32_a1:
821; X86-BMI1:       # %bb.0:
822; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
823; X86-BMI1-NEXT:    shll $8, %eax
824; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
825; X86-BMI1-NEXT:    retl
826;
827; X86-BMI2-LABEL: bzhi64_32_a1:
828; X86-BMI2:       # %bb.0:
829; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
830; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
831; X86-BMI2-NEXT:    retl
832;
833; X64-NOBMI-LABEL: bzhi64_32_a1:
834; X64-NOBMI:       # %bb.0:
835; X64-NOBMI-NEXT:    movl %esi, %ecx
836; X64-NOBMI-NEXT:    movl $1, %eax
837; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
838; X64-NOBMI-NEXT:    shll %cl, %eax
839; X64-NOBMI-NEXT:    decl %eax
840; X64-NOBMI-NEXT:    andl %edi, %eax
841; X64-NOBMI-NEXT:    retq
842;
843; X64-BMI1-LABEL: bzhi64_32_a1:
844; X64-BMI1:       # %bb.0:
845; X64-BMI1-NEXT:    shll $8, %esi
846; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
847; X64-BMI1-NEXT:    retq
848;
849; X64-BMI2-LABEL: bzhi64_32_a1:
850; X64-BMI2:       # %bb.0:
851; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
852; X64-BMI2-NEXT:    retq
853  %truncval = trunc i64 %val to i32
854  %onebit = shl i32 1, %numlowbits
855  %mask = add nsw i32 %onebit, -1
856  %masked = and i32 %mask, %truncval
857  ret i32 %masked
858}
859
860; Shifting happens in 64-bit, then truncation (with extra use).
861; Masking is 32-bit.
862define i32 @bzhi64_32_a1_trunc_extrause(i64 %val, i32 %numlowbits) nounwind {
863; X86-NOBMI-LABEL: bzhi64_32_a1_trunc_extrause:
864; X86-NOBMI:       # %bb.0:
865; X86-NOBMI-NEXT:    pushl %ebx
866; X86-NOBMI-NEXT:    pushl %esi
867; X86-NOBMI-NEXT:    pushl %eax
868; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %bl
869; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
870; X86-NOBMI-NEXT:    movl %esi, (%esp)
871; X86-NOBMI-NEXT:    calll use32@PLT
872; X86-NOBMI-NEXT:    movl $1, %eax
873; X86-NOBMI-NEXT:    movl %ebx, %ecx
874; X86-NOBMI-NEXT:    shll %cl, %eax
875; X86-NOBMI-NEXT:    decl %eax
876; X86-NOBMI-NEXT:    andl %esi, %eax
877; X86-NOBMI-NEXT:    addl $4, %esp
878; X86-NOBMI-NEXT:    popl %esi
879; X86-NOBMI-NEXT:    popl %ebx
880; X86-NOBMI-NEXT:    retl
881;
882; X86-BMI1-LABEL: bzhi64_32_a1_trunc_extrause:
883; X86-BMI1:       # %bb.0:
884; X86-BMI1-NEXT:    pushl %ebx
885; X86-BMI1-NEXT:    pushl %esi
886; X86-BMI1-NEXT:    pushl %eax
887; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %bl
888; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
889; X86-BMI1-NEXT:    movl %esi, (%esp)
890; X86-BMI1-NEXT:    calll use32@PLT
891; X86-BMI1-NEXT:    shll $8, %ebx
892; X86-BMI1-NEXT:    bextrl %ebx, %esi, %eax
893; X86-BMI1-NEXT:    addl $4, %esp
894; X86-BMI1-NEXT:    popl %esi
895; X86-BMI1-NEXT:    popl %ebx
896; X86-BMI1-NEXT:    retl
897;
898; X86-BMI2-LABEL: bzhi64_32_a1_trunc_extrause:
899; X86-BMI2:       # %bb.0:
900; X86-BMI2-NEXT:    pushl %ebx
901; X86-BMI2-NEXT:    pushl %esi
902; X86-BMI2-NEXT:    pushl %eax
903; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
904; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
905; X86-BMI2-NEXT:    movl %esi, (%esp)
906; X86-BMI2-NEXT:    calll use32@PLT
907; X86-BMI2-NEXT:    bzhil %ebx, %esi, %eax
908; X86-BMI2-NEXT:    addl $4, %esp
909; X86-BMI2-NEXT:    popl %esi
910; X86-BMI2-NEXT:    popl %ebx
911; X86-BMI2-NEXT:    retl
912;
913; X64-NOBMI-LABEL: bzhi64_32_a1_trunc_extrause:
914; X64-NOBMI:       # %bb.0:
915; X64-NOBMI-NEXT:    pushq %rbp
916; X64-NOBMI-NEXT:    pushq %rbx
917; X64-NOBMI-NEXT:    pushq %rax
918; X64-NOBMI-NEXT:    movl %esi, %ebp
919; X64-NOBMI-NEXT:    movq %rdi, %rbx
920; X64-NOBMI-NEXT:    callq use32@PLT
921; X64-NOBMI-NEXT:    movl $1, %eax
922; X64-NOBMI-NEXT:    movl %ebp, %ecx
923; X64-NOBMI-NEXT:    shll %cl, %eax
924; X64-NOBMI-NEXT:    decl %eax
925; X64-NOBMI-NEXT:    andl %ebx, %eax
926; X64-NOBMI-NEXT:    addq $8, %rsp
927; X64-NOBMI-NEXT:    popq %rbx
928; X64-NOBMI-NEXT:    popq %rbp
929; X64-NOBMI-NEXT:    retq
930;
931; X64-BMI1-LABEL: bzhi64_32_a1_trunc_extrause:
932; X64-BMI1:       # %bb.0:
933; X64-BMI1-NEXT:    pushq %r14
934; X64-BMI1-NEXT:    pushq %rbx
935; X64-BMI1-NEXT:    pushq %rax
936; X64-BMI1-NEXT:    movl %esi, %ebx
937; X64-BMI1-NEXT:    movq %rdi, %r14
938; X64-BMI1-NEXT:    callq use32@PLT
939; X64-BMI1-NEXT:    shll $8, %ebx
940; X64-BMI1-NEXT:    bextrl %ebx, %r14d, %eax
941; X64-BMI1-NEXT:    addq $8, %rsp
942; X64-BMI1-NEXT:    popq %rbx
943; X64-BMI1-NEXT:    popq %r14
944; X64-BMI1-NEXT:    retq
945;
946; X64-BMI2-LABEL: bzhi64_32_a1_trunc_extrause:
947; X64-BMI2:       # %bb.0:
948; X64-BMI2-NEXT:    pushq %rbp
949; X64-BMI2-NEXT:    pushq %rbx
950; X64-BMI2-NEXT:    pushq %rax
951; X64-BMI2-NEXT:    movl %esi, %ebp
952; X64-BMI2-NEXT:    movq %rdi, %rbx
953; X64-BMI2-NEXT:    callq use32@PLT
954; X64-BMI2-NEXT:    bzhil %ebp, %ebx, %eax
955; X64-BMI2-NEXT:    addq $8, %rsp
956; X64-BMI2-NEXT:    popq %rbx
957; X64-BMI2-NEXT:    popq %rbp
958; X64-BMI2-NEXT:    retq
959  %truncval = trunc i64 %val to i32
960  call void @use32(i32 %truncval)
961  %onebit = shl i32 1, %numlowbits
962  %mask = add nsw i32 %onebit, -1
963  %masked = and i32 %mask, %truncval
964  ret i32 %masked
965}
966
967; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit.
968; Masking is 64-bit. Then truncation.
969define i32 @bzhi64_32_a2(i64 %val, i32 %numlowbits) nounwind {
970; X86-NOBMI-LABEL: bzhi64_32_a2:
971; X86-NOBMI:       # %bb.0:
972; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
973; X86-NOBMI-NEXT:    movl $1, %eax
974; X86-NOBMI-NEXT:    shll %cl, %eax
975; X86-NOBMI-NEXT:    decl %eax
976; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
977; X86-NOBMI-NEXT:    retl
978;
979; X86-BMI1-LABEL: bzhi64_32_a2:
980; X86-BMI1:       # %bb.0:
981; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
982; X86-BMI1-NEXT:    shll $8, %eax
983; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
984; X86-BMI1-NEXT:    retl
985;
986; X86-BMI2-LABEL: bzhi64_32_a2:
987; X86-BMI2:       # %bb.0:
988; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
989; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
990; X86-BMI2-NEXT:    retl
991;
992; X64-NOBMI-LABEL: bzhi64_32_a2:
993; X64-NOBMI:       # %bb.0:
994; X64-NOBMI-NEXT:    movl %esi, %ecx
995; X64-NOBMI-NEXT:    movl $1, %eax
996; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
997; X64-NOBMI-NEXT:    shll %cl, %eax
998; X64-NOBMI-NEXT:    decl %eax
999; X64-NOBMI-NEXT:    andl %edi, %eax
1000; X64-NOBMI-NEXT:    retq
1001;
1002; X64-BMI1-LABEL: bzhi64_32_a2:
1003; X64-BMI1:       # %bb.0:
1004; X64-BMI1-NEXT:    shll $8, %esi
1005; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
1006; X64-BMI1-NEXT:    retq
1007;
1008; X64-BMI2-LABEL: bzhi64_32_a2:
1009; X64-BMI2:       # %bb.0:
1010; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
1011; X64-BMI2-NEXT:    retq
1012  %onebit = shl i32 1, %numlowbits
1013  %mask = add nsw i32 %onebit, -1
1014  %zextmask = zext i32 %mask to i64
1015  %masked = and i64 %zextmask, %val
1016  %truncmasked = trunc i64 %masked to i32
1017  ret i32 %truncmasked
1018}
1019
1020; Shifting happens in 64-bit. Mask is 32-bit, but calculated in 64-bit.
1021; Masking is 64-bit. Then truncation.
1022define i32 @bzhi64_32_a3(i64 %val, i64 %numlowbits) nounwind {
1023; X86-NOBMI-LABEL: bzhi64_32_a3:
1024; X86-NOBMI:       # %bb.0:
1025; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
1026; X86-NOBMI-NEXT:    movl $1, %edx
1027; X86-NOBMI-NEXT:    shll %cl, %edx
1028; X86-NOBMI-NEXT:    xorl %eax, %eax
1029; X86-NOBMI-NEXT:    testb $32, %cl
1030; X86-NOBMI-NEXT:    jne .LBB14_2
1031; X86-NOBMI-NEXT:  # %bb.1:
1032; X86-NOBMI-NEXT:    movl %edx, %eax
1033; X86-NOBMI-NEXT:  .LBB14_2:
1034; X86-NOBMI-NEXT:    decl %eax
1035; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1036; X86-NOBMI-NEXT:    retl
1037;
1038; X86-BMI1-LABEL: bzhi64_32_a3:
1039; X86-BMI1:       # %bb.0:
1040; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
1041; X86-BMI1-NEXT:    movl $1, %edx
1042; X86-BMI1-NEXT:    shll %cl, %edx
1043; X86-BMI1-NEXT:    xorl %eax, %eax
1044; X86-BMI1-NEXT:    testb $32, %cl
1045; X86-BMI1-NEXT:    jne .LBB14_2
1046; X86-BMI1-NEXT:  # %bb.1:
1047; X86-BMI1-NEXT:    movl %edx, %eax
1048; X86-BMI1-NEXT:  .LBB14_2:
1049; X86-BMI1-NEXT:    decl %eax
1050; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
1051; X86-BMI1-NEXT:    retl
1052;
1053; X86-BMI2-LABEL: bzhi64_32_a3:
1054; X86-BMI2:       # %bb.0:
1055; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
1056; X86-BMI2-NEXT:    xorl %eax, %eax
1057; X86-BMI2-NEXT:    testb $32, %cl
1058; X86-BMI2-NEXT:    jne .LBB14_2
1059; X86-BMI2-NEXT:  # %bb.1:
1060; X86-BMI2-NEXT:    movl $1, %eax
1061; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
1062; X86-BMI2-NEXT:  .LBB14_2:
1063; X86-BMI2-NEXT:    decl %eax
1064; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
1065; X86-BMI2-NEXT:    retl
1066;
1067; X64-NOBMI-LABEL: bzhi64_32_a3:
1068; X64-NOBMI:       # %bb.0:
1069; X64-NOBMI-NEXT:    movq %rsi, %rcx
1070; X64-NOBMI-NEXT:    movl $1, %eax
1071; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
1072; X64-NOBMI-NEXT:    shlq %cl, %rax
1073; X64-NOBMI-NEXT:    decl %eax
1074; X64-NOBMI-NEXT:    andl %edi, %eax
1075; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
1076; X64-NOBMI-NEXT:    retq
1077;
1078; X64-BMI1-LABEL: bzhi64_32_a3:
1079; X64-BMI1:       # %bb.0:
1080; X64-BMI1-NEXT:    shll $8, %esi
1081; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
1082; X64-BMI1-NEXT:    retq
1083;
1084; X64-BMI2-LABEL: bzhi64_32_a3:
1085; X64-BMI2:       # %bb.0:
1086; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
1087; X64-BMI2-NEXT:    retq
1088  %onebit = shl i64 1, %numlowbits
1089  %mask = add nsw i64 %onebit, 4294967295
1090  %masked = and i64 %mask, %val
1091  %truncmasked = trunc i64 %masked to i32
1092  ret i32 %truncmasked
1093}
1094
1095; ---------------------------------------------------------------------------- ;
1096; Pattern b. 32-bit
1097; ---------------------------------------------------------------------------- ;
1098
1099define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind {
1100; X86-NOBMI-LABEL: bzhi32_b0:
1101; X86-NOBMI:       # %bb.0:
1102; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
1103; X86-NOBMI-NEXT:    movl $-1, %eax
1104; X86-NOBMI-NEXT:    shll %cl, %eax
1105; X86-NOBMI-NEXT:    notl %eax
1106; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1107; X86-NOBMI-NEXT:    retl
1108;
1109; X86-BMI1-LABEL: bzhi32_b0:
1110; X86-BMI1:       # %bb.0:
1111; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
1112; X86-BMI1-NEXT:    shll $8, %eax
1113; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
1114; X86-BMI1-NEXT:    retl
1115;
1116; X86-BMI2-LABEL: bzhi32_b0:
1117; X86-BMI2:       # %bb.0:
1118; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
1119; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
1120; X86-BMI2-NEXT:    retl
1121;
1122; X64-NOBMI-LABEL: bzhi32_b0:
1123; X64-NOBMI:       # %bb.0:
1124; X64-NOBMI-NEXT:    movl %esi, %ecx
1125; X64-NOBMI-NEXT:    movl $-1, %eax
1126; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1127; X64-NOBMI-NEXT:    shll %cl, %eax
1128; X64-NOBMI-NEXT:    notl %eax
1129; X64-NOBMI-NEXT:    andl %edi, %eax
1130; X64-NOBMI-NEXT:    retq
1131;
1132; X64-BMI1-LABEL: bzhi32_b0:
1133; X64-BMI1:       # %bb.0:
1134; X64-BMI1-NEXT:    shll $8, %esi
1135; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
1136; X64-BMI1-NEXT:    retq
1137;
1138; X64-BMI2-LABEL: bzhi32_b0:
1139; X64-BMI2:       # %bb.0:
1140; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
1141; X64-BMI2-NEXT:    retq
1142  %notmask = shl i32 -1, %numlowbits
1143  %mask = xor i32 %notmask, -1
1144  %masked = and i32 %mask, %val
1145  ret i32 %masked
1146}
1147
1148define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
1149; X86-NOBMI-LABEL: bzhi32_b1_indexzext:
1150; X86-NOBMI:       # %bb.0:
1151; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
1152; X86-NOBMI-NEXT:    movl $-1, %eax
1153; X86-NOBMI-NEXT:    shll %cl, %eax
1154; X86-NOBMI-NEXT:    notl %eax
1155; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1156; X86-NOBMI-NEXT:    retl
1157;
1158; X86-BMI1-LABEL: bzhi32_b1_indexzext:
1159; X86-BMI1:       # %bb.0:
1160; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
1161; X86-BMI1-NEXT:    shll $8, %eax
1162; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
1163; X86-BMI1-NEXT:    retl
1164;
1165; X86-BMI2-LABEL: bzhi32_b1_indexzext:
1166; X86-BMI2:       # %bb.0:
1167; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
1168; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
1169; X86-BMI2-NEXT:    retl
1170;
1171; X64-NOBMI-LABEL: bzhi32_b1_indexzext:
1172; X64-NOBMI:       # %bb.0:
1173; X64-NOBMI-NEXT:    movl %esi, %ecx
1174; X64-NOBMI-NEXT:    movl $-1, %eax
1175; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1176; X64-NOBMI-NEXT:    shll %cl, %eax
1177; X64-NOBMI-NEXT:    notl %eax
1178; X64-NOBMI-NEXT:    andl %edi, %eax
1179; X64-NOBMI-NEXT:    retq
1180;
1181; X64-BMI1-LABEL: bzhi32_b1_indexzext:
1182; X64-BMI1:       # %bb.0:
1183; X64-BMI1-NEXT:    shll $8, %esi
1184; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
1185; X64-BMI1-NEXT:    retq
1186;
1187; X64-BMI2-LABEL: bzhi32_b1_indexzext:
1188; X64-BMI2:       # %bb.0:
1189; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
1190; X64-BMI2-NEXT:    retq
1191  %conv = zext i8 %numlowbits to i32
1192  %notmask = shl i32 -1, %conv
1193  %mask = xor i32 %notmask, -1
1194  %masked = and i32 %mask, %val
1195  ret i32 %masked
1196}
1197
1198define i32 @bzhi32_b2_load(i32* %w, i32 %numlowbits) nounwind {
1199; X86-NOBMI-LABEL: bzhi32_b2_load:
1200; X86-NOBMI:       # %bb.0:
1201; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
1202; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
1203; X86-NOBMI-NEXT:    movl $-1, %eax
1204; X86-NOBMI-NEXT:    shll %cl, %eax
1205; X86-NOBMI-NEXT:    notl %eax
1206; X86-NOBMI-NEXT:    andl (%edx), %eax
1207; X86-NOBMI-NEXT:    retl
1208;
1209; X86-BMI1-LABEL: bzhi32_b2_load:
1210; X86-BMI1:       # %bb.0:
1211; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
1212; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
1213; X86-BMI1-NEXT:    shll $8, %ecx
1214; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
1215; X86-BMI1-NEXT:    retl
1216;
1217; X86-BMI2-LABEL: bzhi32_b2_load:
1218; X86-BMI2:       # %bb.0:
1219; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1220; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
1221; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
1222; X86-BMI2-NEXT:    retl
1223;
1224; X64-NOBMI-LABEL: bzhi32_b2_load:
1225; X64-NOBMI:       # %bb.0:
1226; X64-NOBMI-NEXT:    movl %esi, %ecx
1227; X64-NOBMI-NEXT:    movl $-1, %eax
1228; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1229; X64-NOBMI-NEXT:    shll %cl, %eax
1230; X64-NOBMI-NEXT:    notl %eax
1231; X64-NOBMI-NEXT:    andl (%rdi), %eax
1232; X64-NOBMI-NEXT:    retq
1233;
1234; X64-BMI1-LABEL: bzhi32_b2_load:
1235; X64-BMI1:       # %bb.0:
1236; X64-BMI1-NEXT:    shll $8, %esi
1237; X64-BMI1-NEXT:    bextrl %esi, (%rdi), %eax
1238; X64-BMI1-NEXT:    retq
1239;
1240; X64-BMI2-LABEL: bzhi32_b2_load:
1241; X64-BMI2:       # %bb.0:
1242; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
1243; X64-BMI2-NEXT:    retq
1244  %val = load i32, i32* %w
1245  %notmask = shl i32 -1, %numlowbits
1246  %mask = xor i32 %notmask, -1
1247  %masked = and i32 %mask, %val
1248  ret i32 %masked
1249}
1250
1251define i32 @bzhi32_b3_load_indexzext(i32* %w, i8 zeroext %numlowbits) nounwind {
1252; X86-NOBMI-LABEL: bzhi32_b3_load_indexzext:
1253; X86-NOBMI:       # %bb.0:
1254; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
1255; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
1256; X86-NOBMI-NEXT:    movl $-1, %eax
1257; X86-NOBMI-NEXT:    shll %cl, %eax
1258; X86-NOBMI-NEXT:    notl %eax
1259; X86-NOBMI-NEXT:    andl (%edx), %eax
1260; X86-NOBMI-NEXT:    retl
1261;
1262; X86-BMI1-LABEL: bzhi32_b3_load_indexzext:
1263; X86-BMI1:       # %bb.0:
1264; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
1265; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
1266; X86-BMI1-NEXT:    shll $8, %ecx
1267; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
1268; X86-BMI1-NEXT:    retl
1269;
1270; X86-BMI2-LABEL: bzhi32_b3_load_indexzext:
1271; X86-BMI2:       # %bb.0:
1272; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1273; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
1274; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
1275; X86-BMI2-NEXT:    retl
1276;
1277; X64-NOBMI-LABEL: bzhi32_b3_load_indexzext:
1278; X64-NOBMI:       # %bb.0:
1279; X64-NOBMI-NEXT:    movl %esi, %ecx
1280; X64-NOBMI-NEXT:    movl $-1, %eax
1281; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1282; X64-NOBMI-NEXT:    shll %cl, %eax
1283; X64-NOBMI-NEXT:    notl %eax
1284; X64-NOBMI-NEXT:    andl (%rdi), %eax
1285; X64-NOBMI-NEXT:    retq
1286;
1287; X64-BMI1-LABEL: bzhi32_b3_load_indexzext:
1288; X64-BMI1:       # %bb.0:
1289; X64-BMI1-NEXT:    shll $8, %esi
1290; X64-BMI1-NEXT:    bextrl %esi, (%rdi), %eax
1291; X64-BMI1-NEXT:    retq
1292;
1293; X64-BMI2-LABEL: bzhi32_b3_load_indexzext:
1294; X64-BMI2:       # %bb.0:
1295; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
1296; X64-BMI2-NEXT:    retq
1297  %val = load i32, i32* %w
1298  %conv = zext i8 %numlowbits to i32
1299  %notmask = shl i32 -1, %conv
1300  %mask = xor i32 %notmask, -1
1301  %masked = and i32 %mask, %val
1302  ret i32 %masked
1303}
1304
1305define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind {
1306; X86-NOBMI-LABEL: bzhi32_b4_commutative:
1307; X86-NOBMI:       # %bb.0:
1308; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
1309; X86-NOBMI-NEXT:    movl $-1, %eax
1310; X86-NOBMI-NEXT:    shll %cl, %eax
1311; X86-NOBMI-NEXT:    notl %eax
1312; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1313; X86-NOBMI-NEXT:    retl
1314;
1315; X86-BMI1-LABEL: bzhi32_b4_commutative:
1316; X86-BMI1:       # %bb.0:
1317; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
1318; X86-BMI1-NEXT:    shll $8, %eax
1319; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
1320; X86-BMI1-NEXT:    retl
1321;
1322; X86-BMI2-LABEL: bzhi32_b4_commutative:
1323; X86-BMI2:       # %bb.0:
1324; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
1325; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
1326; X86-BMI2-NEXT:    retl
1327;
1328; X64-NOBMI-LABEL: bzhi32_b4_commutative:
1329; X64-NOBMI:       # %bb.0:
1330; X64-NOBMI-NEXT:    movl %esi, %ecx
1331; X64-NOBMI-NEXT:    movl $-1, %eax
1332; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1333; X64-NOBMI-NEXT:    shll %cl, %eax
1334; X64-NOBMI-NEXT:    notl %eax
1335; X64-NOBMI-NEXT:    andl %edi, %eax
1336; X64-NOBMI-NEXT:    retq
1337;
1338; X64-BMI1-LABEL: bzhi32_b4_commutative:
1339; X64-BMI1:       # %bb.0:
1340; X64-BMI1-NEXT:    shll $8, %esi
1341; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
1342; X64-BMI1-NEXT:    retq
1343;
1344; X64-BMI2-LABEL: bzhi32_b4_commutative:
1345; X64-BMI2:       # %bb.0:
1346; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
1347; X64-BMI2-NEXT:    retq
1348  %notmask = shl i32 -1, %numlowbits
1349  %mask = xor i32 %notmask, -1
1350  %masked = and i32 %val, %mask ; swapped order
1351  ret i32 %masked
1352}
1353
1354; 64-bit
1355
1356define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind {
1357; X86-NOBMI-LABEL: bzhi64_b0:
1358; X86-NOBMI:       # %bb.0:
1359; X86-NOBMI-NEXT:    pushl %esi
1360; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
1361; X86-NOBMI-NEXT:    movl $-1, %edx
1362; X86-NOBMI-NEXT:    movl $-1, %esi
1363; X86-NOBMI-NEXT:    shll %cl, %esi
1364; X86-NOBMI-NEXT:    xorl %eax, %eax
1365; X86-NOBMI-NEXT:    testb $32, %cl
1366; X86-NOBMI-NEXT:    jne .LBB20_1
1367; X86-NOBMI-NEXT:  # %bb.2:
1368; X86-NOBMI-NEXT:    movl %esi, %eax
1369; X86-NOBMI-NEXT:    jmp .LBB20_3
1370; X86-NOBMI-NEXT:  .LBB20_1:
1371; X86-NOBMI-NEXT:    movl %esi, %edx
1372; X86-NOBMI-NEXT:  .LBB20_3:
1373; X86-NOBMI-NEXT:    notl %edx
1374; X86-NOBMI-NEXT:    notl %eax
1375; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1376; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
1377; X86-NOBMI-NEXT:    popl %esi
1378; X86-NOBMI-NEXT:    retl
1379;
1380; X86-BMI1-LABEL: bzhi64_b0:
1381; X86-BMI1:       # %bb.0:
1382; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
1383; X86-BMI1-NEXT:    movl $-1, %edx
1384; X86-BMI1-NEXT:    movl $-1, %eax
1385; X86-BMI1-NEXT:    shll %cl, %eax
1386; X86-BMI1-NEXT:    testb $32, %cl
1387; X86-BMI1-NEXT:    je .LBB20_2
1388; X86-BMI1-NEXT:  # %bb.1:
1389; X86-BMI1-NEXT:    movl %eax, %edx
1390; X86-BMI1-NEXT:    xorl %eax, %eax
1391; X86-BMI1-NEXT:  .LBB20_2:
1392; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
1393; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %edx
1394; X86-BMI1-NEXT:    retl
1395;
1396; X86-BMI2-LABEL: bzhi64_b0:
1397; X86-BMI2:       # %bb.0:
1398; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
1399; X86-BMI2-NEXT:    movl $-1, %ecx
1400; X86-BMI2-NEXT:    shlxl %edx, %ecx, %eax
1401; X86-BMI2-NEXT:    testb $32, %dl
1402; X86-BMI2-NEXT:    je .LBB20_2
1403; X86-BMI2-NEXT:  # %bb.1:
1404; X86-BMI2-NEXT:    movl %eax, %ecx
1405; X86-BMI2-NEXT:    xorl %eax, %eax
1406; X86-BMI2-NEXT:  .LBB20_2:
1407; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
1408; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %edx
1409; X86-BMI2-NEXT:    retl
1410;
1411; X64-NOBMI-LABEL: bzhi64_b0:
1412; X64-NOBMI:       # %bb.0:
1413; X64-NOBMI-NEXT:    movq %rsi, %rcx
1414; X64-NOBMI-NEXT:    movq $-1, %rax
1415; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
1416; X64-NOBMI-NEXT:    shlq %cl, %rax
1417; X64-NOBMI-NEXT:    notq %rax
1418; X64-NOBMI-NEXT:    andq %rdi, %rax
1419; X64-NOBMI-NEXT:    retq
1420;
1421; X64-BMI1-LABEL: bzhi64_b0:
1422; X64-BMI1:       # %bb.0:
1423; X64-BMI1-NEXT:    shll $8, %esi
1424; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
1425; X64-BMI1-NEXT:    retq
1426;
1427; X64-BMI2-LABEL: bzhi64_b0:
1428; X64-BMI2:       # %bb.0:
1429; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
1430; X64-BMI2-NEXT:    retq
1431  %notmask = shl i64 -1, %numlowbits
1432  %mask = xor i64 %notmask, -1
1433  %masked = and i64 %mask, %val
1434  ret i64 %masked
1435}
1436
1437define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
1438; X86-NOBMI-LABEL: bzhi64_b1_indexzext:
1439; X86-NOBMI:       # %bb.0:
1440; X86-NOBMI-NEXT:    pushl %esi
1441; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
1442; X86-NOBMI-NEXT:    movl $-1, %edx
1443; X86-NOBMI-NEXT:    movl $-1, %esi
1444; X86-NOBMI-NEXT:    shll %cl, %esi
1445; X86-NOBMI-NEXT:    xorl %eax, %eax
1446; X86-NOBMI-NEXT:    testb $32, %cl
1447; X86-NOBMI-NEXT:    jne .LBB21_1
1448; X86-NOBMI-NEXT:  # %bb.2:
1449; X86-NOBMI-NEXT:    movl %esi, %eax
1450; X86-NOBMI-NEXT:    jmp .LBB21_3
1451; X86-NOBMI-NEXT:  .LBB21_1:
1452; X86-NOBMI-NEXT:    movl %esi, %edx
1453; X86-NOBMI-NEXT:  .LBB21_3:
1454; X86-NOBMI-NEXT:    notl %edx
1455; X86-NOBMI-NEXT:    notl %eax
1456; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1457; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
1458; X86-NOBMI-NEXT:    popl %esi
1459; X86-NOBMI-NEXT:    retl
1460;
1461; X86-BMI1-LABEL: bzhi64_b1_indexzext:
1462; X86-BMI1:       # %bb.0:
1463; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
1464; X86-BMI1-NEXT:    movl $-1, %edx
1465; X86-BMI1-NEXT:    movl $-1, %eax
1466; X86-BMI1-NEXT:    shll %cl, %eax
1467; X86-BMI1-NEXT:    testb $32, %cl
1468; X86-BMI1-NEXT:    je .LBB21_2
1469; X86-BMI1-NEXT:  # %bb.1:
1470; X86-BMI1-NEXT:    movl %eax, %edx
1471; X86-BMI1-NEXT:    xorl %eax, %eax
1472; X86-BMI1-NEXT:  .LBB21_2:
1473; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
1474; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %edx
1475; X86-BMI1-NEXT:    retl
1476;
1477; X86-BMI2-LABEL: bzhi64_b1_indexzext:
1478; X86-BMI2:       # %bb.0:
1479; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
1480; X86-BMI2-NEXT:    movl $-1, %ecx
1481; X86-BMI2-NEXT:    shlxl %edx, %ecx, %eax
1482; X86-BMI2-NEXT:    testb $32, %dl
1483; X86-BMI2-NEXT:    je .LBB21_2
1484; X86-BMI2-NEXT:  # %bb.1:
1485; X86-BMI2-NEXT:    movl %eax, %ecx
1486; X86-BMI2-NEXT:    xorl %eax, %eax
1487; X86-BMI2-NEXT:  .LBB21_2:
1488; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
1489; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %edx
1490; X86-BMI2-NEXT:    retl
1491;
1492; X64-NOBMI-LABEL: bzhi64_b1_indexzext:
1493; X64-NOBMI:       # %bb.0:
1494; X64-NOBMI-NEXT:    movl %esi, %ecx
1495; X64-NOBMI-NEXT:    movq $-1, %rax
1496; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1497; X64-NOBMI-NEXT:    shlq %cl, %rax
1498; X64-NOBMI-NEXT:    notq %rax
1499; X64-NOBMI-NEXT:    andq %rdi, %rax
1500; X64-NOBMI-NEXT:    retq
1501;
1502; X64-BMI1-LABEL: bzhi64_b1_indexzext:
1503; X64-BMI1:       # %bb.0:
1504; X64-BMI1-NEXT:    # kill: def $esi killed $esi def $rsi
1505; X64-BMI1-NEXT:    shll $8, %esi
1506; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
1507; X64-BMI1-NEXT:    retq
1508;
1509; X64-BMI2-LABEL: bzhi64_b1_indexzext:
1510; X64-BMI2:       # %bb.0:
1511; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
1512; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
1513; X64-BMI2-NEXT:    retq
1514  %conv = zext i8 %numlowbits to i64
1515  %notmask = shl i64 -1, %conv
1516  %mask = xor i64 %notmask, -1
1517  %masked = and i64 %mask, %val
1518  ret i64 %masked
1519}
1520
1521define i64 @bzhi64_b2_load(i64* %w, i64 %numlowbits) nounwind {
1522; X86-NOBMI-LABEL: bzhi64_b2_load:
1523; X86-NOBMI:       # %bb.0:
1524; X86-NOBMI-NEXT:    pushl %edi
1525; X86-NOBMI-NEXT:    pushl %esi
1526; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
1527; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
1528; X86-NOBMI-NEXT:    movl $-1, %edx
1529; X86-NOBMI-NEXT:    movl $-1, %edi
1530; X86-NOBMI-NEXT:    shll %cl, %edi
1531; X86-NOBMI-NEXT:    xorl %eax, %eax
1532; X86-NOBMI-NEXT:    testb $32, %cl
1533; X86-NOBMI-NEXT:    jne .LBB22_1
1534; X86-NOBMI-NEXT:  # %bb.2:
1535; X86-NOBMI-NEXT:    movl %edi, %eax
1536; X86-NOBMI-NEXT:    jmp .LBB22_3
1537; X86-NOBMI-NEXT:  .LBB22_1:
1538; X86-NOBMI-NEXT:    movl %edi, %edx
1539; X86-NOBMI-NEXT:  .LBB22_3:
1540; X86-NOBMI-NEXT:    notl %edx
1541; X86-NOBMI-NEXT:    notl %eax
1542; X86-NOBMI-NEXT:    andl (%esi), %eax
1543; X86-NOBMI-NEXT:    andl 4(%esi), %edx
1544; X86-NOBMI-NEXT:    popl %esi
1545; X86-NOBMI-NEXT:    popl %edi
1546; X86-NOBMI-NEXT:    retl
1547;
1548; X86-BMI1-LABEL: bzhi64_b2_load:
1549; X86-BMI1:       # %bb.0:
1550; X86-BMI1-NEXT:    pushl %esi
1551; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
1552; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
1553; X86-BMI1-NEXT:    movl $-1, %esi
1554; X86-BMI1-NEXT:    movl $-1, %eax
1555; X86-BMI1-NEXT:    shll %cl, %eax
1556; X86-BMI1-NEXT:    testb $32, %cl
1557; X86-BMI1-NEXT:    je .LBB22_2
1558; X86-BMI1-NEXT:  # %bb.1:
1559; X86-BMI1-NEXT:    movl %eax, %esi
1560; X86-BMI1-NEXT:    xorl %eax, %eax
1561; X86-BMI1-NEXT:  .LBB22_2:
1562; X86-BMI1-NEXT:    andnl (%edx), %eax, %eax
1563; X86-BMI1-NEXT:    andnl 4(%edx), %esi, %edx
1564; X86-BMI1-NEXT:    popl %esi
1565; X86-BMI1-NEXT:    retl
1566;
1567; X86-BMI2-LABEL: bzhi64_b2_load:
1568; X86-BMI2:       # %bb.0:
1569; X86-BMI2-NEXT:    pushl %ebx
1570; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1571; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
1572; X86-BMI2-NEXT:    movl $-1, %edx
1573; X86-BMI2-NEXT:    shlxl %ebx, %edx, %eax
1574; X86-BMI2-NEXT:    testb $32, %bl
1575; X86-BMI2-NEXT:    je .LBB22_2
1576; X86-BMI2-NEXT:  # %bb.1:
1577; X86-BMI2-NEXT:    movl %eax, %edx
1578; X86-BMI2-NEXT:    xorl %eax, %eax
1579; X86-BMI2-NEXT:  .LBB22_2:
1580; X86-BMI2-NEXT:    andnl (%ecx), %eax, %eax
1581; X86-BMI2-NEXT:    andnl 4(%ecx), %edx, %edx
1582; X86-BMI2-NEXT:    popl %ebx
1583; X86-BMI2-NEXT:    retl
1584;
1585; X64-NOBMI-LABEL: bzhi64_b2_load:
1586; X64-NOBMI:       # %bb.0:
1587; X64-NOBMI-NEXT:    movq %rsi, %rcx
1588; X64-NOBMI-NEXT:    movq $-1, %rax
1589; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
1590; X64-NOBMI-NEXT:    shlq %cl, %rax
1591; X64-NOBMI-NEXT:    notq %rax
1592; X64-NOBMI-NEXT:    andq (%rdi), %rax
1593; X64-NOBMI-NEXT:    retq
1594;
1595; X64-BMI1-LABEL: bzhi64_b2_load:
1596; X64-BMI1:       # %bb.0:
1597; X64-BMI1-NEXT:    shll $8, %esi
1598; X64-BMI1-NEXT:    bextrq %rsi, (%rdi), %rax
1599; X64-BMI1-NEXT:    retq
1600;
1601; X64-BMI2-LABEL: bzhi64_b2_load:
1602; X64-BMI2:       # %bb.0:
1603; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
1604; X64-BMI2-NEXT:    retq
1605  %val = load i64, i64* %w
1606  %notmask = shl i64 -1, %numlowbits
1607  %mask = xor i64 %notmask, -1
1608  %masked = and i64 %mask, %val
1609  ret i64 %masked
1610}
1611
1612define i64 @bzhi64_b3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind {
1613; X86-NOBMI-LABEL: bzhi64_b3_load_indexzext:
1614; X86-NOBMI:       # %bb.0:
1615; X86-NOBMI-NEXT:    pushl %edi
1616; X86-NOBMI-NEXT:    pushl %esi
1617; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
1618; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
1619; X86-NOBMI-NEXT:    movl $-1, %edx
1620; X86-NOBMI-NEXT:    movl $-1, %edi
1621; X86-NOBMI-NEXT:    shll %cl, %edi
1622; X86-NOBMI-NEXT:    xorl %eax, %eax
1623; X86-NOBMI-NEXT:    testb $32, %cl
1624; X86-NOBMI-NEXT:    jne .LBB23_1
1625; X86-NOBMI-NEXT:  # %bb.2:
1626; X86-NOBMI-NEXT:    movl %edi, %eax
1627; X86-NOBMI-NEXT:    jmp .LBB23_3
1628; X86-NOBMI-NEXT:  .LBB23_1:
1629; X86-NOBMI-NEXT:    movl %edi, %edx
1630; X86-NOBMI-NEXT:  .LBB23_3:
1631; X86-NOBMI-NEXT:    notl %edx
1632; X86-NOBMI-NEXT:    notl %eax
1633; X86-NOBMI-NEXT:    andl (%esi), %eax
1634; X86-NOBMI-NEXT:    andl 4(%esi), %edx
1635; X86-NOBMI-NEXT:    popl %esi
1636; X86-NOBMI-NEXT:    popl %edi
1637; X86-NOBMI-NEXT:    retl
1638;
1639; X86-BMI1-LABEL: bzhi64_b3_load_indexzext:
1640; X86-BMI1:       # %bb.0:
1641; X86-BMI1-NEXT:    pushl %esi
1642; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
1643; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
1644; X86-BMI1-NEXT:    movl $-1, %esi
1645; X86-BMI1-NEXT:    movl $-1, %eax
1646; X86-BMI1-NEXT:    shll %cl, %eax
1647; X86-BMI1-NEXT:    testb $32, %cl
1648; X86-BMI1-NEXT:    je .LBB23_2
1649; X86-BMI1-NEXT:  # %bb.1:
1650; X86-BMI1-NEXT:    movl %eax, %esi
1651; X86-BMI1-NEXT:    xorl %eax, %eax
1652; X86-BMI1-NEXT:  .LBB23_2:
1653; X86-BMI1-NEXT:    andnl (%edx), %eax, %eax
1654; X86-BMI1-NEXT:    andnl 4(%edx), %esi, %edx
1655; X86-BMI1-NEXT:    popl %esi
1656; X86-BMI1-NEXT:    retl
1657;
1658; X86-BMI2-LABEL: bzhi64_b3_load_indexzext:
1659; X86-BMI2:       # %bb.0:
1660; X86-BMI2-NEXT:    pushl %ebx
1661; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1662; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
1663; X86-BMI2-NEXT:    movl $-1, %edx
1664; X86-BMI2-NEXT:    shlxl %ebx, %edx, %eax
1665; X86-BMI2-NEXT:    testb $32, %bl
1666; X86-BMI2-NEXT:    je .LBB23_2
1667; X86-BMI2-NEXT:  # %bb.1:
1668; X86-BMI2-NEXT:    movl %eax, %edx
1669; X86-BMI2-NEXT:    xorl %eax, %eax
1670; X86-BMI2-NEXT:  .LBB23_2:
1671; X86-BMI2-NEXT:    andnl (%ecx), %eax, %eax
1672; X86-BMI2-NEXT:    andnl 4(%ecx), %edx, %edx
1673; X86-BMI2-NEXT:    popl %ebx
1674; X86-BMI2-NEXT:    retl
1675;
1676; X64-NOBMI-LABEL: bzhi64_b3_load_indexzext:
1677; X64-NOBMI:       # %bb.0:
1678; X64-NOBMI-NEXT:    movl %esi, %ecx
1679; X64-NOBMI-NEXT:    movq $-1, %rax
1680; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1681; X64-NOBMI-NEXT:    shlq %cl, %rax
1682; X64-NOBMI-NEXT:    notq %rax
1683; X64-NOBMI-NEXT:    andq (%rdi), %rax
1684; X64-NOBMI-NEXT:    retq
1685;
1686; X64-BMI1-LABEL: bzhi64_b3_load_indexzext:
1687; X64-BMI1:       # %bb.0:
1688; X64-BMI1-NEXT:    # kill: def $esi killed $esi def $rsi
1689; X64-BMI1-NEXT:    shll $8, %esi
1690; X64-BMI1-NEXT:    bextrq %rsi, (%rdi), %rax
1691; X64-BMI1-NEXT:    retq
1692;
1693; X64-BMI2-LABEL: bzhi64_b3_load_indexzext:
1694; X64-BMI2:       # %bb.0:
1695; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
1696; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
1697; X64-BMI2-NEXT:    retq
1698  %val = load i64, i64* %w
1699  %conv = zext i8 %numlowbits to i64
1700  %notmask = shl i64 -1, %conv
1701  %mask = xor i64 %notmask, -1
1702  %masked = and i64 %mask, %val
1703  ret i64 %masked
1704}
1705
1706define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
1707; X86-NOBMI-LABEL: bzhi64_b4_commutative:
1708; X86-NOBMI:       # %bb.0:
1709; X86-NOBMI-NEXT:    pushl %esi
1710; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
1711; X86-NOBMI-NEXT:    movl $-1, %edx
1712; X86-NOBMI-NEXT:    movl $-1, %esi
1713; X86-NOBMI-NEXT:    shll %cl, %esi
1714; X86-NOBMI-NEXT:    xorl %eax, %eax
1715; X86-NOBMI-NEXT:    testb $32, %cl
1716; X86-NOBMI-NEXT:    jne .LBB24_1
1717; X86-NOBMI-NEXT:  # %bb.2:
1718; X86-NOBMI-NEXT:    movl %esi, %eax
1719; X86-NOBMI-NEXT:    jmp .LBB24_3
1720; X86-NOBMI-NEXT:  .LBB24_1:
1721; X86-NOBMI-NEXT:    movl %esi, %edx
1722; X86-NOBMI-NEXT:  .LBB24_3:
1723; X86-NOBMI-NEXT:    notl %edx
1724; X86-NOBMI-NEXT:    notl %eax
1725; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1726; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
1727; X86-NOBMI-NEXT:    popl %esi
1728; X86-NOBMI-NEXT:    retl
1729;
1730; X86-BMI1-LABEL: bzhi64_b4_commutative:
1731; X86-BMI1:       # %bb.0:
1732; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
1733; X86-BMI1-NEXT:    movl $-1, %edx
1734; X86-BMI1-NEXT:    movl $-1, %eax
1735; X86-BMI1-NEXT:    shll %cl, %eax
1736; X86-BMI1-NEXT:    testb $32, %cl
1737; X86-BMI1-NEXT:    je .LBB24_2
1738; X86-BMI1-NEXT:  # %bb.1:
1739; X86-BMI1-NEXT:    movl %eax, %edx
1740; X86-BMI1-NEXT:    xorl %eax, %eax
1741; X86-BMI1-NEXT:  .LBB24_2:
1742; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
1743; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %edx
1744; X86-BMI1-NEXT:    retl
1745;
1746; X86-BMI2-LABEL: bzhi64_b4_commutative:
1747; X86-BMI2:       # %bb.0:
1748; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
1749; X86-BMI2-NEXT:    movl $-1, %ecx
1750; X86-BMI2-NEXT:    shlxl %edx, %ecx, %eax
1751; X86-BMI2-NEXT:    testb $32, %dl
1752; X86-BMI2-NEXT:    je .LBB24_2
1753; X86-BMI2-NEXT:  # %bb.1:
1754; X86-BMI2-NEXT:    movl %eax, %ecx
1755; X86-BMI2-NEXT:    xorl %eax, %eax
1756; X86-BMI2-NEXT:  .LBB24_2:
1757; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
1758; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %edx
1759; X86-BMI2-NEXT:    retl
1760;
1761; X64-NOBMI-LABEL: bzhi64_b4_commutative:
1762; X64-NOBMI:       # %bb.0:
1763; X64-NOBMI-NEXT:    movq %rsi, %rcx
1764; X64-NOBMI-NEXT:    movq $-1, %rax
1765; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
1766; X64-NOBMI-NEXT:    shlq %cl, %rax
1767; X64-NOBMI-NEXT:    notq %rax
1768; X64-NOBMI-NEXT:    andq %rdi, %rax
1769; X64-NOBMI-NEXT:    retq
1770;
1771; X64-BMI1-LABEL: bzhi64_b4_commutative:
1772; X64-BMI1:       # %bb.0:
1773; X64-BMI1-NEXT:    shll $8, %esi
1774; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
1775; X64-BMI1-NEXT:    retq
1776;
1777; X64-BMI2-LABEL: bzhi64_b4_commutative:
1778; X64-BMI2:       # %bb.0:
1779; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
1780; X64-BMI2-NEXT:    retq
1781  %notmask = shl i64 -1, %numlowbits
1782  %mask = xor i64 %notmask, -1
1783  %masked = and i64 %val, %mask ; swapped order
1784  ret i64 %masked
1785}
1786
1787; 64-bit, but with 32-bit output
1788
1789; Everything done in 64-bit, truncation happens last.
1790define i32 @bzhi64_32_b0(i64 %val, i8 %numlowbits) nounwind {
1791; X86-NOBMI-LABEL: bzhi64_32_b0:
1792; X86-NOBMI:       # %bb.0:
1793; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
1794; X86-NOBMI-NEXT:    movl $-1, %edx
1795; X86-NOBMI-NEXT:    shll %cl, %edx
1796; X86-NOBMI-NEXT:    xorl %eax, %eax
1797; X86-NOBMI-NEXT:    testb $32, %cl
1798; X86-NOBMI-NEXT:    jne .LBB25_2
1799; X86-NOBMI-NEXT:  # %bb.1:
1800; X86-NOBMI-NEXT:    movl %edx, %eax
1801; X86-NOBMI-NEXT:  .LBB25_2:
1802; X86-NOBMI-NEXT:    notl %eax
1803; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1804; X86-NOBMI-NEXT:    retl
1805;
1806; X86-BMI1-LABEL: bzhi64_32_b0:
1807; X86-BMI1:       # %bb.0:
1808; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
1809; X86-BMI1-NEXT:    movl $-1, %eax
1810; X86-BMI1-NEXT:    shll %cl, %eax
1811; X86-BMI1-NEXT:    xorl %edx, %edx
1812; X86-BMI1-NEXT:    testb $32, %cl
1813; X86-BMI1-NEXT:    jne .LBB25_2
1814; X86-BMI1-NEXT:  # %bb.1:
1815; X86-BMI1-NEXT:    movl %eax, %edx
1816; X86-BMI1-NEXT:  .LBB25_2:
1817; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %eax
1818; X86-BMI1-NEXT:    retl
1819;
1820; X86-BMI2-LABEL: bzhi64_32_b0:
1821; X86-BMI2:       # %bb.0:
1822; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
1823; X86-BMI2-NEXT:    xorl %ecx, %ecx
1824; X86-BMI2-NEXT:    testb $32, %al
1825; X86-BMI2-NEXT:    jne .LBB25_2
1826; X86-BMI2-NEXT:  # %bb.1:
1827; X86-BMI2-NEXT:    movl $-1, %ecx
1828; X86-BMI2-NEXT:    shlxl %eax, %ecx, %ecx
1829; X86-BMI2-NEXT:  .LBB25_2:
1830; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %eax
1831; X86-BMI2-NEXT:    retl
1832;
1833; X64-NOBMI-LABEL: bzhi64_32_b0:
1834; X64-NOBMI:       # %bb.0:
1835; X64-NOBMI-NEXT:    movl %esi, %ecx
1836; X64-NOBMI-NEXT:    movq $-1, %rax
1837; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1838; X64-NOBMI-NEXT:    shlq %cl, %rax
1839; X64-NOBMI-NEXT:    notl %eax
1840; X64-NOBMI-NEXT:    andl %edi, %eax
1841; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
1842; X64-NOBMI-NEXT:    retq
1843;
1844; X64-BMI1-LABEL: bzhi64_32_b0:
1845; X64-BMI1:       # %bb.0:
1846; X64-BMI1-NEXT:    shll $8, %esi
1847; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
1848; X64-BMI1-NEXT:    retq
1849;
1850; X64-BMI2-LABEL: bzhi64_32_b0:
1851; X64-BMI2:       # %bb.0:
1852; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
1853; X64-BMI2-NEXT:    retq
1854  %widenumlowbits = zext i8 %numlowbits to i64
1855  %notmask = shl nsw i64 -1, %widenumlowbits
1856  %mask = xor i64 %notmask, -1
1857  %wideres = and i64 %val, %mask
1858  %res = trunc i64 %wideres to i32
1859  ret i32 %res
1860}
1861
1862; Shifting happens in 64-bit, then truncation. Masking is 32-bit.
1863define i32 @bzhi64_32_b1(i64 %val, i8 %numlowbits) nounwind {
1864; X86-NOBMI-LABEL: bzhi64_32_b1:
1865; X86-NOBMI:       # %bb.0:
1866; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
1867; X86-NOBMI-NEXT:    movl $-1, %eax
1868; X86-NOBMI-NEXT:    shll %cl, %eax
1869; X86-NOBMI-NEXT:    notl %eax
1870; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1871; X86-NOBMI-NEXT:    retl
1872;
1873; X86-BMI1-LABEL: bzhi64_32_b1:
1874; X86-BMI1:       # %bb.0:
1875; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
1876; X86-BMI1-NEXT:    shll $8, %eax
1877; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
1878; X86-BMI1-NEXT:    retl
1879;
1880; X86-BMI2-LABEL: bzhi64_32_b1:
1881; X86-BMI2:       # %bb.0:
1882; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
1883; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
1884; X86-BMI2-NEXT:    retl
1885;
1886; X64-NOBMI-LABEL: bzhi64_32_b1:
1887; X64-NOBMI:       # %bb.0:
1888; X64-NOBMI-NEXT:    movl %esi, %ecx
1889; X64-NOBMI-NEXT:    movl $-1, %eax
1890; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1891; X64-NOBMI-NEXT:    shll %cl, %eax
1892; X64-NOBMI-NEXT:    notl %eax
1893; X64-NOBMI-NEXT:    andl %edi, %eax
1894; X64-NOBMI-NEXT:    retq
1895;
1896; X64-BMI1-LABEL: bzhi64_32_b1:
1897; X64-BMI1:       # %bb.0:
1898; X64-BMI1-NEXT:    shll $8, %esi
1899; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
1900; X64-BMI1-NEXT:    retq
1901;
1902; X64-BMI2-LABEL: bzhi64_32_b1:
1903; X64-BMI2:       # %bb.0:
1904; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
1905; X64-BMI2-NEXT:    retq
1906  %truncval = trunc i64 %val to i32
1907  %widenumlowbits = zext i8 %numlowbits to i32
1908  %notmask = shl nsw i32 -1, %widenumlowbits
1909  %mask = xor i32 %notmask, -1
1910  %res = and i32 %truncval, %mask
1911  ret i32 %res
1912}
1913
1914; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit.
1915; Masking is 64-bit. Then truncation.
1916define i32 @bzhi64_32_b2(i64 %val, i8 %numlowbits) nounwind {
1917; X86-NOBMI-LABEL: bzhi64_32_b2:
1918; X86-NOBMI:       # %bb.0:
1919; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
1920; X86-NOBMI-NEXT:    movl $-1, %eax
1921; X86-NOBMI-NEXT:    shll %cl, %eax
1922; X86-NOBMI-NEXT:    notl %eax
1923; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1924; X86-NOBMI-NEXT:    retl
1925;
1926; X86-BMI1-LABEL: bzhi64_32_b2:
1927; X86-BMI1:       # %bb.0:
1928; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
1929; X86-BMI1-NEXT:    shll $8, %eax
1930; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
1931; X86-BMI1-NEXT:    retl
1932;
1933; X86-BMI2-LABEL: bzhi64_32_b2:
1934; X86-BMI2:       # %bb.0:
1935; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
1936; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
1937; X86-BMI2-NEXT:    retl
1938;
1939; X64-NOBMI-LABEL: bzhi64_32_b2:
1940; X64-NOBMI:       # %bb.0:
1941; X64-NOBMI-NEXT:    movl %esi, %ecx
1942; X64-NOBMI-NEXT:    movl $-1, %eax
1943; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1944; X64-NOBMI-NEXT:    shll %cl, %eax
1945; X64-NOBMI-NEXT:    notl %eax
1946; X64-NOBMI-NEXT:    andl %edi, %eax
1947; X64-NOBMI-NEXT:    retq
1948;
1949; X64-BMI1-LABEL: bzhi64_32_b2:
1950; X64-BMI1:       # %bb.0:
1951; X64-BMI1-NEXT:    shll $8, %esi
1952; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
1953; X64-BMI1-NEXT:    retq
1954;
1955; X64-BMI2-LABEL: bzhi64_32_b2:
1956; X64-BMI2:       # %bb.0:
1957; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
1958; X64-BMI2-NEXT:    retq
1959  %widenumlowbits = zext i8 %numlowbits to i32
1960  %notmask = shl nsw i32 -1, %widenumlowbits
1961  %mask = xor i32 %notmask, -1
1962  %zextmask = zext i32 %mask to i64
1963  %wideres = and i64 %val, %zextmask
1964  %res = trunc i64 %wideres to i32
1965  ret i32 %res
1966}
1967
1968; Shifting happens in 64-bit. Mask is 32-bit, but calculated in 64-bit.
1969; Masking is 64-bit. Then truncation.
1970define i32 @bzhi64_32_b3(i64 %val, i8 %numlowbits) nounwind {
1971; X86-NOBMI-LABEL: bzhi64_32_b3:
1972; X86-NOBMI:       # %bb.0:
1973; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
1974; X86-NOBMI-NEXT:    movl $-1, %edx
1975; X86-NOBMI-NEXT:    shll %cl, %edx
1976; X86-NOBMI-NEXT:    xorl %eax, %eax
1977; X86-NOBMI-NEXT:    testb $32, %cl
1978; X86-NOBMI-NEXT:    jne .LBB28_2
1979; X86-NOBMI-NEXT:  # %bb.1:
1980; X86-NOBMI-NEXT:    movl %edx, %eax
1981; X86-NOBMI-NEXT:  .LBB28_2:
1982; X86-NOBMI-NEXT:    notl %eax
1983; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1984; X86-NOBMI-NEXT:    retl
1985;
1986; X86-BMI1-LABEL: bzhi64_32_b3:
1987; X86-BMI1:       # %bb.0:
1988; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
1989; X86-BMI1-NEXT:    movl $-1, %eax
1990; X86-BMI1-NEXT:    shll %cl, %eax
1991; X86-BMI1-NEXT:    xorl %edx, %edx
1992; X86-BMI1-NEXT:    testb $32, %cl
1993; X86-BMI1-NEXT:    jne .LBB28_2
1994; X86-BMI1-NEXT:  # %bb.1:
1995; X86-BMI1-NEXT:    movl %eax, %edx
1996; X86-BMI1-NEXT:  .LBB28_2:
1997; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %eax
1998; X86-BMI1-NEXT:    retl
1999;
2000; X86-BMI2-LABEL: bzhi64_32_b3:
2001; X86-BMI2:       # %bb.0:
2002; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
2003; X86-BMI2-NEXT:    xorl %ecx, %ecx
2004; X86-BMI2-NEXT:    testb $32, %al
2005; X86-BMI2-NEXT:    jne .LBB28_2
2006; X86-BMI2-NEXT:  # %bb.1:
2007; X86-BMI2-NEXT:    movl $-1, %ecx
2008; X86-BMI2-NEXT:    shlxl %eax, %ecx, %ecx
2009; X86-BMI2-NEXT:  .LBB28_2:
2010; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %eax
2011; X86-BMI2-NEXT:    retl
2012;
2013; X64-NOBMI-LABEL: bzhi64_32_b3:
2014; X64-NOBMI:       # %bb.0:
2015; X64-NOBMI-NEXT:    movl %esi, %ecx
2016; X64-NOBMI-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
2017; X64-NOBMI-NEXT:    movl $4294967295, %edx # imm = 0xFFFFFFFF
2018; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2019; X64-NOBMI-NEXT:    shlq %cl, %rdx
2020; X64-NOBMI-NEXT:    xorl %edx, %eax
2021; X64-NOBMI-NEXT:    andl %edi, %eax
2022; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
2023; X64-NOBMI-NEXT:    retq
2024;
2025; X64-BMI1-LABEL: bzhi64_32_b3:
2026; X64-BMI1:       # %bb.0:
2027; X64-BMI1-NEXT:    shll $8, %esi
2028; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
2029; X64-BMI1-NEXT:    retq
2030;
2031; X64-BMI2-LABEL: bzhi64_32_b3:
2032; X64-BMI2:       # %bb.0:
2033; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
2034; X64-BMI2-NEXT:    retq
2035  %widenumlowbits = zext i8 %numlowbits to i64
2036  %notmask = shl nsw i64 4294967295, %widenumlowbits
2037  %mask = xor i64 %notmask, 4294967295
2038  %wideres = and i64 %val, %mask
2039  %res = trunc i64 %wideres to i32
2040  ret i32 %res
2041}
2042
2043; ---------------------------------------------------------------------------- ;
2044; Pattern c. 32-bit
2045; ---------------------------------------------------------------------------- ;
2046
2047declare void @use32(i32)
2048
2049define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
2050; X86-NOBMI-LABEL: bzhi32_c0:
2051; X86-NOBMI:       # %bb.0:
2052; X86-NOBMI-NEXT:    pushl %esi
2053; X86-NOBMI-NEXT:    subl $8, %esp
2054; X86-NOBMI-NEXT:    xorl %ecx, %ecx
2055; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2056; X86-NOBMI-NEXT:    movl $-1, %esi
2057; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2058; X86-NOBMI-NEXT:    shrl %cl, %esi
2059; X86-NOBMI-NEXT:    movl %esi, (%esp)
2060; X86-NOBMI-NEXT:    calll use32@PLT
2061; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %esi
2062; X86-NOBMI-NEXT:    movl %esi, %eax
2063; X86-NOBMI-NEXT:    addl $8, %esp
2064; X86-NOBMI-NEXT:    popl %esi
2065; X86-NOBMI-NEXT:    retl
2066;
2067; X86-BMI1-LABEL: bzhi32_c0:
2068; X86-BMI1:       # %bb.0:
2069; X86-BMI1-NEXT:    pushl %esi
2070; X86-BMI1-NEXT:    subl $8, %esp
2071; X86-BMI1-NEXT:    xorl %ecx, %ecx
2072; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
2073; X86-BMI1-NEXT:    movl $-1, %esi
2074; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2075; X86-BMI1-NEXT:    shrl %cl, %esi
2076; X86-BMI1-NEXT:    movl %esi, (%esp)
2077; X86-BMI1-NEXT:    calll use32@PLT
2078; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %esi
2079; X86-BMI1-NEXT:    movl %esi, %eax
2080; X86-BMI1-NEXT:    addl $8, %esp
2081; X86-BMI1-NEXT:    popl %esi
2082; X86-BMI1-NEXT:    retl
2083;
2084; X86-BMI2-LABEL: bzhi32_c0:
2085; X86-BMI2:       # %bb.0:
2086; X86-BMI2-NEXT:    pushl %ebx
2087; X86-BMI2-NEXT:    subl $8, %esp
2088; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
2089; X86-BMI2-NEXT:    movl %ebx, %eax
2090; X86-BMI2-NEXT:    negb %al
2091; X86-BMI2-NEXT:    movl $-1, %ecx
2092; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
2093; X86-BMI2-NEXT:    movl %eax, (%esp)
2094; X86-BMI2-NEXT:    calll use32@PLT
2095; X86-BMI2-NEXT:    bzhil %ebx, {{[0-9]+}}(%esp), %eax
2096; X86-BMI2-NEXT:    addl $8, %esp
2097; X86-BMI2-NEXT:    popl %ebx
2098; X86-BMI2-NEXT:    retl
2099;
2100; X64-NOBMI-LABEL: bzhi32_c0:
2101; X64-NOBMI:       # %bb.0:
2102; X64-NOBMI-NEXT:    pushq %rbp
2103; X64-NOBMI-NEXT:    pushq %rbx
2104; X64-NOBMI-NEXT:    pushq %rax
2105; X64-NOBMI-NEXT:    movl %esi, %ecx
2106; X64-NOBMI-NEXT:    movl %edi, %ebx
2107; X64-NOBMI-NEXT:    negb %cl
2108; X64-NOBMI-NEXT:    movl $-1, %ebp
2109; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2110; X64-NOBMI-NEXT:    shrl %cl, %ebp
2111; X64-NOBMI-NEXT:    movl %ebp, %edi
2112; X64-NOBMI-NEXT:    callq use32@PLT
2113; X64-NOBMI-NEXT:    andl %ebx, %ebp
2114; X64-NOBMI-NEXT:    movl %ebp, %eax
2115; X64-NOBMI-NEXT:    addq $8, %rsp
2116; X64-NOBMI-NEXT:    popq %rbx
2117; X64-NOBMI-NEXT:    popq %rbp
2118; X64-NOBMI-NEXT:    retq
2119;
2120; X64-BMI1-LABEL: bzhi32_c0:
2121; X64-BMI1:       # %bb.0:
2122; X64-BMI1-NEXT:    pushq %rbp
2123; X64-BMI1-NEXT:    pushq %rbx
2124; X64-BMI1-NEXT:    pushq %rax
2125; X64-BMI1-NEXT:    movl %esi, %ecx
2126; X64-BMI1-NEXT:    movl %edi, %ebx
2127; X64-BMI1-NEXT:    negb %cl
2128; X64-BMI1-NEXT:    movl $-1, %ebp
2129; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2130; X64-BMI1-NEXT:    shrl %cl, %ebp
2131; X64-BMI1-NEXT:    movl %ebp, %edi
2132; X64-BMI1-NEXT:    callq use32@PLT
2133; X64-BMI1-NEXT:    andl %ebx, %ebp
2134; X64-BMI1-NEXT:    movl %ebp, %eax
2135; X64-BMI1-NEXT:    addq $8, %rsp
2136; X64-BMI1-NEXT:    popq %rbx
2137; X64-BMI1-NEXT:    popq %rbp
2138; X64-BMI1-NEXT:    retq
2139;
2140; X64-BMI2-LABEL: bzhi32_c0:
2141; X64-BMI2:       # %bb.0:
2142; X64-BMI2-NEXT:    pushq %rbp
2143; X64-BMI2-NEXT:    pushq %rbx
2144; X64-BMI2-NEXT:    pushq %rax
2145; X64-BMI2-NEXT:    movl %esi, %ebx
2146; X64-BMI2-NEXT:    movl %edi, %ebp
2147; X64-BMI2-NEXT:    movl %ebx, %eax
2148; X64-BMI2-NEXT:    negb %al
2149; X64-BMI2-NEXT:    movl $-1, %ecx
2150; X64-BMI2-NEXT:    shrxl %eax, %ecx, %edi
2151; X64-BMI2-NEXT:    callq use32@PLT
2152; X64-BMI2-NEXT:    bzhil %ebx, %ebp, %eax
2153; X64-BMI2-NEXT:    addq $8, %rsp
2154; X64-BMI2-NEXT:    popq %rbx
2155; X64-BMI2-NEXT:    popq %rbp
2156; X64-BMI2-NEXT:    retq
2157  %numhighbits = sub i32 32, %numlowbits
2158  %mask = lshr i32 -1, %numhighbits
2159  call void @use32(i32 %mask)
2160  %masked = and i32 %mask, %val
2161  ret i32 %masked
2162}
2163
2164define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
2165; X86-NOBMI-LABEL: bzhi32_c1_indexzext:
2166; X86-NOBMI:       # %bb.0:
2167; X86-NOBMI-NEXT:    pushl %esi
2168; X86-NOBMI-NEXT:    subl $8, %esp
2169; X86-NOBMI-NEXT:    xorl %ecx, %ecx
2170; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2171; X86-NOBMI-NEXT:    movl $-1, %esi
2172; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2173; X86-NOBMI-NEXT:    shrl %cl, %esi
2174; X86-NOBMI-NEXT:    movl %esi, (%esp)
2175; X86-NOBMI-NEXT:    calll use32@PLT
2176; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %esi
2177; X86-NOBMI-NEXT:    movl %esi, %eax
2178; X86-NOBMI-NEXT:    addl $8, %esp
2179; X86-NOBMI-NEXT:    popl %esi
2180; X86-NOBMI-NEXT:    retl
2181;
2182; X86-BMI1-LABEL: bzhi32_c1_indexzext:
2183; X86-BMI1:       # %bb.0:
2184; X86-BMI1-NEXT:    pushl %esi
2185; X86-BMI1-NEXT:    subl $8, %esp
2186; X86-BMI1-NEXT:    xorl %ecx, %ecx
2187; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
2188; X86-BMI1-NEXT:    movl $-1, %esi
2189; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2190; X86-BMI1-NEXT:    shrl %cl, %esi
2191; X86-BMI1-NEXT:    movl %esi, (%esp)
2192; X86-BMI1-NEXT:    calll use32@PLT
2193; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %esi
2194; X86-BMI1-NEXT:    movl %esi, %eax
2195; X86-BMI1-NEXT:    addl $8, %esp
2196; X86-BMI1-NEXT:    popl %esi
2197; X86-BMI1-NEXT:    retl
2198;
2199; X86-BMI2-LABEL: bzhi32_c1_indexzext:
2200; X86-BMI2:       # %bb.0:
2201; X86-BMI2-NEXT:    pushl %ebx
2202; X86-BMI2-NEXT:    subl $8, %esp
2203; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
2204; X86-BMI2-NEXT:    movl %ebx, %eax
2205; X86-BMI2-NEXT:    negb %al
2206; X86-BMI2-NEXT:    movl $-1, %ecx
2207; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
2208; X86-BMI2-NEXT:    movl %eax, (%esp)
2209; X86-BMI2-NEXT:    calll use32@PLT
2210; X86-BMI2-NEXT:    bzhil %ebx, {{[0-9]+}}(%esp), %eax
2211; X86-BMI2-NEXT:    addl $8, %esp
2212; X86-BMI2-NEXT:    popl %ebx
2213; X86-BMI2-NEXT:    retl
2214;
2215; X64-NOBMI-LABEL: bzhi32_c1_indexzext:
2216; X64-NOBMI:       # %bb.0:
2217; X64-NOBMI-NEXT:    pushq %rbp
2218; X64-NOBMI-NEXT:    pushq %rbx
2219; X64-NOBMI-NEXT:    pushq %rax
2220; X64-NOBMI-NEXT:    movl %esi, %ecx
2221; X64-NOBMI-NEXT:    movl %edi, %ebx
2222; X64-NOBMI-NEXT:    negb %cl
2223; X64-NOBMI-NEXT:    movl $-1, %ebp
2224; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2225; X64-NOBMI-NEXT:    shrl %cl, %ebp
2226; X64-NOBMI-NEXT:    movl %ebp, %edi
2227; X64-NOBMI-NEXT:    callq use32@PLT
2228; X64-NOBMI-NEXT:    andl %ebx, %ebp
2229; X64-NOBMI-NEXT:    movl %ebp, %eax
2230; X64-NOBMI-NEXT:    addq $8, %rsp
2231; X64-NOBMI-NEXT:    popq %rbx
2232; X64-NOBMI-NEXT:    popq %rbp
2233; X64-NOBMI-NEXT:    retq
2234;
2235; X64-BMI1-LABEL: bzhi32_c1_indexzext:
2236; X64-BMI1:       # %bb.0:
2237; X64-BMI1-NEXT:    pushq %rbp
2238; X64-BMI1-NEXT:    pushq %rbx
2239; X64-BMI1-NEXT:    pushq %rax
2240; X64-BMI1-NEXT:    movl %esi, %ecx
2241; X64-BMI1-NEXT:    movl %edi, %ebx
2242; X64-BMI1-NEXT:    negb %cl
2243; X64-BMI1-NEXT:    movl $-1, %ebp
2244; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2245; X64-BMI1-NEXT:    shrl %cl, %ebp
2246; X64-BMI1-NEXT:    movl %ebp, %edi
2247; X64-BMI1-NEXT:    callq use32@PLT
2248; X64-BMI1-NEXT:    andl %ebx, %ebp
2249; X64-BMI1-NEXT:    movl %ebp, %eax
2250; X64-BMI1-NEXT:    addq $8, %rsp
2251; X64-BMI1-NEXT:    popq %rbx
2252; X64-BMI1-NEXT:    popq %rbp
2253; X64-BMI1-NEXT:    retq
2254;
2255; X64-BMI2-LABEL: bzhi32_c1_indexzext:
2256; X64-BMI2:       # %bb.0:
2257; X64-BMI2-NEXT:    pushq %rbp
2258; X64-BMI2-NEXT:    pushq %rbx
2259; X64-BMI2-NEXT:    pushq %rax
2260; X64-BMI2-NEXT:    movl %esi, %ebx
2261; X64-BMI2-NEXT:    movl %edi, %ebp
2262; X64-BMI2-NEXT:    movl %ebx, %eax
2263; X64-BMI2-NEXT:    negb %al
2264; X64-BMI2-NEXT:    movl $-1, %ecx
2265; X64-BMI2-NEXT:    shrxl %eax, %ecx, %edi
2266; X64-BMI2-NEXT:    callq use32@PLT
2267; X64-BMI2-NEXT:    bzhil %ebx, %ebp, %eax
2268; X64-BMI2-NEXT:    addq $8, %rsp
2269; X64-BMI2-NEXT:    popq %rbx
2270; X64-BMI2-NEXT:    popq %rbp
2271; X64-BMI2-NEXT:    retq
2272  %numhighbits = sub i8 32, %numlowbits
2273  %sh_prom = zext i8 %numhighbits to i32
2274  %mask = lshr i32 -1, %sh_prom
2275  call void @use32(i32 %mask)
2276  %masked = and i32 %mask, %val
2277  ret i32 %masked
2278}
2279
2280define i32 @bzhi32_c2_load(i32* %w, i32 %numlowbits) nounwind {
2281; X86-NOBMI-LABEL: bzhi32_c2_load:
2282; X86-NOBMI:       # %bb.0:
2283; X86-NOBMI-NEXT:    pushl %esi
2284; X86-NOBMI-NEXT:    subl $8, %esp
2285; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
2286; X86-NOBMI-NEXT:    xorl %ecx, %ecx
2287; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2288; X86-NOBMI-NEXT:    movl $-1, %edx
2289; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2290; X86-NOBMI-NEXT:    shrl %cl, %edx
2291; X86-NOBMI-NEXT:    movl (%eax), %esi
2292; X86-NOBMI-NEXT:    andl %edx, %esi
2293; X86-NOBMI-NEXT:    movl %edx, (%esp)
2294; X86-NOBMI-NEXT:    calll use32@PLT
2295; X86-NOBMI-NEXT:    movl %esi, %eax
2296; X86-NOBMI-NEXT:    addl $8, %esp
2297; X86-NOBMI-NEXT:    popl %esi
2298; X86-NOBMI-NEXT:    retl
2299;
2300; X86-BMI1-LABEL: bzhi32_c2_load:
2301; X86-BMI1:       # %bb.0:
2302; X86-BMI1-NEXT:    pushl %esi
2303; X86-BMI1-NEXT:    subl $8, %esp
2304; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
2305; X86-BMI1-NEXT:    xorl %ecx, %ecx
2306; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
2307; X86-BMI1-NEXT:    movl $-1, %edx
2308; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2309; X86-BMI1-NEXT:    shrl %cl, %edx
2310; X86-BMI1-NEXT:    movl (%eax), %esi
2311; X86-BMI1-NEXT:    andl %edx, %esi
2312; X86-BMI1-NEXT:    movl %edx, (%esp)
2313; X86-BMI1-NEXT:    calll use32@PLT
2314; X86-BMI1-NEXT:    movl %esi, %eax
2315; X86-BMI1-NEXT:    addl $8, %esp
2316; X86-BMI1-NEXT:    popl %esi
2317; X86-BMI1-NEXT:    retl
2318;
2319; X86-BMI2-LABEL: bzhi32_c2_load:
2320; X86-BMI2:       # %bb.0:
2321; X86-BMI2-NEXT:    pushl %esi
2322; X86-BMI2-NEXT:    subl $8, %esp
2323; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
2324; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
2325; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %esi
2326; X86-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx def $ecx
2327; X86-BMI2-NEXT:    negb %cl
2328; X86-BMI2-NEXT:    movl $-1, %eax
2329; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
2330; X86-BMI2-NEXT:    movl %eax, (%esp)
2331; X86-BMI2-NEXT:    calll use32@PLT
2332; X86-BMI2-NEXT:    movl %esi, %eax
2333; X86-BMI2-NEXT:    addl $8, %esp
2334; X86-BMI2-NEXT:    popl %esi
2335; X86-BMI2-NEXT:    retl
2336;
2337; X64-NOBMI-LABEL: bzhi32_c2_load:
2338; X64-NOBMI:       # %bb.0:
2339; X64-NOBMI-NEXT:    pushq %rbx
2340; X64-NOBMI-NEXT:    movl %esi, %ecx
2341; X64-NOBMI-NEXT:    negb %cl
2342; X64-NOBMI-NEXT:    movl $-1, %eax
2343; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2344; X64-NOBMI-NEXT:    shrl %cl, %eax
2345; X64-NOBMI-NEXT:    movl (%rdi), %ebx
2346; X64-NOBMI-NEXT:    andl %eax, %ebx
2347; X64-NOBMI-NEXT:    movl %eax, %edi
2348; X64-NOBMI-NEXT:    callq use32@PLT
2349; X64-NOBMI-NEXT:    movl %ebx, %eax
2350; X64-NOBMI-NEXT:    popq %rbx
2351; X64-NOBMI-NEXT:    retq
2352;
2353; X64-BMI1-LABEL: bzhi32_c2_load:
2354; X64-BMI1:       # %bb.0:
2355; X64-BMI1-NEXT:    pushq %rbx
2356; X64-BMI1-NEXT:    movl %esi, %ecx
2357; X64-BMI1-NEXT:    negb %cl
2358; X64-BMI1-NEXT:    movl $-1, %eax
2359; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2360; X64-BMI1-NEXT:    shrl %cl, %eax
2361; X64-BMI1-NEXT:    movl (%rdi), %ebx
2362; X64-BMI1-NEXT:    andl %eax, %ebx
2363; X64-BMI1-NEXT:    movl %eax, %edi
2364; X64-BMI1-NEXT:    callq use32@PLT
2365; X64-BMI1-NEXT:    movl %ebx, %eax
2366; X64-BMI1-NEXT:    popq %rbx
2367; X64-BMI1-NEXT:    retq
2368;
2369; X64-BMI2-LABEL: bzhi32_c2_load:
2370; X64-BMI2:       # %bb.0:
2371; X64-BMI2-NEXT:    pushq %rbx
2372; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %ebx
2373; X64-BMI2-NEXT:    # kill: def $sil killed $sil killed $esi def $esi
2374; X64-BMI2-NEXT:    negb %sil
2375; X64-BMI2-NEXT:    movl $-1, %eax
2376; X64-BMI2-NEXT:    shrxl %esi, %eax, %edi
2377; X64-BMI2-NEXT:    callq use32@PLT
2378; X64-BMI2-NEXT:    movl %ebx, %eax
2379; X64-BMI2-NEXT:    popq %rbx
2380; X64-BMI2-NEXT:    retq
2381  %val = load i32, i32* %w
2382  %numhighbits = sub i32 32, %numlowbits
2383  %mask = lshr i32 -1, %numhighbits
2384  call void @use32(i32 %mask)
2385  %masked = and i32 %mask, %val
2386  ret i32 %masked
2387}
2388
2389define i32 @bzhi32_c3_load_indexzext(i32* %w, i8 %numlowbits) nounwind {
2390; X86-NOBMI-LABEL: bzhi32_c3_load_indexzext:
2391; X86-NOBMI:       # %bb.0:
2392; X86-NOBMI-NEXT:    pushl %esi
2393; X86-NOBMI-NEXT:    subl $8, %esp
2394; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
2395; X86-NOBMI-NEXT:    xorl %ecx, %ecx
2396; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2397; X86-NOBMI-NEXT:    movl $-1, %edx
2398; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2399; X86-NOBMI-NEXT:    shrl %cl, %edx
2400; X86-NOBMI-NEXT:    movl (%eax), %esi
2401; X86-NOBMI-NEXT:    andl %edx, %esi
2402; X86-NOBMI-NEXT:    movl %edx, (%esp)
2403; X86-NOBMI-NEXT:    calll use32@PLT
2404; X86-NOBMI-NEXT:    movl %esi, %eax
2405; X86-NOBMI-NEXT:    addl $8, %esp
2406; X86-NOBMI-NEXT:    popl %esi
2407; X86-NOBMI-NEXT:    retl
2408;
2409; X86-BMI1-LABEL: bzhi32_c3_load_indexzext:
2410; X86-BMI1:       # %bb.0:
2411; X86-BMI1-NEXT:    pushl %esi
2412; X86-BMI1-NEXT:    subl $8, %esp
2413; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
2414; X86-BMI1-NEXT:    xorl %ecx, %ecx
2415; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
2416; X86-BMI1-NEXT:    movl $-1, %edx
2417; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2418; X86-BMI1-NEXT:    shrl %cl, %edx
2419; X86-BMI1-NEXT:    movl (%eax), %esi
2420; X86-BMI1-NEXT:    andl %edx, %esi
2421; X86-BMI1-NEXT:    movl %edx, (%esp)
2422; X86-BMI1-NEXT:    calll use32@PLT
2423; X86-BMI1-NEXT:    movl %esi, %eax
2424; X86-BMI1-NEXT:    addl $8, %esp
2425; X86-BMI1-NEXT:    popl %esi
2426; X86-BMI1-NEXT:    retl
2427;
2428; X86-BMI2-LABEL: bzhi32_c3_load_indexzext:
2429; X86-BMI2:       # %bb.0:
2430; X86-BMI2-NEXT:    pushl %esi
2431; X86-BMI2-NEXT:    subl $8, %esp
2432; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
2433; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
2434; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %esi
2435; X86-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx def $ecx
2436; X86-BMI2-NEXT:    negb %cl
2437; X86-BMI2-NEXT:    movl $-1, %eax
2438; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
2439; X86-BMI2-NEXT:    movl %eax, (%esp)
2440; X86-BMI2-NEXT:    calll use32@PLT
2441; X86-BMI2-NEXT:    movl %esi, %eax
2442; X86-BMI2-NEXT:    addl $8, %esp
2443; X86-BMI2-NEXT:    popl %esi
2444; X86-BMI2-NEXT:    retl
2445;
2446; X64-NOBMI-LABEL: bzhi32_c3_load_indexzext:
2447; X64-NOBMI:       # %bb.0:
2448; X64-NOBMI-NEXT:    pushq %rbx
2449; X64-NOBMI-NEXT:    movl %esi, %ecx
2450; X64-NOBMI-NEXT:    negb %cl
2451; X64-NOBMI-NEXT:    movl $-1, %eax
2452; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2453; X64-NOBMI-NEXT:    shrl %cl, %eax
2454; X64-NOBMI-NEXT:    movl (%rdi), %ebx
2455; X64-NOBMI-NEXT:    andl %eax, %ebx
2456; X64-NOBMI-NEXT:    movl %eax, %edi
2457; X64-NOBMI-NEXT:    callq use32@PLT
2458; X64-NOBMI-NEXT:    movl %ebx, %eax
2459; X64-NOBMI-NEXT:    popq %rbx
2460; X64-NOBMI-NEXT:    retq
2461;
2462; X64-BMI1-LABEL: bzhi32_c3_load_indexzext:
2463; X64-BMI1:       # %bb.0:
2464; X64-BMI1-NEXT:    pushq %rbx
2465; X64-BMI1-NEXT:    movl %esi, %ecx
2466; X64-BMI1-NEXT:    negb %cl
2467; X64-BMI1-NEXT:    movl $-1, %eax
2468; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2469; X64-BMI1-NEXT:    shrl %cl, %eax
2470; X64-BMI1-NEXT:    movl (%rdi), %ebx
2471; X64-BMI1-NEXT:    andl %eax, %ebx
2472; X64-BMI1-NEXT:    movl %eax, %edi
2473; X64-BMI1-NEXT:    callq use32@PLT
2474; X64-BMI1-NEXT:    movl %ebx, %eax
2475; X64-BMI1-NEXT:    popq %rbx
2476; X64-BMI1-NEXT:    retq
2477;
2478; X64-BMI2-LABEL: bzhi32_c3_load_indexzext:
2479; X64-BMI2:       # %bb.0:
2480; X64-BMI2-NEXT:    pushq %rbx
2481; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %ebx
2482; X64-BMI2-NEXT:    # kill: def $sil killed $sil killed $esi def $esi
2483; X64-BMI2-NEXT:    negb %sil
2484; X64-BMI2-NEXT:    movl $-1, %eax
2485; X64-BMI2-NEXT:    shrxl %esi, %eax, %edi
2486; X64-BMI2-NEXT:    callq use32@PLT
2487; X64-BMI2-NEXT:    movl %ebx, %eax
2488; X64-BMI2-NEXT:    popq %rbx
2489; X64-BMI2-NEXT:    retq
2490  %val = load i32, i32* %w
2491  %numhighbits = sub i8 32, %numlowbits
2492  %sh_prom = zext i8 %numhighbits to i32
2493  %mask = lshr i32 -1, %sh_prom
2494  call void @use32(i32 %mask)
2495  %masked = and i32 %mask, %val
2496  ret i32 %masked
2497}
2498
2499define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
2500; X86-NOBMI-LABEL: bzhi32_c4_commutative:
2501; X86-NOBMI:       # %bb.0:
2502; X86-NOBMI-NEXT:    pushl %esi
2503; X86-NOBMI-NEXT:    subl $8, %esp
2504; X86-NOBMI-NEXT:    xorl %ecx, %ecx
2505; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2506; X86-NOBMI-NEXT:    movl $-1, %esi
2507; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2508; X86-NOBMI-NEXT:    shrl %cl, %esi
2509; X86-NOBMI-NEXT:    movl %esi, (%esp)
2510; X86-NOBMI-NEXT:    calll use32@PLT
2511; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %esi
2512; X86-NOBMI-NEXT:    movl %esi, %eax
2513; X86-NOBMI-NEXT:    addl $8, %esp
2514; X86-NOBMI-NEXT:    popl %esi
2515; X86-NOBMI-NEXT:    retl
2516;
2517; X86-BMI1-LABEL: bzhi32_c4_commutative:
2518; X86-BMI1:       # %bb.0:
2519; X86-BMI1-NEXT:    pushl %esi
2520; X86-BMI1-NEXT:    subl $8, %esp
2521; X86-BMI1-NEXT:    xorl %ecx, %ecx
2522; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
2523; X86-BMI1-NEXT:    movl $-1, %esi
2524; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2525; X86-BMI1-NEXT:    shrl %cl, %esi
2526; X86-BMI1-NEXT:    movl %esi, (%esp)
2527; X86-BMI1-NEXT:    calll use32@PLT
2528; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %esi
2529; X86-BMI1-NEXT:    movl %esi, %eax
2530; X86-BMI1-NEXT:    addl $8, %esp
2531; X86-BMI1-NEXT:    popl %esi
2532; X86-BMI1-NEXT:    retl
2533;
2534; X86-BMI2-LABEL: bzhi32_c4_commutative:
2535; X86-BMI2:       # %bb.0:
2536; X86-BMI2-NEXT:    pushl %ebx
2537; X86-BMI2-NEXT:    subl $8, %esp
2538; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
2539; X86-BMI2-NEXT:    movl %ebx, %eax
2540; X86-BMI2-NEXT:    negb %al
2541; X86-BMI2-NEXT:    movl $-1, %ecx
2542; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
2543; X86-BMI2-NEXT:    movl %eax, (%esp)
2544; X86-BMI2-NEXT:    calll use32@PLT
2545; X86-BMI2-NEXT:    bzhil %ebx, {{[0-9]+}}(%esp), %eax
2546; X86-BMI2-NEXT:    addl $8, %esp
2547; X86-BMI2-NEXT:    popl %ebx
2548; X86-BMI2-NEXT:    retl
2549;
2550; X64-NOBMI-LABEL: bzhi32_c4_commutative:
2551; X64-NOBMI:       # %bb.0:
2552; X64-NOBMI-NEXT:    pushq %rbp
2553; X64-NOBMI-NEXT:    pushq %rbx
2554; X64-NOBMI-NEXT:    pushq %rax
2555; X64-NOBMI-NEXT:    movl %esi, %ecx
2556; X64-NOBMI-NEXT:    movl %edi, %ebx
2557; X64-NOBMI-NEXT:    negb %cl
2558; X64-NOBMI-NEXT:    movl $-1, %ebp
2559; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2560; X64-NOBMI-NEXT:    shrl %cl, %ebp
2561; X64-NOBMI-NEXT:    movl %ebp, %edi
2562; X64-NOBMI-NEXT:    callq use32@PLT
2563; X64-NOBMI-NEXT:    andl %ebx, %ebp
2564; X64-NOBMI-NEXT:    movl %ebp, %eax
2565; X64-NOBMI-NEXT:    addq $8, %rsp
2566; X64-NOBMI-NEXT:    popq %rbx
2567; X64-NOBMI-NEXT:    popq %rbp
2568; X64-NOBMI-NEXT:    retq
2569;
2570; X64-BMI1-LABEL: bzhi32_c4_commutative:
2571; X64-BMI1:       # %bb.0:
2572; X64-BMI1-NEXT:    pushq %rbp
2573; X64-BMI1-NEXT:    pushq %rbx
2574; X64-BMI1-NEXT:    pushq %rax
2575; X64-BMI1-NEXT:    movl %esi, %ecx
2576; X64-BMI1-NEXT:    movl %edi, %ebx
2577; X64-BMI1-NEXT:    negb %cl
2578; X64-BMI1-NEXT:    movl $-1, %ebp
2579; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2580; X64-BMI1-NEXT:    shrl %cl, %ebp
2581; X64-BMI1-NEXT:    movl %ebp, %edi
2582; X64-BMI1-NEXT:    callq use32@PLT
2583; X64-BMI1-NEXT:    andl %ebx, %ebp
2584; X64-BMI1-NEXT:    movl %ebp, %eax
2585; X64-BMI1-NEXT:    addq $8, %rsp
2586; X64-BMI1-NEXT:    popq %rbx
2587; X64-BMI1-NEXT:    popq %rbp
2588; X64-BMI1-NEXT:    retq
2589;
2590; X64-BMI2-LABEL: bzhi32_c4_commutative:
2591; X64-BMI2:       # %bb.0:
2592; X64-BMI2-NEXT:    pushq %rbp
2593; X64-BMI2-NEXT:    pushq %rbx
2594; X64-BMI2-NEXT:    pushq %rax
2595; X64-BMI2-NEXT:    movl %esi, %ebx
2596; X64-BMI2-NEXT:    movl %edi, %ebp
2597; X64-BMI2-NEXT:    movl %ebx, %eax
2598; X64-BMI2-NEXT:    negb %al
2599; X64-BMI2-NEXT:    movl $-1, %ecx
2600; X64-BMI2-NEXT:    shrxl %eax, %ecx, %edi
2601; X64-BMI2-NEXT:    callq use32@PLT
2602; X64-BMI2-NEXT:    bzhil %ebx, %ebp, %eax
2603; X64-BMI2-NEXT:    addq $8, %rsp
2604; X64-BMI2-NEXT:    popq %rbx
2605; X64-BMI2-NEXT:    popq %rbp
2606; X64-BMI2-NEXT:    retq
2607  %numhighbits = sub i32 32, %numlowbits
2608  %mask = lshr i32 -1, %numhighbits
2609  call void @use32(i32 %mask)
2610  %masked = and i32 %val, %mask ; swapped order
2611  ret i32 %masked
2612}
2613
2614; 64-bit
2615
2616declare void @use64(i64)
2617
2618define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind {
2619; X86-NOBMI-LABEL: bzhi64_c0:
2620; X86-NOBMI:       # %bb.0:
2621; X86-NOBMI-NEXT:    pushl %edi
2622; X86-NOBMI-NEXT:    pushl %esi
2623; X86-NOBMI-NEXT:    pushl %eax
2624; X86-NOBMI-NEXT:    movb $64, %cl
2625; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2626; X86-NOBMI-NEXT:    movl $-1, %esi
2627; X86-NOBMI-NEXT:    movl $-1, %edi
2628; X86-NOBMI-NEXT:    shrl %cl, %edi
2629; X86-NOBMI-NEXT:    testb $32, %cl
2630; X86-NOBMI-NEXT:    je .LBB34_2
2631; X86-NOBMI-NEXT:  # %bb.1:
2632; X86-NOBMI-NEXT:    movl %edi, %esi
2633; X86-NOBMI-NEXT:    xorl %edi, %edi
2634; X86-NOBMI-NEXT:  .LBB34_2:
2635; X86-NOBMI-NEXT:    subl $8, %esp
2636; X86-NOBMI-NEXT:    pushl %edi
2637; X86-NOBMI-NEXT:    pushl %esi
2638; X86-NOBMI-NEXT:    calll use64@PLT
2639; X86-NOBMI-NEXT:    addl $16, %esp
2640; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %esi
2641; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edi
2642; X86-NOBMI-NEXT:    movl %esi, %eax
2643; X86-NOBMI-NEXT:    movl %edi, %edx
2644; X86-NOBMI-NEXT:    addl $4, %esp
2645; X86-NOBMI-NEXT:    popl %esi
2646; X86-NOBMI-NEXT:    popl %edi
2647; X86-NOBMI-NEXT:    retl
2648;
2649; X86-BMI1-LABEL: bzhi64_c0:
2650; X86-BMI1:       # %bb.0:
2651; X86-BMI1-NEXT:    pushl %edi
2652; X86-BMI1-NEXT:    pushl %esi
2653; X86-BMI1-NEXT:    pushl %eax
2654; X86-BMI1-NEXT:    movb $64, %cl
2655; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
2656; X86-BMI1-NEXT:    movl $-1, %esi
2657; X86-BMI1-NEXT:    movl $-1, %edi
2658; X86-BMI1-NEXT:    shrl %cl, %edi
2659; X86-BMI1-NEXT:    testb $32, %cl
2660; X86-BMI1-NEXT:    je .LBB34_2
2661; X86-BMI1-NEXT:  # %bb.1:
2662; X86-BMI1-NEXT:    movl %edi, %esi
2663; X86-BMI1-NEXT:    xorl %edi, %edi
2664; X86-BMI1-NEXT:  .LBB34_2:
2665; X86-BMI1-NEXT:    subl $8, %esp
2666; X86-BMI1-NEXT:    pushl %edi
2667; X86-BMI1-NEXT:    pushl %esi
2668; X86-BMI1-NEXT:    calll use64@PLT
2669; X86-BMI1-NEXT:    addl $16, %esp
2670; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %esi
2671; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edi
2672; X86-BMI1-NEXT:    movl %esi, %eax
2673; X86-BMI1-NEXT:    movl %edi, %edx
2674; X86-BMI1-NEXT:    addl $4, %esp
2675; X86-BMI1-NEXT:    popl %esi
2676; X86-BMI1-NEXT:    popl %edi
2677; X86-BMI1-NEXT:    retl
2678;
2679; X86-BMI2-LABEL: bzhi64_c0:
2680; X86-BMI2:       # %bb.0:
2681; X86-BMI2-NEXT:    pushl %edi
2682; X86-BMI2-NEXT:    pushl %esi
2683; X86-BMI2-NEXT:    pushl %eax
2684; X86-BMI2-NEXT:    movb $64, %al
2685; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
2686; X86-BMI2-NEXT:    movl $-1, %edi
2687; X86-BMI2-NEXT:    shrxl %eax, %edi, %esi
2688; X86-BMI2-NEXT:    testb $32, %al
2689; X86-BMI2-NEXT:    je .LBB34_2
2690; X86-BMI2-NEXT:  # %bb.1:
2691; X86-BMI2-NEXT:    movl %esi, %edi
2692; X86-BMI2-NEXT:    xorl %esi, %esi
2693; X86-BMI2-NEXT:  .LBB34_2:
2694; X86-BMI2-NEXT:    subl $8, %esp
2695; X86-BMI2-NEXT:    pushl %esi
2696; X86-BMI2-NEXT:    pushl %edi
2697; X86-BMI2-NEXT:    calll use64@PLT
2698; X86-BMI2-NEXT:    addl $16, %esp
2699; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edi
2700; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
2701; X86-BMI2-NEXT:    movl %edi, %eax
2702; X86-BMI2-NEXT:    movl %esi, %edx
2703; X86-BMI2-NEXT:    addl $4, %esp
2704; X86-BMI2-NEXT:    popl %esi
2705; X86-BMI2-NEXT:    popl %edi
2706; X86-BMI2-NEXT:    retl
2707;
2708; X64-NOBMI-LABEL: bzhi64_c0:
2709; X64-NOBMI:       # %bb.0:
2710; X64-NOBMI-NEXT:    pushq %r14
2711; X64-NOBMI-NEXT:    pushq %rbx
2712; X64-NOBMI-NEXT:    pushq %rax
2713; X64-NOBMI-NEXT:    movq %rsi, %rcx
2714; X64-NOBMI-NEXT:    movq %rdi, %r14
2715; X64-NOBMI-NEXT:    negb %cl
2716; X64-NOBMI-NEXT:    movq $-1, %rbx
2717; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
2718; X64-NOBMI-NEXT:    shrq %cl, %rbx
2719; X64-NOBMI-NEXT:    movq %rbx, %rdi
2720; X64-NOBMI-NEXT:    callq use64@PLT
2721; X64-NOBMI-NEXT:    andq %r14, %rbx
2722; X64-NOBMI-NEXT:    movq %rbx, %rax
2723; X64-NOBMI-NEXT:    addq $8, %rsp
2724; X64-NOBMI-NEXT:    popq %rbx
2725; X64-NOBMI-NEXT:    popq %r14
2726; X64-NOBMI-NEXT:    retq
2727;
2728; X64-BMI1-LABEL: bzhi64_c0:
2729; X64-BMI1:       # %bb.0:
2730; X64-BMI1-NEXT:    pushq %r14
2731; X64-BMI1-NEXT:    pushq %rbx
2732; X64-BMI1-NEXT:    pushq %rax
2733; X64-BMI1-NEXT:    movq %rsi, %rcx
2734; X64-BMI1-NEXT:    movq %rdi, %r14
2735; X64-BMI1-NEXT:    negb %cl
2736; X64-BMI1-NEXT:    movq $-1, %rbx
2737; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
2738; X64-BMI1-NEXT:    shrq %cl, %rbx
2739; X64-BMI1-NEXT:    movq %rbx, %rdi
2740; X64-BMI1-NEXT:    callq use64@PLT
2741; X64-BMI1-NEXT:    andq %r14, %rbx
2742; X64-BMI1-NEXT:    movq %rbx, %rax
2743; X64-BMI1-NEXT:    addq $8, %rsp
2744; X64-BMI1-NEXT:    popq %rbx
2745; X64-BMI1-NEXT:    popq %r14
2746; X64-BMI1-NEXT:    retq
2747;
2748; X64-BMI2-LABEL: bzhi64_c0:
2749; X64-BMI2:       # %bb.0:
2750; X64-BMI2-NEXT:    pushq %r14
2751; X64-BMI2-NEXT:    pushq %rbx
2752; X64-BMI2-NEXT:    pushq %rax
2753; X64-BMI2-NEXT:    movq %rsi, %rbx
2754; X64-BMI2-NEXT:    movq %rdi, %r14
2755; X64-BMI2-NEXT:    movl %ebx, %eax
2756; X64-BMI2-NEXT:    negb %al
2757; X64-BMI2-NEXT:    movq $-1, %rcx
2758; X64-BMI2-NEXT:    shrxq %rax, %rcx, %rdi
2759; X64-BMI2-NEXT:    callq use64@PLT
2760; X64-BMI2-NEXT:    bzhiq %rbx, %r14, %rax
2761; X64-BMI2-NEXT:    addq $8, %rsp
2762; X64-BMI2-NEXT:    popq %rbx
2763; X64-BMI2-NEXT:    popq %r14
2764; X64-BMI2-NEXT:    retq
2765  %numhighbits = sub i64 64, %numlowbits
2766  %mask = lshr i64 -1, %numhighbits
2767  call void @use64(i64 %mask)
2768  %masked = and i64 %mask, %val
2769  ret i64 %masked
2770}
2771
2772define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
2773; X86-NOBMI-LABEL: bzhi64_c1_indexzext:
2774; X86-NOBMI:       # %bb.0:
2775; X86-NOBMI-NEXT:    pushl %edi
2776; X86-NOBMI-NEXT:    pushl %esi
2777; X86-NOBMI-NEXT:    pushl %eax
2778; X86-NOBMI-NEXT:    movb $64, %cl
2779; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2780; X86-NOBMI-NEXT:    movl $-1, %esi
2781; X86-NOBMI-NEXT:    movl $-1, %edi
2782; X86-NOBMI-NEXT:    shrl %cl, %edi
2783; X86-NOBMI-NEXT:    testb $32, %cl
2784; X86-NOBMI-NEXT:    je .LBB35_2
2785; X86-NOBMI-NEXT:  # %bb.1:
2786; X86-NOBMI-NEXT:    movl %edi, %esi
2787; X86-NOBMI-NEXT:    xorl %edi, %edi
2788; X86-NOBMI-NEXT:  .LBB35_2:
2789; X86-NOBMI-NEXT:    subl $8, %esp
2790; X86-NOBMI-NEXT:    pushl %edi
2791; X86-NOBMI-NEXT:    pushl %esi
2792; X86-NOBMI-NEXT:    calll use64@PLT
2793; X86-NOBMI-NEXT:    addl $16, %esp
2794; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %esi
2795; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edi
2796; X86-NOBMI-NEXT:    movl %esi, %eax
2797; X86-NOBMI-NEXT:    movl %edi, %edx
2798; X86-NOBMI-NEXT:    addl $4, %esp
2799; X86-NOBMI-NEXT:    popl %esi
2800; X86-NOBMI-NEXT:    popl %edi
2801; X86-NOBMI-NEXT:    retl
2802;
2803; X86-BMI1-LABEL: bzhi64_c1_indexzext:
2804; X86-BMI1:       # %bb.0:
2805; X86-BMI1-NEXT:    pushl %edi
2806; X86-BMI1-NEXT:    pushl %esi
2807; X86-BMI1-NEXT:    pushl %eax
2808; X86-BMI1-NEXT:    movb $64, %cl
2809; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
2810; X86-BMI1-NEXT:    movl $-1, %esi
2811; X86-BMI1-NEXT:    movl $-1, %edi
2812; X86-BMI1-NEXT:    shrl %cl, %edi
2813; X86-BMI1-NEXT:    testb $32, %cl
2814; X86-BMI1-NEXT:    je .LBB35_2
2815; X86-BMI1-NEXT:  # %bb.1:
2816; X86-BMI1-NEXT:    movl %edi, %esi
2817; X86-BMI1-NEXT:    xorl %edi, %edi
2818; X86-BMI1-NEXT:  .LBB35_2:
2819; X86-BMI1-NEXT:    subl $8, %esp
2820; X86-BMI1-NEXT:    pushl %edi
2821; X86-BMI1-NEXT:    pushl %esi
2822; X86-BMI1-NEXT:    calll use64@PLT
2823; X86-BMI1-NEXT:    addl $16, %esp
2824; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %esi
2825; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edi
2826; X86-BMI1-NEXT:    movl %esi, %eax
2827; X86-BMI1-NEXT:    movl %edi, %edx
2828; X86-BMI1-NEXT:    addl $4, %esp
2829; X86-BMI1-NEXT:    popl %esi
2830; X86-BMI1-NEXT:    popl %edi
2831; X86-BMI1-NEXT:    retl
2832;
2833; X86-BMI2-LABEL: bzhi64_c1_indexzext:
2834; X86-BMI2:       # %bb.0:
2835; X86-BMI2-NEXT:    pushl %edi
2836; X86-BMI2-NEXT:    pushl %esi
2837; X86-BMI2-NEXT:    pushl %eax
2838; X86-BMI2-NEXT:    movb $64, %al
2839; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
2840; X86-BMI2-NEXT:    movl $-1, %edi
2841; X86-BMI2-NEXT:    shrxl %eax, %edi, %esi
2842; X86-BMI2-NEXT:    testb $32, %al
2843; X86-BMI2-NEXT:    je .LBB35_2
2844; X86-BMI2-NEXT:  # %bb.1:
2845; X86-BMI2-NEXT:    movl %esi, %edi
2846; X86-BMI2-NEXT:    xorl %esi, %esi
2847; X86-BMI2-NEXT:  .LBB35_2:
2848; X86-BMI2-NEXT:    subl $8, %esp
2849; X86-BMI2-NEXT:    pushl %esi
2850; X86-BMI2-NEXT:    pushl %edi
2851; X86-BMI2-NEXT:    calll use64@PLT
2852; X86-BMI2-NEXT:    addl $16, %esp
2853; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edi
2854; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
2855; X86-BMI2-NEXT:    movl %edi, %eax
2856; X86-BMI2-NEXT:    movl %esi, %edx
2857; X86-BMI2-NEXT:    addl $4, %esp
2858; X86-BMI2-NEXT:    popl %esi
2859; X86-BMI2-NEXT:    popl %edi
2860; X86-BMI2-NEXT:    retl
2861;
2862; X64-NOBMI-LABEL: bzhi64_c1_indexzext:
2863; X64-NOBMI:       # %bb.0:
2864; X64-NOBMI-NEXT:    pushq %r14
2865; X64-NOBMI-NEXT:    pushq %rbx
2866; X64-NOBMI-NEXT:    pushq %rax
2867; X64-NOBMI-NEXT:    movl %esi, %ecx
2868; X64-NOBMI-NEXT:    movq %rdi, %r14
2869; X64-NOBMI-NEXT:    negb %cl
2870; X64-NOBMI-NEXT:    movq $-1, %rbx
2871; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2872; X64-NOBMI-NEXT:    shrq %cl, %rbx
2873; X64-NOBMI-NEXT:    movq %rbx, %rdi
2874; X64-NOBMI-NEXT:    callq use64@PLT
2875; X64-NOBMI-NEXT:    andq %r14, %rbx
2876; X64-NOBMI-NEXT:    movq %rbx, %rax
2877; X64-NOBMI-NEXT:    addq $8, %rsp
2878; X64-NOBMI-NEXT:    popq %rbx
2879; X64-NOBMI-NEXT:    popq %r14
2880; X64-NOBMI-NEXT:    retq
2881;
2882; X64-BMI1-LABEL: bzhi64_c1_indexzext:
2883; X64-BMI1:       # %bb.0:
2884; X64-BMI1-NEXT:    pushq %r14
2885; X64-BMI1-NEXT:    pushq %rbx
2886; X64-BMI1-NEXT:    pushq %rax
2887; X64-BMI1-NEXT:    movl %esi, %ecx
2888; X64-BMI1-NEXT:    movq %rdi, %r14
2889; X64-BMI1-NEXT:    negb %cl
2890; X64-BMI1-NEXT:    movq $-1, %rbx
2891; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2892; X64-BMI1-NEXT:    shrq %cl, %rbx
2893; X64-BMI1-NEXT:    movq %rbx, %rdi
2894; X64-BMI1-NEXT:    callq use64@PLT
2895; X64-BMI1-NEXT:    andq %r14, %rbx
2896; X64-BMI1-NEXT:    movq %rbx, %rax
2897; X64-BMI1-NEXT:    addq $8, %rsp
2898; X64-BMI1-NEXT:    popq %rbx
2899; X64-BMI1-NEXT:    popq %r14
2900; X64-BMI1-NEXT:    retq
2901;
2902; X64-BMI2-LABEL: bzhi64_c1_indexzext:
2903; X64-BMI2:       # %bb.0:
2904; X64-BMI2-NEXT:    pushq %r14
2905; X64-BMI2-NEXT:    pushq %rbx
2906; X64-BMI2-NEXT:    pushq %rax
2907; X64-BMI2-NEXT:    movl %esi, %ebx
2908; X64-BMI2-NEXT:    movq %rdi, %r14
2909; X64-BMI2-NEXT:    movl %ebx, %eax
2910; X64-BMI2-NEXT:    negb %al
2911; X64-BMI2-NEXT:    movq $-1, %rcx
2912; X64-BMI2-NEXT:    shrxq %rax, %rcx, %rdi
2913; X64-BMI2-NEXT:    callq use64@PLT
2914; X64-BMI2-NEXT:    bzhiq %rbx, %r14, %rax
2915; X64-BMI2-NEXT:    addq $8, %rsp
2916; X64-BMI2-NEXT:    popq %rbx
2917; X64-BMI2-NEXT:    popq %r14
2918; X64-BMI2-NEXT:    retq
2919  %numhighbits = sub i8 64, %numlowbits
2920  %sh_prom = zext i8 %numhighbits to i64
2921  %mask = lshr i64 -1, %sh_prom
2922  call void @use64(i64 %mask)
2923  %masked = and i64 %mask, %val
2924  ret i64 %masked
2925}
2926
2927define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind {
2928; X86-NOBMI-LABEL: bzhi64_c2_load:
2929; X86-NOBMI:       # %bb.0:
2930; X86-NOBMI-NEXT:    pushl %ebx
2931; X86-NOBMI-NEXT:    pushl %edi
2932; X86-NOBMI-NEXT:    pushl %esi
2933; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
2934; X86-NOBMI-NEXT:    movb $64, %cl
2935; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2936; X86-NOBMI-NEXT:    movl $-1, %eax
2937; X86-NOBMI-NEXT:    movl $-1, %ebx
2938; X86-NOBMI-NEXT:    shrl %cl, %ebx
2939; X86-NOBMI-NEXT:    testb $32, %cl
2940; X86-NOBMI-NEXT:    je .LBB36_2
2941; X86-NOBMI-NEXT:  # %bb.1:
2942; X86-NOBMI-NEXT:    movl %ebx, %eax
2943; X86-NOBMI-NEXT:    xorl %ebx, %ebx
2944; X86-NOBMI-NEXT:  .LBB36_2:
2945; X86-NOBMI-NEXT:    movl 4(%edx), %esi
2946; X86-NOBMI-NEXT:    andl %ebx, %esi
2947; X86-NOBMI-NEXT:    movl (%edx), %edi
2948; X86-NOBMI-NEXT:    andl %eax, %edi
2949; X86-NOBMI-NEXT:    subl $8, %esp
2950; X86-NOBMI-NEXT:    pushl %ebx
2951; X86-NOBMI-NEXT:    pushl %eax
2952; X86-NOBMI-NEXT:    calll use64@PLT
2953; X86-NOBMI-NEXT:    addl $16, %esp
2954; X86-NOBMI-NEXT:    movl %edi, %eax
2955; X86-NOBMI-NEXT:    movl %esi, %edx
2956; X86-NOBMI-NEXT:    popl %esi
2957; X86-NOBMI-NEXT:    popl %edi
2958; X86-NOBMI-NEXT:    popl %ebx
2959; X86-NOBMI-NEXT:    retl
2960;
2961; X86-BMI1-LABEL: bzhi64_c2_load:
2962; X86-BMI1:       # %bb.0:
2963; X86-BMI1-NEXT:    pushl %ebx
2964; X86-BMI1-NEXT:    pushl %edi
2965; X86-BMI1-NEXT:    pushl %esi
2966; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
2967; X86-BMI1-NEXT:    movb $64, %cl
2968; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
2969; X86-BMI1-NEXT:    movl $-1, %eax
2970; X86-BMI1-NEXT:    movl $-1, %ebx
2971; X86-BMI1-NEXT:    shrl %cl, %ebx
2972; X86-BMI1-NEXT:    testb $32, %cl
2973; X86-BMI1-NEXT:    je .LBB36_2
2974; X86-BMI1-NEXT:  # %bb.1:
2975; X86-BMI1-NEXT:    movl %ebx, %eax
2976; X86-BMI1-NEXT:    xorl %ebx, %ebx
2977; X86-BMI1-NEXT:  .LBB36_2:
2978; X86-BMI1-NEXT:    movl 4(%edx), %esi
2979; X86-BMI1-NEXT:    andl %ebx, %esi
2980; X86-BMI1-NEXT:    movl (%edx), %edi
2981; X86-BMI1-NEXT:    andl %eax, %edi
2982; X86-BMI1-NEXT:    subl $8, %esp
2983; X86-BMI1-NEXT:    pushl %ebx
2984; X86-BMI1-NEXT:    pushl %eax
2985; X86-BMI1-NEXT:    calll use64@PLT
2986; X86-BMI1-NEXT:    addl $16, %esp
2987; X86-BMI1-NEXT:    movl %edi, %eax
2988; X86-BMI1-NEXT:    movl %esi, %edx
2989; X86-BMI1-NEXT:    popl %esi
2990; X86-BMI1-NEXT:    popl %edi
2991; X86-BMI1-NEXT:    popl %ebx
2992; X86-BMI1-NEXT:    retl
2993;
2994; X86-BMI2-LABEL: bzhi64_c2_load:
2995; X86-BMI2:       # %bb.0:
2996; X86-BMI2-NEXT:    pushl %ebx
2997; X86-BMI2-NEXT:    pushl %edi
2998; X86-BMI2-NEXT:    pushl %esi
2999; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
3000; X86-BMI2-NEXT:    movb $64, %bl
3001; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %bl
3002; X86-BMI2-NEXT:    movl $-1, %ecx
3003; X86-BMI2-NEXT:    shrxl %ebx, %ecx, %edx
3004; X86-BMI2-NEXT:    testb $32, %bl
3005; X86-BMI2-NEXT:    je .LBB36_2
3006; X86-BMI2-NEXT:  # %bb.1:
3007; X86-BMI2-NEXT:    movl %edx, %ecx
3008; X86-BMI2-NEXT:    xorl %edx, %edx
3009; X86-BMI2-NEXT:  .LBB36_2:
3010; X86-BMI2-NEXT:    movl 4(%eax), %esi
3011; X86-BMI2-NEXT:    andl %edx, %esi
3012; X86-BMI2-NEXT:    movl (%eax), %edi
3013; X86-BMI2-NEXT:    andl %ecx, %edi
3014; X86-BMI2-NEXT:    subl $8, %esp
3015; X86-BMI2-NEXT:    pushl %edx
3016; X86-BMI2-NEXT:    pushl %ecx
3017; X86-BMI2-NEXT:    calll use64@PLT
3018; X86-BMI2-NEXT:    addl $16, %esp
3019; X86-BMI2-NEXT:    movl %edi, %eax
3020; X86-BMI2-NEXT:    movl %esi, %edx
3021; X86-BMI2-NEXT:    popl %esi
3022; X86-BMI2-NEXT:    popl %edi
3023; X86-BMI2-NEXT:    popl %ebx
3024; X86-BMI2-NEXT:    retl
3025;
3026; X64-NOBMI-LABEL: bzhi64_c2_load:
3027; X64-NOBMI:       # %bb.0:
3028; X64-NOBMI-NEXT:    pushq %rbx
3029; X64-NOBMI-NEXT:    movq %rsi, %rcx
3030; X64-NOBMI-NEXT:    negb %cl
3031; X64-NOBMI-NEXT:    movq $-1, %rax
3032; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
3033; X64-NOBMI-NEXT:    shrq %cl, %rax
3034; X64-NOBMI-NEXT:    movq (%rdi), %rbx
3035; X64-NOBMI-NEXT:    andq %rax, %rbx
3036; X64-NOBMI-NEXT:    movq %rax, %rdi
3037; X64-NOBMI-NEXT:    callq use64@PLT
3038; X64-NOBMI-NEXT:    movq %rbx, %rax
3039; X64-NOBMI-NEXT:    popq %rbx
3040; X64-NOBMI-NEXT:    retq
3041;
3042; X64-BMI1-LABEL: bzhi64_c2_load:
3043; X64-BMI1:       # %bb.0:
3044; X64-BMI1-NEXT:    pushq %rbx
3045; X64-BMI1-NEXT:    movq %rsi, %rcx
3046; X64-BMI1-NEXT:    negb %cl
3047; X64-BMI1-NEXT:    movq $-1, %rax
3048; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
3049; X64-BMI1-NEXT:    shrq %cl, %rax
3050; X64-BMI1-NEXT:    movq (%rdi), %rbx
3051; X64-BMI1-NEXT:    andq %rax, %rbx
3052; X64-BMI1-NEXT:    movq %rax, %rdi
3053; X64-BMI1-NEXT:    callq use64@PLT
3054; X64-BMI1-NEXT:    movq %rbx, %rax
3055; X64-BMI1-NEXT:    popq %rbx
3056; X64-BMI1-NEXT:    retq
3057;
3058; X64-BMI2-LABEL: bzhi64_c2_load:
3059; X64-BMI2:       # %bb.0:
3060; X64-BMI2-NEXT:    pushq %rbx
3061; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rbx
3062; X64-BMI2-NEXT:    # kill: def $sil killed $sil killed $rsi def $rsi
3063; X64-BMI2-NEXT:    negb %sil
3064; X64-BMI2-NEXT:    movq $-1, %rax
3065; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rdi
3066; X64-BMI2-NEXT:    callq use64@PLT
3067; X64-BMI2-NEXT:    movq %rbx, %rax
3068; X64-BMI2-NEXT:    popq %rbx
3069; X64-BMI2-NEXT:    retq
3070  %val = load i64, i64* %w
3071  %numhighbits = sub i64 64, %numlowbits
3072  %mask = lshr i64 -1, %numhighbits
3073  call void @use64(i64 %mask)
3074  %masked = and i64 %mask, %val
3075  ret i64 %masked
3076}
3077
3078define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
3079; X86-NOBMI-LABEL: bzhi64_c3_load_indexzext:
3080; X86-NOBMI:       # %bb.0:
3081; X86-NOBMI-NEXT:    pushl %ebx
3082; X86-NOBMI-NEXT:    pushl %edi
3083; X86-NOBMI-NEXT:    pushl %esi
3084; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
3085; X86-NOBMI-NEXT:    movb $64, %cl
3086; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3087; X86-NOBMI-NEXT:    movl $-1, %eax
3088; X86-NOBMI-NEXT:    movl $-1, %ebx
3089; X86-NOBMI-NEXT:    shrl %cl, %ebx
3090; X86-NOBMI-NEXT:    testb $32, %cl
3091; X86-NOBMI-NEXT:    je .LBB37_2
3092; X86-NOBMI-NEXT:  # %bb.1:
3093; X86-NOBMI-NEXT:    movl %ebx, %eax
3094; X86-NOBMI-NEXT:    xorl %ebx, %ebx
3095; X86-NOBMI-NEXT:  .LBB37_2:
3096; X86-NOBMI-NEXT:    movl 4(%edx), %esi
3097; X86-NOBMI-NEXT:    andl %ebx, %esi
3098; X86-NOBMI-NEXT:    movl (%edx), %edi
3099; X86-NOBMI-NEXT:    andl %eax, %edi
3100; X86-NOBMI-NEXT:    subl $8, %esp
3101; X86-NOBMI-NEXT:    pushl %ebx
3102; X86-NOBMI-NEXT:    pushl %eax
3103; X86-NOBMI-NEXT:    calll use64@PLT
3104; X86-NOBMI-NEXT:    addl $16, %esp
3105; X86-NOBMI-NEXT:    movl %edi, %eax
3106; X86-NOBMI-NEXT:    movl %esi, %edx
3107; X86-NOBMI-NEXT:    popl %esi
3108; X86-NOBMI-NEXT:    popl %edi
3109; X86-NOBMI-NEXT:    popl %ebx
3110; X86-NOBMI-NEXT:    retl
3111;
3112; X86-BMI1-LABEL: bzhi64_c3_load_indexzext:
3113; X86-BMI1:       # %bb.0:
3114; X86-BMI1-NEXT:    pushl %ebx
3115; X86-BMI1-NEXT:    pushl %edi
3116; X86-BMI1-NEXT:    pushl %esi
3117; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
3118; X86-BMI1-NEXT:    movb $64, %cl
3119; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
3120; X86-BMI1-NEXT:    movl $-1, %eax
3121; X86-BMI1-NEXT:    movl $-1, %ebx
3122; X86-BMI1-NEXT:    shrl %cl, %ebx
3123; X86-BMI1-NEXT:    testb $32, %cl
3124; X86-BMI1-NEXT:    je .LBB37_2
3125; X86-BMI1-NEXT:  # %bb.1:
3126; X86-BMI1-NEXT:    movl %ebx, %eax
3127; X86-BMI1-NEXT:    xorl %ebx, %ebx
3128; X86-BMI1-NEXT:  .LBB37_2:
3129; X86-BMI1-NEXT:    movl 4(%edx), %esi
3130; X86-BMI1-NEXT:    andl %ebx, %esi
3131; X86-BMI1-NEXT:    movl (%edx), %edi
3132; X86-BMI1-NEXT:    andl %eax, %edi
3133; X86-BMI1-NEXT:    subl $8, %esp
3134; X86-BMI1-NEXT:    pushl %ebx
3135; X86-BMI1-NEXT:    pushl %eax
3136; X86-BMI1-NEXT:    calll use64@PLT
3137; X86-BMI1-NEXT:    addl $16, %esp
3138; X86-BMI1-NEXT:    movl %edi, %eax
3139; X86-BMI1-NEXT:    movl %esi, %edx
3140; X86-BMI1-NEXT:    popl %esi
3141; X86-BMI1-NEXT:    popl %edi
3142; X86-BMI1-NEXT:    popl %ebx
3143; X86-BMI1-NEXT:    retl
3144;
3145; X86-BMI2-LABEL: bzhi64_c3_load_indexzext:
3146; X86-BMI2:       # %bb.0:
3147; X86-BMI2-NEXT:    pushl %ebx
3148; X86-BMI2-NEXT:    pushl %edi
3149; X86-BMI2-NEXT:    pushl %esi
3150; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
3151; X86-BMI2-NEXT:    movb $64, %bl
3152; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %bl
3153; X86-BMI2-NEXT:    movl $-1, %ecx
3154; X86-BMI2-NEXT:    shrxl %ebx, %ecx, %edx
3155; X86-BMI2-NEXT:    testb $32, %bl
3156; X86-BMI2-NEXT:    je .LBB37_2
3157; X86-BMI2-NEXT:  # %bb.1:
3158; X86-BMI2-NEXT:    movl %edx, %ecx
3159; X86-BMI2-NEXT:    xorl %edx, %edx
3160; X86-BMI2-NEXT:  .LBB37_2:
3161; X86-BMI2-NEXT:    movl 4(%eax), %esi
3162; X86-BMI2-NEXT:    andl %edx, %esi
3163; X86-BMI2-NEXT:    movl (%eax), %edi
3164; X86-BMI2-NEXT:    andl %ecx, %edi
3165; X86-BMI2-NEXT:    subl $8, %esp
3166; X86-BMI2-NEXT:    pushl %edx
3167; X86-BMI2-NEXT:    pushl %ecx
3168; X86-BMI2-NEXT:    calll use64@PLT
3169; X86-BMI2-NEXT:    addl $16, %esp
3170; X86-BMI2-NEXT:    movl %edi, %eax
3171; X86-BMI2-NEXT:    movl %esi, %edx
3172; X86-BMI2-NEXT:    popl %esi
3173; X86-BMI2-NEXT:    popl %edi
3174; X86-BMI2-NEXT:    popl %ebx
3175; X86-BMI2-NEXT:    retl
3176;
3177; X64-NOBMI-LABEL: bzhi64_c3_load_indexzext:
3178; X64-NOBMI:       # %bb.0:
3179; X64-NOBMI-NEXT:    pushq %rbx
3180; X64-NOBMI-NEXT:    movl %esi, %ecx
3181; X64-NOBMI-NEXT:    negb %cl
3182; X64-NOBMI-NEXT:    movq $-1, %rax
3183; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3184; X64-NOBMI-NEXT:    shrq %cl, %rax
3185; X64-NOBMI-NEXT:    movq (%rdi), %rbx
3186; X64-NOBMI-NEXT:    andq %rax, %rbx
3187; X64-NOBMI-NEXT:    movq %rax, %rdi
3188; X64-NOBMI-NEXT:    callq use64@PLT
3189; X64-NOBMI-NEXT:    movq %rbx, %rax
3190; X64-NOBMI-NEXT:    popq %rbx
3191; X64-NOBMI-NEXT:    retq
3192;
3193; X64-BMI1-LABEL: bzhi64_c3_load_indexzext:
3194; X64-BMI1:       # %bb.0:
3195; X64-BMI1-NEXT:    pushq %rbx
3196; X64-BMI1-NEXT:    movl %esi, %ecx
3197; X64-BMI1-NEXT:    negb %cl
3198; X64-BMI1-NEXT:    movq $-1, %rax
3199; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
3200; X64-BMI1-NEXT:    shrq %cl, %rax
3201; X64-BMI1-NEXT:    movq (%rdi), %rbx
3202; X64-BMI1-NEXT:    andq %rax, %rbx
3203; X64-BMI1-NEXT:    movq %rax, %rdi
3204; X64-BMI1-NEXT:    callq use64@PLT
3205; X64-BMI1-NEXT:    movq %rbx, %rax
3206; X64-BMI1-NEXT:    popq %rbx
3207; X64-BMI1-NEXT:    retq
3208;
3209; X64-BMI2-LABEL: bzhi64_c3_load_indexzext:
3210; X64-BMI2:       # %bb.0:
3211; X64-BMI2-NEXT:    pushq %rbx
3212; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
3213; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rbx
3214; X64-BMI2-NEXT:    # kill: def $sil killed $sil killed $rsi def $rsi
3215; X64-BMI2-NEXT:    negb %sil
3216; X64-BMI2-NEXT:    movq $-1, %rax
3217; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rdi
3218; X64-BMI2-NEXT:    callq use64@PLT
3219; X64-BMI2-NEXT:    movq %rbx, %rax
3220; X64-BMI2-NEXT:    popq %rbx
3221; X64-BMI2-NEXT:    retq
3222  %val = load i64, i64* %w
3223  %numhighbits = sub i8 64, %numlowbits
3224  %sh_prom = zext i8 %numhighbits to i64
3225  %mask = lshr i64 -1, %sh_prom
3226  call void @use64(i64 %mask)
3227  %masked = and i64 %mask, %val
3228  ret i64 %masked
3229}
3230
3231define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
3232; X86-NOBMI-LABEL: bzhi64_c4_commutative:
3233; X86-NOBMI:       # %bb.0:
3234; X86-NOBMI-NEXT:    pushl %edi
3235; X86-NOBMI-NEXT:    pushl %esi
3236; X86-NOBMI-NEXT:    pushl %eax
3237; X86-NOBMI-NEXT:    movb $64, %cl
3238; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3239; X86-NOBMI-NEXT:    movl $-1, %esi
3240; X86-NOBMI-NEXT:    movl $-1, %edi
3241; X86-NOBMI-NEXT:    shrl %cl, %edi
3242; X86-NOBMI-NEXT:    testb $32, %cl
3243; X86-NOBMI-NEXT:    je .LBB38_2
3244; X86-NOBMI-NEXT:  # %bb.1:
3245; X86-NOBMI-NEXT:    movl %edi, %esi
3246; X86-NOBMI-NEXT:    xorl %edi, %edi
3247; X86-NOBMI-NEXT:  .LBB38_2:
3248; X86-NOBMI-NEXT:    subl $8, %esp
3249; X86-NOBMI-NEXT:    pushl %edi
3250; X86-NOBMI-NEXT:    pushl %esi
3251; X86-NOBMI-NEXT:    calll use64@PLT
3252; X86-NOBMI-NEXT:    addl $16, %esp
3253; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %esi
3254; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edi
3255; X86-NOBMI-NEXT:    movl %esi, %eax
3256; X86-NOBMI-NEXT:    movl %edi, %edx
3257; X86-NOBMI-NEXT:    addl $4, %esp
3258; X86-NOBMI-NEXT:    popl %esi
3259; X86-NOBMI-NEXT:    popl %edi
3260; X86-NOBMI-NEXT:    retl
3261;
3262; X86-BMI1-LABEL: bzhi64_c4_commutative:
3263; X86-BMI1:       # %bb.0:
3264; X86-BMI1-NEXT:    pushl %edi
3265; X86-BMI1-NEXT:    pushl %esi
3266; X86-BMI1-NEXT:    pushl %eax
3267; X86-BMI1-NEXT:    movb $64, %cl
3268; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
3269; X86-BMI1-NEXT:    movl $-1, %esi
3270; X86-BMI1-NEXT:    movl $-1, %edi
3271; X86-BMI1-NEXT:    shrl %cl, %edi
3272; X86-BMI1-NEXT:    testb $32, %cl
3273; X86-BMI1-NEXT:    je .LBB38_2
3274; X86-BMI1-NEXT:  # %bb.1:
3275; X86-BMI1-NEXT:    movl %edi, %esi
3276; X86-BMI1-NEXT:    xorl %edi, %edi
3277; X86-BMI1-NEXT:  .LBB38_2:
3278; X86-BMI1-NEXT:    subl $8, %esp
3279; X86-BMI1-NEXT:    pushl %edi
3280; X86-BMI1-NEXT:    pushl %esi
3281; X86-BMI1-NEXT:    calll use64@PLT
3282; X86-BMI1-NEXT:    addl $16, %esp
3283; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %esi
3284; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edi
3285; X86-BMI1-NEXT:    movl %esi, %eax
3286; X86-BMI1-NEXT:    movl %edi, %edx
3287; X86-BMI1-NEXT:    addl $4, %esp
3288; X86-BMI1-NEXT:    popl %esi
3289; X86-BMI1-NEXT:    popl %edi
3290; X86-BMI1-NEXT:    retl
3291;
3292; X86-BMI2-LABEL: bzhi64_c4_commutative:
3293; X86-BMI2:       # %bb.0:
3294; X86-BMI2-NEXT:    pushl %edi
3295; X86-BMI2-NEXT:    pushl %esi
3296; X86-BMI2-NEXT:    pushl %eax
3297; X86-BMI2-NEXT:    movb $64, %al
3298; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
3299; X86-BMI2-NEXT:    movl $-1, %edi
3300; X86-BMI2-NEXT:    shrxl %eax, %edi, %esi
3301; X86-BMI2-NEXT:    testb $32, %al
3302; X86-BMI2-NEXT:    je .LBB38_2
3303; X86-BMI2-NEXT:  # %bb.1:
3304; X86-BMI2-NEXT:    movl %esi, %edi
3305; X86-BMI2-NEXT:    xorl %esi, %esi
3306; X86-BMI2-NEXT:  .LBB38_2:
3307; X86-BMI2-NEXT:    subl $8, %esp
3308; X86-BMI2-NEXT:    pushl %esi
3309; X86-BMI2-NEXT:    pushl %edi
3310; X86-BMI2-NEXT:    calll use64@PLT
3311; X86-BMI2-NEXT:    addl $16, %esp
3312; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edi
3313; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
3314; X86-BMI2-NEXT:    movl %edi, %eax
3315; X86-BMI2-NEXT:    movl %esi, %edx
3316; X86-BMI2-NEXT:    addl $4, %esp
3317; X86-BMI2-NEXT:    popl %esi
3318; X86-BMI2-NEXT:    popl %edi
3319; X86-BMI2-NEXT:    retl
3320;
3321; X64-NOBMI-LABEL: bzhi64_c4_commutative:
3322; X64-NOBMI:       # %bb.0:
3323; X64-NOBMI-NEXT:    pushq %r14
3324; X64-NOBMI-NEXT:    pushq %rbx
3325; X64-NOBMI-NEXT:    pushq %rax
3326; X64-NOBMI-NEXT:    movq %rsi, %rcx
3327; X64-NOBMI-NEXT:    movq %rdi, %r14
3328; X64-NOBMI-NEXT:    negb %cl
3329; X64-NOBMI-NEXT:    movq $-1, %rbx
3330; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
3331; X64-NOBMI-NEXT:    shrq %cl, %rbx
3332; X64-NOBMI-NEXT:    movq %rbx, %rdi
3333; X64-NOBMI-NEXT:    callq use64@PLT
3334; X64-NOBMI-NEXT:    andq %r14, %rbx
3335; X64-NOBMI-NEXT:    movq %rbx, %rax
3336; X64-NOBMI-NEXT:    addq $8, %rsp
3337; X64-NOBMI-NEXT:    popq %rbx
3338; X64-NOBMI-NEXT:    popq %r14
3339; X64-NOBMI-NEXT:    retq
3340;
3341; X64-BMI1-LABEL: bzhi64_c4_commutative:
3342; X64-BMI1:       # %bb.0:
3343; X64-BMI1-NEXT:    pushq %r14
3344; X64-BMI1-NEXT:    pushq %rbx
3345; X64-BMI1-NEXT:    pushq %rax
3346; X64-BMI1-NEXT:    movq %rsi, %rcx
3347; X64-BMI1-NEXT:    movq %rdi, %r14
3348; X64-BMI1-NEXT:    negb %cl
3349; X64-BMI1-NEXT:    movq $-1, %rbx
3350; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
3351; X64-BMI1-NEXT:    shrq %cl, %rbx
3352; X64-BMI1-NEXT:    movq %rbx, %rdi
3353; X64-BMI1-NEXT:    callq use64@PLT
3354; X64-BMI1-NEXT:    andq %r14, %rbx
3355; X64-BMI1-NEXT:    movq %rbx, %rax
3356; X64-BMI1-NEXT:    addq $8, %rsp
3357; X64-BMI1-NEXT:    popq %rbx
3358; X64-BMI1-NEXT:    popq %r14
3359; X64-BMI1-NEXT:    retq
3360;
3361; X64-BMI2-LABEL: bzhi64_c4_commutative:
3362; X64-BMI2:       # %bb.0:
3363; X64-BMI2-NEXT:    pushq %r14
3364; X64-BMI2-NEXT:    pushq %rbx
3365; X64-BMI2-NEXT:    pushq %rax
3366; X64-BMI2-NEXT:    movq %rsi, %rbx
3367; X64-BMI2-NEXT:    movq %rdi, %r14
3368; X64-BMI2-NEXT:    movl %ebx, %eax
3369; X64-BMI2-NEXT:    negb %al
3370; X64-BMI2-NEXT:    movq $-1, %rcx
3371; X64-BMI2-NEXT:    shrxq %rax, %rcx, %rdi
3372; X64-BMI2-NEXT:    callq use64@PLT
3373; X64-BMI2-NEXT:    bzhiq %rbx, %r14, %rax
3374; X64-BMI2-NEXT:    addq $8, %rsp
3375; X64-BMI2-NEXT:    popq %rbx
3376; X64-BMI2-NEXT:    popq %r14
3377; X64-BMI2-NEXT:    retq
3378  %numhighbits = sub i64 64, %numlowbits
3379  %mask = lshr i64 -1, %numhighbits
3380  call void @use64(i64 %mask)
3381  %masked = and i64 %val, %mask ; swapped order
3382  ret i64 %masked
3383}
3384
3385; 64-bit, but with 32-bit output
3386
3387; Everything done in 64-bit, truncation happens last.
3388define i32 @bzhi64_32_c0(i64 %val, i64 %numlowbits) nounwind {
3389; X86-NOBMI-LABEL: bzhi64_32_c0:
3390; X86-NOBMI:       # %bb.0:
3391; X86-NOBMI-NEXT:    movb $64, %cl
3392; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3393; X86-NOBMI-NEXT:    movl $-1, %eax
3394; X86-NOBMI-NEXT:    shrl %cl, %eax
3395; X86-NOBMI-NEXT:    testb $32, %cl
3396; X86-NOBMI-NEXT:    jne .LBB39_2
3397; X86-NOBMI-NEXT:  # %bb.1:
3398; X86-NOBMI-NEXT:    movl $-1, %eax
3399; X86-NOBMI-NEXT:  .LBB39_2:
3400; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
3401; X86-NOBMI-NEXT:    retl
3402;
3403; X86-BMI1-LABEL: bzhi64_32_c0:
3404; X86-BMI1:       # %bb.0:
3405; X86-BMI1-NEXT:    movb $64, %cl
3406; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
3407; X86-BMI1-NEXT:    movl $-1, %eax
3408; X86-BMI1-NEXT:    shrl %cl, %eax
3409; X86-BMI1-NEXT:    testb $32, %cl
3410; X86-BMI1-NEXT:    jne .LBB39_2
3411; X86-BMI1-NEXT:  # %bb.1:
3412; X86-BMI1-NEXT:    movl $-1, %eax
3413; X86-BMI1-NEXT:  .LBB39_2:
3414; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
3415; X86-BMI1-NEXT:    retl
3416;
3417; X86-BMI2-LABEL: bzhi64_32_c0:
3418; X86-BMI2:       # %bb.0:
3419; X86-BMI2-NEXT:    movb $64, %cl
3420; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
3421; X86-BMI2-NEXT:    movl $-1, %eax
3422; X86-BMI2-NEXT:    testb $32, %cl
3423; X86-BMI2-NEXT:    je .LBB39_2
3424; X86-BMI2-NEXT:  # %bb.1:
3425; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
3426; X86-BMI2-NEXT:  .LBB39_2:
3427; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
3428; X86-BMI2-NEXT:    retl
3429;
3430; X64-NOBMI-LABEL: bzhi64_32_c0:
3431; X64-NOBMI:       # %bb.0:
3432; X64-NOBMI-NEXT:    movq %rsi, %rcx
3433; X64-NOBMI-NEXT:    negb %cl
3434; X64-NOBMI-NEXT:    movq $-1, %rax
3435; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
3436; X64-NOBMI-NEXT:    shrq %cl, %rax
3437; X64-NOBMI-NEXT:    andl %edi, %eax
3438; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
3439; X64-NOBMI-NEXT:    retq
3440;
3441; X64-BMI1-LABEL: bzhi64_32_c0:
3442; X64-BMI1:       # %bb.0:
3443; X64-BMI1-NEXT:    shll $8, %esi
3444; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
3445; X64-BMI1-NEXT:    retq
3446;
3447; X64-BMI2-LABEL: bzhi64_32_c0:
3448; X64-BMI2:       # %bb.0:
3449; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
3450; X64-BMI2-NEXT:    retq
3451  %numhighbits = sub i64 64, %numlowbits
3452  %mask = lshr i64 -1, %numhighbits
3453  %masked = and i64 %mask, %val
3454  %res = trunc i64 %masked to i32
3455  ret i32 %res
3456}
3457
3458; Shifting happens in 64-bit, then truncation. Masking is 32-bit.
3459define i32 @bzhi64_32_c1(i64 %val, i32 %numlowbits) nounwind {
3460; X86-NOBMI-LABEL: bzhi64_32_c1:
3461; X86-NOBMI:       # %bb.0:
3462; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
3463; X86-NOBMI-NEXT:    xorl %ecx, %ecx
3464; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3465; X86-NOBMI-NEXT:    shll %cl, %eax
3466; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3467; X86-NOBMI-NEXT:    shrl %cl, %eax
3468; X86-NOBMI-NEXT:    retl
3469;
3470; X86-BMI1-LABEL: bzhi64_32_c1:
3471; X86-BMI1:       # %bb.0:
3472; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
3473; X86-BMI1-NEXT:    shll $8, %eax
3474; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
3475; X86-BMI1-NEXT:    retl
3476;
3477; X86-BMI2-LABEL: bzhi64_32_c1:
3478; X86-BMI2:       # %bb.0:
3479; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
3480; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
3481; X86-BMI2-NEXT:    retl
3482;
3483; X64-NOBMI-LABEL: bzhi64_32_c1:
3484; X64-NOBMI:       # %bb.0:
3485; X64-NOBMI-NEXT:    movl %esi, %ecx
3486; X64-NOBMI-NEXT:    movq %rdi, %rax
3487; X64-NOBMI-NEXT:    negb %cl
3488; X64-NOBMI-NEXT:    shll %cl, %eax
3489; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3490; X64-NOBMI-NEXT:    shrl %cl, %eax
3491; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
3492; X64-NOBMI-NEXT:    retq
3493;
3494; X64-BMI1-LABEL: bzhi64_32_c1:
3495; X64-BMI1:       # %bb.0:
3496; X64-BMI1-NEXT:    shll $8, %esi
3497; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
3498; X64-BMI1-NEXT:    retq
3499;
3500; X64-BMI2-LABEL: bzhi64_32_c1:
3501; X64-BMI2:       # %bb.0:
3502; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
3503; X64-BMI2-NEXT:    retq
3504  %truncval = trunc i64 %val to i32
3505  %numhighbits = sub i32 32, %numlowbits
3506  %mask = lshr i32 -1, %numhighbits
3507  %masked = and i32 %mask, %truncval
3508  ret i32 %masked
3509}
3510
3511; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit.
3512; Masking is 64-bit. Then truncation.
3513define i32 @bzhi64_32_c2(i64 %val, i32 %numlowbits) nounwind {
3514; X86-NOBMI-LABEL: bzhi64_32_c2:
3515; X86-NOBMI:       # %bb.0:
3516; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
3517; X86-NOBMI-NEXT:    xorl %ecx, %ecx
3518; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3519; X86-NOBMI-NEXT:    shll %cl, %eax
3520; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3521; X86-NOBMI-NEXT:    shrl %cl, %eax
3522; X86-NOBMI-NEXT:    retl
3523;
3524; X86-BMI1-LABEL: bzhi64_32_c2:
3525; X86-BMI1:       # %bb.0:
3526; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
3527; X86-BMI1-NEXT:    shll $8, %eax
3528; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
3529; X86-BMI1-NEXT:    retl
3530;
3531; X86-BMI2-LABEL: bzhi64_32_c2:
3532; X86-BMI2:       # %bb.0:
3533; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
3534; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
3535; X86-BMI2-NEXT:    retl
3536;
3537; X64-NOBMI-LABEL: bzhi64_32_c2:
3538; X64-NOBMI:       # %bb.0:
3539; X64-NOBMI-NEXT:    movl %esi, %ecx
3540; X64-NOBMI-NEXT:    movq %rdi, %rax
3541; X64-NOBMI-NEXT:    negb %cl
3542; X64-NOBMI-NEXT:    shll %cl, %eax
3543; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3544; X64-NOBMI-NEXT:    shrl %cl, %eax
3545; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
3546; X64-NOBMI-NEXT:    retq
3547;
3548; X64-BMI1-LABEL: bzhi64_32_c2:
3549; X64-BMI1:       # %bb.0:
3550; X64-BMI1-NEXT:    shll $8, %esi
3551; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
3552; X64-BMI1-NEXT:    retq
3553;
3554; X64-BMI2-LABEL: bzhi64_32_c2:
3555; X64-BMI2:       # %bb.0:
3556; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
3557; X64-BMI2-NEXT:    retq
3558  %numhighbits = sub i32 32, %numlowbits
3559  %mask = lshr i32 -1, %numhighbits
3560  %zextmask = zext i32 %mask to i64
3561  %masked = and i64 %zextmask, %val
3562  %truncmasked = trunc i64 %masked to i32
3563  ret i32 %truncmasked
3564}
3565
3566; Shifting happens in 64-bit. Mask is 32-bit, but calculated in 64-bit.
3567; Masking is 64-bit. Then truncation.
3568define i32 @bzhi64_32_c3(i64 %val, i64 %numlowbits) nounwind {
3569; X86-LABEL: bzhi64_32_c3:
3570; X86:       # %bb.0:
3571; X86-NEXT:    movb $64, %cl
3572; X86-NEXT:    subb {{[0-9]+}}(%esp), %cl
3573; X86-NEXT:    xorl %eax, %eax
3574; X86-NEXT:    movl $-1, %edx
3575; X86-NEXT:    shrdl %cl, %eax, %edx
3576; X86-NEXT:    testb $32, %cl
3577; X86-NEXT:    jne .LBB42_2
3578; X86-NEXT:  # %bb.1:
3579; X86-NEXT:    movl %edx, %eax
3580; X86-NEXT:  .LBB42_2:
3581; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
3582; X86-NEXT:    retl
3583;
3584; X64-NOBMI-LABEL: bzhi64_32_c3:
3585; X64-NOBMI:       # %bb.0:
3586; X64-NOBMI-NEXT:    movq %rsi, %rcx
3587; X64-NOBMI-NEXT:    negb %cl
3588; X64-NOBMI-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
3589; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
3590; X64-NOBMI-NEXT:    shrq %cl, %rax
3591; X64-NOBMI-NEXT:    andl %edi, %eax
3592; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
3593; X64-NOBMI-NEXT:    retq
3594;
3595; X64-BMI1-LABEL: bzhi64_32_c3:
3596; X64-BMI1:       # %bb.0:
3597; X64-BMI1-NEXT:    movq %rsi, %rcx
3598; X64-BMI1-NEXT:    negb %cl
3599; X64-BMI1-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
3600; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
3601; X64-BMI1-NEXT:    shrq %cl, %rax
3602; X64-BMI1-NEXT:    andl %edi, %eax
3603; X64-BMI1-NEXT:    # kill: def $eax killed $eax killed $rax
3604; X64-BMI1-NEXT:    retq
3605;
3606; X64-BMI2-LABEL: bzhi64_32_c3:
3607; X64-BMI2:       # %bb.0:
3608; X64-BMI2-NEXT:    negb %sil
3609; X64-BMI2-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
3610; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rax
3611; X64-BMI2-NEXT:    andl %edi, %eax
3612; X64-BMI2-NEXT:    # kill: def $eax killed $eax killed $rax
3613; X64-BMI2-NEXT:    retq
3614  %numhighbits = sub i64 64, %numlowbits
3615  %mask = lshr i64 4294967295, %numhighbits
3616  %masked = and i64 %mask, %val
3617  %truncmasked = trunc i64 %masked to i32
3618  ret i32 %truncmasked
3619}
3620
3621; ---------------------------------------------------------------------------- ;
3622; Pattern d. 32-bit.
3623; ---------------------------------------------------------------------------- ;
3624
3625define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind {
3626; X86-NOBMI-LABEL: bzhi32_d0:
3627; X86-NOBMI:       # %bb.0:
3628; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
3629; X86-NOBMI-NEXT:    xorl %ecx, %ecx
3630; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3631; X86-NOBMI-NEXT:    shll %cl, %eax
3632; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3633; X86-NOBMI-NEXT:    shrl %cl, %eax
3634; X86-NOBMI-NEXT:    retl
3635;
3636; X86-BMI1-LABEL: bzhi32_d0:
3637; X86-BMI1:       # %bb.0:
3638; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
3639; X86-BMI1-NEXT:    shll $8, %eax
3640; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
3641; X86-BMI1-NEXT:    retl
3642;
3643; X86-BMI2-LABEL: bzhi32_d0:
3644; X86-BMI2:       # %bb.0:
3645; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
3646; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
3647; X86-BMI2-NEXT:    retl
3648;
3649; X64-NOBMI-LABEL: bzhi32_d0:
3650; X64-NOBMI:       # %bb.0:
3651; X64-NOBMI-NEXT:    movl %esi, %ecx
3652; X64-NOBMI-NEXT:    movl %edi, %eax
3653; X64-NOBMI-NEXT:    negb %cl
3654; X64-NOBMI-NEXT:    shll %cl, %eax
3655; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3656; X64-NOBMI-NEXT:    shrl %cl, %eax
3657; X64-NOBMI-NEXT:    retq
3658;
3659; X64-BMI1-LABEL: bzhi32_d0:
3660; X64-BMI1:       # %bb.0:
3661; X64-BMI1-NEXT:    shll $8, %esi
3662; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
3663; X64-BMI1-NEXT:    retq
3664;
3665; X64-BMI2-LABEL: bzhi32_d0:
3666; X64-BMI2:       # %bb.0:
3667; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
3668; X64-BMI2-NEXT:    retq
3669  %numhighbits = sub i32 32, %numlowbits
3670  %highbitscleared = shl i32 %val, %numhighbits
3671  %masked = lshr i32 %highbitscleared, %numhighbits
3672  ret i32 %masked
3673}
3674
3675define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind {
3676; X86-NOBMI-LABEL: bzhi32_d1_indexzext:
3677; X86-NOBMI:       # %bb.0:
3678; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
3679; X86-NOBMI-NEXT:    xorl %ecx, %ecx
3680; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3681; X86-NOBMI-NEXT:    shll %cl, %eax
3682; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3683; X86-NOBMI-NEXT:    shrl %cl, %eax
3684; X86-NOBMI-NEXT:    retl
3685;
3686; X86-BMI1-LABEL: bzhi32_d1_indexzext:
3687; X86-BMI1:       # %bb.0:
3688; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
3689; X86-BMI1-NEXT:    shll $8, %eax
3690; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
3691; X86-BMI1-NEXT:    retl
3692;
3693; X86-BMI2-LABEL: bzhi32_d1_indexzext:
3694; X86-BMI2:       # %bb.0:
3695; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
3696; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
3697; X86-BMI2-NEXT:    retl
3698;
3699; X64-NOBMI-LABEL: bzhi32_d1_indexzext:
3700; X64-NOBMI:       # %bb.0:
3701; X64-NOBMI-NEXT:    movl %esi, %ecx
3702; X64-NOBMI-NEXT:    movl %edi, %eax
3703; X64-NOBMI-NEXT:    negb %cl
3704; X64-NOBMI-NEXT:    shll %cl, %eax
3705; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3706; X64-NOBMI-NEXT:    shrl %cl, %eax
3707; X64-NOBMI-NEXT:    retq
3708;
3709; X64-BMI1-LABEL: bzhi32_d1_indexzext:
3710; X64-BMI1:       # %bb.0:
3711; X64-BMI1-NEXT:    shll $8, %esi
3712; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
3713; X64-BMI1-NEXT:    retq
3714;
3715; X64-BMI2-LABEL: bzhi32_d1_indexzext:
3716; X64-BMI2:       # %bb.0:
3717; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
3718; X64-BMI2-NEXT:    retq
3719  %numhighbits = sub i8 32, %numlowbits
3720  %sh_prom = zext i8 %numhighbits to i32
3721  %highbitscleared = shl i32 %val, %sh_prom
3722  %masked = lshr i32 %highbitscleared, %sh_prom
3723  ret i32 %masked
3724}
3725
3726define i32 @bzhi32_d2_load(i32* %w, i32 %numlowbits) nounwind {
3727; X86-NOBMI-LABEL: bzhi32_d2_load:
3728; X86-NOBMI:       # %bb.0:
3729; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
3730; X86-NOBMI-NEXT:    movl (%eax), %eax
3731; X86-NOBMI-NEXT:    xorl %ecx, %ecx
3732; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3733; X86-NOBMI-NEXT:    shll %cl, %eax
3734; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3735; X86-NOBMI-NEXT:    shrl %cl, %eax
3736; X86-NOBMI-NEXT:    retl
3737;
3738; X86-BMI1-LABEL: bzhi32_d2_load:
3739; X86-BMI1:       # %bb.0:
3740; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
3741; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
3742; X86-BMI1-NEXT:    shll $8, %ecx
3743; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
3744; X86-BMI1-NEXT:    retl
3745;
3746; X86-BMI2-LABEL: bzhi32_d2_load:
3747; X86-BMI2:       # %bb.0:
3748; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
3749; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
3750; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
3751; X86-BMI2-NEXT:    retl
3752;
3753; X64-NOBMI-LABEL: bzhi32_d2_load:
3754; X64-NOBMI:       # %bb.0:
3755; X64-NOBMI-NEXT:    movl %esi, %ecx
3756; X64-NOBMI-NEXT:    movl (%rdi), %eax
3757; X64-NOBMI-NEXT:    negb %cl
3758; X64-NOBMI-NEXT:    shll %cl, %eax
3759; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3760; X64-NOBMI-NEXT:    shrl %cl, %eax
3761; X64-NOBMI-NEXT:    retq
3762;
3763; X64-BMI1-LABEL: bzhi32_d2_load:
3764; X64-BMI1:       # %bb.0:
3765; X64-BMI1-NEXT:    shll $8, %esi
3766; X64-BMI1-NEXT:    bextrl %esi, (%rdi), %eax
3767; X64-BMI1-NEXT:    retq
3768;
3769; X64-BMI2-LABEL: bzhi32_d2_load:
3770; X64-BMI2:       # %bb.0:
3771; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
3772; X64-BMI2-NEXT:    retq
3773  %val = load i32, i32* %w
3774  %numhighbits = sub i32 32, %numlowbits
3775  %highbitscleared = shl i32 %val, %numhighbits
3776  %masked = lshr i32 %highbitscleared, %numhighbits
3777  ret i32 %masked
3778}
3779
3780define i32 @bzhi32_d3_load_indexzext(i32* %w, i8 %numlowbits) nounwind {
3781; X86-NOBMI-LABEL: bzhi32_d3_load_indexzext:
3782; X86-NOBMI:       # %bb.0:
3783; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
3784; X86-NOBMI-NEXT:    movl (%eax), %eax
3785; X86-NOBMI-NEXT:    xorl %ecx, %ecx
3786; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3787; X86-NOBMI-NEXT:    shll %cl, %eax
3788; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3789; X86-NOBMI-NEXT:    shrl %cl, %eax
3790; X86-NOBMI-NEXT:    retl
3791;
3792; X86-BMI1-LABEL: bzhi32_d3_load_indexzext:
3793; X86-BMI1:       # %bb.0:
3794; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
3795; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
3796; X86-BMI1-NEXT:    shll $8, %ecx
3797; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
3798; X86-BMI1-NEXT:    retl
3799;
3800; X86-BMI2-LABEL: bzhi32_d3_load_indexzext:
3801; X86-BMI2:       # %bb.0:
3802; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
3803; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
3804; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
3805; X86-BMI2-NEXT:    retl
3806;
3807; X64-NOBMI-LABEL: bzhi32_d3_load_indexzext:
3808; X64-NOBMI:       # %bb.0:
3809; X64-NOBMI-NEXT:    movl %esi, %ecx
3810; X64-NOBMI-NEXT:    movl (%rdi), %eax
3811; X64-NOBMI-NEXT:    negb %cl
3812; X64-NOBMI-NEXT:    shll %cl, %eax
3813; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3814; X64-NOBMI-NEXT:    shrl %cl, %eax
3815; X64-NOBMI-NEXT:    retq
3816;
3817; X64-BMI1-LABEL: bzhi32_d3_load_indexzext:
3818; X64-BMI1:       # %bb.0:
3819; X64-BMI1-NEXT:    shll $8, %esi
3820; X64-BMI1-NEXT:    bextrl %esi, (%rdi), %eax
3821; X64-BMI1-NEXT:    retq
3822;
3823; X64-BMI2-LABEL: bzhi32_d3_load_indexzext:
3824; X64-BMI2:       # %bb.0:
3825; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
3826; X64-BMI2-NEXT:    retq
3827  %val = load i32, i32* %w
3828  %numhighbits = sub i8 32, %numlowbits
3829  %sh_prom = zext i8 %numhighbits to i32
3830  %highbitscleared = shl i32 %val, %sh_prom
3831  %masked = lshr i32 %highbitscleared, %sh_prom
3832  ret i32 %masked
3833}
3834
3835; 64-bit.
3836
3837define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind {
3838; X86-NOBMI-LABEL: bzhi64_d0:
3839; X86-NOBMI:       # %bb.0:
3840; X86-NOBMI-NEXT:    pushl %ebx
3841; X86-NOBMI-NEXT:    pushl %edi
3842; X86-NOBMI-NEXT:    pushl %esi
3843; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
3844; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
3845; X86-NOBMI-NEXT:    movb $64, %cl
3846; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3847; X86-NOBMI-NEXT:    movl %edx, %esi
3848; X86-NOBMI-NEXT:    shll %cl, %esi
3849; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
3850; X86-NOBMI-NEXT:    testb $32, %cl
3851; X86-NOBMI-NEXT:    movl %esi, %edi
3852; X86-NOBMI-NEXT:    jne .LBB47_2
3853; X86-NOBMI-NEXT:  # %bb.1:
3854; X86-NOBMI-NEXT:    movl %eax, %edi
3855; X86-NOBMI-NEXT:  .LBB47_2:
3856; X86-NOBMI-NEXT:    movl %edi, %eax
3857; X86-NOBMI-NEXT:    shrl %cl, %eax
3858; X86-NOBMI-NEXT:    xorl %ebx, %ebx
3859; X86-NOBMI-NEXT:    testb $32, %cl
3860; X86-NOBMI-NEXT:    movl $0, %edx
3861; X86-NOBMI-NEXT:    jne .LBB47_4
3862; X86-NOBMI-NEXT:  # %bb.3:
3863; X86-NOBMI-NEXT:    movl %esi, %ebx
3864; X86-NOBMI-NEXT:    movl %eax, %edx
3865; X86-NOBMI-NEXT:  .LBB47_4:
3866; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
3867; X86-NOBMI-NEXT:    testb $32, %cl
3868; X86-NOBMI-NEXT:    jne .LBB47_6
3869; X86-NOBMI-NEXT:  # %bb.5:
3870; X86-NOBMI-NEXT:    movl %ebx, %eax
3871; X86-NOBMI-NEXT:  .LBB47_6:
3872; X86-NOBMI-NEXT:    popl %esi
3873; X86-NOBMI-NEXT:    popl %edi
3874; X86-NOBMI-NEXT:    popl %ebx
3875; X86-NOBMI-NEXT:    retl
3876;
3877; X86-BMI1-LABEL: bzhi64_d0:
3878; X86-BMI1:       # %bb.0:
3879; X86-BMI1-NEXT:    pushl %ebx
3880; X86-BMI1-NEXT:    pushl %edi
3881; X86-BMI1-NEXT:    pushl %esi
3882; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
3883; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
3884; X86-BMI1-NEXT:    movb $64, %cl
3885; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
3886; X86-BMI1-NEXT:    movl %edx, %esi
3887; X86-BMI1-NEXT:    shll %cl, %esi
3888; X86-BMI1-NEXT:    shldl %cl, %edx, %eax
3889; X86-BMI1-NEXT:    testb $32, %cl
3890; X86-BMI1-NEXT:    movl %esi, %edi
3891; X86-BMI1-NEXT:    jne .LBB47_2
3892; X86-BMI1-NEXT:  # %bb.1:
3893; X86-BMI1-NEXT:    movl %eax, %edi
3894; X86-BMI1-NEXT:  .LBB47_2:
3895; X86-BMI1-NEXT:    movl %edi, %eax
3896; X86-BMI1-NEXT:    shrl %cl, %eax
3897; X86-BMI1-NEXT:    xorl %ebx, %ebx
3898; X86-BMI1-NEXT:    testb $32, %cl
3899; X86-BMI1-NEXT:    movl $0, %edx
3900; X86-BMI1-NEXT:    jne .LBB47_4
3901; X86-BMI1-NEXT:  # %bb.3:
3902; X86-BMI1-NEXT:    movl %esi, %ebx
3903; X86-BMI1-NEXT:    movl %eax, %edx
3904; X86-BMI1-NEXT:  .LBB47_4:
3905; X86-BMI1-NEXT:    shrdl %cl, %edi, %ebx
3906; X86-BMI1-NEXT:    testb $32, %cl
3907; X86-BMI1-NEXT:    jne .LBB47_6
3908; X86-BMI1-NEXT:  # %bb.5:
3909; X86-BMI1-NEXT:    movl %ebx, %eax
3910; X86-BMI1-NEXT:  .LBB47_6:
3911; X86-BMI1-NEXT:    popl %esi
3912; X86-BMI1-NEXT:    popl %edi
3913; X86-BMI1-NEXT:    popl %ebx
3914; X86-BMI1-NEXT:    retl
3915;
3916; X86-BMI2-LABEL: bzhi64_d0:
3917; X86-BMI2:       # %bb.0:
3918; X86-BMI2-NEXT:    pushl %edi
3919; X86-BMI2-NEXT:    pushl %esi
3920; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
3921; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
3922; X86-BMI2-NEXT:    movb $64, %cl
3923; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
3924; X86-BMI2-NEXT:    shldl %cl, %eax, %esi
3925; X86-BMI2-NEXT:    shlxl %ecx, %eax, %edi
3926; X86-BMI2-NEXT:    xorl %edx, %edx
3927; X86-BMI2-NEXT:    testb $32, %cl
3928; X86-BMI2-NEXT:    je .LBB47_2
3929; X86-BMI2-NEXT:  # %bb.1:
3930; X86-BMI2-NEXT:    movl %edi, %esi
3931; X86-BMI2-NEXT:    movl $0, %edi
3932; X86-BMI2-NEXT:  .LBB47_2:
3933; X86-BMI2-NEXT:    shrxl %ecx, %esi, %eax
3934; X86-BMI2-NEXT:    jne .LBB47_4
3935; X86-BMI2-NEXT:  # %bb.3:
3936; X86-BMI2-NEXT:    movl %eax, %edx
3937; X86-BMI2-NEXT:  .LBB47_4:
3938; X86-BMI2-NEXT:    shrdl %cl, %esi, %edi
3939; X86-BMI2-NEXT:    testb $32, %cl
3940; X86-BMI2-NEXT:    jne .LBB47_6
3941; X86-BMI2-NEXT:  # %bb.5:
3942; X86-BMI2-NEXT:    movl %edi, %eax
3943; X86-BMI2-NEXT:  .LBB47_6:
3944; X86-BMI2-NEXT:    popl %esi
3945; X86-BMI2-NEXT:    popl %edi
3946; X86-BMI2-NEXT:    retl
3947;
3948; X64-NOBMI-LABEL: bzhi64_d0:
3949; X64-NOBMI:       # %bb.0:
3950; X64-NOBMI-NEXT:    movq %rsi, %rcx
3951; X64-NOBMI-NEXT:    movq %rdi, %rax
3952; X64-NOBMI-NEXT:    negb %cl
3953; X64-NOBMI-NEXT:    shlq %cl, %rax
3954; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
3955; X64-NOBMI-NEXT:    shrq %cl, %rax
3956; X64-NOBMI-NEXT:    retq
3957;
3958; X64-BMI1-LABEL: bzhi64_d0:
3959; X64-BMI1:       # %bb.0:
3960; X64-BMI1-NEXT:    shll $8, %esi
3961; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
3962; X64-BMI1-NEXT:    retq
3963;
3964; X64-BMI2-LABEL: bzhi64_d0:
3965; X64-BMI2:       # %bb.0:
3966; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
3967; X64-BMI2-NEXT:    retq
3968  %numhighbits = sub i64 64, %numlowbits
3969  %highbitscleared = shl i64 %val, %numhighbits
3970  %masked = lshr i64 %highbitscleared, %numhighbits
3971  ret i64 %masked
3972}
3973
3974define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind {
3975; X86-NOBMI-LABEL: bzhi64_d1_indexzext:
3976; X86-NOBMI:       # %bb.0:
3977; X86-NOBMI-NEXT:    pushl %ebx
3978; X86-NOBMI-NEXT:    pushl %edi
3979; X86-NOBMI-NEXT:    pushl %esi
3980; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
3981; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
3982; X86-NOBMI-NEXT:    movb $64, %cl
3983; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3984; X86-NOBMI-NEXT:    movl %edx, %esi
3985; X86-NOBMI-NEXT:    shll %cl, %esi
3986; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
3987; X86-NOBMI-NEXT:    testb $32, %cl
3988; X86-NOBMI-NEXT:    movl %esi, %edi
3989; X86-NOBMI-NEXT:    jne .LBB48_2
3990; X86-NOBMI-NEXT:  # %bb.1:
3991; X86-NOBMI-NEXT:    movl %eax, %edi
3992; X86-NOBMI-NEXT:  .LBB48_2:
3993; X86-NOBMI-NEXT:    movl %edi, %eax
3994; X86-NOBMI-NEXT:    shrl %cl, %eax
3995; X86-NOBMI-NEXT:    xorl %ebx, %ebx
3996; X86-NOBMI-NEXT:    testb $32, %cl
3997; X86-NOBMI-NEXT:    movl $0, %edx
3998; X86-NOBMI-NEXT:    jne .LBB48_4
3999; X86-NOBMI-NEXT:  # %bb.3:
4000; X86-NOBMI-NEXT:    movl %esi, %ebx
4001; X86-NOBMI-NEXT:    movl %eax, %edx
4002; X86-NOBMI-NEXT:  .LBB48_4:
4003; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
4004; X86-NOBMI-NEXT:    testb $32, %cl
4005; X86-NOBMI-NEXT:    jne .LBB48_6
4006; X86-NOBMI-NEXT:  # %bb.5:
4007; X86-NOBMI-NEXT:    movl %ebx, %eax
4008; X86-NOBMI-NEXT:  .LBB48_6:
4009; X86-NOBMI-NEXT:    popl %esi
4010; X86-NOBMI-NEXT:    popl %edi
4011; X86-NOBMI-NEXT:    popl %ebx
4012; X86-NOBMI-NEXT:    retl
4013;
4014; X86-BMI1-LABEL: bzhi64_d1_indexzext:
4015; X86-BMI1:       # %bb.0:
4016; X86-BMI1-NEXT:    pushl %ebx
4017; X86-BMI1-NEXT:    pushl %edi
4018; X86-BMI1-NEXT:    pushl %esi
4019; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
4020; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
4021; X86-BMI1-NEXT:    movb $64, %cl
4022; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
4023; X86-BMI1-NEXT:    movl %edx, %esi
4024; X86-BMI1-NEXT:    shll %cl, %esi
4025; X86-BMI1-NEXT:    shldl %cl, %edx, %eax
4026; X86-BMI1-NEXT:    testb $32, %cl
4027; X86-BMI1-NEXT:    movl %esi, %edi
4028; X86-BMI1-NEXT:    jne .LBB48_2
4029; X86-BMI1-NEXT:  # %bb.1:
4030; X86-BMI1-NEXT:    movl %eax, %edi
4031; X86-BMI1-NEXT:  .LBB48_2:
4032; X86-BMI1-NEXT:    movl %edi, %eax
4033; X86-BMI1-NEXT:    shrl %cl, %eax
4034; X86-BMI1-NEXT:    xorl %ebx, %ebx
4035; X86-BMI1-NEXT:    testb $32, %cl
4036; X86-BMI1-NEXT:    movl $0, %edx
4037; X86-BMI1-NEXT:    jne .LBB48_4
4038; X86-BMI1-NEXT:  # %bb.3:
4039; X86-BMI1-NEXT:    movl %esi, %ebx
4040; X86-BMI1-NEXT:    movl %eax, %edx
4041; X86-BMI1-NEXT:  .LBB48_4:
4042; X86-BMI1-NEXT:    shrdl %cl, %edi, %ebx
4043; X86-BMI1-NEXT:    testb $32, %cl
4044; X86-BMI1-NEXT:    jne .LBB48_6
4045; X86-BMI1-NEXT:  # %bb.5:
4046; X86-BMI1-NEXT:    movl %ebx, %eax
4047; X86-BMI1-NEXT:  .LBB48_6:
4048; X86-BMI1-NEXT:    popl %esi
4049; X86-BMI1-NEXT:    popl %edi
4050; X86-BMI1-NEXT:    popl %ebx
4051; X86-BMI1-NEXT:    retl
4052;
4053; X86-BMI2-LABEL: bzhi64_d1_indexzext:
4054; X86-BMI2:       # %bb.0:
4055; X86-BMI2-NEXT:    pushl %edi
4056; X86-BMI2-NEXT:    pushl %esi
4057; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
4058; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
4059; X86-BMI2-NEXT:    movb $64, %cl
4060; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
4061; X86-BMI2-NEXT:    shldl %cl, %eax, %esi
4062; X86-BMI2-NEXT:    shlxl %ecx, %eax, %edi
4063; X86-BMI2-NEXT:    xorl %edx, %edx
4064; X86-BMI2-NEXT:    testb $32, %cl
4065; X86-BMI2-NEXT:    je .LBB48_2
4066; X86-BMI2-NEXT:  # %bb.1:
4067; X86-BMI2-NEXT:    movl %edi, %esi
4068; X86-BMI2-NEXT:    movl $0, %edi
4069; X86-BMI2-NEXT:  .LBB48_2:
4070; X86-BMI2-NEXT:    shrxl %ecx, %esi, %eax
4071; X86-BMI2-NEXT:    jne .LBB48_4
4072; X86-BMI2-NEXT:  # %bb.3:
4073; X86-BMI2-NEXT:    movl %eax, %edx
4074; X86-BMI2-NEXT:  .LBB48_4:
4075; X86-BMI2-NEXT:    shrdl %cl, %esi, %edi
4076; X86-BMI2-NEXT:    testb $32, %cl
4077; X86-BMI2-NEXT:    jne .LBB48_6
4078; X86-BMI2-NEXT:  # %bb.5:
4079; X86-BMI2-NEXT:    movl %edi, %eax
4080; X86-BMI2-NEXT:  .LBB48_6:
4081; X86-BMI2-NEXT:    popl %esi
4082; X86-BMI2-NEXT:    popl %edi
4083; X86-BMI2-NEXT:    retl
4084;
4085; X64-NOBMI-LABEL: bzhi64_d1_indexzext:
4086; X64-NOBMI:       # %bb.0:
4087; X64-NOBMI-NEXT:    movl %esi, %ecx
4088; X64-NOBMI-NEXT:    movq %rdi, %rax
4089; X64-NOBMI-NEXT:    negb %cl
4090; X64-NOBMI-NEXT:    shlq %cl, %rax
4091; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
4092; X64-NOBMI-NEXT:    shrq %cl, %rax
4093; X64-NOBMI-NEXT:    retq
4094;
4095; X64-BMI1-LABEL: bzhi64_d1_indexzext:
4096; X64-BMI1:       # %bb.0:
4097; X64-BMI1-NEXT:    # kill: def $esi killed $esi def $rsi
4098; X64-BMI1-NEXT:    shll $8, %esi
4099; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
4100; X64-BMI1-NEXT:    retq
4101;
4102; X64-BMI2-LABEL: bzhi64_d1_indexzext:
4103; X64-BMI2:       # %bb.0:
4104; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
4105; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
4106; X64-BMI2-NEXT:    retq
4107  %numhighbits = sub i8 64, %numlowbits
4108  %sh_prom = zext i8 %numhighbits to i64
4109  %highbitscleared = shl i64 %val, %sh_prom
4110  %masked = lshr i64 %highbitscleared, %sh_prom
4111  ret i64 %masked
4112}
4113
4114define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) nounwind {
4115; X86-NOBMI-LABEL: bzhi64_d2_load:
4116; X86-NOBMI:       # %bb.0:
4117; X86-NOBMI-NEXT:    pushl %ebx
4118; X86-NOBMI-NEXT:    pushl %edi
4119; X86-NOBMI-NEXT:    pushl %esi
4120; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
4121; X86-NOBMI-NEXT:    movl (%eax), %edx
4122; X86-NOBMI-NEXT:    movl 4(%eax), %eax
4123; X86-NOBMI-NEXT:    movb $64, %cl
4124; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
4125; X86-NOBMI-NEXT:    movl %edx, %esi
4126; X86-NOBMI-NEXT:    shll %cl, %esi
4127; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
4128; X86-NOBMI-NEXT:    testb $32, %cl
4129; X86-NOBMI-NEXT:    movl %esi, %edi
4130; X86-NOBMI-NEXT:    jne .LBB49_2
4131; X86-NOBMI-NEXT:  # %bb.1:
4132; X86-NOBMI-NEXT:    movl %eax, %edi
4133; X86-NOBMI-NEXT:  .LBB49_2:
4134; X86-NOBMI-NEXT:    movl %edi, %eax
4135; X86-NOBMI-NEXT:    shrl %cl, %eax
4136; X86-NOBMI-NEXT:    xorl %ebx, %ebx
4137; X86-NOBMI-NEXT:    testb $32, %cl
4138; X86-NOBMI-NEXT:    movl $0, %edx
4139; X86-NOBMI-NEXT:    jne .LBB49_4
4140; X86-NOBMI-NEXT:  # %bb.3:
4141; X86-NOBMI-NEXT:    movl %esi, %ebx
4142; X86-NOBMI-NEXT:    movl %eax, %edx
4143; X86-NOBMI-NEXT:  .LBB49_4:
4144; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
4145; X86-NOBMI-NEXT:    testb $32, %cl
4146; X86-NOBMI-NEXT:    jne .LBB49_6
4147; X86-NOBMI-NEXT:  # %bb.5:
4148; X86-NOBMI-NEXT:    movl %ebx, %eax
4149; X86-NOBMI-NEXT:  .LBB49_6:
4150; X86-NOBMI-NEXT:    popl %esi
4151; X86-NOBMI-NEXT:    popl %edi
4152; X86-NOBMI-NEXT:    popl %ebx
4153; X86-NOBMI-NEXT:    retl
4154;
4155; X86-BMI1-LABEL: bzhi64_d2_load:
4156; X86-BMI1:       # %bb.0:
4157; X86-BMI1-NEXT:    pushl %ebx
4158; X86-BMI1-NEXT:    pushl %edi
4159; X86-BMI1-NEXT:    pushl %esi
4160; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
4161; X86-BMI1-NEXT:    movl (%eax), %edx
4162; X86-BMI1-NEXT:    movl 4(%eax), %eax
4163; X86-BMI1-NEXT:    movb $64, %cl
4164; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
4165; X86-BMI1-NEXT:    movl %edx, %esi
4166; X86-BMI1-NEXT:    shll %cl, %esi
4167; X86-BMI1-NEXT:    shldl %cl, %edx, %eax
4168; X86-BMI1-NEXT:    testb $32, %cl
4169; X86-BMI1-NEXT:    movl %esi, %edi
4170; X86-BMI1-NEXT:    jne .LBB49_2
4171; X86-BMI1-NEXT:  # %bb.1:
4172; X86-BMI1-NEXT:    movl %eax, %edi
4173; X86-BMI1-NEXT:  .LBB49_2:
4174; X86-BMI1-NEXT:    movl %edi, %eax
4175; X86-BMI1-NEXT:    shrl %cl, %eax
4176; X86-BMI1-NEXT:    xorl %ebx, %ebx
4177; X86-BMI1-NEXT:    testb $32, %cl
4178; X86-BMI1-NEXT:    movl $0, %edx
4179; X86-BMI1-NEXT:    jne .LBB49_4
4180; X86-BMI1-NEXT:  # %bb.3:
4181; X86-BMI1-NEXT:    movl %esi, %ebx
4182; X86-BMI1-NEXT:    movl %eax, %edx
4183; X86-BMI1-NEXT:  .LBB49_4:
4184; X86-BMI1-NEXT:    shrdl %cl, %edi, %ebx
4185; X86-BMI1-NEXT:    testb $32, %cl
4186; X86-BMI1-NEXT:    jne .LBB49_6
4187; X86-BMI1-NEXT:  # %bb.5:
4188; X86-BMI1-NEXT:    movl %ebx, %eax
4189; X86-BMI1-NEXT:  .LBB49_6:
4190; X86-BMI1-NEXT:    popl %esi
4191; X86-BMI1-NEXT:    popl %edi
4192; X86-BMI1-NEXT:    popl %ebx
4193; X86-BMI1-NEXT:    retl
4194;
4195; X86-BMI2-LABEL: bzhi64_d2_load:
4196; X86-BMI2:       # %bb.0:
4197; X86-BMI2-NEXT:    pushl %edi
4198; X86-BMI2-NEXT:    pushl %esi
4199; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
4200; X86-BMI2-NEXT:    movl (%eax), %edx
4201; X86-BMI2-NEXT:    movl 4(%eax), %esi
4202; X86-BMI2-NEXT:    movb $64, %cl
4203; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
4204; X86-BMI2-NEXT:    shldl %cl, %edx, %esi
4205; X86-BMI2-NEXT:    shlxl %ecx, %edx, %edi
4206; X86-BMI2-NEXT:    xorl %edx, %edx
4207; X86-BMI2-NEXT:    testb $32, %cl
4208; X86-BMI2-NEXT:    je .LBB49_2
4209; X86-BMI2-NEXT:  # %bb.1:
4210; X86-BMI2-NEXT:    movl %edi, %esi
4211; X86-BMI2-NEXT:    movl $0, %edi
4212; X86-BMI2-NEXT:  .LBB49_2:
4213; X86-BMI2-NEXT:    shrxl %ecx, %esi, %eax
4214; X86-BMI2-NEXT:    jne .LBB49_4
4215; X86-BMI2-NEXT:  # %bb.3:
4216; X86-BMI2-NEXT:    movl %eax, %edx
4217; X86-BMI2-NEXT:  .LBB49_4:
4218; X86-BMI2-NEXT:    shrdl %cl, %esi, %edi
4219; X86-BMI2-NEXT:    testb $32, %cl
4220; X86-BMI2-NEXT:    jne .LBB49_6
4221; X86-BMI2-NEXT:  # %bb.5:
4222; X86-BMI2-NEXT:    movl %edi, %eax
4223; X86-BMI2-NEXT:  .LBB49_6:
4224; X86-BMI2-NEXT:    popl %esi
4225; X86-BMI2-NEXT:    popl %edi
4226; X86-BMI2-NEXT:    retl
4227;
4228; X64-NOBMI-LABEL: bzhi64_d2_load:
4229; X64-NOBMI:       # %bb.0:
4230; X64-NOBMI-NEXT:    movq %rsi, %rcx
4231; X64-NOBMI-NEXT:    movq (%rdi), %rax
4232; X64-NOBMI-NEXT:    negb %cl
4233; X64-NOBMI-NEXT:    shlq %cl, %rax
4234; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
4235; X64-NOBMI-NEXT:    shrq %cl, %rax
4236; X64-NOBMI-NEXT:    retq
4237;
4238; X64-BMI1-LABEL: bzhi64_d2_load:
4239; X64-BMI1:       # %bb.0:
4240; X64-BMI1-NEXT:    shll $8, %esi
4241; X64-BMI1-NEXT:    bextrq %rsi, (%rdi), %rax
4242; X64-BMI1-NEXT:    retq
4243;
4244; X64-BMI2-LABEL: bzhi64_d2_load:
4245; X64-BMI2:       # %bb.0:
4246; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
4247; X64-BMI2-NEXT:    retq
4248  %val = load i64, i64* %w
4249  %numhighbits = sub i64 64, %numlowbits
4250  %highbitscleared = shl i64 %val, %numhighbits
4251  %masked = lshr i64 %highbitscleared, %numhighbits
4252  ret i64 %masked
4253}
4254
4255define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
4256; X86-NOBMI-LABEL: bzhi64_d3_load_indexzext:
4257; X86-NOBMI:       # %bb.0:
4258; X86-NOBMI-NEXT:    pushl %ebx
4259; X86-NOBMI-NEXT:    pushl %edi
4260; X86-NOBMI-NEXT:    pushl %esi
4261; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
4262; X86-NOBMI-NEXT:    movl (%eax), %edx
4263; X86-NOBMI-NEXT:    movl 4(%eax), %eax
4264; X86-NOBMI-NEXT:    movb $64, %cl
4265; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
4266; X86-NOBMI-NEXT:    movl %edx, %esi
4267; X86-NOBMI-NEXT:    shll %cl, %esi
4268; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
4269; X86-NOBMI-NEXT:    testb $32, %cl
4270; X86-NOBMI-NEXT:    movl %esi, %edi
4271; X86-NOBMI-NEXT:    jne .LBB50_2
4272; X86-NOBMI-NEXT:  # %bb.1:
4273; X86-NOBMI-NEXT:    movl %eax, %edi
4274; X86-NOBMI-NEXT:  .LBB50_2:
4275; X86-NOBMI-NEXT:    movl %edi, %eax
4276; X86-NOBMI-NEXT:    shrl %cl, %eax
4277; X86-NOBMI-NEXT:    xorl %ebx, %ebx
4278; X86-NOBMI-NEXT:    testb $32, %cl
4279; X86-NOBMI-NEXT:    movl $0, %edx
4280; X86-NOBMI-NEXT:    jne .LBB50_4
4281; X86-NOBMI-NEXT:  # %bb.3:
4282; X86-NOBMI-NEXT:    movl %esi, %ebx
4283; X86-NOBMI-NEXT:    movl %eax, %edx
4284; X86-NOBMI-NEXT:  .LBB50_4:
4285; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
4286; X86-NOBMI-NEXT:    testb $32, %cl
4287; X86-NOBMI-NEXT:    jne .LBB50_6
4288; X86-NOBMI-NEXT:  # %bb.5:
4289; X86-NOBMI-NEXT:    movl %ebx, %eax
4290; X86-NOBMI-NEXT:  .LBB50_6:
4291; X86-NOBMI-NEXT:    popl %esi
4292; X86-NOBMI-NEXT:    popl %edi
4293; X86-NOBMI-NEXT:    popl %ebx
4294; X86-NOBMI-NEXT:    retl
4295;
4296; X86-BMI1-LABEL: bzhi64_d3_load_indexzext:
4297; X86-BMI1:       # %bb.0:
4298; X86-BMI1-NEXT:    pushl %ebx
4299; X86-BMI1-NEXT:    pushl %edi
4300; X86-BMI1-NEXT:    pushl %esi
4301; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
4302; X86-BMI1-NEXT:    movl (%eax), %edx
4303; X86-BMI1-NEXT:    movl 4(%eax), %eax
4304; X86-BMI1-NEXT:    movb $64, %cl
4305; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
4306; X86-BMI1-NEXT:    movl %edx, %esi
4307; X86-BMI1-NEXT:    shll %cl, %esi
4308; X86-BMI1-NEXT:    shldl %cl, %edx, %eax
4309; X86-BMI1-NEXT:    testb $32, %cl
4310; X86-BMI1-NEXT:    movl %esi, %edi
4311; X86-BMI1-NEXT:    jne .LBB50_2
4312; X86-BMI1-NEXT:  # %bb.1:
4313; X86-BMI1-NEXT:    movl %eax, %edi
4314; X86-BMI1-NEXT:  .LBB50_2:
4315; X86-BMI1-NEXT:    movl %edi, %eax
4316; X86-BMI1-NEXT:    shrl %cl, %eax
4317; X86-BMI1-NEXT:    xorl %ebx, %ebx
4318; X86-BMI1-NEXT:    testb $32, %cl
4319; X86-BMI1-NEXT:    movl $0, %edx
4320; X86-BMI1-NEXT:    jne .LBB50_4
4321; X86-BMI1-NEXT:  # %bb.3:
4322; X86-BMI1-NEXT:    movl %esi, %ebx
4323; X86-BMI1-NEXT:    movl %eax, %edx
4324; X86-BMI1-NEXT:  .LBB50_4:
4325; X86-BMI1-NEXT:    shrdl %cl, %edi, %ebx
4326; X86-BMI1-NEXT:    testb $32, %cl
4327; X86-BMI1-NEXT:    jne .LBB50_6
4328; X86-BMI1-NEXT:  # %bb.5:
4329; X86-BMI1-NEXT:    movl %ebx, %eax
4330; X86-BMI1-NEXT:  .LBB50_6:
4331; X86-BMI1-NEXT:    popl %esi
4332; X86-BMI1-NEXT:    popl %edi
4333; X86-BMI1-NEXT:    popl %ebx
4334; X86-BMI1-NEXT:    retl
4335;
4336; X86-BMI2-LABEL: bzhi64_d3_load_indexzext:
4337; X86-BMI2:       # %bb.0:
4338; X86-BMI2-NEXT:    pushl %edi
4339; X86-BMI2-NEXT:    pushl %esi
4340; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
4341; X86-BMI2-NEXT:    movl (%eax), %edx
4342; X86-BMI2-NEXT:    movl 4(%eax), %esi
4343; X86-BMI2-NEXT:    movb $64, %cl
4344; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
4345; X86-BMI2-NEXT:    shldl %cl, %edx, %esi
4346; X86-BMI2-NEXT:    shlxl %ecx, %edx, %edi
4347; X86-BMI2-NEXT:    xorl %edx, %edx
4348; X86-BMI2-NEXT:    testb $32, %cl
4349; X86-BMI2-NEXT:    je .LBB50_2
4350; X86-BMI2-NEXT:  # %bb.1:
4351; X86-BMI2-NEXT:    movl %edi, %esi
4352; X86-BMI2-NEXT:    movl $0, %edi
4353; X86-BMI2-NEXT:  .LBB50_2:
4354; X86-BMI2-NEXT:    shrxl %ecx, %esi, %eax
4355; X86-BMI2-NEXT:    jne .LBB50_4
4356; X86-BMI2-NEXT:  # %bb.3:
4357; X86-BMI2-NEXT:    movl %eax, %edx
4358; X86-BMI2-NEXT:  .LBB50_4:
4359; X86-BMI2-NEXT:    shrdl %cl, %esi, %edi
4360; X86-BMI2-NEXT:    testb $32, %cl
4361; X86-BMI2-NEXT:    jne .LBB50_6
4362; X86-BMI2-NEXT:  # %bb.5:
4363; X86-BMI2-NEXT:    movl %edi, %eax
4364; X86-BMI2-NEXT:  .LBB50_6:
4365; X86-BMI2-NEXT:    popl %esi
4366; X86-BMI2-NEXT:    popl %edi
4367; X86-BMI2-NEXT:    retl
4368;
4369; X64-NOBMI-LABEL: bzhi64_d3_load_indexzext:
4370; X64-NOBMI:       # %bb.0:
4371; X64-NOBMI-NEXT:    movl %esi, %ecx
4372; X64-NOBMI-NEXT:    movq (%rdi), %rax
4373; X64-NOBMI-NEXT:    negb %cl
4374; X64-NOBMI-NEXT:    shlq %cl, %rax
4375; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
4376; X64-NOBMI-NEXT:    shrq %cl, %rax
4377; X64-NOBMI-NEXT:    retq
4378;
4379; X64-BMI1-LABEL: bzhi64_d3_load_indexzext:
4380; X64-BMI1:       # %bb.0:
4381; X64-BMI1-NEXT:    # kill: def $esi killed $esi def $rsi
4382; X64-BMI1-NEXT:    shll $8, %esi
4383; X64-BMI1-NEXT:    bextrq %rsi, (%rdi), %rax
4384; X64-BMI1-NEXT:    retq
4385;
4386; X64-BMI2-LABEL: bzhi64_d3_load_indexzext:
4387; X64-BMI2:       # %bb.0:
4388; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
4389; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
4390; X64-BMI2-NEXT:    retq
4391  %val = load i64, i64* %w
4392  %numhighbits = sub i8 64, %numlowbits
4393  %sh_prom = zext i8 %numhighbits to i64
4394  %highbitscleared = shl i64 %val, %sh_prom
4395  %masked = lshr i64 %highbitscleared, %sh_prom
4396  ret i64 %masked
4397}
4398
4399; 64-bit, but with 32-bit output
4400
4401; Everything done in 64-bit, truncation happens last.
4402define i32 @bzhi64_32_d0(i64 %val, i64 %numlowbits) nounwind {
4403; X86-NOBMI-LABEL: bzhi64_32_d0:
4404; X86-NOBMI:       # %bb.0:
4405; X86-NOBMI-NEXT:    pushl %esi
4406; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
4407; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
4408; X86-NOBMI-NEXT:    movb $64, %cl
4409; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
4410; X86-NOBMI-NEXT:    movl %esi, %edx
4411; X86-NOBMI-NEXT:    shll %cl, %edx
4412; X86-NOBMI-NEXT:    shldl %cl, %esi, %eax
4413; X86-NOBMI-NEXT:    testb $32, %cl
4414; X86-NOBMI-NEXT:    je .LBB51_2
4415; X86-NOBMI-NEXT:  # %bb.1:
4416; X86-NOBMI-NEXT:    movl %edx, %eax
4417; X86-NOBMI-NEXT:    xorl %edx, %edx
4418; X86-NOBMI-NEXT:  .LBB51_2:
4419; X86-NOBMI-NEXT:    shrdl %cl, %eax, %edx
4420; X86-NOBMI-NEXT:    shrl %cl, %eax
4421; X86-NOBMI-NEXT:    testb $32, %cl
4422; X86-NOBMI-NEXT:    jne .LBB51_4
4423; X86-NOBMI-NEXT:  # %bb.3:
4424; X86-NOBMI-NEXT:    movl %edx, %eax
4425; X86-NOBMI-NEXT:  .LBB51_4:
4426; X86-NOBMI-NEXT:    popl %esi
4427; X86-NOBMI-NEXT:    retl
4428;
4429; X86-BMI1-LABEL: bzhi64_32_d0:
4430; X86-BMI1:       # %bb.0:
4431; X86-BMI1-NEXT:    pushl %esi
4432; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
4433; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
4434; X86-BMI1-NEXT:    movb $64, %cl
4435; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
4436; X86-BMI1-NEXT:    movl %esi, %edx
4437; X86-BMI1-NEXT:    shll %cl, %edx
4438; X86-BMI1-NEXT:    shldl %cl, %esi, %eax
4439; X86-BMI1-NEXT:    testb $32, %cl
4440; X86-BMI1-NEXT:    je .LBB51_2
4441; X86-BMI1-NEXT:  # %bb.1:
4442; X86-BMI1-NEXT:    movl %edx, %eax
4443; X86-BMI1-NEXT:    xorl %edx, %edx
4444; X86-BMI1-NEXT:  .LBB51_2:
4445; X86-BMI1-NEXT:    shrdl %cl, %eax, %edx
4446; X86-BMI1-NEXT:    shrl %cl, %eax
4447; X86-BMI1-NEXT:    testb $32, %cl
4448; X86-BMI1-NEXT:    jne .LBB51_4
4449; X86-BMI1-NEXT:  # %bb.3:
4450; X86-BMI1-NEXT:    movl %edx, %eax
4451; X86-BMI1-NEXT:  .LBB51_4:
4452; X86-BMI1-NEXT:    popl %esi
4453; X86-BMI1-NEXT:    retl
4454;
4455; X86-BMI2-LABEL: bzhi64_32_d0:
4456; X86-BMI2:       # %bb.0:
4457; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
4458; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
4459; X86-BMI2-NEXT:    movb $64, %cl
4460; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
4461; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
4462; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
4463; X86-BMI2-NEXT:    testb $32, %cl
4464; X86-BMI2-NEXT:    je .LBB51_2
4465; X86-BMI2-NEXT:  # %bb.1:
4466; X86-BMI2-NEXT:    movl %eax, %edx
4467; X86-BMI2-NEXT:    xorl %eax, %eax
4468; X86-BMI2-NEXT:  .LBB51_2:
4469; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
4470; X86-BMI2-NEXT:    testb $32, %cl
4471; X86-BMI2-NEXT:    je .LBB51_4
4472; X86-BMI2-NEXT:  # %bb.3:
4473; X86-BMI2-NEXT:    shrxl %ecx, %edx, %eax
4474; X86-BMI2-NEXT:  .LBB51_4:
4475; X86-BMI2-NEXT:    retl
4476;
4477; X64-NOBMI-LABEL: bzhi64_32_d0:
4478; X64-NOBMI:       # %bb.0:
4479; X64-NOBMI-NEXT:    movq %rsi, %rcx
4480; X64-NOBMI-NEXT:    movq %rdi, %rax
4481; X64-NOBMI-NEXT:    negb %cl
4482; X64-NOBMI-NEXT:    shlq %cl, %rax
4483; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
4484; X64-NOBMI-NEXT:    shrq %cl, %rax
4485; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
4486; X64-NOBMI-NEXT:    retq
4487;
4488; X64-BMI1-LABEL: bzhi64_32_d0:
4489; X64-BMI1:       # %bb.0:
4490; X64-BMI1-NEXT:    shll $8, %esi
4491; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
4492; X64-BMI1-NEXT:    # kill: def $eax killed $eax killed $rax
4493; X64-BMI1-NEXT:    retq
4494;
4495; X64-BMI2-LABEL: bzhi64_32_d0:
4496; X64-BMI2:       # %bb.0:
4497; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
4498; X64-BMI2-NEXT:    # kill: def $eax killed $eax killed $rax
4499; X64-BMI2-NEXT:    retq
4500  %numhighbits = sub i64 64, %numlowbits
4501  %highbitscleared = shl i64 %val, %numhighbits
4502  %masked = lshr i64 %highbitscleared, %numhighbits
4503  %res = trunc i64 %masked to i32
4504  ret i32 %res
4505}
4506
4507; Shifting happens in 64-bit, then truncation. Masking is 32-bit.
4508define i32 @bzhi64_32_d1(i64 %val, i32 %numlowbits) nounwind {
4509; X86-NOBMI-LABEL: bzhi64_32_d1:
4510; X86-NOBMI:       # %bb.0:
4511; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
4512; X86-NOBMI-NEXT:    xorl %ecx, %ecx
4513; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
4514; X86-NOBMI-NEXT:    shll %cl, %eax
4515; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
4516; X86-NOBMI-NEXT:    shrl %cl, %eax
4517; X86-NOBMI-NEXT:    retl
4518;
4519; X86-BMI1-LABEL: bzhi64_32_d1:
4520; X86-BMI1:       # %bb.0:
4521; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
4522; X86-BMI1-NEXT:    shll $8, %eax
4523; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
4524; X86-BMI1-NEXT:    retl
4525;
4526; X86-BMI2-LABEL: bzhi64_32_d1:
4527; X86-BMI2:       # %bb.0:
4528; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
4529; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
4530; X86-BMI2-NEXT:    retl
4531;
4532; X64-NOBMI-LABEL: bzhi64_32_d1:
4533; X64-NOBMI:       # %bb.0:
4534; X64-NOBMI-NEXT:    movl %esi, %ecx
4535; X64-NOBMI-NEXT:    movq %rdi, %rax
4536; X64-NOBMI-NEXT:    negb %cl
4537; X64-NOBMI-NEXT:    shll %cl, %eax
4538; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
4539; X64-NOBMI-NEXT:    shrl %cl, %eax
4540; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
4541; X64-NOBMI-NEXT:    retq
4542;
4543; X64-BMI1-LABEL: bzhi64_32_d1:
4544; X64-BMI1:       # %bb.0:
4545; X64-BMI1-NEXT:    shll $8, %esi
4546; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
4547; X64-BMI1-NEXT:    retq
4548;
4549; X64-BMI2-LABEL: bzhi64_32_d1:
4550; X64-BMI2:       # %bb.0:
4551; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
4552; X64-BMI2-NEXT:    retq
4553  %truncval = trunc i64 %val to i32
4554  %numhighbits = sub i32 32, %numlowbits
4555  %highbitscleared = shl i32 %truncval, %numhighbits
4556  %masked = lshr i32 %highbitscleared, %numhighbits
4557  ret i32 %masked
4558}
4559
4560; ---------------------------------------------------------------------------- ;
4561; Constant mask
4562; ---------------------------------------------------------------------------- ;
4563
4564; 32-bit
4565
4566define i32 @bzhi32_constant_mask32(i32 %val) nounwind {
4567; X86-LABEL: bzhi32_constant_mask32:
4568; X86:       # %bb.0:
4569; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
4570; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
4571; X86-NEXT:    retl
4572;
4573; X64-LABEL: bzhi32_constant_mask32:
4574; X64:       # %bb.0:
4575; X64-NEXT:    movl %edi, %eax
4576; X64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
4577; X64-NEXT:    retq
4578  %masked = and i32 %val, 2147483647
4579  ret i32 %masked
4580}
4581
4582define i32 @bzhi32_constant_mask32_load(i32* %val) nounwind {
4583; X86-LABEL: bzhi32_constant_mask32_load:
4584; X86:       # %bb.0:
4585; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
4586; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
4587; X86-NEXT:    andl (%ecx), %eax
4588; X86-NEXT:    retl
4589;
4590; X64-LABEL: bzhi32_constant_mask32_load:
4591; X64:       # %bb.0:
4592; X64-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
4593; X64-NEXT:    andl (%rdi), %eax
4594; X64-NEXT:    retq
4595  %val1 = load i32, i32* %val
4596  %masked = and i32 %val1, 2147483647
4597  ret i32 %masked
4598}
4599
4600define i32 @bzhi32_constant_mask16(i32 %val) nounwind {
4601; X86-LABEL: bzhi32_constant_mask16:
4602; X86:       # %bb.0:
4603; X86-NEXT:    movl $32767, %eax # imm = 0x7FFF
4604; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
4605; X86-NEXT:    retl
4606;
4607; X64-LABEL: bzhi32_constant_mask16:
4608; X64:       # %bb.0:
4609; X64-NEXT:    movl %edi, %eax
4610; X64-NEXT:    andl $32767, %eax # imm = 0x7FFF
4611; X64-NEXT:    retq
4612  %masked = and i32 %val, 32767
4613  ret i32 %masked
4614}
4615
4616define i32 @bzhi32_constant_mask16_load(i32* %val) nounwind {
4617; X86-LABEL: bzhi32_constant_mask16_load:
4618; X86:       # %bb.0:
4619; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
4620; X86-NEXT:    movl $32767, %eax # imm = 0x7FFF
4621; X86-NEXT:    andl (%ecx), %eax
4622; X86-NEXT:    retl
4623;
4624; X64-LABEL: bzhi32_constant_mask16_load:
4625; X64:       # %bb.0:
4626; X64-NEXT:    movl $32767, %eax # imm = 0x7FFF
4627; X64-NEXT:    andl (%rdi), %eax
4628; X64-NEXT:    retq
4629  %val1 = load i32, i32* %val
4630  %masked = and i32 %val1, 32767
4631  ret i32 %masked
4632}
4633
4634define i32 @bzhi32_constant_mask8(i32 %val) nounwind {
4635; X86-LABEL: bzhi32_constant_mask8:
4636; X86:       # %bb.0:
4637; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
4638; X86-NEXT:    andl $127, %eax
4639; X86-NEXT:    retl
4640;
4641; X64-LABEL: bzhi32_constant_mask8:
4642; X64:       # %bb.0:
4643; X64-NEXT:    movl %edi, %eax
4644; X64-NEXT:    andl $127, %eax
4645; X64-NEXT:    retq
4646  %masked = and i32 %val, 127
4647  ret i32 %masked
4648}
4649
4650define i32 @bzhi32_constant_mask8_load(i32* %val) nounwind {
4651; X86-LABEL: bzhi32_constant_mask8_load:
4652; X86:       # %bb.0:
4653; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
4654; X86-NEXT:    movl (%eax), %eax
4655; X86-NEXT:    andl $127, %eax
4656; X86-NEXT:    retl
4657;
4658; X64-LABEL: bzhi32_constant_mask8_load:
4659; X64:       # %bb.0:
4660; X64-NEXT:    movl (%rdi), %eax
4661; X64-NEXT:    andl $127, %eax
4662; X64-NEXT:    retq
4663  %val1 = load i32, i32* %val
4664  %masked = and i32 %val1, 127
4665  ret i32 %masked
4666}
4667
4668; 64-bit
4669
4670define i64 @bzhi64_constant_mask64(i64 %val) nounwind {
4671; X86-LABEL: bzhi64_constant_mask64:
4672; X86:       # %bb.0:
4673; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
4674; X86-NEXT:    movl $1073741823, %edx # imm = 0x3FFFFFFF
4675; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx
4676; X86-NEXT:    retl
4677;
4678; X64-NOBMI-LABEL: bzhi64_constant_mask64:
4679; X64-NOBMI:       # %bb.0:
4680; X64-NOBMI-NEXT:    movabsq $4611686018427387903, %rax # imm = 0x3FFFFFFFFFFFFFFF
4681; X64-NOBMI-NEXT:    andq %rdi, %rax
4682; X64-NOBMI-NEXT:    retq
4683;
4684; X64-BMI1NOTBM-LABEL: bzhi64_constant_mask64:
4685; X64-BMI1NOTBM:       # %bb.0:
4686; X64-BMI1NOTBM-NEXT:    movl $15872, %eax # imm = 0x3E00
4687; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rax
4688; X64-BMI1NOTBM-NEXT:    retq
4689;
4690; X64-BMI1TBM-LABEL: bzhi64_constant_mask64:
4691; X64-BMI1TBM:       # %bb.0:
4692; X64-BMI1TBM-NEXT:    bextrq $15872, %rdi, %rax # imm = 0x3E00
4693; X64-BMI1TBM-NEXT:    retq
4694;
4695; X64-BMI2TBM-LABEL: bzhi64_constant_mask64:
4696; X64-BMI2TBM:       # %bb.0:
4697; X64-BMI2TBM-NEXT:    bextrq $15872, %rdi, %rax # imm = 0x3E00
4698; X64-BMI2TBM-NEXT:    retq
4699;
4700; X64-BMI2NOTBM-LABEL: bzhi64_constant_mask64:
4701; X64-BMI2NOTBM:       # %bb.0:
4702; X64-BMI2NOTBM-NEXT:    movb $62, %al
4703; X64-BMI2NOTBM-NEXT:    bzhiq %rax, %rdi, %rax
4704; X64-BMI2NOTBM-NEXT:    retq
4705  %masked = and i64 %val, 4611686018427387903
4706  ret i64 %masked
4707}
4708
4709define i64 @bzhi64_constant_mask64_load(i64* %val) nounwind {
4710; X86-LABEL: bzhi64_constant_mask64_load:
4711; X86:       # %bb.0:
4712; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
4713; X86-NEXT:    movl (%ecx), %eax
4714; X86-NEXT:    movl $1073741823, %edx # imm = 0x3FFFFFFF
4715; X86-NEXT:    andl 4(%ecx), %edx
4716; X86-NEXT:    retl
4717;
4718; X64-NOBMI-LABEL: bzhi64_constant_mask64_load:
4719; X64-NOBMI:       # %bb.0:
4720; X64-NOBMI-NEXT:    movabsq $4611686018427387903, %rax # imm = 0x3FFFFFFFFFFFFFFF
4721; X64-NOBMI-NEXT:    andq (%rdi), %rax
4722; X64-NOBMI-NEXT:    retq
4723;
4724; X64-BMI1NOTBM-LABEL: bzhi64_constant_mask64_load:
4725; X64-BMI1NOTBM:       # %bb.0:
4726; X64-BMI1NOTBM-NEXT:    movl $15872, %eax # imm = 0x3E00
4727; X64-BMI1NOTBM-NEXT:    bextrq %rax, (%rdi), %rax
4728; X64-BMI1NOTBM-NEXT:    retq
4729;
4730; X64-BMI1TBM-LABEL: bzhi64_constant_mask64_load:
4731; X64-BMI1TBM:       # %bb.0:
4732; X64-BMI1TBM-NEXT:    bextrq $15872, (%rdi), %rax # imm = 0x3E00
4733; X64-BMI1TBM-NEXT:    retq
4734;
4735; X64-BMI2TBM-LABEL: bzhi64_constant_mask64_load:
4736; X64-BMI2TBM:       # %bb.0:
4737; X64-BMI2TBM-NEXT:    bextrq $15872, (%rdi), %rax # imm = 0x3E00
4738; X64-BMI2TBM-NEXT:    retq
4739;
4740; X64-BMI2NOTBM-LABEL: bzhi64_constant_mask64_load:
4741; X64-BMI2NOTBM:       # %bb.0:
4742; X64-BMI2NOTBM-NEXT:    movb $62, %al
4743; X64-BMI2NOTBM-NEXT:    bzhiq %rax, (%rdi), %rax
4744; X64-BMI2NOTBM-NEXT:    retq
4745  %val1 = load i64, i64* %val
4746  %masked = and i64 %val1, 4611686018427387903
4747  ret i64 %masked
4748}
4749
4750define i64 @bzhi64_constant_mask32(i64 %val) nounwind {
4751; X86-LABEL: bzhi64_constant_mask32:
4752; X86:       # %bb.0:
4753; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
4754; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
4755; X86-NEXT:    xorl %edx, %edx
4756; X86-NEXT:    retl
4757;
4758; X64-LABEL: bzhi64_constant_mask32:
4759; X64:       # %bb.0:
4760; X64-NEXT:    movq %rdi, %rax
4761; X64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
4762; X64-NEXT:    retq
4763  %masked = and i64 %val, 2147483647
4764  ret i64 %masked
4765}
4766
4767define i64 @bzhi64_constant_mask32_load(i64* %val) nounwind {
4768; X86-LABEL: bzhi64_constant_mask32_load:
4769; X86:       # %bb.0:
4770; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
4771; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
4772; X86-NEXT:    andl (%ecx), %eax
4773; X86-NEXT:    xorl %edx, %edx
4774; X86-NEXT:    retl
4775;
4776; X64-LABEL: bzhi64_constant_mask32_load:
4777; X64:       # %bb.0:
4778; X64-NEXT:    movq (%rdi), %rax
4779; X64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
4780; X64-NEXT:    retq
4781  %val1 = load i64, i64* %val
4782  %masked = and i64 %val1, 2147483647
4783  ret i64 %masked
4784}
4785
4786define i64 @bzhi64_constant_mask16(i64 %val) nounwind {
4787; X86-LABEL: bzhi64_constant_mask16:
4788; X86:       # %bb.0:
4789; X86-NEXT:    movl $32767, %eax # imm = 0x7FFF
4790; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
4791; X86-NEXT:    xorl %edx, %edx
4792; X86-NEXT:    retl
4793;
4794; X64-LABEL: bzhi64_constant_mask16:
4795; X64:       # %bb.0:
4796; X64-NEXT:    movq %rdi, %rax
4797; X64-NEXT:    andl $32767, %eax # imm = 0x7FFF
4798; X64-NEXT:    retq
4799  %masked = and i64 %val, 32767
4800  ret i64 %masked
4801}
4802
4803define i64 @bzhi64_constant_mask16_load(i64* %val) nounwind {
4804; X86-LABEL: bzhi64_constant_mask16_load:
4805; X86:       # %bb.0:
4806; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
4807; X86-NEXT:    movl $32767, %eax # imm = 0x7FFF
4808; X86-NEXT:    andl (%ecx), %eax
4809; X86-NEXT:    xorl %edx, %edx
4810; X86-NEXT:    retl
4811;
4812; X64-LABEL: bzhi64_constant_mask16_load:
4813; X64:       # %bb.0:
4814; X64-NEXT:    movq (%rdi), %rax
4815; X64-NEXT:    andl $32767, %eax # imm = 0x7FFF
4816; X64-NEXT:    retq
4817  %val1 = load i64, i64* %val
4818  %masked = and i64 %val1, 32767
4819  ret i64 %masked
4820}
4821
4822define i64 @bzhi64_constant_mask8(i64 %val) nounwind {
4823; X86-LABEL: bzhi64_constant_mask8:
4824; X86:       # %bb.0:
4825; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
4826; X86-NEXT:    andl $127, %eax
4827; X86-NEXT:    xorl %edx, %edx
4828; X86-NEXT:    retl
4829;
4830; X64-LABEL: bzhi64_constant_mask8:
4831; X64:       # %bb.0:
4832; X64-NEXT:    movq %rdi, %rax
4833; X64-NEXT:    andl $127, %eax
4834; X64-NEXT:    retq
4835  %masked = and i64 %val, 127
4836  ret i64 %masked
4837}
4838
4839define i64 @bzhi64_constant_mask8_load(i64* %val) nounwind {
4840; X86-LABEL: bzhi64_constant_mask8_load:
4841; X86:       # %bb.0:
4842; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
4843; X86-NEXT:    movl (%eax), %eax
4844; X86-NEXT:    andl $127, %eax
4845; X86-NEXT:    xorl %edx, %edx
4846; X86-NEXT:    retl
4847;
4848; X64-LABEL: bzhi64_constant_mask8_load:
4849; X64:       # %bb.0:
4850; X64-NEXT:    movq (%rdi), %rax
4851; X64-NEXT:    andl $127, %eax
4852; X64-NEXT:    retq
4853  %val1 = load i64, i64* %val
4854  %masked = and i64 %val1, 127
4855  ret i64 %masked
4856}
4857