1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
4
5; Check that under certain conditions we can factor out a rotate
6; from the following idioms:
7;   (a*c0) >> s1 | (a*c1)
8;   (a/c0) << s1 | (a/c1)
9; This targets cases where instcombine has folded a shl/srl/mul/udiv
10; with one of the shifts from the rotate idiom
11
12define i64 @rolq_extract_shl(i64 %i) nounwind {
13; X86-LABEL: rolq_extract_shl:
14; X86:       # %bb.0:
15; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
16; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
17; X86-NEXT:    leal (,%edx,8), %eax
18; X86-NEXT:    shldl $10, %ecx, %edx
19; X86-NEXT:    shll $10, %ecx
20; X86-NEXT:    shrl $25, %eax
21; X86-NEXT:    orl %ecx, %eax
22; X86-NEXT:    retl
23;
24; X64-LABEL: rolq_extract_shl:
25; X64:       # %bb.0:
26; X64-NEXT:    leaq (,%rdi,8), %rax
27; X64-NEXT:    rolq $7, %rax
28; X64-NEXT:    retq
29  %lhs_mul = shl i64 %i, 3
30  %rhs_mul = shl i64 %i, 10
31  %lhs_shift = lshr i64 %lhs_mul, 57
32  %out = or i64 %lhs_shift, %rhs_mul
33  ret i64 %out
34}
35
36define i16 @rolw_extract_shrl(i16 %i) nounwind {
37; X86-LABEL: rolw_extract_shrl:
38; X86:       # %bb.0:
39; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
40; X86-NEXT:    shrl $3, %eax
41; X86-NEXT:    rolw $12, %ax
42; X86-NEXT:    # kill: def $ax killed $ax killed $eax
43; X86-NEXT:    retl
44;
45; X64-LABEL: rolw_extract_shrl:
46; X64:       # %bb.0:
47; X64-NEXT:    movzwl %di, %eax
48; X64-NEXT:    shrl $3, %eax
49; X64-NEXT:    rolw $12, %ax
50; X64-NEXT:    # kill: def $ax killed $ax killed $eax
51; X64-NEXT:    retq
52  %lhs_div = lshr i16 %i, 7
53  %rhs_div = lshr i16 %i, 3
54  %rhs_shift = shl i16 %rhs_div, 12
55  %out = or i16 %lhs_div, %rhs_shift
56  ret i16 %out
57}
58
59define i32 @roll_extract_mul(i32 %i) nounwind {
60; X86-LABEL: roll_extract_mul:
61; X86:       # %bb.0:
62; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
63; X86-NEXT:    leal (%eax,%eax,8), %eax
64; X86-NEXT:    roll $7, %eax
65; X86-NEXT:    retl
66;
67; X64-LABEL: roll_extract_mul:
68; X64:       # %bb.0:
69; X64-NEXT:    # kill: def $edi killed $edi def $rdi
70; X64-NEXT:    leal (%rdi,%rdi,8), %eax
71; X64-NEXT:    roll $7, %eax
72; X64-NEXT:    retq
73  %lhs_mul = mul i32 %i, 9
74  %rhs_mul = mul i32 %i, 1152
75  %lhs_shift = lshr i32 %lhs_mul, 25
76  %out = or i32 %lhs_shift, %rhs_mul
77  ret i32 %out
78}
79
80define i8 @rolb_extract_udiv(i8 %i) nounwind {
81; X86-LABEL: rolb_extract_udiv:
82; X86:       # %bb.0:
83; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
84; X86-NEXT:    imull $171, %eax, %eax
85; X86-NEXT:    shrl $9, %eax
86; X86-NEXT:    rolb $4, %al
87; X86-NEXT:    # kill: def $al killed $al killed $eax
88; X86-NEXT:    retl
89;
90; X64-LABEL: rolb_extract_udiv:
91; X64:       # %bb.0:
92; X64-NEXT:    movzbl %dil, %eax
93; X64-NEXT:    imull $171, %eax, %eax
94; X64-NEXT:    shrl $9, %eax
95; X64-NEXT:    rolb $4, %al
96; X64-NEXT:    # kill: def $al killed $al killed $eax
97; X64-NEXT:    retq
98  %lhs_div = udiv i8 %i, 3
99  %rhs_div = udiv i8 %i, 48
100  %lhs_shift = shl i8 %lhs_div, 4
101  %out = or i8 %lhs_shift, %rhs_div
102  ret i8 %out
103}
104
105define i64 @rolq_extract_mul_with_mask(i64 %i) nounwind {
106; X86-LABEL: rolq_extract_mul_with_mask:
107; X86:       # %bb.0:
108; X86-NEXT:    pushl %esi
109; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
110; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
111; X86-NEXT:    movl %eax, %ecx
112; X86-NEXT:    shll $7, %ecx
113; X86-NEXT:    leal (%ecx,%ecx,8), %ecx
114; X86-NEXT:    movl $9, %edx
115; X86-NEXT:    mull %edx
116; X86-NEXT:    leal (%esi,%esi,8), %eax
117; X86-NEXT:    addl %edx, %eax
118; X86-NEXT:    movzbl %cl, %ecx
119; X86-NEXT:    shrl $25, %eax
120; X86-NEXT:    orl %ecx, %eax
121; X86-NEXT:    xorl %edx, %edx
122; X86-NEXT:    popl %esi
123; X86-NEXT:    retl
124;
125; X64-LABEL: rolq_extract_mul_with_mask:
126; X64:       # %bb.0:
127; X64-NEXT:    leaq (%rdi,%rdi,8), %rax
128; X64-NEXT:    rolq $7, %rax
129; X64-NEXT:    movzbl %al, %eax
130; X64-NEXT:    retq
131  %lhs_mul = mul i64 %i, 1152
132  %rhs_mul = mul i64 %i, 9
133  %lhs_and = and i64 %lhs_mul, 160
134  %rhs_shift = lshr i64 %rhs_mul, 57
135  %out = or i64 %lhs_and, %rhs_shift
136  ret i64 %out
137}
138
139; Result would undershift
140define i64 @no_extract_shl(i64 %i) nounwind {
141; X86-LABEL: no_extract_shl:
142; X86:       # %bb.0:
143; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
144; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
145; X86-NEXT:    movl %edx, %eax
146; X86-NEXT:    shll $5, %eax
147; X86-NEXT:    shldl $10, %ecx, %edx
148; X86-NEXT:    shll $10, %ecx
149; X86-NEXT:    shrl $25, %eax
150; X86-NEXT:    orl %ecx, %eax
151; X86-NEXT:    retl
152;
153; X64-LABEL: no_extract_shl:
154; X64:       # %bb.0:
155; X64-NEXT:    movq %rdi, %rax
156; X64-NEXT:    shlq $5, %rax
157; X64-NEXT:    shlq $10, %rdi
158; X64-NEXT:    shrq $57, %rax
159; X64-NEXT:    addq %rdi, %rax
160; X64-NEXT:    retq
161  %lhs_mul = shl i64 %i, 5
162  %rhs_mul = shl i64 %i, 10
163  %lhs_shift = lshr i64 %lhs_mul, 57
164  %out = or i64 %lhs_shift, %rhs_mul
165  ret i64 %out
166}
167
168; Result would overshift
169define i32 @no_extract_shrl(i32 %i) nounwind {
170; X86-LABEL: no_extract_shrl:
171; X86:       # %bb.0:
172; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
173; X86-NEXT:    movl %eax, %ecx
174; X86-NEXT:    andl $-8, %ecx
175; X86-NEXT:    shll $25, %ecx
176; X86-NEXT:    shrl $9, %eax
177; X86-NEXT:    orl %ecx, %eax
178; X86-NEXT:    retl
179;
180; X64-LABEL: no_extract_shrl:
181; X64:       # %bb.0:
182; X64-NEXT:    # kill: def $edi killed $edi def $rdi
183; X64-NEXT:    movl %edi, %eax
184; X64-NEXT:    andl $-8, %eax
185; X64-NEXT:    shll $25, %eax
186; X64-NEXT:    shrl $9, %edi
187; X64-NEXT:    addl %edi, %eax
188; X64-NEXT:    retq
189  %lhs_div = lshr i32 %i, 3
190  %rhs_div = lshr i32 %i, 9
191  %lhs_shift = shl i32 %lhs_div, 28
192  %out = or i32 %lhs_shift, %rhs_div
193  ret i32 %out
194}
195
196; Can factor 128 from 2304, but result is 18 instead of 9
197define i16 @no_extract_mul(i16 %i) nounwind {
198; X86-LABEL: no_extract_mul:
199; X86:       # %bb.0:
200; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
201; X86-NEXT:    leal (%eax,%eax,8), %ecx
202; X86-NEXT:    shll $8, %eax
203; X86-NEXT:    leal (%eax,%eax,8), %edx
204; X86-NEXT:    movzwl %cx, %eax
205; X86-NEXT:    shrl $9, %eax
206; X86-NEXT:    orl %edx, %eax
207; X86-NEXT:    # kill: def $ax killed $ax killed $eax
208; X86-NEXT:    retl
209;
210; X64-LABEL: no_extract_mul:
211; X64:       # %bb.0:
212; X64-NEXT:    # kill: def $edi killed $edi def $rdi
213; X64-NEXT:    leal (%rdi,%rdi,8), %eax
214; X64-NEXT:    # kill: def $edi killed $edi killed $rdi def $rdi
215; X64-NEXT:    shll $8, %edi
216; X64-NEXT:    leal (%rdi,%rdi,8), %ecx
217; X64-NEXT:    movzwl %ax, %eax
218; X64-NEXT:    shrl $9, %eax
219; X64-NEXT:    orl %ecx, %eax
220; X64-NEXT:    # kill: def $ax killed $ax killed $eax
221; X64-NEXT:    retq
222  %lhs_mul = mul i16 %i, 2304
223  %rhs_mul = mul i16 %i, 9
224  %rhs_shift = lshr i16 %rhs_mul, 9
225  %out = or i16 %lhs_mul, %rhs_shift
226  ret i16 %out
227}
228
229; Can't evenly factor 16 from 49
230define i8 @no_extract_udiv(i8 %i) nounwind {
231; X86-LABEL: no_extract_udiv:
232; X86:       # %bb.0:
233; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
234; X86-NEXT:    imull $171, %eax, %ecx
235; X86-NEXT:    imull $79, %eax, %edx
236; X86-NEXT:    subb %dh, %al
237; X86-NEXT:    shrb %al
238; X86-NEXT:    addb %dh, %al
239; X86-NEXT:    shrb $5, %al
240; X86-NEXT:    shlb $3, %ch
241; X86-NEXT:    orb %al, %ch
242; X86-NEXT:    andb $-9, %ch
243; X86-NEXT:    movb %ch, %al
244; X86-NEXT:    retl
245;
246; X64-LABEL: no_extract_udiv:
247; X64:       # %bb.0:
248; X64-NEXT:    movzbl %dil, %ecx
249; X64-NEXT:    imull $171, %ecx, %eax
250; X64-NEXT:    shrl $8, %eax
251; X64-NEXT:    imull $79, %ecx, %edx
252; X64-NEXT:    shrl $8, %edx
253; X64-NEXT:    subb %dl, %cl
254; X64-NEXT:    shrb %cl
255; X64-NEXT:    addb %dl, %cl
256; X64-NEXT:    shrb $5, %cl
257; X64-NEXT:    shlb $3, %al
258; X64-NEXT:    orb %cl, %al
259; X64-NEXT:    andb $-9, %al
260; X64-NEXT:    # kill: def $al killed $al killed $eax
261; X64-NEXT:    retq
262  %lhs_div = udiv i8 %i, 3
263  %rhs_div = udiv i8 %i, 49
264  %lhs_shift = shl i8 %lhs_div,4
265  %out = or i8 %lhs_shift, %rhs_div
266  ret i8 %out
267}
268
269; DAGCombiner transforms shl X, 1 into add X, X.
270define i32 @extract_add_1(i32 %i) nounwind {
271; X86-LABEL: extract_add_1:
272; X86:       # %bb.0:
273; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
274; X86-NEXT:    roll %eax
275; X86-NEXT:    retl
276;
277; X64-LABEL: extract_add_1:
278; X64:       # %bb.0:
279; X64-NEXT:    movl %edi, %eax
280; X64-NEXT:    roll %eax
281; X64-NEXT:    retq
282  %ii = add i32 %i, %i
283  %rhs = lshr i32 %i, 31
284  %out = or i32 %ii, %rhs
285  ret i32 %out
286}
287
288define i32 @extract_add_1_comut(i32 %i) nounwind {
289; X86-LABEL: extract_add_1_comut:
290; X86:       # %bb.0:
291; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
292; X86-NEXT:    roll %eax
293; X86-NEXT:    retl
294;
295; X64-LABEL: extract_add_1_comut:
296; X64:       # %bb.0:
297; X64-NEXT:    movl %edi, %eax
298; X64-NEXT:    roll %eax
299; X64-NEXT:    retq
300  %ii = add i32 %i, %i
301  %lhs = lshr i32 %i, 31
302  %out = or i32 %lhs, %ii
303  ret i32 %out
304}
305
306define i32 @no_extract_add_1(i32 %i) nounwind {
307; X86-LABEL: no_extract_add_1:
308; X86:       # %bb.0:
309; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
310; X86-NEXT:    leal (%eax,%eax), %ecx
311; X86-NEXT:    shrl $27, %eax
312; X86-NEXT:    orl %ecx, %eax
313; X86-NEXT:    retl
314;
315; X64-LABEL: no_extract_add_1:
316; X64:       # %bb.0:
317; X64-NEXT:    # kill: def $edi killed $edi def $rdi
318; X64-NEXT:    leal (%rdi,%rdi), %eax
319; X64-NEXT:    shrl $27, %edi
320; X64-NEXT:    orl %edi, %eax
321; X64-NEXT:    retq
322  %ii = add i32 %i, %i
323  %rhs = lshr i32 %i, 27
324  %out = or i32 %ii, %rhs
325  ret i32 %out
326}
327