1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 4 5; Check that under certain conditions we can factor out a rotate 6; from the following idioms: 7; (a*c0) >> s1 | (a*c1) 8; (a/c0) << s1 | (a/c1) 9; This targets cases where instcombine has folded a shl/srl/mul/udiv 10; with one of the shifts from the rotate idiom 11 12define i64 @rolq_extract_shl(i64 %i) nounwind { 13; X86-LABEL: rolq_extract_shl: 14; X86: # %bb.0: 15; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 16; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 17; X86-NEXT: leal (,%edx,8), %eax 18; X86-NEXT: shldl $10, %ecx, %edx 19; X86-NEXT: shll $10, %ecx 20; X86-NEXT: shrl $25, %eax 21; X86-NEXT: orl %ecx, %eax 22; X86-NEXT: retl 23; 24; X64-LABEL: rolq_extract_shl: 25; X64: # %bb.0: 26; X64-NEXT: leaq (,%rdi,8), %rax 27; X64-NEXT: rolq $7, %rax 28; X64-NEXT: retq 29 %lhs_mul = shl i64 %i, 3 30 %rhs_mul = shl i64 %i, 10 31 %lhs_shift = lshr i64 %lhs_mul, 57 32 %out = or i64 %lhs_shift, %rhs_mul 33 ret i64 %out 34} 35 36define i16 @rolw_extract_shrl(i16 %i) nounwind { 37; X86-LABEL: rolw_extract_shrl: 38; X86: # %bb.0: 39; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 40; X86-NEXT: shrl $3, %eax 41; X86-NEXT: rolw $12, %ax 42; X86-NEXT: # kill: def $ax killed $ax killed $eax 43; X86-NEXT: retl 44; 45; X64-LABEL: rolw_extract_shrl: 46; X64: # %bb.0: 47; X64-NEXT: movzwl %di, %eax 48; X64-NEXT: shrl $3, %eax 49; X64-NEXT: rolw $12, %ax 50; X64-NEXT: # kill: def $ax killed $ax killed $eax 51; X64-NEXT: retq 52 %lhs_div = lshr i16 %i, 7 53 %rhs_div = lshr i16 %i, 3 54 %rhs_shift = shl i16 %rhs_div, 12 55 %out = or i16 %lhs_div, %rhs_shift 56 ret i16 %out 57} 58 59define i32 @roll_extract_mul(i32 %i) nounwind { 60; X86-LABEL: roll_extract_mul: 61; X86: # %bb.0: 62; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 63; X86-NEXT: leal (%eax,%eax,8), %eax 64; X86-NEXT: roll $7, %eax 65; X86-NEXT: retl 66; 67; X64-LABEL: roll_extract_mul: 68; X64: # %bb.0: 69; X64-NEXT: # kill: def $edi killed $edi def $rdi 70; X64-NEXT: leal (%rdi,%rdi,8), %eax 71; X64-NEXT: roll $7, %eax 72; X64-NEXT: retq 73 %lhs_mul = mul i32 %i, 9 74 %rhs_mul = mul i32 %i, 1152 75 %lhs_shift = lshr i32 %lhs_mul, 25 76 %out = or i32 %lhs_shift, %rhs_mul 77 ret i32 %out 78} 79 80define i8 @rolb_extract_udiv(i8 %i) nounwind { 81; X86-LABEL: rolb_extract_udiv: 82; X86: # %bb.0: 83; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 84; X86-NEXT: imull $171, %eax, %eax 85; X86-NEXT: shrl $9, %eax 86; X86-NEXT: rolb $4, %al 87; X86-NEXT: # kill: def $al killed $al killed $eax 88; X86-NEXT: retl 89; 90; X64-LABEL: rolb_extract_udiv: 91; X64: # %bb.0: 92; X64-NEXT: movzbl %dil, %eax 93; X64-NEXT: imull $171, %eax, %eax 94; X64-NEXT: shrl $9, %eax 95; X64-NEXT: rolb $4, %al 96; X64-NEXT: # kill: def $al killed $al killed $eax 97; X64-NEXT: retq 98 %lhs_div = udiv i8 %i, 3 99 %rhs_div = udiv i8 %i, 48 100 %lhs_shift = shl i8 %lhs_div, 4 101 %out = or i8 %lhs_shift, %rhs_div 102 ret i8 %out 103} 104 105define i64 @rolq_extract_mul_with_mask(i64 %i) nounwind { 106; X86-LABEL: rolq_extract_mul_with_mask: 107; X86: # %bb.0: 108; X86-NEXT: pushl %esi 109; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 110; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 111; X86-NEXT: movl %eax, %ecx 112; X86-NEXT: shll $7, %ecx 113; X86-NEXT: leal (%ecx,%ecx,8), %ecx 114; X86-NEXT: movl $9, %edx 115; X86-NEXT: mull %edx 116; X86-NEXT: leal (%esi,%esi,8), %eax 117; X86-NEXT: addl %edx, %eax 118; X86-NEXT: movzbl %cl, %ecx 119; X86-NEXT: shrl $25, %eax 120; X86-NEXT: orl %ecx, %eax 121; X86-NEXT: xorl %edx, %edx 122; X86-NEXT: popl %esi 123; X86-NEXT: retl 124; 125; X64-LABEL: rolq_extract_mul_with_mask: 126; X64: # %bb.0: 127; X64-NEXT: leaq (%rdi,%rdi,8), %rax 128; X64-NEXT: rolq $7, %rax 129; X64-NEXT: movzbl %al, %eax 130; X64-NEXT: retq 131 %lhs_mul = mul i64 %i, 1152 132 %rhs_mul = mul i64 %i, 9 133 %lhs_and = and i64 %lhs_mul, 160 134 %rhs_shift = lshr i64 %rhs_mul, 57 135 %out = or i64 %lhs_and, %rhs_shift 136 ret i64 %out 137} 138 139; Result would undershift 140define i64 @no_extract_shl(i64 %i) nounwind { 141; X86-LABEL: no_extract_shl: 142; X86: # %bb.0: 143; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 144; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 145; X86-NEXT: movl %edx, %eax 146; X86-NEXT: shll $5, %eax 147; X86-NEXT: shldl $10, %ecx, %edx 148; X86-NEXT: shll $10, %ecx 149; X86-NEXT: shrl $25, %eax 150; X86-NEXT: orl %ecx, %eax 151; X86-NEXT: retl 152; 153; X64-LABEL: no_extract_shl: 154; X64: # %bb.0: 155; X64-NEXT: movq %rdi, %rax 156; X64-NEXT: shlq $5, %rax 157; X64-NEXT: shlq $10, %rdi 158; X64-NEXT: shrq $57, %rax 159; X64-NEXT: addq %rdi, %rax 160; X64-NEXT: retq 161 %lhs_mul = shl i64 %i, 5 162 %rhs_mul = shl i64 %i, 10 163 %lhs_shift = lshr i64 %lhs_mul, 57 164 %out = or i64 %lhs_shift, %rhs_mul 165 ret i64 %out 166} 167 168; Result would overshift 169define i32 @no_extract_shrl(i32 %i) nounwind { 170; X86-LABEL: no_extract_shrl: 171; X86: # %bb.0: 172; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 173; X86-NEXT: movl %eax, %ecx 174; X86-NEXT: andl $-8, %ecx 175; X86-NEXT: shll $25, %ecx 176; X86-NEXT: shrl $9, %eax 177; X86-NEXT: orl %ecx, %eax 178; X86-NEXT: retl 179; 180; X64-LABEL: no_extract_shrl: 181; X64: # %bb.0: 182; X64-NEXT: # kill: def $edi killed $edi def $rdi 183; X64-NEXT: movl %edi, %eax 184; X64-NEXT: andl $-8, %eax 185; X64-NEXT: shll $25, %eax 186; X64-NEXT: shrl $9, %edi 187; X64-NEXT: addl %edi, %eax 188; X64-NEXT: retq 189 %lhs_div = lshr i32 %i, 3 190 %rhs_div = lshr i32 %i, 9 191 %lhs_shift = shl i32 %lhs_div, 28 192 %out = or i32 %lhs_shift, %rhs_div 193 ret i32 %out 194} 195 196; Can factor 128 from 2304, but result is 18 instead of 9 197define i16 @no_extract_mul(i16 %i) nounwind { 198; X86-LABEL: no_extract_mul: 199; X86: # %bb.0: 200; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 201; X86-NEXT: leal (%eax,%eax,8), %ecx 202; X86-NEXT: shll $8, %eax 203; X86-NEXT: leal (%eax,%eax,8), %edx 204; X86-NEXT: movzwl %cx, %eax 205; X86-NEXT: shrl $9, %eax 206; X86-NEXT: orl %edx, %eax 207; X86-NEXT: # kill: def $ax killed $ax killed $eax 208; X86-NEXT: retl 209; 210; X64-LABEL: no_extract_mul: 211; X64: # %bb.0: 212; X64-NEXT: # kill: def $edi killed $edi def $rdi 213; X64-NEXT: leal (%rdi,%rdi,8), %eax 214; X64-NEXT: # kill: def $edi killed $edi killed $rdi def $rdi 215; X64-NEXT: shll $8, %edi 216; X64-NEXT: leal (%rdi,%rdi,8), %ecx 217; X64-NEXT: movzwl %ax, %eax 218; X64-NEXT: shrl $9, %eax 219; X64-NEXT: orl %ecx, %eax 220; X64-NEXT: # kill: def $ax killed $ax killed $eax 221; X64-NEXT: retq 222 %lhs_mul = mul i16 %i, 2304 223 %rhs_mul = mul i16 %i, 9 224 %rhs_shift = lshr i16 %rhs_mul, 9 225 %out = or i16 %lhs_mul, %rhs_shift 226 ret i16 %out 227} 228 229; Can't evenly factor 16 from 49 230define i8 @no_extract_udiv(i8 %i) nounwind { 231; X86-LABEL: no_extract_udiv: 232; X86: # %bb.0: 233; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 234; X86-NEXT: imull $171, %eax, %ecx 235; X86-NEXT: imull $79, %eax, %edx 236; X86-NEXT: subb %dh, %al 237; X86-NEXT: shrb %al 238; X86-NEXT: addb %dh, %al 239; X86-NEXT: shrb $5, %al 240; X86-NEXT: shlb $3, %ch 241; X86-NEXT: orb %al, %ch 242; X86-NEXT: andb $-9, %ch 243; X86-NEXT: movb %ch, %al 244; X86-NEXT: retl 245; 246; X64-LABEL: no_extract_udiv: 247; X64: # %bb.0: 248; X64-NEXT: movzbl %dil, %ecx 249; X64-NEXT: imull $171, %ecx, %eax 250; X64-NEXT: shrl $8, %eax 251; X64-NEXT: imull $79, %ecx, %edx 252; X64-NEXT: shrl $8, %edx 253; X64-NEXT: subb %dl, %cl 254; X64-NEXT: shrb %cl 255; X64-NEXT: addb %dl, %cl 256; X64-NEXT: shrb $5, %cl 257; X64-NEXT: shlb $3, %al 258; X64-NEXT: orb %cl, %al 259; X64-NEXT: andb $-9, %al 260; X64-NEXT: # kill: def $al killed $al killed $eax 261; X64-NEXT: retq 262 %lhs_div = udiv i8 %i, 3 263 %rhs_div = udiv i8 %i, 49 264 %lhs_shift = shl i8 %lhs_div,4 265 %out = or i8 %lhs_shift, %rhs_div 266 ret i8 %out 267} 268 269; DAGCombiner transforms shl X, 1 into add X, X. 270define i32 @extract_add_1(i32 %i) nounwind { 271; X86-LABEL: extract_add_1: 272; X86: # %bb.0: 273; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 274; X86-NEXT: roll %eax 275; X86-NEXT: retl 276; 277; X64-LABEL: extract_add_1: 278; X64: # %bb.0: 279; X64-NEXT: movl %edi, %eax 280; X64-NEXT: roll %eax 281; X64-NEXT: retq 282 %ii = add i32 %i, %i 283 %rhs = lshr i32 %i, 31 284 %out = or i32 %ii, %rhs 285 ret i32 %out 286} 287 288define i32 @extract_add_1_comut(i32 %i) nounwind { 289; X86-LABEL: extract_add_1_comut: 290; X86: # %bb.0: 291; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 292; X86-NEXT: roll %eax 293; X86-NEXT: retl 294; 295; X64-LABEL: extract_add_1_comut: 296; X64: # %bb.0: 297; X64-NEXT: movl %edi, %eax 298; X64-NEXT: roll %eax 299; X64-NEXT: retq 300 %ii = add i32 %i, %i 301 %lhs = lshr i32 %i, 31 302 %out = or i32 %lhs, %ii 303 ret i32 %out 304} 305 306define i32 @no_extract_add_1(i32 %i) nounwind { 307; X86-LABEL: no_extract_add_1: 308; X86: # %bb.0: 309; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 310; X86-NEXT: leal (%eax,%eax), %ecx 311; X86-NEXT: shrl $27, %eax 312; X86-NEXT: orl %ecx, %eax 313; X86-NEXT: retl 314; 315; X64-LABEL: no_extract_add_1: 316; X64: # %bb.0: 317; X64-NEXT: # kill: def $edi killed $edi def $rdi 318; X64-NEXT: leal (%rdi,%rdi), %eax 319; X64-NEXT: shrl $27, %edi 320; X64-NEXT: orl %edi, %eax 321; X64-NEXT: retq 322 %ii = add i32 %i, %i 323 %rhs = lshr i32 %i, 27 324 %out = or i32 %ii, %rhs 325 ret i32 %out 326} 327