1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86 3; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64 4; RUN: llc < %s -mtriple=i686-unknown -mattr=+xop | FileCheck %s --check-prefixes=X86XOP 5 6; These tests just check that the plumbing is in place for @llvm.bitreverse. The 7; actual output is massive at the moment as llvm.bitreverse is not yet legal. 8 9declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>) readnone 10 11define <2 x i16> @test_bitreverse_v2i16(<2 x i16> %a) nounwind { 12; X86-LABEL: test_bitreverse_v2i16: 13; X86: # %bb.0: 14; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 15; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 16; X86-NEXT: rolw $8, %ax 17; X86-NEXT: movl %eax, %edx 18; X86-NEXT: andl $3855, %edx # imm = 0xF0F 19; X86-NEXT: shll $4, %edx 20; X86-NEXT: andl $61680, %eax # imm = 0xF0F0 21; X86-NEXT: shrl $4, %eax 22; X86-NEXT: orl %edx, %eax 23; X86-NEXT: movl %eax, %edx 24; X86-NEXT: andl $13107, %edx # imm = 0x3333 25; X86-NEXT: andl $52428, %eax # imm = 0xCCCC 26; X86-NEXT: shrl $2, %eax 27; X86-NEXT: leal (%eax,%edx,4), %eax 28; X86-NEXT: movl %eax, %edx 29; X86-NEXT: andl $21845, %edx # imm = 0x5555 30; X86-NEXT: andl $43690, %eax # imm = 0xAAAA 31; X86-NEXT: shrl %eax 32; X86-NEXT: leal (%eax,%edx,2), %eax 33; X86-NEXT: rolw $8, %cx 34; X86-NEXT: movl %ecx, %edx 35; X86-NEXT: andl $3855, %edx # imm = 0xF0F 36; X86-NEXT: shll $4, %edx 37; X86-NEXT: andl $61680, %ecx # imm = 0xF0F0 38; X86-NEXT: shrl $4, %ecx 39; X86-NEXT: orl %edx, %ecx 40; X86-NEXT: movl %ecx, %edx 41; X86-NEXT: andl $13107, %edx # imm = 0x3333 42; X86-NEXT: andl $52428, %ecx # imm = 0xCCCC 43; X86-NEXT: shrl $2, %ecx 44; X86-NEXT: leal (%ecx,%edx,4), %ecx 45; X86-NEXT: movl %ecx, %edx 46; X86-NEXT: andl $21845, %edx # imm = 0x5555 47; X86-NEXT: andl $43690, %ecx # imm = 0xAAAA 48; X86-NEXT: shrl %ecx 49; X86-NEXT: leal (%ecx,%edx,2), %edx 50; X86-NEXT: # kill: def $ax killed $ax killed $eax 51; X86-NEXT: # kill: def $dx killed $dx killed $edx 52; X86-NEXT: retl 53; 54; X64-LABEL: test_bitreverse_v2i16: 55; X64: # %bb.0: 56; X64-NEXT: movdqa %xmm0, %xmm1 57; X64-NEXT: psrlw $8, %xmm1 58; X64-NEXT: psllw $8, %xmm0 59; X64-NEXT: por %xmm1, %xmm0 60; X64-NEXT: movdqa %xmm0, %xmm1 61; X64-NEXT: psllw $4, %xmm1 62; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 63; X64-NEXT: psrlw $4, %xmm0 64; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 65; X64-NEXT: por %xmm1, %xmm0 66; X64-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 67; X64-NEXT: pand %xmm0, %xmm1 68; X64-NEXT: psllw $2, %xmm1 69; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 70; X64-NEXT: psrlw $2, %xmm0 71; X64-NEXT: por %xmm1, %xmm0 72; X64-NEXT: movdqa {{.*#+}} xmm1 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85] 73; X64-NEXT: pand %xmm0, %xmm1 74; X64-NEXT: paddb %xmm1, %xmm1 75; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 76; X64-NEXT: psrlw $1, %xmm0 77; X64-NEXT: por %xmm1, %xmm0 78; X64-NEXT: retq 79; 80; X86XOP-LABEL: test_bitreverse_v2i16: 81; X86XOP: # %bb.0: 82; X86XOP-NEXT: vpperm {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0, %xmm0 83; X86XOP-NEXT: retl 84 %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a) 85 ret <2 x i16> %b 86} 87 88declare i64 @llvm.bitreverse.i64(i64) readnone 89 90define i64 @test_bitreverse_i64(i64 %a) nounwind { 91; X86-LABEL: test_bitreverse_i64: 92; X86: # %bb.0: 93; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 94; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 95; X86-NEXT: bswapl %eax 96; X86-NEXT: movl %eax, %edx 97; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F 98; X86-NEXT: shll $4, %edx 99; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 100; X86-NEXT: shrl $4, %eax 101; X86-NEXT: orl %edx, %eax 102; X86-NEXT: movl %eax, %edx 103; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 104; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 105; X86-NEXT: shrl $2, %eax 106; X86-NEXT: leal (%eax,%edx,4), %eax 107; X86-NEXT: movl %eax, %edx 108; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 109; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 110; X86-NEXT: shrl %eax 111; X86-NEXT: leal (%eax,%edx,2), %eax 112; X86-NEXT: bswapl %ecx 113; X86-NEXT: movl %ecx, %edx 114; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F 115; X86-NEXT: shll $4, %edx 116; X86-NEXT: andl $-252645136, %ecx # imm = 0xF0F0F0F0 117; X86-NEXT: shrl $4, %ecx 118; X86-NEXT: orl %edx, %ecx 119; X86-NEXT: movl %ecx, %edx 120; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 121; X86-NEXT: andl $-858993460, %ecx # imm = 0xCCCCCCCC 122; X86-NEXT: shrl $2, %ecx 123; X86-NEXT: leal (%ecx,%edx,4), %ecx 124; X86-NEXT: movl %ecx, %edx 125; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 126; X86-NEXT: andl $-1431655766, %ecx # imm = 0xAAAAAAAA 127; X86-NEXT: shrl %ecx 128; X86-NEXT: leal (%ecx,%edx,2), %edx 129; X86-NEXT: retl 130; 131; X64-LABEL: test_bitreverse_i64: 132; X64: # %bb.0: 133; X64-NEXT: bswapq %rdi 134; X64-NEXT: movabsq $1085102592571150095, %rax # imm = 0xF0F0F0F0F0F0F0F 135; X64-NEXT: andq %rdi, %rax 136; X64-NEXT: shlq $4, %rax 137; X64-NEXT: movabsq $-1085102592571150096, %rcx # imm = 0xF0F0F0F0F0F0F0F0 138; X64-NEXT: andq %rdi, %rcx 139; X64-NEXT: shrq $4, %rcx 140; X64-NEXT: orq %rax, %rcx 141; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 142; X64-NEXT: andq %rcx, %rax 143; X64-NEXT: movabsq $-3689348814741910324, %rdx # imm = 0xCCCCCCCCCCCCCCCC 144; X64-NEXT: andq %rcx, %rdx 145; X64-NEXT: shrq $2, %rdx 146; X64-NEXT: leaq (%rdx,%rax,4), %rax 147; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 148; X64-NEXT: andq %rax, %rcx 149; X64-NEXT: movabsq $-6148914691236517206, %rdx # imm = 0xAAAAAAAAAAAAAAAA 150; X64-NEXT: andq %rax, %rdx 151; X64-NEXT: shrq %rdx 152; X64-NEXT: leaq (%rdx,%rcx,2), %rax 153; X64-NEXT: retq 154; 155; X86XOP-LABEL: test_bitreverse_i64: 156; X86XOP: # %bb.0: 157; X86XOP-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 158; X86XOP-NEXT: vpperm {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0, %xmm0 159; X86XOP-NEXT: vmovd %xmm0, %eax 160; X86XOP-NEXT: vpextrd $1, %xmm0, %edx 161; X86XOP-NEXT: retl 162 %b = call i64 @llvm.bitreverse.i64(i64 %a) 163 ret i64 %b 164} 165 166declare i32 @llvm.bitreverse.i32(i32) readnone 167 168define i32 @test_bitreverse_i32(i32 %a) nounwind { 169; X86-LABEL: test_bitreverse_i32: 170; X86: # %bb.0: 171; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 172; X86-NEXT: bswapl %eax 173; X86-NEXT: movl %eax, %ecx 174; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 175; X86-NEXT: shll $4, %ecx 176; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 177; X86-NEXT: shrl $4, %eax 178; X86-NEXT: orl %ecx, %eax 179; X86-NEXT: movl %eax, %ecx 180; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 181; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 182; X86-NEXT: shrl $2, %eax 183; X86-NEXT: leal (%eax,%ecx,4), %eax 184; X86-NEXT: movl %eax, %ecx 185; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 186; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 187; X86-NEXT: shrl %eax 188; X86-NEXT: leal (%eax,%ecx,2), %eax 189; X86-NEXT: retl 190; 191; X64-LABEL: test_bitreverse_i32: 192; X64: # %bb.0: 193; X64-NEXT: # kill: def $edi killed $edi def $rdi 194; X64-NEXT: bswapl %edi 195; X64-NEXT: movl %edi, %eax 196; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 197; X64-NEXT: shll $4, %eax 198; X64-NEXT: andl $-252645136, %edi # imm = 0xF0F0F0F0 199; X64-NEXT: shrl $4, %edi 200; X64-NEXT: orl %eax, %edi 201; X64-NEXT: movl %edi, %eax 202; X64-NEXT: andl $858993459, %eax # imm = 0x33333333 203; X64-NEXT: andl $-858993460, %edi # imm = 0xCCCCCCCC 204; X64-NEXT: shrl $2, %edi 205; X64-NEXT: leal (%rdi,%rax,4), %eax 206; X64-NEXT: movl %eax, %ecx 207; X64-NEXT: andl $1431655765, %ecx # imm = 0x55555555 208; X64-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 209; X64-NEXT: shrl %eax 210; X64-NEXT: leal (%rax,%rcx,2), %eax 211; X64-NEXT: retq 212; 213; X86XOP-LABEL: test_bitreverse_i32: 214; X86XOP: # %bb.0: 215; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 216; X86XOP-NEXT: vpperm {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0, %xmm0 217; X86XOP-NEXT: vmovd %xmm0, %eax 218; X86XOP-NEXT: retl 219 %b = call i32 @llvm.bitreverse.i32(i32 %a) 220 ret i32 %b 221} 222 223declare i24 @llvm.bitreverse.i24(i24) readnone 224 225define i24 @test_bitreverse_i24(i24 %a) nounwind { 226; X86-LABEL: test_bitreverse_i24: 227; X86: # %bb.0: 228; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 229; X86-NEXT: bswapl %eax 230; X86-NEXT: movl %eax, %ecx 231; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 232; X86-NEXT: shll $4, %ecx 233; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 234; X86-NEXT: shrl $4, %eax 235; X86-NEXT: orl %ecx, %eax 236; X86-NEXT: movl %eax, %ecx 237; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 238; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 239; X86-NEXT: shrl $2, %eax 240; X86-NEXT: leal (%eax,%ecx,4), %eax 241; X86-NEXT: movl %eax, %ecx 242; X86-NEXT: andl $1431655680, %ecx # imm = 0x55555500 243; X86-NEXT: andl $-1431655936, %eax # imm = 0xAAAAAA00 244; X86-NEXT: shrl %eax 245; X86-NEXT: leal (%eax,%ecx,2), %eax 246; X86-NEXT: shrl $8, %eax 247; X86-NEXT: retl 248; 249; X64-LABEL: test_bitreverse_i24: 250; X64: # %bb.0: 251; X64-NEXT: # kill: def $edi killed $edi def $rdi 252; X64-NEXT: bswapl %edi 253; X64-NEXT: movl %edi, %eax 254; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 255; X64-NEXT: shll $4, %eax 256; X64-NEXT: andl $-252645136, %edi # imm = 0xF0F0F0F0 257; X64-NEXT: shrl $4, %edi 258; X64-NEXT: orl %eax, %edi 259; X64-NEXT: movl %edi, %eax 260; X64-NEXT: andl $858993459, %eax # imm = 0x33333333 261; X64-NEXT: andl $-858993460, %edi # imm = 0xCCCCCCCC 262; X64-NEXT: shrl $2, %edi 263; X64-NEXT: leal (%rdi,%rax,4), %eax 264; X64-NEXT: movl %eax, %ecx 265; X64-NEXT: andl $1431655680, %ecx # imm = 0x55555500 266; X64-NEXT: andl $-1431655936, %eax # imm = 0xAAAAAA00 267; X64-NEXT: shrl %eax 268; X64-NEXT: leal (%rax,%rcx,2), %eax 269; X64-NEXT: shrl $8, %eax 270; X64-NEXT: retq 271; 272; X86XOP-LABEL: test_bitreverse_i24: 273; X86XOP: # %bb.0: 274; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 275; X86XOP-NEXT: vpperm {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0, %xmm0 276; X86XOP-NEXT: vmovd %xmm0, %eax 277; X86XOP-NEXT: shrl $8, %eax 278; X86XOP-NEXT: retl 279 %b = call i24 @llvm.bitreverse.i24(i24 %a) 280 ret i24 %b 281} 282 283declare i16 @llvm.bitreverse.i16(i16) readnone 284 285define i16 @test_bitreverse_i16(i16 %a) nounwind { 286; X86-LABEL: test_bitreverse_i16: 287; X86: # %bb.0: 288; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 289; X86-NEXT: rolw $8, %ax 290; X86-NEXT: movl %eax, %ecx 291; X86-NEXT: andl $3855, %ecx # imm = 0xF0F 292; X86-NEXT: shll $4, %ecx 293; X86-NEXT: andl $61680, %eax # imm = 0xF0F0 294; X86-NEXT: shrl $4, %eax 295; X86-NEXT: orl %ecx, %eax 296; X86-NEXT: movl %eax, %ecx 297; X86-NEXT: andl $13107, %ecx # imm = 0x3333 298; X86-NEXT: andl $52428, %eax # imm = 0xCCCC 299; X86-NEXT: shrl $2, %eax 300; X86-NEXT: leal (%eax,%ecx,4), %eax 301; X86-NEXT: movl %eax, %ecx 302; X86-NEXT: andl $21845, %ecx # imm = 0x5555 303; X86-NEXT: andl $43690, %eax # imm = 0xAAAA 304; X86-NEXT: shrl %eax 305; X86-NEXT: leal (%eax,%ecx,2), %eax 306; X86-NEXT: # kill: def $ax killed $ax killed $eax 307; X86-NEXT: retl 308; 309; X64-LABEL: test_bitreverse_i16: 310; X64: # %bb.0: 311; X64-NEXT: # kill: def $edi killed $edi def $rdi 312; X64-NEXT: rolw $8, %di 313; X64-NEXT: movl %edi, %eax 314; X64-NEXT: andl $3855, %eax # imm = 0xF0F 315; X64-NEXT: shll $4, %eax 316; X64-NEXT: andl $61680, %edi # imm = 0xF0F0 317; X64-NEXT: shrl $4, %edi 318; X64-NEXT: orl %eax, %edi 319; X64-NEXT: movl %edi, %eax 320; X64-NEXT: andl $13107, %eax # imm = 0x3333 321; X64-NEXT: andl $52428, %edi # imm = 0xCCCC 322; X64-NEXT: shrl $2, %edi 323; X64-NEXT: leal (%rdi,%rax,4), %eax 324; X64-NEXT: movl %eax, %ecx 325; X64-NEXT: andl $21845, %ecx # imm = 0x5555 326; X64-NEXT: andl $43690, %eax # imm = 0xAAAA 327; X64-NEXT: shrl %eax 328; X64-NEXT: leal (%rax,%rcx,2), %eax 329; X64-NEXT: # kill: def $ax killed $ax killed $eax 330; X64-NEXT: retq 331; 332; X86XOP-LABEL: test_bitreverse_i16: 333; X86XOP: # %bb.0: 334; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 335; X86XOP-NEXT: vpperm {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0, %xmm0 336; X86XOP-NEXT: vmovd %xmm0, %eax 337; X86XOP-NEXT: # kill: def $ax killed $ax killed $eax 338; X86XOP-NEXT: retl 339 %b = call i16 @llvm.bitreverse.i16(i16 %a) 340 ret i16 %b 341} 342 343declare i8 @llvm.bitreverse.i8(i8) readnone 344 345define i8 @test_bitreverse_i8(i8 %a) { 346; X86-LABEL: test_bitreverse_i8: 347; X86: # %bb.0: 348; X86-NEXT: movb {{[0-9]+}}(%esp), %al 349; X86-NEXT: rolb $4, %al 350; X86-NEXT: movl %eax, %ecx 351; X86-NEXT: andb $51, %cl 352; X86-NEXT: shlb $2, %cl 353; X86-NEXT: andb $-52, %al 354; X86-NEXT: shrb $2, %al 355; X86-NEXT: orb %cl, %al 356; X86-NEXT: movl %eax, %ecx 357; X86-NEXT: andb $85, %cl 358; X86-NEXT: addb %cl, %cl 359; X86-NEXT: andb $-86, %al 360; X86-NEXT: shrb %al 361; X86-NEXT: orb %cl, %al 362; X86-NEXT: retl 363; 364; X64-LABEL: test_bitreverse_i8: 365; X64: # %bb.0: 366; X64-NEXT: # kill: def $edi killed $edi def $rdi 367; X64-NEXT: rolb $4, %dil 368; X64-NEXT: movl %edi, %eax 369; X64-NEXT: andb $51, %al 370; X64-NEXT: shlb $2, %al 371; X64-NEXT: andb $-52, %dil 372; X64-NEXT: shrb $2, %dil 373; X64-NEXT: orb %al, %dil 374; X64-NEXT: movl %edi, %eax 375; X64-NEXT: andb $85, %al 376; X64-NEXT: addb %al, %al 377; X64-NEXT: andb $-86, %dil 378; X64-NEXT: shrb %dil 379; X64-NEXT: addl %edi, %eax 380; X64-NEXT: # kill: def $al killed $al killed $eax 381; X64-NEXT: retq 382; 383; X86XOP-LABEL: test_bitreverse_i8: 384; X86XOP: # %bb.0: 385; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 386; X86XOP-NEXT: vpperm {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0, %xmm0 387; X86XOP-NEXT: vmovd %xmm0, %eax 388; X86XOP-NEXT: # kill: def $al killed $al killed $eax 389; X86XOP-NEXT: retl 390 %b = call i8 @llvm.bitreverse.i8(i8 %a) 391 ret i8 %b 392} 393 394declare i4 @llvm.bitreverse.i4(i4) readnone 395 396define i4 @test_bitreverse_i4(i4 %a) { 397; X86-LABEL: test_bitreverse_i4: 398; X86: # %bb.0: 399; X86-NEXT: movb {{[0-9]+}}(%esp), %al 400; X86-NEXT: rolb $4, %al 401; X86-NEXT: movl %eax, %ecx 402; X86-NEXT: andb $51, %cl 403; X86-NEXT: shlb $2, %cl 404; X86-NEXT: andb $-52, %al 405; X86-NEXT: shrb $2, %al 406; X86-NEXT: orb %cl, %al 407; X86-NEXT: movl %eax, %ecx 408; X86-NEXT: andb $80, %cl 409; X86-NEXT: addb %cl, %cl 410; X86-NEXT: andb $-96, %al 411; X86-NEXT: shrb %al 412; X86-NEXT: orb %cl, %al 413; X86-NEXT: shrb $4, %al 414; X86-NEXT: retl 415; 416; X64-LABEL: test_bitreverse_i4: 417; X64: # %bb.0: 418; X64-NEXT: # kill: def $edi killed $edi def $rdi 419; X64-NEXT: rolb $4, %dil 420; X64-NEXT: movl %edi, %eax 421; X64-NEXT: andb $51, %al 422; X64-NEXT: shlb $2, %al 423; X64-NEXT: andb $-52, %dil 424; X64-NEXT: shrb $2, %dil 425; X64-NEXT: orb %al, %dil 426; X64-NEXT: movl %edi, %eax 427; X64-NEXT: andb $80, %al 428; X64-NEXT: addb %al, %al 429; X64-NEXT: andb $-96, %dil 430; X64-NEXT: shrb %dil 431; X64-NEXT: addl %edi, %eax 432; X64-NEXT: shrb $4, %al 433; X64-NEXT: # kill: def $al killed $al killed $eax 434; X64-NEXT: retq 435; 436; X86XOP-LABEL: test_bitreverse_i4: 437; X86XOP: # %bb.0: 438; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 439; X86XOP-NEXT: vpperm {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0, %xmm0 440; X86XOP-NEXT: vmovd %xmm0, %eax 441; X86XOP-NEXT: shrb $4, %al 442; X86XOP-NEXT: # kill: def $al killed $al killed $eax 443; X86XOP-NEXT: retl 444 %b = call i4 @llvm.bitreverse.i4(i4 %a) 445 ret i4 %b 446} 447 448; These tests check that bitreverse(constant) calls are folded 449 450define <2 x i16> @fold_v2i16() { 451; X86-LABEL: fold_v2i16: 452; X86: # %bb.0: 453; X86-NEXT: movw $-4096, %ax # imm = 0xF000 454; X86-NEXT: movw $240, %dx 455; X86-NEXT: retl 456; 457; X64-LABEL: fold_v2i16: 458; X64: # %bb.0: 459; X64-NEXT: movaps {{.*#+}} xmm0 = <61440,240,u,u,u,u,u,u> 460; X64-NEXT: retq 461; 462; X86XOP-LABEL: fold_v2i16: 463; X86XOP: # %bb.0: 464; X86XOP-NEXT: vmovaps {{.*#+}} xmm0 = <61440,240,u,u,u,u,u,u> 465; X86XOP-NEXT: retl 466 %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> <i16 15, i16 3840>) 467 ret <2 x i16> %b 468} 469 470define i24 @fold_i24() { 471; X86-LABEL: fold_i24: 472; X86: # %bb.0: 473; X86-NEXT: movl $2048, %eax # imm = 0x800 474; X86-NEXT: retl 475; 476; X64-LABEL: fold_i24: 477; X64: # %bb.0: 478; X64-NEXT: movl $2048, %eax # imm = 0x800 479; X64-NEXT: retq 480; 481; X86XOP-LABEL: fold_i24: 482; X86XOP: # %bb.0: 483; X86XOP-NEXT: movl $2048, %eax # imm = 0x800 484; X86XOP-NEXT: retl 485 %b = call i24 @llvm.bitreverse.i24(i24 4096) 486 ret i24 %b 487} 488 489define i8 @fold_i8() { 490; X86-LABEL: fold_i8: 491; X86: # %bb.0: 492; X86-NEXT: movb $-16, %al 493; X86-NEXT: retl 494; 495; X64-LABEL: fold_i8: 496; X64: # %bb.0: 497; X64-NEXT: movb $-16, %al 498; X64-NEXT: retq 499; 500; X86XOP-LABEL: fold_i8: 501; X86XOP: # %bb.0: 502; X86XOP-NEXT: movb $-16, %al 503; X86XOP-NEXT: retl 504 %b = call i8 @llvm.bitreverse.i8(i8 15) 505 ret i8 %b 506} 507 508define i4 @fold_i4() { 509; X86-LABEL: fold_i4: 510; X86: # %bb.0: 511; X86-NEXT: movb $1, %al 512; X86-NEXT: retl 513; 514; X64-LABEL: fold_i4: 515; X64: # %bb.0: 516; X64-NEXT: movb $1, %al 517; X64-NEXT: retq 518; 519; X86XOP-LABEL: fold_i4: 520; X86XOP: # %bb.0: 521; X86XOP-NEXT: movb $1, %al 522; X86XOP-NEXT: retl 523 %b = call i4 @llvm.bitreverse.i4(i4 8) 524 ret i4 %b 525} 526 527; These tests check that bitreverse(bitreverse()) calls are removed 528 529define i8 @identity_i8(i8 %a) { 530; X86-LABEL: identity_i8: 531; X86: # %bb.0: 532; X86-NEXT: movb {{[0-9]+}}(%esp), %al 533; X86-NEXT: retl 534; 535; X64-LABEL: identity_i8: 536; X64: # %bb.0: 537; X64-NEXT: movl %edi, %eax 538; X64-NEXT: # kill: def $al killed $al killed $eax 539; X64-NEXT: retq 540; 541; X86XOP-LABEL: identity_i8: 542; X86XOP: # %bb.0: 543; X86XOP-NEXT: movb {{[0-9]+}}(%esp), %al 544; X86XOP-NEXT: retl 545 %b = call i8 @llvm.bitreverse.i8(i8 %a) 546 %c = call i8 @llvm.bitreverse.i8(i8 %b) 547 ret i8 %c 548} 549 550define <2 x i16> @identity_v2i16(<2 x i16> %a) { 551; X86-LABEL: identity_v2i16: 552; X86: # %bb.0: 553; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 554; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx 555; X86-NEXT: retl 556; 557; X64-LABEL: identity_v2i16: 558; X64: # %bb.0: 559; X64-NEXT: retq 560; 561; X86XOP-LABEL: identity_v2i16: 562; X86XOP: # %bb.0: 563; X86XOP-NEXT: retl 564 %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a) 565 %c = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %b) 566 ret <2 x i16> %c 567} 568 569; These tests check that bitreverse(undef) calls are removed 570 571define i8 @undef_i8() { 572; X86-LABEL: undef_i8: 573; X86: # %bb.0: 574; X86-NEXT: retl 575; 576; X64-LABEL: undef_i8: 577; X64: # %bb.0: 578; X64-NEXT: retq 579; 580; X86XOP-LABEL: undef_i8: 581; X86XOP: # %bb.0: 582; X86XOP-NEXT: retl 583 %b = call i8 @llvm.bitreverse.i8(i8 undef) 584 ret i8 %b 585} 586 587define <2 x i16> @undef_v2i16() { 588; X86-LABEL: undef_v2i16: 589; X86: # %bb.0: 590; X86-NEXT: retl 591; 592; X64-LABEL: undef_v2i16: 593; X64: # %bb.0: 594; X64-NEXT: retq 595; 596; X86XOP-LABEL: undef_v2i16: 597; X86XOP: # %bb.0: 598; X86XOP-NEXT: retl 599 %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> undef) 600 ret <2 x i16> %b 601} 602 603; Make sure we don't assert during type legalization promoting a large 604; bitreverse due to the need for a large shift that won't fit in the i8 returned 605; from getShiftAmountTy. 606define i528 @large_promotion(i528 %A) nounwind { 607; X86-LABEL: large_promotion: 608; X86: # %bb.0: 609; X86-NEXT: pushl %ebp 610; X86-NEXT: pushl %ebx 611; X86-NEXT: pushl %edi 612; X86-NEXT: pushl %esi 613; X86-NEXT: subl $56, %esp 614; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 615; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 616; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 617; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 618; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 619; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 620; X86-NEXT: bswapl %ebx 621; X86-NEXT: movl %ebx, %ebp 622; X86-NEXT: andl $252645135, %ebp # imm = 0xF0F0F0F 623; X86-NEXT: shll $4, %ebp 624; X86-NEXT: andl $-252645136, %ebx # imm = 0xF0F0F0F0 625; X86-NEXT: shrl $4, %ebx 626; X86-NEXT: orl %ebp, %ebx 627; X86-NEXT: movl %ebx, %ebp 628; X86-NEXT: andl $858993459, %ebp # imm = 0x33333333 629; X86-NEXT: andl $-858993460, %ebx # imm = 0xCCCCCCCC 630; X86-NEXT: shrl $2, %ebx 631; X86-NEXT: leal (%ebx,%ebp,4), %ebx 632; X86-NEXT: movl %ebx, %ebp 633; X86-NEXT: andl $1431633920, %ebp # imm = 0x55550000 634; X86-NEXT: andl $-1431699456, %ebx # imm = 0xAAAA0000 635; X86-NEXT: shrl %ebx 636; X86-NEXT: leal (%ebx,%ebp,2), %ebx 637; X86-NEXT: movl %ebx, (%esp) # 4-byte Spill 638; X86-NEXT: bswapl %edi 639; X86-NEXT: movl %edi, %ebx 640; X86-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F 641; X86-NEXT: shll $4, %ebx 642; X86-NEXT: andl $-252645136, %edi # imm = 0xF0F0F0F0 643; X86-NEXT: shrl $4, %edi 644; X86-NEXT: orl %ebx, %edi 645; X86-NEXT: movl %edi, %ebx 646; X86-NEXT: andl $858993459, %ebx # imm = 0x33333333 647; X86-NEXT: andl $-858993460, %edi # imm = 0xCCCCCCCC 648; X86-NEXT: shrl $2, %edi 649; X86-NEXT: leal (%edi,%ebx,4), %edi 650; X86-NEXT: movl %edi, %ebx 651; X86-NEXT: andl $1431655765, %ebx # imm = 0x55555555 652; X86-NEXT: andl $-1431655766, %edi # imm = 0xAAAAAAAA 653; X86-NEXT: shrl %edi 654; X86-NEXT: leal (%edi,%ebx,2), %edi 655; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 656; X86-NEXT: bswapl %esi 657; X86-NEXT: movl %esi, %edi 658; X86-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F 659; X86-NEXT: shll $4, %edi 660; X86-NEXT: andl $-252645136, %esi # imm = 0xF0F0F0F0 661; X86-NEXT: shrl $4, %esi 662; X86-NEXT: orl %edi, %esi 663; X86-NEXT: movl %esi, %edi 664; X86-NEXT: andl $858993459, %edi # imm = 0x33333333 665; X86-NEXT: andl $-858993460, %esi # imm = 0xCCCCCCCC 666; X86-NEXT: shrl $2, %esi 667; X86-NEXT: leal (%esi,%edi,4), %esi 668; X86-NEXT: movl %esi, %edi 669; X86-NEXT: andl $1431655765, %edi # imm = 0x55555555 670; X86-NEXT: andl $-1431655766, %esi # imm = 0xAAAAAAAA 671; X86-NEXT: shrl %esi 672; X86-NEXT: leal (%esi,%edi,2), %ebx 673; X86-NEXT: bswapl %edx 674; X86-NEXT: movl %edx, %esi 675; X86-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F 676; X86-NEXT: shll $4, %esi 677; X86-NEXT: andl $-252645136, %edx # imm = 0xF0F0F0F0 678; X86-NEXT: shrl $4, %edx 679; X86-NEXT: orl %esi, %edx 680; X86-NEXT: movl %edx, %esi 681; X86-NEXT: andl $858993459, %esi # imm = 0x33333333 682; X86-NEXT: andl $-858993460, %edx # imm = 0xCCCCCCCC 683; X86-NEXT: shrl $2, %edx 684; X86-NEXT: leal (%edx,%esi,4), %edx 685; X86-NEXT: movl %edx, %esi 686; X86-NEXT: andl $1431655765, %esi # imm = 0x55555555 687; X86-NEXT: andl $-1431655766, %edx # imm = 0xAAAAAAAA 688; X86-NEXT: shrl %edx 689; X86-NEXT: leal (%edx,%esi,2), %edx 690; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 691; X86-NEXT: bswapl %ecx 692; X86-NEXT: movl %ecx, %edx 693; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F 694; X86-NEXT: shll $4, %edx 695; X86-NEXT: andl $-252645136, %ecx # imm = 0xF0F0F0F0 696; X86-NEXT: shrl $4, %ecx 697; X86-NEXT: orl %edx, %ecx 698; X86-NEXT: movl %ecx, %edx 699; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 700; X86-NEXT: andl $-858993460, %ecx # imm = 0xCCCCCCCC 701; X86-NEXT: shrl $2, %ecx 702; X86-NEXT: leal (%ecx,%edx,4), %ecx 703; X86-NEXT: movl %ecx, %edx 704; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 705; X86-NEXT: andl $-1431655766, %ecx # imm = 0xAAAAAAAA 706; X86-NEXT: shrl %ecx 707; X86-NEXT: leal (%ecx,%edx,2), %ecx 708; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 709; X86-NEXT: bswapl %eax 710; X86-NEXT: movl %eax, %ecx 711; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 712; X86-NEXT: shll $4, %ecx 713; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 714; X86-NEXT: shrl $4, %eax 715; X86-NEXT: orl %ecx, %eax 716; X86-NEXT: movl %eax, %ecx 717; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 718; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 719; X86-NEXT: shrl $2, %eax 720; X86-NEXT: leal (%eax,%ecx,4), %eax 721; X86-NEXT: movl %eax, %ecx 722; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 723; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 724; X86-NEXT: shrl %eax 725; X86-NEXT: leal (%eax,%ecx,2), %eax 726; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 727; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 728; X86-NEXT: bswapl %eax 729; X86-NEXT: movl %eax, %ecx 730; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 731; X86-NEXT: shll $4, %ecx 732; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 733; X86-NEXT: shrl $4, %eax 734; X86-NEXT: orl %ecx, %eax 735; X86-NEXT: movl %eax, %ecx 736; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 737; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 738; X86-NEXT: shrl $2, %eax 739; X86-NEXT: leal (%eax,%ecx,4), %eax 740; X86-NEXT: movl %eax, %ecx 741; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 742; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 743; X86-NEXT: shrl %eax 744; X86-NEXT: leal (%eax,%ecx,2), %eax 745; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 746; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 747; X86-NEXT: bswapl %eax 748; X86-NEXT: movl %eax, %ecx 749; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 750; X86-NEXT: shll $4, %ecx 751; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 752; X86-NEXT: shrl $4, %eax 753; X86-NEXT: orl %ecx, %eax 754; X86-NEXT: movl %eax, %ecx 755; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 756; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 757; X86-NEXT: shrl $2, %eax 758; X86-NEXT: leal (%eax,%ecx,4), %eax 759; X86-NEXT: movl %eax, %ecx 760; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 761; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 762; X86-NEXT: shrl %eax 763; X86-NEXT: leal (%eax,%ecx,2), %eax 764; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 765; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 766; X86-NEXT: bswapl %eax 767; X86-NEXT: movl %eax, %ecx 768; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 769; X86-NEXT: shll $4, %ecx 770; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 771; X86-NEXT: shrl $4, %eax 772; X86-NEXT: orl %ecx, %eax 773; X86-NEXT: movl %eax, %ecx 774; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 775; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 776; X86-NEXT: shrl $2, %eax 777; X86-NEXT: leal (%eax,%ecx,4), %eax 778; X86-NEXT: movl %eax, %ecx 779; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 780; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 781; X86-NEXT: shrl %eax 782; X86-NEXT: leal (%eax,%ecx,2), %eax 783; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 784; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 785; X86-NEXT: bswapl %eax 786; X86-NEXT: movl %eax, %ecx 787; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 788; X86-NEXT: shll $4, %ecx 789; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 790; X86-NEXT: shrl $4, %eax 791; X86-NEXT: orl %ecx, %eax 792; X86-NEXT: movl %eax, %ecx 793; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 794; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 795; X86-NEXT: shrl $2, %eax 796; X86-NEXT: leal (%eax,%ecx,4), %eax 797; X86-NEXT: movl %eax, %ecx 798; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 799; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 800; X86-NEXT: shrl %eax 801; X86-NEXT: leal (%eax,%ecx,2), %eax 802; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 803; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 804; X86-NEXT: bswapl %eax 805; X86-NEXT: movl %eax, %ecx 806; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 807; X86-NEXT: shll $4, %ecx 808; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 809; X86-NEXT: shrl $4, %eax 810; X86-NEXT: orl %ecx, %eax 811; X86-NEXT: movl %eax, %ecx 812; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 813; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 814; X86-NEXT: shrl $2, %eax 815; X86-NEXT: leal (%eax,%ecx,4), %eax 816; X86-NEXT: movl %eax, %ecx 817; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 818; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 819; X86-NEXT: shrl %eax 820; X86-NEXT: leal (%eax,%ecx,2), %eax 821; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 822; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 823; X86-NEXT: bswapl %eax 824; X86-NEXT: movl %eax, %ecx 825; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 826; X86-NEXT: shll $4, %ecx 827; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 828; X86-NEXT: shrl $4, %eax 829; X86-NEXT: orl %ecx, %eax 830; X86-NEXT: movl %eax, %ecx 831; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 832; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 833; X86-NEXT: shrl $2, %eax 834; X86-NEXT: leal (%eax,%ecx,4), %eax 835; X86-NEXT: movl %eax, %ecx 836; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 837; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 838; X86-NEXT: shrl %eax 839; X86-NEXT: leal (%eax,%ecx,2), %eax 840; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 841; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 842; X86-NEXT: bswapl %eax 843; X86-NEXT: movl %eax, %ecx 844; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 845; X86-NEXT: shll $4, %ecx 846; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 847; X86-NEXT: shrl $4, %eax 848; X86-NEXT: orl %ecx, %eax 849; X86-NEXT: movl %eax, %ecx 850; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 851; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 852; X86-NEXT: shrl $2, %eax 853; X86-NEXT: leal (%eax,%ecx,4), %eax 854; X86-NEXT: movl %eax, %ecx 855; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 856; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 857; X86-NEXT: shrl %eax 858; X86-NEXT: leal (%eax,%ecx,2), %eax 859; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 860; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 861; X86-NEXT: bswapl %eax 862; X86-NEXT: movl %eax, %ecx 863; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 864; X86-NEXT: shll $4, %ecx 865; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 866; X86-NEXT: shrl $4, %eax 867; X86-NEXT: orl %ecx, %eax 868; X86-NEXT: movl %eax, %ecx 869; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 870; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 871; X86-NEXT: shrl $2, %eax 872; X86-NEXT: leal (%eax,%ecx,4), %eax 873; X86-NEXT: movl %eax, %ecx 874; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 875; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 876; X86-NEXT: shrl %eax 877; X86-NEXT: leal (%eax,%ecx,2), %eax 878; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 879; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 880; X86-NEXT: bswapl %eax 881; X86-NEXT: movl %eax, %ecx 882; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 883; X86-NEXT: shll $4, %ecx 884; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 885; X86-NEXT: shrl $4, %eax 886; X86-NEXT: orl %ecx, %eax 887; X86-NEXT: movl %eax, %ecx 888; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 889; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 890; X86-NEXT: shrl $2, %eax 891; X86-NEXT: leal (%eax,%ecx,4), %eax 892; X86-NEXT: movl %eax, %ecx 893; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 894; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 895; X86-NEXT: shrl %eax 896; X86-NEXT: leal (%eax,%ecx,2), %eax 897; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 898; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 899; X86-NEXT: bswapl %eax 900; X86-NEXT: movl %eax, %ecx 901; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 902; X86-NEXT: shll $4, %ecx 903; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 904; X86-NEXT: shrl $4, %eax 905; X86-NEXT: orl %ecx, %eax 906; X86-NEXT: movl %eax, %ecx 907; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 908; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 909; X86-NEXT: shrl $2, %eax 910; X86-NEXT: leal (%eax,%ecx,4), %eax 911; X86-NEXT: movl %eax, %ecx 912; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 913; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 914; X86-NEXT: shrl %eax 915; X86-NEXT: leal (%eax,%ecx,2), %edi 916; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 917; X86-NEXT: bswapl %eax 918; X86-NEXT: movl %eax, %ecx 919; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 920; X86-NEXT: shll $4, %ecx 921; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 922; X86-NEXT: shrl $4, %eax 923; X86-NEXT: orl %ecx, %eax 924; X86-NEXT: movl %eax, %ecx 925; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 926; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 927; X86-NEXT: shrl $2, %eax 928; X86-NEXT: leal (%eax,%ecx,4), %eax 929; X86-NEXT: movl %eax, %ecx 930; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 931; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 932; X86-NEXT: shrl %eax 933; X86-NEXT: leal (%eax,%ecx,2), %edx 934; X86-NEXT: movl (%esp), %esi # 4-byte Reload 935; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 936; X86-NEXT: shrdl $16, %eax, %esi 937; X86-NEXT: shrdl $16, %ebx, %eax 938; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 939; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 940; X86-NEXT: shrdl $16, %ecx, %ebx 941; X86-NEXT: movl %ebx, (%esp) # 4-byte Spill 942; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 943; X86-NEXT: shrdl $16, %eax, %ecx 944; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 945; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 946; X86-NEXT: shrdl $16, %ecx, %eax 947; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 948; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 949; X86-NEXT: shrdl $16, %eax, %ecx 950; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 951; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 952; X86-NEXT: shrdl $16, %ecx, %eax 953; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 954; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 955; X86-NEXT: shrdl $16, %eax, %ecx 956; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 957; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 958; X86-NEXT: shrdl $16, %ecx, %eax 959; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 960; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 961; X86-NEXT: shrdl $16, %eax, %ecx 962; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 963; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 964; X86-NEXT: shrdl $16, %ecx, %eax 965; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 966; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload 967; X86-NEXT: shrdl $16, %ebp, %ecx 968; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 969; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload 970; X86-NEXT: shrdl $16, %ebx, %ebp 971; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 972; X86-NEXT: shrdl $16, %eax, %ebx 973; X86-NEXT: shrdl $16, %edi, %eax 974; X86-NEXT: movl %eax, %ecx 975; X86-NEXT: shrdl $16, %edx, %edi 976; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 977; X86-NEXT: movl %edi, 60(%eax) 978; X86-NEXT: movl %ecx, 56(%eax) 979; X86-NEXT: movl %ebx, 52(%eax) 980; X86-NEXT: movl %ebp, 48(%eax) 981; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 982; X86-NEXT: movl %ecx, 44(%eax) 983; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 984; X86-NEXT: movl %ecx, 40(%eax) 985; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 986; X86-NEXT: movl %ecx, 36(%eax) 987; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 988; X86-NEXT: movl %ecx, 32(%eax) 989; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 990; X86-NEXT: movl %ecx, 28(%eax) 991; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 992; X86-NEXT: movl %ecx, 24(%eax) 993; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 994; X86-NEXT: movl %ecx, 20(%eax) 995; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 996; X86-NEXT: movl %ecx, 16(%eax) 997; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 998; X86-NEXT: movl %ecx, 12(%eax) 999; X86-NEXT: movl (%esp), %ecx # 4-byte Reload 1000; X86-NEXT: movl %ecx, 8(%eax) 1001; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1002; X86-NEXT: movl %ecx, 4(%eax) 1003; X86-NEXT: movl %esi, (%eax) 1004; X86-NEXT: shrl $16, %edx 1005; X86-NEXT: movw %dx, 64(%eax) 1006; X86-NEXT: addl $56, %esp 1007; X86-NEXT: popl %esi 1008; X86-NEXT: popl %edi 1009; X86-NEXT: popl %ebx 1010; X86-NEXT: popl %ebp 1011; X86-NEXT: retl $4 1012; 1013; X64-LABEL: large_promotion: 1014; X64: # %bb.0: 1015; X64-NEXT: pushq %rbp 1016; X64-NEXT: pushq %r15 1017; X64-NEXT: pushq %r14 1018; X64-NEXT: pushq %r13 1019; X64-NEXT: pushq %r12 1020; X64-NEXT: pushq %rbx 1021; X64-NEXT: movq %rdi, %r12 1022; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp 1023; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx 1024; X64-NEXT: bswapq %rbx 1025; X64-NEXT: movabsq $1085102592571150095, %r13 # imm = 0xF0F0F0F0F0F0F0F 1026; X64-NEXT: movq %rbx, %r10 1027; X64-NEXT: andq %r13, %r10 1028; X64-NEXT: shlq $4, %r10 1029; X64-NEXT: movabsq $-1085102592571150096, %rax # imm = 0xF0F0F0F0F0F0F0F0 1030; X64-NEXT: andq %rax, %rbx 1031; X64-NEXT: shrq $4, %rbx 1032; X64-NEXT: orq %r10, %rbx 1033; X64-NEXT: movabsq $3689348814741910323, %r11 # imm = 0x3333333333333333 1034; X64-NEXT: movq %rbx, %r10 1035; X64-NEXT: andq %r11, %r10 1036; X64-NEXT: movabsq $-3689348814741910324, %r14 # imm = 0xCCCCCCCCCCCCCCCC 1037; X64-NEXT: andq %r14, %rbx 1038; X64-NEXT: shrq $2, %rbx 1039; X64-NEXT: leaq (%rbx,%r10,4), %r10 1040; X64-NEXT: movabsq $6148820866244280320, %rbx # imm = 0x5555000000000000 1041; X64-NEXT: andq %r10, %rbx 1042; X64-NEXT: movabsq $-6149102341220990976, %rdi # imm = 0xAAAA000000000000 1043; X64-NEXT: andq %r10, %rdi 1044; X64-NEXT: shrq %rdi 1045; X64-NEXT: leaq (%rdi,%rbx,2), %rdi 1046; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1047; X64-NEXT: bswapq %rbp 1048; X64-NEXT: movq %rbp, %rdi 1049; X64-NEXT: andq %r13, %rdi 1050; X64-NEXT: shlq $4, %rdi 1051; X64-NEXT: andq %rax, %rbp 1052; X64-NEXT: shrq $4, %rbp 1053; X64-NEXT: orq %rdi, %rbp 1054; X64-NEXT: movq %rbp, %rdi 1055; X64-NEXT: andq %r11, %rdi 1056; X64-NEXT: andq %r14, %rbp 1057; X64-NEXT: shrq $2, %rbp 1058; X64-NEXT: leaq (%rbp,%rdi,4), %rbp 1059; X64-NEXT: movabsq $6148914691236517205, %rbx # imm = 0x5555555555555555 1060; X64-NEXT: movq %rbp, %r10 1061; X64-NEXT: andq %rbx, %r10 1062; X64-NEXT: movabsq $-6148914691236517206, %rdi # imm = 0xAAAAAAAAAAAAAAAA 1063; X64-NEXT: andq %rdi, %rbp 1064; X64-NEXT: shrq %rbp 1065; X64-NEXT: leaq (%rbp,%r10,2), %rbp 1066; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1067; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp 1068; X64-NEXT: bswapq %rbp 1069; X64-NEXT: movq %rbp, %r10 1070; X64-NEXT: andq %r13, %r10 1071; X64-NEXT: shlq $4, %r10 1072; X64-NEXT: andq %rax, %rbp 1073; X64-NEXT: movq %rax, %r15 1074; X64-NEXT: shrq $4, %rbp 1075; X64-NEXT: orq %r10, %rbp 1076; X64-NEXT: movq %rbp, %r10 1077; X64-NEXT: andq %r11, %r10 1078; X64-NEXT: andq %r14, %rbp 1079; X64-NEXT: shrq $2, %rbp 1080; X64-NEXT: leaq (%rbp,%r10,4), %rbp 1081; X64-NEXT: movq %rbp, %r10 1082; X64-NEXT: andq %rbx, %r10 1083; X64-NEXT: andq %rdi, %rbp 1084; X64-NEXT: shrq %rbp 1085; X64-NEXT: leaq (%rbp,%r10,2), %rbp 1086; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 1087; X64-NEXT: bswapq %r10 1088; X64-NEXT: movq %r10, %rax 1089; X64-NEXT: andq %r13, %rax 1090; X64-NEXT: shlq $4, %rax 1091; X64-NEXT: movq %r15, %rdi 1092; X64-NEXT: andq %r15, %r10 1093; X64-NEXT: shrq $4, %r10 1094; X64-NEXT: orq %rax, %r10 1095; X64-NEXT: movq %r10, %rax 1096; X64-NEXT: andq %r11, %rax 1097; X64-NEXT: andq %r14, %r10 1098; X64-NEXT: shrq $2, %r10 1099; X64-NEXT: leaq (%r10,%rax,4), %rax 1100; X64-NEXT: movq %rax, %r10 1101; X64-NEXT: andq %rbx, %r10 1102; X64-NEXT: movabsq $-6148914691236517206, %r15 # imm = 0xAAAAAAAAAAAAAAAA 1103; X64-NEXT: andq %r15, %rax 1104; X64-NEXT: shrq %rax 1105; X64-NEXT: leaq (%rax,%r10,2), %r10 1106; X64-NEXT: bswapq %r9 1107; X64-NEXT: movq %r9, %rax 1108; X64-NEXT: andq %r13, %rax 1109; X64-NEXT: shlq $4, %rax 1110; X64-NEXT: andq %rdi, %r9 1111; X64-NEXT: shrq $4, %r9 1112; X64-NEXT: orq %rax, %r9 1113; X64-NEXT: movq %r9, %rax 1114; X64-NEXT: andq %r11, %rax 1115; X64-NEXT: andq %r14, %r9 1116; X64-NEXT: shrq $2, %r9 1117; X64-NEXT: leaq (%r9,%rax,4), %rax 1118; X64-NEXT: movq %rax, %r9 1119; X64-NEXT: andq %rbx, %r9 1120; X64-NEXT: andq %r15, %rax 1121; X64-NEXT: shrq %rax 1122; X64-NEXT: leaq (%rax,%r9,2), %r9 1123; X64-NEXT: bswapq %r8 1124; X64-NEXT: movq %r8, %rax 1125; X64-NEXT: andq %r13, %rax 1126; X64-NEXT: shlq $4, %rax 1127; X64-NEXT: andq %rdi, %r8 1128; X64-NEXT: shrq $4, %r8 1129; X64-NEXT: orq %rax, %r8 1130; X64-NEXT: movq %r8, %rax 1131; X64-NEXT: andq %r11, %rax 1132; X64-NEXT: andq %r14, %r8 1133; X64-NEXT: shrq $2, %r8 1134; X64-NEXT: leaq (%r8,%rax,4), %rax 1135; X64-NEXT: movq %rax, %r8 1136; X64-NEXT: andq %rbx, %r8 1137; X64-NEXT: andq %r15, %rax 1138; X64-NEXT: shrq %rax 1139; X64-NEXT: leaq (%rax,%r8,2), %r8 1140; X64-NEXT: bswapq %rcx 1141; X64-NEXT: movq %rcx, %rax 1142; X64-NEXT: andq %r13, %rax 1143; X64-NEXT: shlq $4, %rax 1144; X64-NEXT: andq %rdi, %rcx 1145; X64-NEXT: shrq $4, %rcx 1146; X64-NEXT: orq %rax, %rcx 1147; X64-NEXT: movq %rcx, %rax 1148; X64-NEXT: andq %r11, %rax 1149; X64-NEXT: andq %r14, %rcx 1150; X64-NEXT: shrq $2, %rcx 1151; X64-NEXT: leaq (%rcx,%rax,4), %rax 1152; X64-NEXT: movq %rax, %rcx 1153; X64-NEXT: andq %rbx, %rcx 1154; X64-NEXT: andq %r15, %rax 1155; X64-NEXT: shrq %rax 1156; X64-NEXT: leaq (%rax,%rcx,2), %rcx 1157; X64-NEXT: bswapq %rdx 1158; X64-NEXT: movq %rdx, %rax 1159; X64-NEXT: andq %r13, %rax 1160; X64-NEXT: shlq $4, %rax 1161; X64-NEXT: andq %rdi, %rdx 1162; X64-NEXT: shrq $4, %rdx 1163; X64-NEXT: orq %rax, %rdx 1164; X64-NEXT: movq %rdx, %rax 1165; X64-NEXT: andq %r11, %rax 1166; X64-NEXT: andq %r14, %rdx 1167; X64-NEXT: shrq $2, %rdx 1168; X64-NEXT: leaq (%rdx,%rax,4), %rax 1169; X64-NEXT: movq %rax, %rdx 1170; X64-NEXT: andq %rbx, %rdx 1171; X64-NEXT: andq %r15, %rax 1172; X64-NEXT: shrq %rax 1173; X64-NEXT: leaq (%rax,%rdx,2), %rax 1174; X64-NEXT: bswapq %rsi 1175; X64-NEXT: andq %rsi, %r13 1176; X64-NEXT: andq %rdi, %rsi 1177; X64-NEXT: shlq $4, %r13 1178; X64-NEXT: shrq $4, %rsi 1179; X64-NEXT: orq %r13, %rsi 1180; X64-NEXT: andq %rsi, %r11 1181; X64-NEXT: andq %r14, %rsi 1182; X64-NEXT: shrq $2, %rsi 1183; X64-NEXT: leaq (%rsi,%r11,4), %rdx 1184; X64-NEXT: andq %rdx, %rbx 1185; X64-NEXT: andq %r15, %rdx 1186; X64-NEXT: shrq %rdx 1187; X64-NEXT: leaq (%rdx,%rbx,2), %rdx 1188; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload 1189; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload 1190; X64-NEXT: shrdq $48, %rdi, %rsi 1191; X64-NEXT: shrdq $48, %rbp, %rdi 1192; X64-NEXT: shrdq $48, %r10, %rbp 1193; X64-NEXT: shrdq $48, %r9, %r10 1194; X64-NEXT: shrdq $48, %r8, %r9 1195; X64-NEXT: shrdq $48, %rcx, %r8 1196; X64-NEXT: shrdq $48, %rax, %rcx 1197; X64-NEXT: shrdq $48, %rdx, %rax 1198; X64-NEXT: movq %rax, 56(%r12) 1199; X64-NEXT: movq %rcx, 48(%r12) 1200; X64-NEXT: movq %r8, 40(%r12) 1201; X64-NEXT: movq %r9, 32(%r12) 1202; X64-NEXT: movq %r10, 24(%r12) 1203; X64-NEXT: movq %rbp, 16(%r12) 1204; X64-NEXT: movq %rdi, 8(%r12) 1205; X64-NEXT: movq %rsi, (%r12) 1206; X64-NEXT: shrq $48, %rdx 1207; X64-NEXT: movw %dx, 64(%r12) 1208; X64-NEXT: movq %r12, %rax 1209; X64-NEXT: popq %rbx 1210; X64-NEXT: popq %r12 1211; X64-NEXT: popq %r13 1212; X64-NEXT: popq %r14 1213; X64-NEXT: popq %r15 1214; X64-NEXT: popq %rbp 1215; X64-NEXT: retq 1216; 1217; X86XOP-LABEL: large_promotion: 1218; X86XOP: # %bb.0: 1219; X86XOP-NEXT: pushl %ebp 1220; X86XOP-NEXT: pushl %ebx 1221; X86XOP-NEXT: pushl %edi 1222; X86XOP-NEXT: pushl %esi 1223; X86XOP-NEXT: subl $44, %esp 1224; X86XOP-NEXT: vmovdqa {{.*#+}} xmm0 = [87,86,85,84,83,82,81,80,95,94,93,92,91,90,89,88] 1225; X86XOP-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 1226; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1 1227; X86XOP-NEXT: vpextrd $1, %xmm1, %eax 1228; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1229; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1 1230; X86XOP-NEXT: vmovd %xmm1, %ecx 1231; X86XOP-NEXT: shrdl $16, %ecx, %eax 1232; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1233; X86XOP-NEXT: vpextrd $1, %xmm1, %eax 1234; X86XOP-NEXT: shrdl $16, %eax, %ecx 1235; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1236; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1237; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1 1238; X86XOP-NEXT: vmovd %xmm1, %ecx 1239; X86XOP-NEXT: shrdl $16, %ecx, %eax 1240; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1241; X86XOP-NEXT: vpextrd $1, %xmm1, %eax 1242; X86XOP-NEXT: shrdl $16, %eax, %ecx 1243; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1244; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1245; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1 1246; X86XOP-NEXT: vmovd %xmm1, %ecx 1247; X86XOP-NEXT: shrdl $16, %ecx, %eax 1248; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1249; X86XOP-NEXT: vpextrd $1, %xmm1, %eax 1250; X86XOP-NEXT: shrdl $16, %eax, %ecx 1251; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1252; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1253; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1 1254; X86XOP-NEXT: vmovd %xmm1, %ecx 1255; X86XOP-NEXT: shrdl $16, %ecx, %eax 1256; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1257; X86XOP-NEXT: vpextrd $1, %xmm1, %eax 1258; X86XOP-NEXT: shrdl $16, %eax, %ecx 1259; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1260; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1261; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1 1262; X86XOP-NEXT: vmovd %xmm1, %ecx 1263; X86XOP-NEXT: shrdl $16, %ecx, %eax 1264; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1265; X86XOP-NEXT: vpextrd $1, %xmm1, %eax 1266; X86XOP-NEXT: shrdl $16, %eax, %ecx 1267; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1268; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1269; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1 1270; X86XOP-NEXT: vmovd %xmm1, %ebp 1271; X86XOP-NEXT: shrdl $16, %ebp, %eax 1272; X86XOP-NEXT: movl %eax, (%esp) # 4-byte Spill 1273; X86XOP-NEXT: vpextrd $1, %xmm1, %ebx 1274; X86XOP-NEXT: shrdl $16, %ebx, %ebp 1275; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1276; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1 1277; X86XOP-NEXT: vmovd %xmm1, %esi 1278; X86XOP-NEXT: shrdl $16, %esi, %ebx 1279; X86XOP-NEXT: vpextrd $1, %xmm1, %edx 1280; X86XOP-NEXT: shrdl $16, %edx, %esi 1281; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1282; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm0 1283; X86XOP-NEXT: vmovd %xmm0, %ecx 1284; X86XOP-NEXT: shrdl $16, %ecx, %edx 1285; X86XOP-NEXT: vpextrd $1, %xmm0, %edi 1286; X86XOP-NEXT: shrdl $16, %edi, %ecx 1287; X86XOP-NEXT: movl {{[0-9]+}}(%esp), %eax 1288; X86XOP-NEXT: movl %ecx, 60(%eax) 1289; X86XOP-NEXT: movl %edx, 56(%eax) 1290; X86XOP-NEXT: movl %esi, 52(%eax) 1291; X86XOP-NEXT: movl %ebx, 48(%eax) 1292; X86XOP-NEXT: movl %ebp, 44(%eax) 1293; X86XOP-NEXT: movl (%esp), %ecx # 4-byte Reload 1294; X86XOP-NEXT: movl %ecx, 40(%eax) 1295; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1296; X86XOP-NEXT: movl %ecx, 36(%eax) 1297; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1298; X86XOP-NEXT: movl %ecx, 32(%eax) 1299; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1300; X86XOP-NEXT: movl %ecx, 28(%eax) 1301; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1302; X86XOP-NEXT: movl %ecx, 24(%eax) 1303; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1304; X86XOP-NEXT: movl %ecx, 20(%eax) 1305; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1306; X86XOP-NEXT: movl %ecx, 16(%eax) 1307; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1308; X86XOP-NEXT: movl %ecx, 12(%eax) 1309; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1310; X86XOP-NEXT: movl %ecx, 8(%eax) 1311; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1312; X86XOP-NEXT: movl %ecx, 4(%eax) 1313; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1314; X86XOP-NEXT: movl %ecx, (%eax) 1315; X86XOP-NEXT: shrl $16, %edi 1316; X86XOP-NEXT: movw %di, 64(%eax) 1317; X86XOP-NEXT: addl $44, %esp 1318; X86XOP-NEXT: popl %esi 1319; X86XOP-NEXT: popl %edi 1320; X86XOP-NEXT: popl %ebx 1321; X86XOP-NEXT: popl %ebp 1322; X86XOP-NEXT: retl $4 1323 %Z = call i528 @llvm.bitreverse.i528(i528 %A) 1324 ret i528 %Z 1325} 1326declare i528 @llvm.bitreverse.i528(i528) 1327