1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86,X86-NOSSE 3; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64 4; RUN: llc < %s -mtriple=i686-unknown -mattr=+popcnt | FileCheck %s --check-prefix=X86-POPCNT 5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+popcnt | FileCheck %s --check-prefix=X64-POPCNT 6; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2 7; RUN: llc < %s -mtriple=i686-unknown -mattr=ssse3 | FileCheck %s --check-prefixes=X86,X86-SSSE3 8 9define i8 @cnt8(i8 %x) nounwind readnone { 10; X86-LABEL: cnt8: 11; X86: # %bb.0: 12; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 13; X86-NEXT: movl %ecx, %eax 14; X86-NEXT: shrb %al 15; X86-NEXT: andb $85, %al 16; X86-NEXT: subb %al, %cl 17; X86-NEXT: movl %ecx, %eax 18; X86-NEXT: andb $51, %al 19; X86-NEXT: shrb $2, %cl 20; X86-NEXT: andb $51, %cl 21; X86-NEXT: addb %al, %cl 22; X86-NEXT: movl %ecx, %eax 23; X86-NEXT: shrb $4, %al 24; X86-NEXT: addb %cl, %al 25; X86-NEXT: andb $15, %al 26; X86-NEXT: retl 27; 28; X64-LABEL: cnt8: 29; X64: # %bb.0: 30; X64-NEXT: # kill: def $edi killed $edi def $rdi 31; X64-NEXT: movl %edi, %eax 32; X64-NEXT: shrb %al 33; X64-NEXT: andb $85, %al 34; X64-NEXT: subb %al, %dil 35; X64-NEXT: movl %edi, %eax 36; X64-NEXT: andb $51, %al 37; X64-NEXT: shrb $2, %dil 38; X64-NEXT: andb $51, %dil 39; X64-NEXT: addb %al, %dil 40; X64-NEXT: movl %edi, %eax 41; X64-NEXT: shrb $4, %al 42; X64-NEXT: addl %edi, %eax 43; X64-NEXT: andb $15, %al 44; X64-NEXT: # kill: def $al killed $al killed $eax 45; X64-NEXT: retq 46; 47; X86-POPCNT-LABEL: cnt8: 48; X86-POPCNT: # %bb.0: 49; X86-POPCNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax 50; X86-POPCNT-NEXT: popcntl %eax, %eax 51; X86-POPCNT-NEXT: # kill: def $al killed $al killed $eax 52; X86-POPCNT-NEXT: retl 53; 54; X64-POPCNT-LABEL: cnt8: 55; X64-POPCNT: # %bb.0: 56; X64-POPCNT-NEXT: movzbl %dil, %eax 57; X64-POPCNT-NEXT: popcntl %eax, %eax 58; X64-POPCNT-NEXT: # kill: def $al killed $al killed $eax 59; X64-POPCNT-NEXT: retq 60 %cnt = tail call i8 @llvm.ctpop.i8(i8 %x) 61 ret i8 %cnt 62} 63 64define i16 @cnt16(i16 %x) nounwind readnone { 65; X86-LABEL: cnt16: 66; X86: # %bb.0: 67; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 68; X86-NEXT: movl %eax, %ecx 69; X86-NEXT: shrl %ecx 70; X86-NEXT: andl $21845, %ecx # imm = 0x5555 71; X86-NEXT: subl %ecx, %eax 72; X86-NEXT: movl %eax, %ecx 73; X86-NEXT: andl $13107, %ecx # imm = 0x3333 74; X86-NEXT: shrl $2, %eax 75; X86-NEXT: andl $13107, %eax # imm = 0x3333 76; X86-NEXT: addl %ecx, %eax 77; X86-NEXT: movl %eax, %ecx 78; X86-NEXT: shrl $4, %ecx 79; X86-NEXT: addl %eax, %ecx 80; X86-NEXT: andl $3855, %ecx # imm = 0xF0F 81; X86-NEXT: movl %ecx, %eax 82; X86-NEXT: shll $8, %eax 83; X86-NEXT: addl %ecx, %eax 84; X86-NEXT: movzbl %ah, %eax 85; X86-NEXT: # kill: def $ax killed $ax killed $eax 86; X86-NEXT: retl 87; 88; X64-LABEL: cnt16: 89; X64: # %bb.0: 90; X64-NEXT: movl %edi, %eax 91; X64-NEXT: shrl %eax 92; X64-NEXT: andl $21845, %eax # imm = 0x5555 93; X64-NEXT: subl %eax, %edi 94; X64-NEXT: movl %edi, %eax 95; X64-NEXT: andl $13107, %eax # imm = 0x3333 96; X64-NEXT: shrl $2, %edi 97; X64-NEXT: andl $13107, %edi # imm = 0x3333 98; X64-NEXT: addl %eax, %edi 99; X64-NEXT: movl %edi, %eax 100; X64-NEXT: shrl $4, %eax 101; X64-NEXT: addl %edi, %eax 102; X64-NEXT: andl $3855, %eax # imm = 0xF0F 103; X64-NEXT: movl %eax, %ecx 104; X64-NEXT: shll $8, %ecx 105; X64-NEXT: addl %eax, %ecx 106; X64-NEXT: movzbl %ch, %eax 107; X64-NEXT: # kill: def $ax killed $ax killed $eax 108; X64-NEXT: retq 109; 110; X86-POPCNT-LABEL: cnt16: 111; X86-POPCNT: # %bb.0: 112; X86-POPCNT-NEXT: popcntw {{[0-9]+}}(%esp), %ax 113; X86-POPCNT-NEXT: retl 114; 115; X64-POPCNT-LABEL: cnt16: 116; X64-POPCNT: # %bb.0: 117; X64-POPCNT-NEXT: popcntw %di, %ax 118; X64-POPCNT-NEXT: retq 119 %cnt = tail call i16 @llvm.ctpop.i16(i16 %x) 120 ret i16 %cnt 121} 122 123define i32 @cnt32(i32 %x) nounwind readnone { 124; X86-LABEL: cnt32: 125; X86: # %bb.0: 126; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 127; X86-NEXT: movl %eax, %ecx 128; X86-NEXT: shrl %ecx 129; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 130; X86-NEXT: subl %ecx, %eax 131; X86-NEXT: movl %eax, %ecx 132; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 133; X86-NEXT: shrl $2, %eax 134; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 135; X86-NEXT: addl %ecx, %eax 136; X86-NEXT: movl %eax, %ecx 137; X86-NEXT: shrl $4, %ecx 138; X86-NEXT: addl %eax, %ecx 139; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 140; X86-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101 141; X86-NEXT: shrl $24, %eax 142; X86-NEXT: retl 143; 144; X64-LABEL: cnt32: 145; X64: # %bb.0: 146; X64-NEXT: movl %edi, %eax 147; X64-NEXT: shrl %eax 148; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555 149; X64-NEXT: subl %eax, %edi 150; X64-NEXT: movl %edi, %eax 151; X64-NEXT: andl $858993459, %eax # imm = 0x33333333 152; X64-NEXT: shrl $2, %edi 153; X64-NEXT: andl $858993459, %edi # imm = 0x33333333 154; X64-NEXT: addl %eax, %edi 155; X64-NEXT: movl %edi, %eax 156; X64-NEXT: shrl $4, %eax 157; X64-NEXT: addl %edi, %eax 158; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 159; X64-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 160; X64-NEXT: shrl $24, %eax 161; X64-NEXT: retq 162; 163; X86-POPCNT-LABEL: cnt32: 164; X86-POPCNT: # %bb.0: 165; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax 166; X86-POPCNT-NEXT: retl 167; 168; X64-POPCNT-LABEL: cnt32: 169; X64-POPCNT: # %bb.0: 170; X64-POPCNT-NEXT: popcntl %edi, %eax 171; X64-POPCNT-NEXT: retq 172 %cnt = tail call i32 @llvm.ctpop.i32(i32 %x) 173 ret i32 %cnt 174} 175 176define i64 @cnt64(i64 %x) nounwind readnone { 177; X86-NOSSE-LABEL: cnt64: 178; X86-NOSSE: # %bb.0: 179; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 180; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 181; X86-NOSSE-NEXT: movl %ecx, %edx 182; X86-NOSSE-NEXT: shrl %edx 183; X86-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555 184; X86-NOSSE-NEXT: subl %edx, %ecx 185; X86-NOSSE-NEXT: movl %ecx, %edx 186; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333 187; X86-NOSSE-NEXT: shrl $2, %ecx 188; X86-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333 189; X86-NOSSE-NEXT: addl %edx, %ecx 190; X86-NOSSE-NEXT: movl %ecx, %edx 191; X86-NOSSE-NEXT: shrl $4, %edx 192; X86-NOSSE-NEXT: addl %ecx, %edx 193; X86-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F 194; X86-NOSSE-NEXT: imull $16843009, %edx, %ecx # imm = 0x1010101 195; X86-NOSSE-NEXT: shrl $24, %ecx 196; X86-NOSSE-NEXT: movl %eax, %edx 197; X86-NOSSE-NEXT: shrl %edx 198; X86-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555 199; X86-NOSSE-NEXT: subl %edx, %eax 200; X86-NOSSE-NEXT: movl %eax, %edx 201; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333 202; X86-NOSSE-NEXT: shrl $2, %eax 203; X86-NOSSE-NEXT: andl $858993459, %eax # imm = 0x33333333 204; X86-NOSSE-NEXT: addl %edx, %eax 205; X86-NOSSE-NEXT: movl %eax, %edx 206; X86-NOSSE-NEXT: shrl $4, %edx 207; X86-NOSSE-NEXT: addl %eax, %edx 208; X86-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F 209; X86-NOSSE-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101 210; X86-NOSSE-NEXT: shrl $24, %eax 211; X86-NOSSE-NEXT: addl %ecx, %eax 212; X86-NOSSE-NEXT: xorl %edx, %edx 213; X86-NOSSE-NEXT: retl 214; 215; X64-LABEL: cnt64: 216; X64: # %bb.0: 217; X64-NEXT: movq %rdi, %rax 218; X64-NEXT: shrq %rax 219; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 220; X64-NEXT: andq %rax, %rcx 221; X64-NEXT: subq %rcx, %rdi 222; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 223; X64-NEXT: movq %rdi, %rcx 224; X64-NEXT: andq %rax, %rcx 225; X64-NEXT: shrq $2, %rdi 226; X64-NEXT: andq %rax, %rdi 227; X64-NEXT: addq %rcx, %rdi 228; X64-NEXT: movq %rdi, %rax 229; X64-NEXT: shrq $4, %rax 230; X64-NEXT: addq %rdi, %rax 231; X64-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F 232; X64-NEXT: andq %rax, %rcx 233; X64-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101 234; X64-NEXT: imulq %rcx, %rax 235; X64-NEXT: shrq $56, %rax 236; X64-NEXT: retq 237; 238; X86-POPCNT-LABEL: cnt64: 239; X86-POPCNT: # %bb.0: 240; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx 241; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax 242; X86-POPCNT-NEXT: addl %ecx, %eax 243; X86-POPCNT-NEXT: xorl %edx, %edx 244; X86-POPCNT-NEXT: retl 245; 246; X64-POPCNT-LABEL: cnt64: 247; X64-POPCNT: # %bb.0: 248; X64-POPCNT-NEXT: popcntq %rdi, %rax 249; X64-POPCNT-NEXT: retq 250; 251; X86-SSE2-LABEL: cnt64: 252; X86-SSE2: # %bb.0: 253; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 254; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 255; X86-SSE2-NEXT: psrlw $1, %xmm1 256; X86-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1 257; X86-SSE2-NEXT: psubb %xmm1, %xmm0 258; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 259; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 260; X86-SSE2-NEXT: pand %xmm1, %xmm2 261; X86-SSE2-NEXT: psrlw $2, %xmm0 262; X86-SSE2-NEXT: pand %xmm1, %xmm0 263; X86-SSE2-NEXT: paddb %xmm2, %xmm0 264; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 265; X86-SSE2-NEXT: psrlw $4, %xmm1 266; X86-SSE2-NEXT: paddb %xmm0, %xmm1 267; X86-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1 268; X86-SSE2-NEXT: pxor %xmm0, %xmm0 269; X86-SSE2-NEXT: psadbw %xmm1, %xmm0 270; X86-SSE2-NEXT: movd %xmm0, %eax 271; X86-SSE2-NEXT: xorl %edx, %edx 272; X86-SSE2-NEXT: retl 273; 274; X86-SSSE3-LABEL: cnt64: 275; X86-SSSE3: # %bb.0: 276; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 277; X86-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 278; X86-SSSE3-NEXT: movdqa %xmm1, %xmm2 279; X86-SSSE3-NEXT: pand %xmm0, %xmm2 280; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 281; X86-SSSE3-NEXT: movdqa %xmm3, %xmm4 282; X86-SSSE3-NEXT: pshufb %xmm2, %xmm4 283; X86-SSSE3-NEXT: psrlw $4, %xmm1 284; X86-SSSE3-NEXT: pand %xmm0, %xmm1 285; X86-SSSE3-NEXT: pshufb %xmm1, %xmm3 286; X86-SSSE3-NEXT: paddb %xmm4, %xmm3 287; X86-SSSE3-NEXT: pxor %xmm0, %xmm0 288; X86-SSSE3-NEXT: psadbw %xmm3, %xmm0 289; X86-SSSE3-NEXT: movd %xmm0, %eax 290; X86-SSSE3-NEXT: xorl %edx, %edx 291; X86-SSSE3-NEXT: retl 292 %cnt = tail call i64 @llvm.ctpop.i64(i64 %x) 293 ret i64 %cnt 294} 295 296define i128 @cnt128(i128 %x) nounwind readnone { 297; X86-NOSSE-LABEL: cnt128: 298; X86-NOSSE: # %bb.0: 299; X86-NOSSE-NEXT: pushl %ebx 300; X86-NOSSE-NEXT: pushl %edi 301; X86-NOSSE-NEXT: pushl %esi 302; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 303; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 304; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 305; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi 306; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edi 307; X86-NOSSE-NEXT: movl %edi, %ebx 308; X86-NOSSE-NEXT: shrl %ebx 309; X86-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555 310; X86-NOSSE-NEXT: subl %ebx, %edi 311; X86-NOSSE-NEXT: movl %edi, %ebx 312; X86-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333 313; X86-NOSSE-NEXT: shrl $2, %edi 314; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333 315; X86-NOSSE-NEXT: addl %ebx, %edi 316; X86-NOSSE-NEXT: movl %edi, %ebx 317; X86-NOSSE-NEXT: shrl $4, %ebx 318; X86-NOSSE-NEXT: addl %edi, %ebx 319; X86-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F 320; X86-NOSSE-NEXT: imull $16843009, %ebx, %edi # imm = 0x1010101 321; X86-NOSSE-NEXT: shrl $24, %edi 322; X86-NOSSE-NEXT: movl %esi, %ebx 323; X86-NOSSE-NEXT: shrl %ebx 324; X86-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555 325; X86-NOSSE-NEXT: subl %ebx, %esi 326; X86-NOSSE-NEXT: movl %esi, %ebx 327; X86-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333 328; X86-NOSSE-NEXT: shrl $2, %esi 329; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333 330; X86-NOSSE-NEXT: addl %ebx, %esi 331; X86-NOSSE-NEXT: movl %esi, %ebx 332; X86-NOSSE-NEXT: shrl $4, %ebx 333; X86-NOSSE-NEXT: addl %esi, %ebx 334; X86-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F 335; X86-NOSSE-NEXT: imull $16843009, %ebx, %esi # imm = 0x1010101 336; X86-NOSSE-NEXT: shrl $24, %esi 337; X86-NOSSE-NEXT: addl %edi, %esi 338; X86-NOSSE-NEXT: movl %edx, %edi 339; X86-NOSSE-NEXT: shrl %edi 340; X86-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555 341; X86-NOSSE-NEXT: subl %edi, %edx 342; X86-NOSSE-NEXT: movl %edx, %edi 343; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333 344; X86-NOSSE-NEXT: shrl $2, %edx 345; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333 346; X86-NOSSE-NEXT: addl %edi, %edx 347; X86-NOSSE-NEXT: movl %edx, %edi 348; X86-NOSSE-NEXT: shrl $4, %edi 349; X86-NOSSE-NEXT: addl %edx, %edi 350; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F 351; X86-NOSSE-NEXT: imull $16843009, %edi, %edx # imm = 0x1010101 352; X86-NOSSE-NEXT: shrl $24, %edx 353; X86-NOSSE-NEXT: movl %ecx, %edi 354; X86-NOSSE-NEXT: shrl %edi 355; X86-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555 356; X86-NOSSE-NEXT: subl %edi, %ecx 357; X86-NOSSE-NEXT: movl %ecx, %edi 358; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333 359; X86-NOSSE-NEXT: shrl $2, %ecx 360; X86-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333 361; X86-NOSSE-NEXT: addl %edi, %ecx 362; X86-NOSSE-NEXT: movl %ecx, %edi 363; X86-NOSSE-NEXT: shrl $4, %edi 364; X86-NOSSE-NEXT: addl %ecx, %edi 365; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F 366; X86-NOSSE-NEXT: imull $16843009, %edi, %ecx # imm = 0x1010101 367; X86-NOSSE-NEXT: shrl $24, %ecx 368; X86-NOSSE-NEXT: addl %edx, %ecx 369; X86-NOSSE-NEXT: addl %esi, %ecx 370; X86-NOSSE-NEXT: movl %ecx, (%eax) 371; X86-NOSSE-NEXT: movl $0, 12(%eax) 372; X86-NOSSE-NEXT: movl $0, 8(%eax) 373; X86-NOSSE-NEXT: movl $0, 4(%eax) 374; X86-NOSSE-NEXT: popl %esi 375; X86-NOSSE-NEXT: popl %edi 376; X86-NOSSE-NEXT: popl %ebx 377; X86-NOSSE-NEXT: retl $4 378; 379; X64-LABEL: cnt128: 380; X64: # %bb.0: 381; X64-NEXT: movq %rsi, %rax 382; X64-NEXT: shrq %rax 383; X64-NEXT: movabsq $6148914691236517205, %r8 # imm = 0x5555555555555555 384; X64-NEXT: andq %r8, %rax 385; X64-NEXT: subq %rax, %rsi 386; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 387; X64-NEXT: movq %rsi, %rcx 388; X64-NEXT: andq %rax, %rcx 389; X64-NEXT: shrq $2, %rsi 390; X64-NEXT: andq %rax, %rsi 391; X64-NEXT: addq %rcx, %rsi 392; X64-NEXT: movq %rsi, %rcx 393; X64-NEXT: shrq $4, %rcx 394; X64-NEXT: addq %rsi, %rcx 395; X64-NEXT: movabsq $1085102592571150095, %r9 # imm = 0xF0F0F0F0F0F0F0F 396; X64-NEXT: andq %r9, %rcx 397; X64-NEXT: movabsq $72340172838076673, %rdx # imm = 0x101010101010101 398; X64-NEXT: imulq %rdx, %rcx 399; X64-NEXT: shrq $56, %rcx 400; X64-NEXT: movq %rdi, %rsi 401; X64-NEXT: shrq %rsi 402; X64-NEXT: andq %r8, %rsi 403; X64-NEXT: subq %rsi, %rdi 404; X64-NEXT: movq %rdi, %rsi 405; X64-NEXT: andq %rax, %rsi 406; X64-NEXT: shrq $2, %rdi 407; X64-NEXT: andq %rax, %rdi 408; X64-NEXT: addq %rsi, %rdi 409; X64-NEXT: movq %rdi, %rax 410; X64-NEXT: shrq $4, %rax 411; X64-NEXT: addq %rdi, %rax 412; X64-NEXT: andq %r9, %rax 413; X64-NEXT: imulq %rdx, %rax 414; X64-NEXT: shrq $56, %rax 415; X64-NEXT: addq %rcx, %rax 416; X64-NEXT: xorl %edx, %edx 417; X64-NEXT: retq 418; 419; X86-POPCNT-LABEL: cnt128: 420; X86-POPCNT: # %bb.0: 421; X86-POPCNT-NEXT: pushl %esi 422; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax 423; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx 424; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx 425; X86-POPCNT-NEXT: addl %ecx, %edx 426; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx 427; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi 428; X86-POPCNT-NEXT: addl %ecx, %esi 429; X86-POPCNT-NEXT: addl %edx, %esi 430; X86-POPCNT-NEXT: movl %esi, (%eax) 431; X86-POPCNT-NEXT: movl $0, 12(%eax) 432; X86-POPCNT-NEXT: movl $0, 8(%eax) 433; X86-POPCNT-NEXT: movl $0, 4(%eax) 434; X86-POPCNT-NEXT: popl %esi 435; X86-POPCNT-NEXT: retl $4 436; 437; X64-POPCNT-LABEL: cnt128: 438; X64-POPCNT: # %bb.0: 439; X64-POPCNT-NEXT: popcntq %rsi, %rcx 440; X64-POPCNT-NEXT: popcntq %rdi, %rax 441; X64-POPCNT-NEXT: addq %rcx, %rax 442; X64-POPCNT-NEXT: xorl %edx, %edx 443; X64-POPCNT-NEXT: retq 444; 445; X86-SSE2-LABEL: cnt128: 446; X86-SSE2: # %bb.0: 447; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 448; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 449; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 450; X86-SSE2-NEXT: psrlw $1, %xmm1 451; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85] 452; X86-SSE2-NEXT: pand %xmm2, %xmm1 453; X86-SSE2-NEXT: psubb %xmm1, %xmm0 454; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 455; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 456; X86-SSE2-NEXT: pand %xmm1, %xmm3 457; X86-SSE2-NEXT: psrlw $2, %xmm0 458; X86-SSE2-NEXT: pand %xmm1, %xmm0 459; X86-SSE2-NEXT: paddb %xmm3, %xmm0 460; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 461; X86-SSE2-NEXT: psrlw $4, %xmm3 462; X86-SSE2-NEXT: paddb %xmm0, %xmm3 463; X86-SSE2-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 464; X86-SSE2-NEXT: pand %xmm0, %xmm3 465; X86-SSE2-NEXT: pxor %xmm4, %xmm4 466; X86-SSE2-NEXT: psadbw %xmm4, %xmm3 467; X86-SSE2-NEXT: movd %xmm3, %ecx 468; X86-SSE2-NEXT: movq {{.*#+}} xmm3 = mem[0],zero 469; X86-SSE2-NEXT: movdqa %xmm3, %xmm5 470; X86-SSE2-NEXT: psrlw $1, %xmm5 471; X86-SSE2-NEXT: pand %xmm2, %xmm5 472; X86-SSE2-NEXT: psubb %xmm5, %xmm3 473; X86-SSE2-NEXT: movdqa %xmm3, %xmm2 474; X86-SSE2-NEXT: pand %xmm1, %xmm2 475; X86-SSE2-NEXT: psrlw $2, %xmm3 476; X86-SSE2-NEXT: pand %xmm1, %xmm3 477; X86-SSE2-NEXT: paddb %xmm2, %xmm3 478; X86-SSE2-NEXT: movdqa %xmm3, %xmm1 479; X86-SSE2-NEXT: psrlw $4, %xmm1 480; X86-SSE2-NEXT: paddb %xmm3, %xmm1 481; X86-SSE2-NEXT: pand %xmm0, %xmm1 482; X86-SSE2-NEXT: psadbw %xmm4, %xmm1 483; X86-SSE2-NEXT: movd %xmm1, %edx 484; X86-SSE2-NEXT: addl %ecx, %edx 485; X86-SSE2-NEXT: movl %edx, (%eax) 486; X86-SSE2-NEXT: movl $0, 12(%eax) 487; X86-SSE2-NEXT: movl $0, 8(%eax) 488; X86-SSE2-NEXT: movl $0, 4(%eax) 489; X86-SSE2-NEXT: retl $4 490; 491; X86-SSSE3-LABEL: cnt128: 492; X86-SSSE3: # %bb.0: 493; X86-SSSE3-NEXT: movl {{[0-9]+}}(%esp), %eax 494; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 495; X86-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 496; X86-SSSE3-NEXT: movdqa %xmm1, %xmm2 497; X86-SSSE3-NEXT: pand %xmm0, %xmm2 498; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 499; X86-SSSE3-NEXT: movdqa %xmm3, %xmm4 500; X86-SSSE3-NEXT: pshufb %xmm2, %xmm4 501; X86-SSSE3-NEXT: psrlw $4, %xmm1 502; X86-SSSE3-NEXT: pand %xmm0, %xmm1 503; X86-SSSE3-NEXT: movdqa %xmm3, %xmm2 504; X86-SSSE3-NEXT: pshufb %xmm1, %xmm2 505; X86-SSSE3-NEXT: paddb %xmm4, %xmm2 506; X86-SSSE3-NEXT: pxor %xmm1, %xmm1 507; X86-SSSE3-NEXT: psadbw %xmm1, %xmm2 508; X86-SSSE3-NEXT: movd %xmm2, %ecx 509; X86-SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero 510; X86-SSSE3-NEXT: movdqa %xmm2, %xmm4 511; X86-SSSE3-NEXT: pand %xmm0, %xmm4 512; X86-SSSE3-NEXT: movdqa %xmm3, %xmm5 513; X86-SSSE3-NEXT: pshufb %xmm4, %xmm5 514; X86-SSSE3-NEXT: psrlw $4, %xmm2 515; X86-SSSE3-NEXT: pand %xmm0, %xmm2 516; X86-SSSE3-NEXT: pshufb %xmm2, %xmm3 517; X86-SSSE3-NEXT: paddb %xmm5, %xmm3 518; X86-SSSE3-NEXT: psadbw %xmm1, %xmm3 519; X86-SSSE3-NEXT: movd %xmm3, %edx 520; X86-SSSE3-NEXT: addl %ecx, %edx 521; X86-SSSE3-NEXT: movl %edx, (%eax) 522; X86-SSSE3-NEXT: movl $0, 12(%eax) 523; X86-SSSE3-NEXT: movl $0, 8(%eax) 524; X86-SSSE3-NEXT: movl $0, 4(%eax) 525; X86-SSSE3-NEXT: retl $4 526 %cnt = tail call i128 @llvm.ctpop.i128(i128 %x) 527 ret i128 %cnt 528} 529 530define i64 @cnt64_noimplicitfloat(i64 %x) nounwind readnone noimplicitfloat { 531; X86-LABEL: cnt64_noimplicitfloat: 532; X86: # %bb.0: 533; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 534; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 535; X86-NEXT: movl %ecx, %edx 536; X86-NEXT: shrl %edx 537; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 538; X86-NEXT: subl %edx, %ecx 539; X86-NEXT: movl %ecx, %edx 540; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 541; X86-NEXT: shrl $2, %ecx 542; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 543; X86-NEXT: addl %edx, %ecx 544; X86-NEXT: movl %ecx, %edx 545; X86-NEXT: shrl $4, %edx 546; X86-NEXT: addl %ecx, %edx 547; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F 548; X86-NEXT: imull $16843009, %edx, %ecx # imm = 0x1010101 549; X86-NEXT: shrl $24, %ecx 550; X86-NEXT: movl %eax, %edx 551; X86-NEXT: shrl %edx 552; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 553; X86-NEXT: subl %edx, %eax 554; X86-NEXT: movl %eax, %edx 555; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 556; X86-NEXT: shrl $2, %eax 557; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 558; X86-NEXT: addl %edx, %eax 559; X86-NEXT: movl %eax, %edx 560; X86-NEXT: shrl $4, %edx 561; X86-NEXT: addl %eax, %edx 562; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F 563; X86-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101 564; X86-NEXT: shrl $24, %eax 565; X86-NEXT: addl %ecx, %eax 566; X86-NEXT: xorl %edx, %edx 567; X86-NEXT: retl 568; 569; X64-LABEL: cnt64_noimplicitfloat: 570; X64: # %bb.0: 571; X64-NEXT: movq %rdi, %rax 572; X64-NEXT: shrq %rax 573; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 574; X64-NEXT: andq %rax, %rcx 575; X64-NEXT: subq %rcx, %rdi 576; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 577; X64-NEXT: movq %rdi, %rcx 578; X64-NEXT: andq %rax, %rcx 579; X64-NEXT: shrq $2, %rdi 580; X64-NEXT: andq %rax, %rdi 581; X64-NEXT: addq %rcx, %rdi 582; X64-NEXT: movq %rdi, %rax 583; X64-NEXT: shrq $4, %rax 584; X64-NEXT: addq %rdi, %rax 585; X64-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F 586; X64-NEXT: andq %rax, %rcx 587; X64-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101 588; X64-NEXT: imulq %rcx, %rax 589; X64-NEXT: shrq $56, %rax 590; X64-NEXT: retq 591; 592; X86-POPCNT-LABEL: cnt64_noimplicitfloat: 593; X86-POPCNT: # %bb.0: 594; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx 595; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax 596; X86-POPCNT-NEXT: addl %ecx, %eax 597; X86-POPCNT-NEXT: xorl %edx, %edx 598; X86-POPCNT-NEXT: retl 599; 600; X64-POPCNT-LABEL: cnt64_noimplicitfloat: 601; X64-POPCNT: # %bb.0: 602; X64-POPCNT-NEXT: popcntq %rdi, %rax 603; X64-POPCNT-NEXT: retq 604 %cnt = tail call i64 @llvm.ctpop.i64(i64 %x) 605 ret i64 %cnt 606} 607 608define i32 @cnt32_optsize(i32 %x) nounwind readnone optsize { 609; X86-LABEL: cnt32_optsize: 610; X86: # %bb.0: 611; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 612; X86-NEXT: movl %eax, %ecx 613; X86-NEXT: shrl %ecx 614; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 615; X86-NEXT: subl %ecx, %eax 616; X86-NEXT: movl $858993459, %ecx # imm = 0x33333333 617; X86-NEXT: movl %eax, %edx 618; X86-NEXT: andl %ecx, %edx 619; X86-NEXT: shrl $2, %eax 620; X86-NEXT: andl %ecx, %eax 621; X86-NEXT: addl %edx, %eax 622; X86-NEXT: movl %eax, %ecx 623; X86-NEXT: shrl $4, %ecx 624; X86-NEXT: addl %eax, %ecx 625; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 626; X86-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101 627; X86-NEXT: shrl $24, %eax 628; X86-NEXT: retl 629; 630; X64-LABEL: cnt32_optsize: 631; X64: # %bb.0: 632; X64-NEXT: movl %edi, %eax 633; X64-NEXT: shrl %eax 634; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555 635; X64-NEXT: subl %eax, %edi 636; X64-NEXT: movl $858993459, %eax # imm = 0x33333333 637; X64-NEXT: movl %edi, %ecx 638; X64-NEXT: andl %eax, %ecx 639; X64-NEXT: shrl $2, %edi 640; X64-NEXT: andl %eax, %edi 641; X64-NEXT: addl %ecx, %edi 642; X64-NEXT: movl %edi, %eax 643; X64-NEXT: shrl $4, %eax 644; X64-NEXT: addl %edi, %eax 645; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 646; X64-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 647; X64-NEXT: shrl $24, %eax 648; X64-NEXT: retq 649; 650; X86-POPCNT-LABEL: cnt32_optsize: 651; X86-POPCNT: # %bb.0: 652; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax 653; X86-POPCNT-NEXT: retl 654; 655; X64-POPCNT-LABEL: cnt32_optsize: 656; X64-POPCNT: # %bb.0: 657; X64-POPCNT-NEXT: popcntl %edi, %eax 658; X64-POPCNT-NEXT: retq 659 %cnt = tail call i32 @llvm.ctpop.i32(i32 %x) 660 ret i32 %cnt 661} 662 663define i64 @cnt64_optsize(i64 %x) nounwind readnone optsize { 664; X86-NOSSE-LABEL: cnt64_optsize: 665; X86-NOSSE: # %bb.0: 666; X86-NOSSE-NEXT: pushl %ebx 667; X86-NOSSE-NEXT: pushl %edi 668; X86-NOSSE-NEXT: pushl %esi 669; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 670; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 671; X86-NOSSE-NEXT: movl %ecx, %edx 672; X86-NOSSE-NEXT: shrl %edx 673; X86-NOSSE-NEXT: movl $1431655765, %esi # imm = 0x55555555 674; X86-NOSSE-NEXT: andl %esi, %edx 675; X86-NOSSE-NEXT: subl %edx, %ecx 676; X86-NOSSE-NEXT: movl $858993459, %edx # imm = 0x33333333 677; X86-NOSSE-NEXT: movl %ecx, %edi 678; X86-NOSSE-NEXT: andl %edx, %edi 679; X86-NOSSE-NEXT: shrl $2, %ecx 680; X86-NOSSE-NEXT: andl %edx, %ecx 681; X86-NOSSE-NEXT: addl %edi, %ecx 682; X86-NOSSE-NEXT: movl %ecx, %edi 683; X86-NOSSE-NEXT: shrl $4, %edi 684; X86-NOSSE-NEXT: addl %ecx, %edi 685; X86-NOSSE-NEXT: movl $252645135, %ecx # imm = 0xF0F0F0F 686; X86-NOSSE-NEXT: andl %ecx, %edi 687; X86-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101 688; X86-NOSSE-NEXT: shrl $24, %edi 689; X86-NOSSE-NEXT: movl %eax, %ebx 690; X86-NOSSE-NEXT: shrl %ebx 691; X86-NOSSE-NEXT: andl %esi, %ebx 692; X86-NOSSE-NEXT: subl %ebx, %eax 693; X86-NOSSE-NEXT: movl %eax, %esi 694; X86-NOSSE-NEXT: andl %edx, %esi 695; X86-NOSSE-NEXT: shrl $2, %eax 696; X86-NOSSE-NEXT: andl %edx, %eax 697; X86-NOSSE-NEXT: addl %esi, %eax 698; X86-NOSSE-NEXT: movl %eax, %edx 699; X86-NOSSE-NEXT: shrl $4, %edx 700; X86-NOSSE-NEXT: addl %eax, %edx 701; X86-NOSSE-NEXT: andl %ecx, %edx 702; X86-NOSSE-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101 703; X86-NOSSE-NEXT: shrl $24, %eax 704; X86-NOSSE-NEXT: addl %edi, %eax 705; X86-NOSSE-NEXT: xorl %edx, %edx 706; X86-NOSSE-NEXT: popl %esi 707; X86-NOSSE-NEXT: popl %edi 708; X86-NOSSE-NEXT: popl %ebx 709; X86-NOSSE-NEXT: retl 710; 711; X64-LABEL: cnt64_optsize: 712; X64: # %bb.0: 713; X64-NEXT: movq %rdi, %rax 714; X64-NEXT: shrq %rax 715; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 716; X64-NEXT: andq %rax, %rcx 717; X64-NEXT: subq %rcx, %rdi 718; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 719; X64-NEXT: movq %rdi, %rcx 720; X64-NEXT: andq %rax, %rcx 721; X64-NEXT: shrq $2, %rdi 722; X64-NEXT: andq %rax, %rdi 723; X64-NEXT: addq %rcx, %rdi 724; X64-NEXT: movq %rdi, %rax 725; X64-NEXT: shrq $4, %rax 726; X64-NEXT: addq %rdi, %rax 727; X64-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F 728; X64-NEXT: andq %rax, %rcx 729; X64-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101 730; X64-NEXT: imulq %rcx, %rax 731; X64-NEXT: shrq $56, %rax 732; X64-NEXT: retq 733; 734; X86-POPCNT-LABEL: cnt64_optsize: 735; X86-POPCNT: # %bb.0: 736; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx 737; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax 738; X86-POPCNT-NEXT: addl %ecx, %eax 739; X86-POPCNT-NEXT: xorl %edx, %edx 740; X86-POPCNT-NEXT: retl 741; 742; X64-POPCNT-LABEL: cnt64_optsize: 743; X64-POPCNT: # %bb.0: 744; X64-POPCNT-NEXT: popcntq %rdi, %rax 745; X64-POPCNT-NEXT: retq 746; 747; X86-SSE2-LABEL: cnt64_optsize: 748; X86-SSE2: # %bb.0: 749; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 750; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 751; X86-SSE2-NEXT: psrlw $1, %xmm1 752; X86-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1 753; X86-SSE2-NEXT: psubb %xmm1, %xmm0 754; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 755; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 756; X86-SSE2-NEXT: pand %xmm1, %xmm2 757; X86-SSE2-NEXT: psrlw $2, %xmm0 758; X86-SSE2-NEXT: pand %xmm1, %xmm0 759; X86-SSE2-NEXT: paddb %xmm2, %xmm0 760; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 761; X86-SSE2-NEXT: psrlw $4, %xmm1 762; X86-SSE2-NEXT: paddb %xmm0, %xmm1 763; X86-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1 764; X86-SSE2-NEXT: pxor %xmm0, %xmm0 765; X86-SSE2-NEXT: psadbw %xmm1, %xmm0 766; X86-SSE2-NEXT: movd %xmm0, %eax 767; X86-SSE2-NEXT: xorl %edx, %edx 768; X86-SSE2-NEXT: retl 769; 770; X86-SSSE3-LABEL: cnt64_optsize: 771; X86-SSSE3: # %bb.0: 772; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 773; X86-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 774; X86-SSSE3-NEXT: movdqa %xmm1, %xmm2 775; X86-SSSE3-NEXT: pand %xmm0, %xmm2 776; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 777; X86-SSSE3-NEXT: movdqa %xmm3, %xmm4 778; X86-SSSE3-NEXT: pshufb %xmm2, %xmm4 779; X86-SSSE3-NEXT: psrlw $4, %xmm1 780; X86-SSSE3-NEXT: pand %xmm0, %xmm1 781; X86-SSSE3-NEXT: pshufb %xmm1, %xmm3 782; X86-SSSE3-NEXT: paddb %xmm4, %xmm3 783; X86-SSSE3-NEXT: pxor %xmm0, %xmm0 784; X86-SSSE3-NEXT: psadbw %xmm3, %xmm0 785; X86-SSSE3-NEXT: movd %xmm0, %eax 786; X86-SSSE3-NEXT: xorl %edx, %edx 787; X86-SSSE3-NEXT: retl 788 %cnt = tail call i64 @llvm.ctpop.i64(i64 %x) 789 ret i64 %cnt 790} 791 792define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize { 793; X86-NOSSE-LABEL: cnt128_optsize: 794; X86-NOSSE: # %bb.0: 795; X86-NOSSE-NEXT: pushl %ebp 796; X86-NOSSE-NEXT: pushl %ebx 797; X86-NOSSE-NEXT: pushl %edi 798; X86-NOSSE-NEXT: pushl %esi 799; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 800; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi 801; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 802; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx 803; X86-NOSSE-NEXT: movl %ebx, %ecx 804; X86-NOSSE-NEXT: shrl %ecx 805; X86-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555 806; X86-NOSSE-NEXT: andl %edi, %ecx 807; X86-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555 808; X86-NOSSE-NEXT: subl %ecx, %ebx 809; X86-NOSSE-NEXT: movl $858993459, %ecx # imm = 0x33333333 810; X86-NOSSE-NEXT: movl %ebx, %ebp 811; X86-NOSSE-NEXT: andl %ecx, %ebp 812; X86-NOSSE-NEXT: shrl $2, %ebx 813; X86-NOSSE-NEXT: andl %ecx, %ebx 814; X86-NOSSE-NEXT: addl %ebp, %ebx 815; X86-NOSSE-NEXT: movl %ebx, %ebp 816; X86-NOSSE-NEXT: shrl $4, %ebp 817; X86-NOSSE-NEXT: addl %ebx, %ebp 818; X86-NOSSE-NEXT: movl %eax, %ebx 819; X86-NOSSE-NEXT: shrl %ebx 820; X86-NOSSE-NEXT: andl %edi, %ebx 821; X86-NOSSE-NEXT: subl %ebx, %eax 822; X86-NOSSE-NEXT: movl %eax, %ebx 823; X86-NOSSE-NEXT: andl %ecx, %ebx 824; X86-NOSSE-NEXT: shrl $2, %eax 825; X86-NOSSE-NEXT: andl %ecx, %eax 826; X86-NOSSE-NEXT: addl %ebx, %eax 827; X86-NOSSE-NEXT: movl %eax, %edi 828; X86-NOSSE-NEXT: shrl $4, %edi 829; X86-NOSSE-NEXT: addl %eax, %edi 830; X86-NOSSE-NEXT: movl $252645135, %ebx # imm = 0xF0F0F0F 831; X86-NOSSE-NEXT: andl %ebx, %ebp 832; X86-NOSSE-NEXT: imull $16843009, %ebp, %eax # imm = 0x1010101 833; X86-NOSSE-NEXT: shrl $24, %eax 834; X86-NOSSE-NEXT: andl %ebx, %edi 835; X86-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101 836; X86-NOSSE-NEXT: shrl $24, %edi 837; X86-NOSSE-NEXT: addl %eax, %edi 838; X86-NOSSE-NEXT: movl %esi, %eax 839; X86-NOSSE-NEXT: shrl %eax 840; X86-NOSSE-NEXT: movl $1431655765, %ebp # imm = 0x55555555 841; X86-NOSSE-NEXT: andl %ebp, %eax 842; X86-NOSSE-NEXT: subl %eax, %esi 843; X86-NOSSE-NEXT: movl %esi, %eax 844; X86-NOSSE-NEXT: andl %ecx, %eax 845; X86-NOSSE-NEXT: shrl $2, %esi 846; X86-NOSSE-NEXT: andl %ecx, %esi 847; X86-NOSSE-NEXT: addl %eax, %esi 848; X86-NOSSE-NEXT: movl %esi, %eax 849; X86-NOSSE-NEXT: shrl $4, %eax 850; X86-NOSSE-NEXT: addl %esi, %eax 851; X86-NOSSE-NEXT: movl %edx, %esi 852; X86-NOSSE-NEXT: shrl %esi 853; X86-NOSSE-NEXT: andl %ebp, %esi 854; X86-NOSSE-NEXT: subl %esi, %edx 855; X86-NOSSE-NEXT: movl %edx, %esi 856; X86-NOSSE-NEXT: andl %ecx, %esi 857; X86-NOSSE-NEXT: shrl $2, %edx 858; X86-NOSSE-NEXT: andl %ecx, %edx 859; X86-NOSSE-NEXT: addl %esi, %edx 860; X86-NOSSE-NEXT: movl %edx, %ecx 861; X86-NOSSE-NEXT: shrl $4, %ecx 862; X86-NOSSE-NEXT: addl %edx, %ecx 863; X86-NOSSE-NEXT: andl %ebx, %eax 864; X86-NOSSE-NEXT: andl %ebx, %ecx 865; X86-NOSSE-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 866; X86-NOSSE-NEXT: shrl $24, %eax 867; X86-NOSSE-NEXT: imull $16843009, %ecx, %ecx # imm = 0x1010101 868; X86-NOSSE-NEXT: shrl $24, %ecx 869; X86-NOSSE-NEXT: addl %eax, %ecx 870; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 871; X86-NOSSE-NEXT: addl %edi, %ecx 872; X86-NOSSE-NEXT: xorl %edx, %edx 873; X86-NOSSE-NEXT: movl %edx, 12(%eax) 874; X86-NOSSE-NEXT: movl %edx, 8(%eax) 875; X86-NOSSE-NEXT: movl %edx, 4(%eax) 876; X86-NOSSE-NEXT: movl %ecx, (%eax) 877; X86-NOSSE-NEXT: popl %esi 878; X86-NOSSE-NEXT: popl %edi 879; X86-NOSSE-NEXT: popl %ebx 880; X86-NOSSE-NEXT: popl %ebp 881; X86-NOSSE-NEXT: retl $4 882; 883; X64-LABEL: cnt128_optsize: 884; X64: # %bb.0: 885; X64-NEXT: movq %rsi, %rax 886; X64-NEXT: shrq %rax 887; X64-NEXT: movabsq $6148914691236517205, %r8 # imm = 0x5555555555555555 888; X64-NEXT: andq %r8, %rax 889; X64-NEXT: subq %rax, %rsi 890; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 891; X64-NEXT: movq %rsi, %rcx 892; X64-NEXT: andq %rax, %rcx 893; X64-NEXT: shrq $2, %rsi 894; X64-NEXT: andq %rax, %rsi 895; X64-NEXT: addq %rcx, %rsi 896; X64-NEXT: movq %rsi, %rcx 897; X64-NEXT: shrq $4, %rcx 898; X64-NEXT: addq %rsi, %rcx 899; X64-NEXT: movabsq $1085102592571150095, %r9 # imm = 0xF0F0F0F0F0F0F0F 900; X64-NEXT: andq %r9, %rcx 901; X64-NEXT: movabsq $72340172838076673, %rdx # imm = 0x101010101010101 902; X64-NEXT: imulq %rdx, %rcx 903; X64-NEXT: shrq $56, %rcx 904; X64-NEXT: movq %rdi, %rsi 905; X64-NEXT: shrq %rsi 906; X64-NEXT: andq %r8, %rsi 907; X64-NEXT: subq %rsi, %rdi 908; X64-NEXT: movq %rdi, %rsi 909; X64-NEXT: andq %rax, %rsi 910; X64-NEXT: shrq $2, %rdi 911; X64-NEXT: andq %rax, %rdi 912; X64-NEXT: addq %rsi, %rdi 913; X64-NEXT: movq %rdi, %rax 914; X64-NEXT: shrq $4, %rax 915; X64-NEXT: addq %rdi, %rax 916; X64-NEXT: andq %r9, %rax 917; X64-NEXT: imulq %rdx, %rax 918; X64-NEXT: shrq $56, %rax 919; X64-NEXT: addq %rcx, %rax 920; X64-NEXT: xorl %edx, %edx 921; X64-NEXT: retq 922; 923; X86-POPCNT-LABEL: cnt128_optsize: 924; X86-POPCNT: # %bb.0: 925; X86-POPCNT-NEXT: pushl %esi 926; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax 927; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx 928; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx 929; X86-POPCNT-NEXT: addl %ecx, %edx 930; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx 931; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi 932; X86-POPCNT-NEXT: addl %ecx, %esi 933; X86-POPCNT-NEXT: addl %edx, %esi 934; X86-POPCNT-NEXT: xorl %ecx, %ecx 935; X86-POPCNT-NEXT: movl %ecx, 12(%eax) 936; X86-POPCNT-NEXT: movl %ecx, 8(%eax) 937; X86-POPCNT-NEXT: movl %ecx, 4(%eax) 938; X86-POPCNT-NEXT: movl %esi, (%eax) 939; X86-POPCNT-NEXT: popl %esi 940; X86-POPCNT-NEXT: retl $4 941; 942; X64-POPCNT-LABEL: cnt128_optsize: 943; X64-POPCNT: # %bb.0: 944; X64-POPCNT-NEXT: popcntq %rsi, %rcx 945; X64-POPCNT-NEXT: popcntq %rdi, %rax 946; X64-POPCNT-NEXT: addq %rcx, %rax 947; X64-POPCNT-NEXT: xorl %edx, %edx 948; X64-POPCNT-NEXT: retq 949; 950; X86-SSE2-LABEL: cnt128_optsize: 951; X86-SSE2: # %bb.0: 952; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 953; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 954; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 955; X86-SSE2-NEXT: psrlw $1, %xmm1 956; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85] 957; X86-SSE2-NEXT: pand %xmm2, %xmm1 958; X86-SSE2-NEXT: psubb %xmm1, %xmm0 959; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 960; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 961; X86-SSE2-NEXT: pand %xmm1, %xmm3 962; X86-SSE2-NEXT: psrlw $2, %xmm0 963; X86-SSE2-NEXT: pand %xmm1, %xmm0 964; X86-SSE2-NEXT: paddb %xmm3, %xmm0 965; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 966; X86-SSE2-NEXT: psrlw $4, %xmm3 967; X86-SSE2-NEXT: paddb %xmm0, %xmm3 968; X86-SSE2-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 969; X86-SSE2-NEXT: pand %xmm0, %xmm3 970; X86-SSE2-NEXT: pxor %xmm4, %xmm4 971; X86-SSE2-NEXT: psadbw %xmm4, %xmm3 972; X86-SSE2-NEXT: movd %xmm3, %ecx 973; X86-SSE2-NEXT: movq {{.*#+}} xmm3 = mem[0],zero 974; X86-SSE2-NEXT: movdqa %xmm3, %xmm5 975; X86-SSE2-NEXT: psrlw $1, %xmm5 976; X86-SSE2-NEXT: pand %xmm2, %xmm5 977; X86-SSE2-NEXT: psubb %xmm5, %xmm3 978; X86-SSE2-NEXT: movdqa %xmm3, %xmm2 979; X86-SSE2-NEXT: pand %xmm1, %xmm2 980; X86-SSE2-NEXT: psrlw $2, %xmm3 981; X86-SSE2-NEXT: pand %xmm1, %xmm3 982; X86-SSE2-NEXT: paddb %xmm2, %xmm3 983; X86-SSE2-NEXT: movdqa %xmm3, %xmm1 984; X86-SSE2-NEXT: psrlw $4, %xmm1 985; X86-SSE2-NEXT: paddb %xmm3, %xmm1 986; X86-SSE2-NEXT: pand %xmm0, %xmm1 987; X86-SSE2-NEXT: psadbw %xmm4, %xmm1 988; X86-SSE2-NEXT: movd %xmm1, %edx 989; X86-SSE2-NEXT: addl %ecx, %edx 990; X86-SSE2-NEXT: xorl %ecx, %ecx 991; X86-SSE2-NEXT: movl %ecx, 12(%eax) 992; X86-SSE2-NEXT: movl %ecx, 8(%eax) 993; X86-SSE2-NEXT: movl %ecx, 4(%eax) 994; X86-SSE2-NEXT: movl %edx, (%eax) 995; X86-SSE2-NEXT: retl $4 996; 997; X86-SSSE3-LABEL: cnt128_optsize: 998; X86-SSSE3: # %bb.0: 999; X86-SSSE3-NEXT: movl {{[0-9]+}}(%esp), %eax 1000; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1001; X86-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1002; X86-SSSE3-NEXT: movdqa %xmm1, %xmm2 1003; X86-SSSE3-NEXT: pand %xmm0, %xmm2 1004; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 1005; X86-SSSE3-NEXT: movdqa %xmm3, %xmm4 1006; X86-SSSE3-NEXT: pshufb %xmm2, %xmm4 1007; X86-SSSE3-NEXT: psrlw $4, %xmm1 1008; X86-SSSE3-NEXT: pand %xmm0, %xmm1 1009; X86-SSSE3-NEXT: movdqa %xmm3, %xmm2 1010; X86-SSSE3-NEXT: pshufb %xmm1, %xmm2 1011; X86-SSSE3-NEXT: paddb %xmm4, %xmm2 1012; X86-SSSE3-NEXT: pxor %xmm1, %xmm1 1013; X86-SSSE3-NEXT: psadbw %xmm1, %xmm2 1014; X86-SSSE3-NEXT: movd %xmm2, %ecx 1015; X86-SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero 1016; X86-SSSE3-NEXT: movdqa %xmm2, %xmm4 1017; X86-SSSE3-NEXT: pand %xmm0, %xmm4 1018; X86-SSSE3-NEXT: movdqa %xmm3, %xmm5 1019; X86-SSSE3-NEXT: pshufb %xmm4, %xmm5 1020; X86-SSSE3-NEXT: psrlw $4, %xmm2 1021; X86-SSSE3-NEXT: pand %xmm0, %xmm2 1022; X86-SSSE3-NEXT: pshufb %xmm2, %xmm3 1023; X86-SSSE3-NEXT: paddb %xmm5, %xmm3 1024; X86-SSSE3-NEXT: psadbw %xmm1, %xmm3 1025; X86-SSSE3-NEXT: movd %xmm3, %edx 1026; X86-SSSE3-NEXT: addl %ecx, %edx 1027; X86-SSSE3-NEXT: xorl %ecx, %ecx 1028; X86-SSSE3-NEXT: movl %ecx, 12(%eax) 1029; X86-SSSE3-NEXT: movl %ecx, 8(%eax) 1030; X86-SSSE3-NEXT: movl %ecx, 4(%eax) 1031; X86-SSSE3-NEXT: movl %edx, (%eax) 1032; X86-SSSE3-NEXT: retl $4 1033 %cnt = tail call i128 @llvm.ctpop.i128(i128 %x) 1034 ret i128 %cnt 1035} 1036 1037define i32 @cnt32_pgso(i32 %x) nounwind readnone !prof !14 { 1038; X86-LABEL: cnt32_pgso: 1039; X86: # %bb.0: 1040; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1041; X86-NEXT: movl %eax, %ecx 1042; X86-NEXT: shrl %ecx 1043; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 1044; X86-NEXT: subl %ecx, %eax 1045; X86-NEXT: movl $858993459, %ecx # imm = 0x33333333 1046; X86-NEXT: movl %eax, %edx 1047; X86-NEXT: andl %ecx, %edx 1048; X86-NEXT: shrl $2, %eax 1049; X86-NEXT: andl %ecx, %eax 1050; X86-NEXT: addl %edx, %eax 1051; X86-NEXT: movl %eax, %ecx 1052; X86-NEXT: shrl $4, %ecx 1053; X86-NEXT: addl %eax, %ecx 1054; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 1055; X86-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101 1056; X86-NEXT: shrl $24, %eax 1057; X86-NEXT: retl 1058; 1059; X64-LABEL: cnt32_pgso: 1060; X64: # %bb.0: 1061; X64-NEXT: movl %edi, %eax 1062; X64-NEXT: shrl %eax 1063; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555 1064; X64-NEXT: subl %eax, %edi 1065; X64-NEXT: movl $858993459, %eax # imm = 0x33333333 1066; X64-NEXT: movl %edi, %ecx 1067; X64-NEXT: andl %eax, %ecx 1068; X64-NEXT: shrl $2, %edi 1069; X64-NEXT: andl %eax, %edi 1070; X64-NEXT: addl %ecx, %edi 1071; X64-NEXT: movl %edi, %eax 1072; X64-NEXT: shrl $4, %eax 1073; X64-NEXT: addl %edi, %eax 1074; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 1075; X64-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 1076; X64-NEXT: shrl $24, %eax 1077; X64-NEXT: retq 1078; 1079; X86-POPCNT-LABEL: cnt32_pgso: 1080; X86-POPCNT: # %bb.0: 1081; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax 1082; X86-POPCNT-NEXT: retl 1083; 1084; X64-POPCNT-LABEL: cnt32_pgso: 1085; X64-POPCNT: # %bb.0: 1086; X64-POPCNT-NEXT: popcntl %edi, %eax 1087; X64-POPCNT-NEXT: retq 1088 %cnt = tail call i32 @llvm.ctpop.i32(i32 %x) 1089 ret i32 %cnt 1090} 1091 1092define i64 @cnt64_pgso(i64 %x) nounwind readnone !prof !14 { 1093; X86-NOSSE-LABEL: cnt64_pgso: 1094; X86-NOSSE: # %bb.0: 1095; X86-NOSSE-NEXT: pushl %ebx 1096; X86-NOSSE-NEXT: pushl %edi 1097; X86-NOSSE-NEXT: pushl %esi 1098; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 1099; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 1100; X86-NOSSE-NEXT: movl %ecx, %edx 1101; X86-NOSSE-NEXT: shrl %edx 1102; X86-NOSSE-NEXT: movl $1431655765, %esi # imm = 0x55555555 1103; X86-NOSSE-NEXT: andl %esi, %edx 1104; X86-NOSSE-NEXT: subl %edx, %ecx 1105; X86-NOSSE-NEXT: movl $858993459, %edx # imm = 0x33333333 1106; X86-NOSSE-NEXT: movl %ecx, %edi 1107; X86-NOSSE-NEXT: andl %edx, %edi 1108; X86-NOSSE-NEXT: shrl $2, %ecx 1109; X86-NOSSE-NEXT: andl %edx, %ecx 1110; X86-NOSSE-NEXT: addl %edi, %ecx 1111; X86-NOSSE-NEXT: movl %ecx, %edi 1112; X86-NOSSE-NEXT: shrl $4, %edi 1113; X86-NOSSE-NEXT: addl %ecx, %edi 1114; X86-NOSSE-NEXT: movl $252645135, %ecx # imm = 0xF0F0F0F 1115; X86-NOSSE-NEXT: andl %ecx, %edi 1116; X86-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101 1117; X86-NOSSE-NEXT: shrl $24, %edi 1118; X86-NOSSE-NEXT: movl %eax, %ebx 1119; X86-NOSSE-NEXT: shrl %ebx 1120; X86-NOSSE-NEXT: andl %esi, %ebx 1121; X86-NOSSE-NEXT: subl %ebx, %eax 1122; X86-NOSSE-NEXT: movl %eax, %esi 1123; X86-NOSSE-NEXT: andl %edx, %esi 1124; X86-NOSSE-NEXT: shrl $2, %eax 1125; X86-NOSSE-NEXT: andl %edx, %eax 1126; X86-NOSSE-NEXT: addl %esi, %eax 1127; X86-NOSSE-NEXT: movl %eax, %edx 1128; X86-NOSSE-NEXT: shrl $4, %edx 1129; X86-NOSSE-NEXT: addl %eax, %edx 1130; X86-NOSSE-NEXT: andl %ecx, %edx 1131; X86-NOSSE-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101 1132; X86-NOSSE-NEXT: shrl $24, %eax 1133; X86-NOSSE-NEXT: addl %edi, %eax 1134; X86-NOSSE-NEXT: xorl %edx, %edx 1135; X86-NOSSE-NEXT: popl %esi 1136; X86-NOSSE-NEXT: popl %edi 1137; X86-NOSSE-NEXT: popl %ebx 1138; X86-NOSSE-NEXT: retl 1139; 1140; X64-LABEL: cnt64_pgso: 1141; X64: # %bb.0: 1142; X64-NEXT: movq %rdi, %rax 1143; X64-NEXT: shrq %rax 1144; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 1145; X64-NEXT: andq %rax, %rcx 1146; X64-NEXT: subq %rcx, %rdi 1147; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 1148; X64-NEXT: movq %rdi, %rcx 1149; X64-NEXT: andq %rax, %rcx 1150; X64-NEXT: shrq $2, %rdi 1151; X64-NEXT: andq %rax, %rdi 1152; X64-NEXT: addq %rcx, %rdi 1153; X64-NEXT: movq %rdi, %rax 1154; X64-NEXT: shrq $4, %rax 1155; X64-NEXT: addq %rdi, %rax 1156; X64-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F 1157; X64-NEXT: andq %rax, %rcx 1158; X64-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101 1159; X64-NEXT: imulq %rcx, %rax 1160; X64-NEXT: shrq $56, %rax 1161; X64-NEXT: retq 1162; 1163; X86-POPCNT-LABEL: cnt64_pgso: 1164; X86-POPCNT: # %bb.0: 1165; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx 1166; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax 1167; X86-POPCNT-NEXT: addl %ecx, %eax 1168; X86-POPCNT-NEXT: xorl %edx, %edx 1169; X86-POPCNT-NEXT: retl 1170; 1171; X64-POPCNT-LABEL: cnt64_pgso: 1172; X64-POPCNT: # %bb.0: 1173; X64-POPCNT-NEXT: popcntq %rdi, %rax 1174; X64-POPCNT-NEXT: retq 1175; 1176; X86-SSE2-LABEL: cnt64_pgso: 1177; X86-SSE2: # %bb.0: 1178; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1179; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1180; X86-SSE2-NEXT: psrlw $1, %xmm1 1181; X86-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1 1182; X86-SSE2-NEXT: psubb %xmm1, %xmm0 1183; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 1184; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1185; X86-SSE2-NEXT: pand %xmm1, %xmm2 1186; X86-SSE2-NEXT: psrlw $2, %xmm0 1187; X86-SSE2-NEXT: pand %xmm1, %xmm0 1188; X86-SSE2-NEXT: paddb %xmm2, %xmm0 1189; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1190; X86-SSE2-NEXT: psrlw $4, %xmm1 1191; X86-SSE2-NEXT: paddb %xmm0, %xmm1 1192; X86-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1 1193; X86-SSE2-NEXT: pxor %xmm0, %xmm0 1194; X86-SSE2-NEXT: psadbw %xmm1, %xmm0 1195; X86-SSE2-NEXT: movd %xmm0, %eax 1196; X86-SSE2-NEXT: xorl %edx, %edx 1197; X86-SSE2-NEXT: retl 1198; 1199; X86-SSSE3-LABEL: cnt64_pgso: 1200; X86-SSSE3: # %bb.0: 1201; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1202; X86-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1203; X86-SSSE3-NEXT: movdqa %xmm1, %xmm2 1204; X86-SSSE3-NEXT: pand %xmm0, %xmm2 1205; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 1206; X86-SSSE3-NEXT: movdqa %xmm3, %xmm4 1207; X86-SSSE3-NEXT: pshufb %xmm2, %xmm4 1208; X86-SSSE3-NEXT: psrlw $4, %xmm1 1209; X86-SSSE3-NEXT: pand %xmm0, %xmm1 1210; X86-SSSE3-NEXT: pshufb %xmm1, %xmm3 1211; X86-SSSE3-NEXT: paddb %xmm4, %xmm3 1212; X86-SSSE3-NEXT: pxor %xmm0, %xmm0 1213; X86-SSSE3-NEXT: psadbw %xmm3, %xmm0 1214; X86-SSSE3-NEXT: movd %xmm0, %eax 1215; X86-SSSE3-NEXT: xorl %edx, %edx 1216; X86-SSSE3-NEXT: retl 1217 %cnt = tail call i64 @llvm.ctpop.i64(i64 %x) 1218 ret i64 %cnt 1219} 1220 1221define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 { 1222; X86-NOSSE-LABEL: cnt128_pgso: 1223; X86-NOSSE: # %bb.0: 1224; X86-NOSSE-NEXT: pushl %ebp 1225; X86-NOSSE-NEXT: pushl %ebx 1226; X86-NOSSE-NEXT: pushl %edi 1227; X86-NOSSE-NEXT: pushl %esi 1228; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 1229; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi 1230; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 1231; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx 1232; X86-NOSSE-NEXT: movl %ebx, %ecx 1233; X86-NOSSE-NEXT: shrl %ecx 1234; X86-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555 1235; X86-NOSSE-NEXT: andl %edi, %ecx 1236; X86-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555 1237; X86-NOSSE-NEXT: subl %ecx, %ebx 1238; X86-NOSSE-NEXT: movl $858993459, %ecx # imm = 0x33333333 1239; X86-NOSSE-NEXT: movl %ebx, %ebp 1240; X86-NOSSE-NEXT: andl %ecx, %ebp 1241; X86-NOSSE-NEXT: shrl $2, %ebx 1242; X86-NOSSE-NEXT: andl %ecx, %ebx 1243; X86-NOSSE-NEXT: addl %ebp, %ebx 1244; X86-NOSSE-NEXT: movl %ebx, %ebp 1245; X86-NOSSE-NEXT: shrl $4, %ebp 1246; X86-NOSSE-NEXT: addl %ebx, %ebp 1247; X86-NOSSE-NEXT: movl %eax, %ebx 1248; X86-NOSSE-NEXT: shrl %ebx 1249; X86-NOSSE-NEXT: andl %edi, %ebx 1250; X86-NOSSE-NEXT: subl %ebx, %eax 1251; X86-NOSSE-NEXT: movl %eax, %ebx 1252; X86-NOSSE-NEXT: andl %ecx, %ebx 1253; X86-NOSSE-NEXT: shrl $2, %eax 1254; X86-NOSSE-NEXT: andl %ecx, %eax 1255; X86-NOSSE-NEXT: addl %ebx, %eax 1256; X86-NOSSE-NEXT: movl %eax, %edi 1257; X86-NOSSE-NEXT: shrl $4, %edi 1258; X86-NOSSE-NEXT: addl %eax, %edi 1259; X86-NOSSE-NEXT: movl $252645135, %ebx # imm = 0xF0F0F0F 1260; X86-NOSSE-NEXT: andl %ebx, %ebp 1261; X86-NOSSE-NEXT: imull $16843009, %ebp, %eax # imm = 0x1010101 1262; X86-NOSSE-NEXT: shrl $24, %eax 1263; X86-NOSSE-NEXT: andl %ebx, %edi 1264; X86-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101 1265; X86-NOSSE-NEXT: shrl $24, %edi 1266; X86-NOSSE-NEXT: addl %eax, %edi 1267; X86-NOSSE-NEXT: movl %esi, %eax 1268; X86-NOSSE-NEXT: shrl %eax 1269; X86-NOSSE-NEXT: movl $1431655765, %ebp # imm = 0x55555555 1270; X86-NOSSE-NEXT: andl %ebp, %eax 1271; X86-NOSSE-NEXT: subl %eax, %esi 1272; X86-NOSSE-NEXT: movl %esi, %eax 1273; X86-NOSSE-NEXT: andl %ecx, %eax 1274; X86-NOSSE-NEXT: shrl $2, %esi 1275; X86-NOSSE-NEXT: andl %ecx, %esi 1276; X86-NOSSE-NEXT: addl %eax, %esi 1277; X86-NOSSE-NEXT: movl %esi, %eax 1278; X86-NOSSE-NEXT: shrl $4, %eax 1279; X86-NOSSE-NEXT: addl %esi, %eax 1280; X86-NOSSE-NEXT: movl %edx, %esi 1281; X86-NOSSE-NEXT: shrl %esi 1282; X86-NOSSE-NEXT: andl %ebp, %esi 1283; X86-NOSSE-NEXT: subl %esi, %edx 1284; X86-NOSSE-NEXT: movl %edx, %esi 1285; X86-NOSSE-NEXT: andl %ecx, %esi 1286; X86-NOSSE-NEXT: shrl $2, %edx 1287; X86-NOSSE-NEXT: andl %ecx, %edx 1288; X86-NOSSE-NEXT: addl %esi, %edx 1289; X86-NOSSE-NEXT: movl %edx, %ecx 1290; X86-NOSSE-NEXT: shrl $4, %ecx 1291; X86-NOSSE-NEXT: addl %edx, %ecx 1292; X86-NOSSE-NEXT: andl %ebx, %eax 1293; X86-NOSSE-NEXT: andl %ebx, %ecx 1294; X86-NOSSE-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 1295; X86-NOSSE-NEXT: shrl $24, %eax 1296; X86-NOSSE-NEXT: imull $16843009, %ecx, %ecx # imm = 0x1010101 1297; X86-NOSSE-NEXT: shrl $24, %ecx 1298; X86-NOSSE-NEXT: addl %eax, %ecx 1299; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 1300; X86-NOSSE-NEXT: addl %edi, %ecx 1301; X86-NOSSE-NEXT: xorl %edx, %edx 1302; X86-NOSSE-NEXT: movl %edx, 12(%eax) 1303; X86-NOSSE-NEXT: movl %edx, 8(%eax) 1304; X86-NOSSE-NEXT: movl %edx, 4(%eax) 1305; X86-NOSSE-NEXT: movl %ecx, (%eax) 1306; X86-NOSSE-NEXT: popl %esi 1307; X86-NOSSE-NEXT: popl %edi 1308; X86-NOSSE-NEXT: popl %ebx 1309; X86-NOSSE-NEXT: popl %ebp 1310; X86-NOSSE-NEXT: retl $4 1311; 1312; X64-LABEL: cnt128_pgso: 1313; X64: # %bb.0: 1314; X64-NEXT: movq %rsi, %rax 1315; X64-NEXT: shrq %rax 1316; X64-NEXT: movabsq $6148914691236517205, %r8 # imm = 0x5555555555555555 1317; X64-NEXT: andq %r8, %rax 1318; X64-NEXT: subq %rax, %rsi 1319; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 1320; X64-NEXT: movq %rsi, %rcx 1321; X64-NEXT: andq %rax, %rcx 1322; X64-NEXT: shrq $2, %rsi 1323; X64-NEXT: andq %rax, %rsi 1324; X64-NEXT: addq %rcx, %rsi 1325; X64-NEXT: movq %rsi, %rcx 1326; X64-NEXT: shrq $4, %rcx 1327; X64-NEXT: addq %rsi, %rcx 1328; X64-NEXT: movabsq $1085102592571150095, %r9 # imm = 0xF0F0F0F0F0F0F0F 1329; X64-NEXT: andq %r9, %rcx 1330; X64-NEXT: movabsq $72340172838076673, %rdx # imm = 0x101010101010101 1331; X64-NEXT: imulq %rdx, %rcx 1332; X64-NEXT: shrq $56, %rcx 1333; X64-NEXT: movq %rdi, %rsi 1334; X64-NEXT: shrq %rsi 1335; X64-NEXT: andq %r8, %rsi 1336; X64-NEXT: subq %rsi, %rdi 1337; X64-NEXT: movq %rdi, %rsi 1338; X64-NEXT: andq %rax, %rsi 1339; X64-NEXT: shrq $2, %rdi 1340; X64-NEXT: andq %rax, %rdi 1341; X64-NEXT: addq %rsi, %rdi 1342; X64-NEXT: movq %rdi, %rax 1343; X64-NEXT: shrq $4, %rax 1344; X64-NEXT: addq %rdi, %rax 1345; X64-NEXT: andq %r9, %rax 1346; X64-NEXT: imulq %rdx, %rax 1347; X64-NEXT: shrq $56, %rax 1348; X64-NEXT: addq %rcx, %rax 1349; X64-NEXT: xorl %edx, %edx 1350; X64-NEXT: retq 1351; 1352; X86-POPCNT-LABEL: cnt128_pgso: 1353; X86-POPCNT: # %bb.0: 1354; X86-POPCNT-NEXT: pushl %esi 1355; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax 1356; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx 1357; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx 1358; X86-POPCNT-NEXT: addl %ecx, %edx 1359; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx 1360; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi 1361; X86-POPCNT-NEXT: addl %ecx, %esi 1362; X86-POPCNT-NEXT: addl %edx, %esi 1363; X86-POPCNT-NEXT: xorl %ecx, %ecx 1364; X86-POPCNT-NEXT: movl %ecx, 12(%eax) 1365; X86-POPCNT-NEXT: movl %ecx, 8(%eax) 1366; X86-POPCNT-NEXT: movl %ecx, 4(%eax) 1367; X86-POPCNT-NEXT: movl %esi, (%eax) 1368; X86-POPCNT-NEXT: popl %esi 1369; X86-POPCNT-NEXT: retl $4 1370; 1371; X64-POPCNT-LABEL: cnt128_pgso: 1372; X64-POPCNT: # %bb.0: 1373; X64-POPCNT-NEXT: popcntq %rsi, %rcx 1374; X64-POPCNT-NEXT: popcntq %rdi, %rax 1375; X64-POPCNT-NEXT: addq %rcx, %rax 1376; X64-POPCNT-NEXT: xorl %edx, %edx 1377; X64-POPCNT-NEXT: retq 1378; 1379; X86-SSE2-LABEL: cnt128_pgso: 1380; X86-SSE2: # %bb.0: 1381; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1382; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1383; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1384; X86-SSE2-NEXT: psrlw $1, %xmm1 1385; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85] 1386; X86-SSE2-NEXT: pand %xmm2, %xmm1 1387; X86-SSE2-NEXT: psubb %xmm1, %xmm0 1388; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 1389; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 1390; X86-SSE2-NEXT: pand %xmm1, %xmm3 1391; X86-SSE2-NEXT: psrlw $2, %xmm0 1392; X86-SSE2-NEXT: pand %xmm1, %xmm0 1393; X86-SSE2-NEXT: paddb %xmm3, %xmm0 1394; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 1395; X86-SSE2-NEXT: psrlw $4, %xmm3 1396; X86-SSE2-NEXT: paddb %xmm0, %xmm3 1397; X86-SSE2-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1398; X86-SSE2-NEXT: pand %xmm0, %xmm3 1399; X86-SSE2-NEXT: pxor %xmm4, %xmm4 1400; X86-SSE2-NEXT: psadbw %xmm4, %xmm3 1401; X86-SSE2-NEXT: movd %xmm3, %ecx 1402; X86-SSE2-NEXT: movq {{.*#+}} xmm3 = mem[0],zero 1403; X86-SSE2-NEXT: movdqa %xmm3, %xmm5 1404; X86-SSE2-NEXT: psrlw $1, %xmm5 1405; X86-SSE2-NEXT: pand %xmm2, %xmm5 1406; X86-SSE2-NEXT: psubb %xmm5, %xmm3 1407; X86-SSE2-NEXT: movdqa %xmm3, %xmm2 1408; X86-SSE2-NEXT: pand %xmm1, %xmm2 1409; X86-SSE2-NEXT: psrlw $2, %xmm3 1410; X86-SSE2-NEXT: pand %xmm1, %xmm3 1411; X86-SSE2-NEXT: paddb %xmm2, %xmm3 1412; X86-SSE2-NEXT: movdqa %xmm3, %xmm1 1413; X86-SSE2-NEXT: psrlw $4, %xmm1 1414; X86-SSE2-NEXT: paddb %xmm3, %xmm1 1415; X86-SSE2-NEXT: pand %xmm0, %xmm1 1416; X86-SSE2-NEXT: psadbw %xmm4, %xmm1 1417; X86-SSE2-NEXT: movd %xmm1, %edx 1418; X86-SSE2-NEXT: addl %ecx, %edx 1419; X86-SSE2-NEXT: xorl %ecx, %ecx 1420; X86-SSE2-NEXT: movl %ecx, 12(%eax) 1421; X86-SSE2-NEXT: movl %ecx, 8(%eax) 1422; X86-SSE2-NEXT: movl %ecx, 4(%eax) 1423; X86-SSE2-NEXT: movl %edx, (%eax) 1424; X86-SSE2-NEXT: retl $4 1425; 1426; X86-SSSE3-LABEL: cnt128_pgso: 1427; X86-SSSE3: # %bb.0: 1428; X86-SSSE3-NEXT: movl {{[0-9]+}}(%esp), %eax 1429; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1430; X86-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1431; X86-SSSE3-NEXT: movdqa %xmm1, %xmm2 1432; X86-SSSE3-NEXT: pand %xmm0, %xmm2 1433; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 1434; X86-SSSE3-NEXT: movdqa %xmm3, %xmm4 1435; X86-SSSE3-NEXT: pshufb %xmm2, %xmm4 1436; X86-SSSE3-NEXT: psrlw $4, %xmm1 1437; X86-SSSE3-NEXT: pand %xmm0, %xmm1 1438; X86-SSSE3-NEXT: movdqa %xmm3, %xmm2 1439; X86-SSSE3-NEXT: pshufb %xmm1, %xmm2 1440; X86-SSSE3-NEXT: paddb %xmm4, %xmm2 1441; X86-SSSE3-NEXT: pxor %xmm1, %xmm1 1442; X86-SSSE3-NEXT: psadbw %xmm1, %xmm2 1443; X86-SSSE3-NEXT: movd %xmm2, %ecx 1444; X86-SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero 1445; X86-SSSE3-NEXT: movdqa %xmm2, %xmm4 1446; X86-SSSE3-NEXT: pand %xmm0, %xmm4 1447; X86-SSSE3-NEXT: movdqa %xmm3, %xmm5 1448; X86-SSSE3-NEXT: pshufb %xmm4, %xmm5 1449; X86-SSSE3-NEXT: psrlw $4, %xmm2 1450; X86-SSSE3-NEXT: pand %xmm0, %xmm2 1451; X86-SSSE3-NEXT: pshufb %xmm2, %xmm3 1452; X86-SSSE3-NEXT: paddb %xmm5, %xmm3 1453; X86-SSSE3-NEXT: psadbw %xmm1, %xmm3 1454; X86-SSSE3-NEXT: movd %xmm3, %edx 1455; X86-SSSE3-NEXT: addl %ecx, %edx 1456; X86-SSSE3-NEXT: xorl %ecx, %ecx 1457; X86-SSSE3-NEXT: movl %ecx, 12(%eax) 1458; X86-SSSE3-NEXT: movl %ecx, 8(%eax) 1459; X86-SSSE3-NEXT: movl %ecx, 4(%eax) 1460; X86-SSSE3-NEXT: movl %edx, (%eax) 1461; X86-SSSE3-NEXT: retl $4 1462 %cnt = tail call i128 @llvm.ctpop.i128(i128 %x) 1463 ret i128 %cnt 1464} 1465 1466declare i8 @llvm.ctpop.i8(i8) nounwind readnone 1467declare i16 @llvm.ctpop.i16(i16) nounwind readnone 1468declare i32 @llvm.ctpop.i32(i32) nounwind readnone 1469declare i64 @llvm.ctpop.i64(i64) nounwind readnone 1470declare i128 @llvm.ctpop.i128(i128) nounwind readnone 1471 1472!llvm.module.flags = !{!0} 1473!0 = !{i32 1, !"ProfileSummary", !1} 1474!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} 1475!2 = !{!"ProfileFormat", !"InstrProf"} 1476!3 = !{!"TotalCount", i64 10000} 1477!4 = !{!"MaxCount", i64 10} 1478!5 = !{!"MaxInternalCount", i64 1} 1479!6 = !{!"MaxFunctionCount", i64 1000} 1480!7 = !{!"NumCounts", i64 3} 1481!8 = !{!"NumFunctions", i64 3} 1482!9 = !{!"DetailedSummary", !10} 1483!10 = !{!11, !12, !13} 1484!11 = !{i32 10000, i64 100, i32 1} 1485!12 = !{i32 999000, i64 100, i32 1} 1486!13 = !{i32 999999, i64 1, i32 2} 1487!14 = !{!"function_entry_count", i64 0} 1488