1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86,X86-NOSSE
3; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64
4; RUN: llc < %s -mtriple=i686-unknown -mattr=+popcnt | FileCheck %s --check-prefix=X86-POPCNT
5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+popcnt | FileCheck %s --check-prefix=X64-POPCNT
6; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2
7; RUN: llc < %s -mtriple=i686-unknown -mattr=ssse3 | FileCheck %s --check-prefixes=X86,X86-SSSE3
8
9define i8 @cnt8(i8 %x) nounwind readnone {
10; X86-LABEL: cnt8:
11; X86:       # %bb.0:
12; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
13; X86-NEXT:    movl %ecx, %eax
14; X86-NEXT:    shrb %al
15; X86-NEXT:    andb $85, %al
16; X86-NEXT:    subb %al, %cl
17; X86-NEXT:    movl %ecx, %eax
18; X86-NEXT:    andb $51, %al
19; X86-NEXT:    shrb $2, %cl
20; X86-NEXT:    andb $51, %cl
21; X86-NEXT:    addb %al, %cl
22; X86-NEXT:    movl %ecx, %eax
23; X86-NEXT:    shrb $4, %al
24; X86-NEXT:    addb %cl, %al
25; X86-NEXT:    andb $15, %al
26; X86-NEXT:    retl
27;
28; X64-LABEL: cnt8:
29; X64:       # %bb.0:
30; X64-NEXT:    # kill: def $edi killed $edi def $rdi
31; X64-NEXT:    movl %edi, %eax
32; X64-NEXT:    shrb %al
33; X64-NEXT:    andb $85, %al
34; X64-NEXT:    subb %al, %dil
35; X64-NEXT:    movl %edi, %eax
36; X64-NEXT:    andb $51, %al
37; X64-NEXT:    shrb $2, %dil
38; X64-NEXT:    andb $51, %dil
39; X64-NEXT:    addb %al, %dil
40; X64-NEXT:    movl %edi, %eax
41; X64-NEXT:    shrb $4, %al
42; X64-NEXT:    addl %edi, %eax
43; X64-NEXT:    andb $15, %al
44; X64-NEXT:    # kill: def $al killed $al killed $eax
45; X64-NEXT:    retq
46;
47; X86-POPCNT-LABEL: cnt8:
48; X86-POPCNT:       # %bb.0:
49; X86-POPCNT-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
50; X86-POPCNT-NEXT:    popcntl %eax, %eax
51; X86-POPCNT-NEXT:    # kill: def $al killed $al killed $eax
52; X86-POPCNT-NEXT:    retl
53;
54; X64-POPCNT-LABEL: cnt8:
55; X64-POPCNT:       # %bb.0:
56; X64-POPCNT-NEXT:    movzbl %dil, %eax
57; X64-POPCNT-NEXT:    popcntl %eax, %eax
58; X64-POPCNT-NEXT:    # kill: def $al killed $al killed $eax
59; X64-POPCNT-NEXT:    retq
60  %cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
61  ret i8 %cnt
62}
63
64define i16 @cnt16(i16 %x) nounwind readnone {
65; X86-LABEL: cnt16:
66; X86:       # %bb.0:
67; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
68; X86-NEXT:    movl %eax, %ecx
69; X86-NEXT:    shrl %ecx
70; X86-NEXT:    andl $21845, %ecx # imm = 0x5555
71; X86-NEXT:    subl %ecx, %eax
72; X86-NEXT:    movl %eax, %ecx
73; X86-NEXT:    andl $13107, %ecx # imm = 0x3333
74; X86-NEXT:    shrl $2, %eax
75; X86-NEXT:    andl $13107, %eax # imm = 0x3333
76; X86-NEXT:    addl %ecx, %eax
77; X86-NEXT:    movl %eax, %ecx
78; X86-NEXT:    shrl $4, %ecx
79; X86-NEXT:    addl %eax, %ecx
80; X86-NEXT:    andl $3855, %ecx # imm = 0xF0F
81; X86-NEXT:    movl %ecx, %eax
82; X86-NEXT:    shll $8, %eax
83; X86-NEXT:    addl %ecx, %eax
84; X86-NEXT:    movzbl %ah, %eax
85; X86-NEXT:    # kill: def $ax killed $ax killed $eax
86; X86-NEXT:    retl
87;
88; X64-LABEL: cnt16:
89; X64:       # %bb.0:
90; X64-NEXT:    movl %edi, %eax
91; X64-NEXT:    shrl %eax
92; X64-NEXT:    andl $21845, %eax # imm = 0x5555
93; X64-NEXT:    subl %eax, %edi
94; X64-NEXT:    movl %edi, %eax
95; X64-NEXT:    andl $13107, %eax # imm = 0x3333
96; X64-NEXT:    shrl $2, %edi
97; X64-NEXT:    andl $13107, %edi # imm = 0x3333
98; X64-NEXT:    addl %eax, %edi
99; X64-NEXT:    movl %edi, %eax
100; X64-NEXT:    shrl $4, %eax
101; X64-NEXT:    addl %edi, %eax
102; X64-NEXT:    andl $3855, %eax # imm = 0xF0F
103; X64-NEXT:    movl %eax, %ecx
104; X64-NEXT:    shll $8, %ecx
105; X64-NEXT:    addl %eax, %ecx
106; X64-NEXT:    movzbl %ch, %eax
107; X64-NEXT:    # kill: def $ax killed $ax killed $eax
108; X64-NEXT:    retq
109;
110; X86-POPCNT-LABEL: cnt16:
111; X86-POPCNT:       # %bb.0:
112; X86-POPCNT-NEXT:    popcntw {{[0-9]+}}(%esp), %ax
113; X86-POPCNT-NEXT:    retl
114;
115; X64-POPCNT-LABEL: cnt16:
116; X64-POPCNT:       # %bb.0:
117; X64-POPCNT-NEXT:    popcntw %di, %ax
118; X64-POPCNT-NEXT:    retq
119  %cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
120  ret i16 %cnt
121}
122
123define i32 @cnt32(i32 %x) nounwind readnone {
124; X86-LABEL: cnt32:
125; X86:       # %bb.0:
126; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
127; X86-NEXT:    movl %eax, %ecx
128; X86-NEXT:    shrl %ecx
129; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
130; X86-NEXT:    subl %ecx, %eax
131; X86-NEXT:    movl %eax, %ecx
132; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
133; X86-NEXT:    shrl $2, %eax
134; X86-NEXT:    andl $858993459, %eax # imm = 0x33333333
135; X86-NEXT:    addl %ecx, %eax
136; X86-NEXT:    movl %eax, %ecx
137; X86-NEXT:    shrl $4, %ecx
138; X86-NEXT:    addl %eax, %ecx
139; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
140; X86-NEXT:    imull $16843009, %ecx, %eax # imm = 0x1010101
141; X86-NEXT:    shrl $24, %eax
142; X86-NEXT:    retl
143;
144; X64-LABEL: cnt32:
145; X64:       # %bb.0:
146; X64-NEXT:    movl %edi, %eax
147; X64-NEXT:    shrl %eax
148; X64-NEXT:    andl $1431655765, %eax # imm = 0x55555555
149; X64-NEXT:    subl %eax, %edi
150; X64-NEXT:    movl %edi, %eax
151; X64-NEXT:    andl $858993459, %eax # imm = 0x33333333
152; X64-NEXT:    shrl $2, %edi
153; X64-NEXT:    andl $858993459, %edi # imm = 0x33333333
154; X64-NEXT:    addl %eax, %edi
155; X64-NEXT:    movl %edi, %eax
156; X64-NEXT:    shrl $4, %eax
157; X64-NEXT:    addl %edi, %eax
158; X64-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
159; X64-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
160; X64-NEXT:    shrl $24, %eax
161; X64-NEXT:    retq
162;
163; X86-POPCNT-LABEL: cnt32:
164; X86-POPCNT:       # %bb.0:
165; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %eax
166; X86-POPCNT-NEXT:    retl
167;
168; X64-POPCNT-LABEL: cnt32:
169; X64-POPCNT:       # %bb.0:
170; X64-POPCNT-NEXT:    popcntl %edi, %eax
171; X64-POPCNT-NEXT:    retq
172  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
173  ret i32 %cnt
174}
175
176define i64 @cnt64(i64 %x) nounwind readnone {
177; X86-NOSSE-LABEL: cnt64:
178; X86-NOSSE:       # %bb.0:
179; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
180; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
181; X86-NOSSE-NEXT:    movl %ecx, %edx
182; X86-NOSSE-NEXT:    shrl %edx
183; X86-NOSSE-NEXT:    andl $1431655765, %edx # imm = 0x55555555
184; X86-NOSSE-NEXT:    subl %edx, %ecx
185; X86-NOSSE-NEXT:    movl %ecx, %edx
186; X86-NOSSE-NEXT:    andl $858993459, %edx # imm = 0x33333333
187; X86-NOSSE-NEXT:    shrl $2, %ecx
188; X86-NOSSE-NEXT:    andl $858993459, %ecx # imm = 0x33333333
189; X86-NOSSE-NEXT:    addl %edx, %ecx
190; X86-NOSSE-NEXT:    movl %ecx, %edx
191; X86-NOSSE-NEXT:    shrl $4, %edx
192; X86-NOSSE-NEXT:    addl %ecx, %edx
193; X86-NOSSE-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
194; X86-NOSSE-NEXT:    imull $16843009, %edx, %ecx # imm = 0x1010101
195; X86-NOSSE-NEXT:    shrl $24, %ecx
196; X86-NOSSE-NEXT:    movl %eax, %edx
197; X86-NOSSE-NEXT:    shrl %edx
198; X86-NOSSE-NEXT:    andl $1431655765, %edx # imm = 0x55555555
199; X86-NOSSE-NEXT:    subl %edx, %eax
200; X86-NOSSE-NEXT:    movl %eax, %edx
201; X86-NOSSE-NEXT:    andl $858993459, %edx # imm = 0x33333333
202; X86-NOSSE-NEXT:    shrl $2, %eax
203; X86-NOSSE-NEXT:    andl $858993459, %eax # imm = 0x33333333
204; X86-NOSSE-NEXT:    addl %edx, %eax
205; X86-NOSSE-NEXT:    movl %eax, %edx
206; X86-NOSSE-NEXT:    shrl $4, %edx
207; X86-NOSSE-NEXT:    addl %eax, %edx
208; X86-NOSSE-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
209; X86-NOSSE-NEXT:    imull $16843009, %edx, %eax # imm = 0x1010101
210; X86-NOSSE-NEXT:    shrl $24, %eax
211; X86-NOSSE-NEXT:    addl %ecx, %eax
212; X86-NOSSE-NEXT:    xorl %edx, %edx
213; X86-NOSSE-NEXT:    retl
214;
215; X64-LABEL: cnt64:
216; X64:       # %bb.0:
217; X64-NEXT:    movq %rdi, %rax
218; X64-NEXT:    shrq %rax
219; X64-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
220; X64-NEXT:    andq %rax, %rcx
221; X64-NEXT:    subq %rcx, %rdi
222; X64-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
223; X64-NEXT:    movq %rdi, %rcx
224; X64-NEXT:    andq %rax, %rcx
225; X64-NEXT:    shrq $2, %rdi
226; X64-NEXT:    andq %rax, %rdi
227; X64-NEXT:    addq %rcx, %rdi
228; X64-NEXT:    movq %rdi, %rax
229; X64-NEXT:    shrq $4, %rax
230; X64-NEXT:    addq %rdi, %rax
231; X64-NEXT:    movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
232; X64-NEXT:    andq %rax, %rcx
233; X64-NEXT:    movabsq $72340172838076673, %rax # imm = 0x101010101010101
234; X64-NEXT:    imulq %rcx, %rax
235; X64-NEXT:    shrq $56, %rax
236; X64-NEXT:    retq
237;
238; X86-POPCNT-LABEL: cnt64:
239; X86-POPCNT:       # %bb.0:
240; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %ecx
241; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %eax
242; X86-POPCNT-NEXT:    addl %ecx, %eax
243; X86-POPCNT-NEXT:    xorl %edx, %edx
244; X86-POPCNT-NEXT:    retl
245;
246; X64-POPCNT-LABEL: cnt64:
247; X64-POPCNT:       # %bb.0:
248; X64-POPCNT-NEXT:    popcntq %rdi, %rax
249; X64-POPCNT-NEXT:    retq
250;
251; X86-SSE2-LABEL: cnt64:
252; X86-SSE2:       # %bb.0:
253; X86-SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
254; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
255; X86-SSE2-NEXT:    psrlw $1, %xmm1
256; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm1
257; X86-SSE2-NEXT:    psubb %xmm1, %xmm0
258; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
259; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
260; X86-SSE2-NEXT:    pand %xmm1, %xmm2
261; X86-SSE2-NEXT:    psrlw $2, %xmm0
262; X86-SSE2-NEXT:    pand %xmm1, %xmm0
263; X86-SSE2-NEXT:    paddb %xmm2, %xmm0
264; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
265; X86-SSE2-NEXT:    psrlw $4, %xmm1
266; X86-SSE2-NEXT:    paddb %xmm0, %xmm1
267; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm1
268; X86-SSE2-NEXT:    pxor %xmm0, %xmm0
269; X86-SSE2-NEXT:    psadbw %xmm1, %xmm0
270; X86-SSE2-NEXT:    movd %xmm0, %eax
271; X86-SSE2-NEXT:    xorl %edx, %edx
272; X86-SSE2-NEXT:    retl
273;
274; X86-SSSE3-LABEL: cnt64:
275; X86-SSSE3:       # %bb.0:
276; X86-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
277; X86-SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
278; X86-SSSE3-NEXT:    movdqa %xmm1, %xmm2
279; X86-SSSE3-NEXT:    pand %xmm0, %xmm2
280; X86-SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
281; X86-SSSE3-NEXT:    movdqa %xmm3, %xmm4
282; X86-SSSE3-NEXT:    pshufb %xmm2, %xmm4
283; X86-SSSE3-NEXT:    psrlw $4, %xmm1
284; X86-SSSE3-NEXT:    pand %xmm0, %xmm1
285; X86-SSSE3-NEXT:    pshufb %xmm1, %xmm3
286; X86-SSSE3-NEXT:    paddb %xmm4, %xmm3
287; X86-SSSE3-NEXT:    pxor %xmm0, %xmm0
288; X86-SSSE3-NEXT:    psadbw %xmm3, %xmm0
289; X86-SSSE3-NEXT:    movd %xmm0, %eax
290; X86-SSSE3-NEXT:    xorl %edx, %edx
291; X86-SSSE3-NEXT:    retl
292  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
293  ret i64 %cnt
294}
295
296define i128 @cnt128(i128 %x) nounwind readnone {
297; X86-NOSSE-LABEL: cnt128:
298; X86-NOSSE:       # %bb.0:
299; X86-NOSSE-NEXT:    pushl %ebx
300; X86-NOSSE-NEXT:    pushl %edi
301; X86-NOSSE-NEXT:    pushl %esi
302; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
303; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
304; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
305; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
306; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edi
307; X86-NOSSE-NEXT:    movl %edi, %ebx
308; X86-NOSSE-NEXT:    shrl %ebx
309; X86-NOSSE-NEXT:    andl $1431655765, %ebx # imm = 0x55555555
310; X86-NOSSE-NEXT:    subl %ebx, %edi
311; X86-NOSSE-NEXT:    movl %edi, %ebx
312; X86-NOSSE-NEXT:    andl $858993459, %ebx # imm = 0x33333333
313; X86-NOSSE-NEXT:    shrl $2, %edi
314; X86-NOSSE-NEXT:    andl $858993459, %edi # imm = 0x33333333
315; X86-NOSSE-NEXT:    addl %ebx, %edi
316; X86-NOSSE-NEXT:    movl %edi, %ebx
317; X86-NOSSE-NEXT:    shrl $4, %ebx
318; X86-NOSSE-NEXT:    addl %edi, %ebx
319; X86-NOSSE-NEXT:    andl $252645135, %ebx # imm = 0xF0F0F0F
320; X86-NOSSE-NEXT:    imull $16843009, %ebx, %edi # imm = 0x1010101
321; X86-NOSSE-NEXT:    shrl $24, %edi
322; X86-NOSSE-NEXT:    movl %esi, %ebx
323; X86-NOSSE-NEXT:    shrl %ebx
324; X86-NOSSE-NEXT:    andl $1431655765, %ebx # imm = 0x55555555
325; X86-NOSSE-NEXT:    subl %ebx, %esi
326; X86-NOSSE-NEXT:    movl %esi, %ebx
327; X86-NOSSE-NEXT:    andl $858993459, %ebx # imm = 0x33333333
328; X86-NOSSE-NEXT:    shrl $2, %esi
329; X86-NOSSE-NEXT:    andl $858993459, %esi # imm = 0x33333333
330; X86-NOSSE-NEXT:    addl %ebx, %esi
331; X86-NOSSE-NEXT:    movl %esi, %ebx
332; X86-NOSSE-NEXT:    shrl $4, %ebx
333; X86-NOSSE-NEXT:    addl %esi, %ebx
334; X86-NOSSE-NEXT:    andl $252645135, %ebx # imm = 0xF0F0F0F
335; X86-NOSSE-NEXT:    imull $16843009, %ebx, %esi # imm = 0x1010101
336; X86-NOSSE-NEXT:    shrl $24, %esi
337; X86-NOSSE-NEXT:    addl %edi, %esi
338; X86-NOSSE-NEXT:    movl %edx, %edi
339; X86-NOSSE-NEXT:    shrl %edi
340; X86-NOSSE-NEXT:    andl $1431655765, %edi # imm = 0x55555555
341; X86-NOSSE-NEXT:    subl %edi, %edx
342; X86-NOSSE-NEXT:    movl %edx, %edi
343; X86-NOSSE-NEXT:    andl $858993459, %edi # imm = 0x33333333
344; X86-NOSSE-NEXT:    shrl $2, %edx
345; X86-NOSSE-NEXT:    andl $858993459, %edx # imm = 0x33333333
346; X86-NOSSE-NEXT:    addl %edi, %edx
347; X86-NOSSE-NEXT:    movl %edx, %edi
348; X86-NOSSE-NEXT:    shrl $4, %edi
349; X86-NOSSE-NEXT:    addl %edx, %edi
350; X86-NOSSE-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
351; X86-NOSSE-NEXT:    imull $16843009, %edi, %edx # imm = 0x1010101
352; X86-NOSSE-NEXT:    shrl $24, %edx
353; X86-NOSSE-NEXT:    movl %ecx, %edi
354; X86-NOSSE-NEXT:    shrl %edi
355; X86-NOSSE-NEXT:    andl $1431655765, %edi # imm = 0x55555555
356; X86-NOSSE-NEXT:    subl %edi, %ecx
357; X86-NOSSE-NEXT:    movl %ecx, %edi
358; X86-NOSSE-NEXT:    andl $858993459, %edi # imm = 0x33333333
359; X86-NOSSE-NEXT:    shrl $2, %ecx
360; X86-NOSSE-NEXT:    andl $858993459, %ecx # imm = 0x33333333
361; X86-NOSSE-NEXT:    addl %edi, %ecx
362; X86-NOSSE-NEXT:    movl %ecx, %edi
363; X86-NOSSE-NEXT:    shrl $4, %edi
364; X86-NOSSE-NEXT:    addl %ecx, %edi
365; X86-NOSSE-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
366; X86-NOSSE-NEXT:    imull $16843009, %edi, %ecx # imm = 0x1010101
367; X86-NOSSE-NEXT:    shrl $24, %ecx
368; X86-NOSSE-NEXT:    addl %edx, %ecx
369; X86-NOSSE-NEXT:    addl %esi, %ecx
370; X86-NOSSE-NEXT:    movl %ecx, (%eax)
371; X86-NOSSE-NEXT:    movl $0, 12(%eax)
372; X86-NOSSE-NEXT:    movl $0, 8(%eax)
373; X86-NOSSE-NEXT:    movl $0, 4(%eax)
374; X86-NOSSE-NEXT:    popl %esi
375; X86-NOSSE-NEXT:    popl %edi
376; X86-NOSSE-NEXT:    popl %ebx
377; X86-NOSSE-NEXT:    retl $4
378;
379; X64-LABEL: cnt128:
380; X64:       # %bb.0:
381; X64-NEXT:    movq %rsi, %rax
382; X64-NEXT:    shrq %rax
383; X64-NEXT:    movabsq $6148914691236517205, %r8 # imm = 0x5555555555555555
384; X64-NEXT:    andq %r8, %rax
385; X64-NEXT:    subq %rax, %rsi
386; X64-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
387; X64-NEXT:    movq %rsi, %rcx
388; X64-NEXT:    andq %rax, %rcx
389; X64-NEXT:    shrq $2, %rsi
390; X64-NEXT:    andq %rax, %rsi
391; X64-NEXT:    addq %rcx, %rsi
392; X64-NEXT:    movq %rsi, %rcx
393; X64-NEXT:    shrq $4, %rcx
394; X64-NEXT:    addq %rsi, %rcx
395; X64-NEXT:    movabsq $1085102592571150095, %r9 # imm = 0xF0F0F0F0F0F0F0F
396; X64-NEXT:    andq %r9, %rcx
397; X64-NEXT:    movabsq $72340172838076673, %rdx # imm = 0x101010101010101
398; X64-NEXT:    imulq %rdx, %rcx
399; X64-NEXT:    shrq $56, %rcx
400; X64-NEXT:    movq %rdi, %rsi
401; X64-NEXT:    shrq %rsi
402; X64-NEXT:    andq %r8, %rsi
403; X64-NEXT:    subq %rsi, %rdi
404; X64-NEXT:    movq %rdi, %rsi
405; X64-NEXT:    andq %rax, %rsi
406; X64-NEXT:    shrq $2, %rdi
407; X64-NEXT:    andq %rax, %rdi
408; X64-NEXT:    addq %rsi, %rdi
409; X64-NEXT:    movq %rdi, %rax
410; X64-NEXT:    shrq $4, %rax
411; X64-NEXT:    addq %rdi, %rax
412; X64-NEXT:    andq %r9, %rax
413; X64-NEXT:    imulq %rdx, %rax
414; X64-NEXT:    shrq $56, %rax
415; X64-NEXT:    addq %rcx, %rax
416; X64-NEXT:    xorl %edx, %edx
417; X64-NEXT:    retq
418;
419; X86-POPCNT-LABEL: cnt128:
420; X86-POPCNT:       # %bb.0:
421; X86-POPCNT-NEXT:    pushl %esi
422; X86-POPCNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
423; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %ecx
424; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %edx
425; X86-POPCNT-NEXT:    addl %ecx, %edx
426; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %ecx
427; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %esi
428; X86-POPCNT-NEXT:    addl %ecx, %esi
429; X86-POPCNT-NEXT:    addl %edx, %esi
430; X86-POPCNT-NEXT:    movl %esi, (%eax)
431; X86-POPCNT-NEXT:    movl $0, 12(%eax)
432; X86-POPCNT-NEXT:    movl $0, 8(%eax)
433; X86-POPCNT-NEXT:    movl $0, 4(%eax)
434; X86-POPCNT-NEXT:    popl %esi
435; X86-POPCNT-NEXT:    retl $4
436;
437; X64-POPCNT-LABEL: cnt128:
438; X64-POPCNT:       # %bb.0:
439; X64-POPCNT-NEXT:    popcntq %rsi, %rcx
440; X64-POPCNT-NEXT:    popcntq %rdi, %rax
441; X64-POPCNT-NEXT:    addq %rcx, %rax
442; X64-POPCNT-NEXT:    xorl %edx, %edx
443; X64-POPCNT-NEXT:    retq
444;
445; X86-SSE2-LABEL: cnt128:
446; X86-SSE2:       # %bb.0:
447; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
448; X86-SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
449; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
450; X86-SSE2-NEXT:    psrlw $1, %xmm1
451; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
452; X86-SSE2-NEXT:    pand %xmm2, %xmm1
453; X86-SSE2-NEXT:    psubb %xmm1, %xmm0
454; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
455; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
456; X86-SSE2-NEXT:    pand %xmm1, %xmm3
457; X86-SSE2-NEXT:    psrlw $2, %xmm0
458; X86-SSE2-NEXT:    pand %xmm1, %xmm0
459; X86-SSE2-NEXT:    paddb %xmm3, %xmm0
460; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
461; X86-SSE2-NEXT:    psrlw $4, %xmm3
462; X86-SSE2-NEXT:    paddb %xmm0, %xmm3
463; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
464; X86-SSE2-NEXT:    pand %xmm0, %xmm3
465; X86-SSE2-NEXT:    pxor %xmm4, %xmm4
466; X86-SSE2-NEXT:    psadbw %xmm4, %xmm3
467; X86-SSE2-NEXT:    movd %xmm3, %ecx
468; X86-SSE2-NEXT:    movq {{.*#+}} xmm3 = mem[0],zero
469; X86-SSE2-NEXT:    movdqa %xmm3, %xmm5
470; X86-SSE2-NEXT:    psrlw $1, %xmm5
471; X86-SSE2-NEXT:    pand %xmm2, %xmm5
472; X86-SSE2-NEXT:    psubb %xmm5, %xmm3
473; X86-SSE2-NEXT:    movdqa %xmm3, %xmm2
474; X86-SSE2-NEXT:    pand %xmm1, %xmm2
475; X86-SSE2-NEXT:    psrlw $2, %xmm3
476; X86-SSE2-NEXT:    pand %xmm1, %xmm3
477; X86-SSE2-NEXT:    paddb %xmm2, %xmm3
478; X86-SSE2-NEXT:    movdqa %xmm3, %xmm1
479; X86-SSE2-NEXT:    psrlw $4, %xmm1
480; X86-SSE2-NEXT:    paddb %xmm3, %xmm1
481; X86-SSE2-NEXT:    pand %xmm0, %xmm1
482; X86-SSE2-NEXT:    psadbw %xmm4, %xmm1
483; X86-SSE2-NEXT:    movd %xmm1, %edx
484; X86-SSE2-NEXT:    addl %ecx, %edx
485; X86-SSE2-NEXT:    movl %edx, (%eax)
486; X86-SSE2-NEXT:    movl $0, 12(%eax)
487; X86-SSE2-NEXT:    movl $0, 8(%eax)
488; X86-SSE2-NEXT:    movl $0, 4(%eax)
489; X86-SSE2-NEXT:    retl $4
490;
491; X86-SSSE3-LABEL: cnt128:
492; X86-SSSE3:       # %bb.0:
493; X86-SSSE3-NEXT:    movl {{[0-9]+}}(%esp), %eax
494; X86-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
495; X86-SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
496; X86-SSSE3-NEXT:    movdqa %xmm1, %xmm2
497; X86-SSSE3-NEXT:    pand %xmm0, %xmm2
498; X86-SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
499; X86-SSSE3-NEXT:    movdqa %xmm3, %xmm4
500; X86-SSSE3-NEXT:    pshufb %xmm2, %xmm4
501; X86-SSSE3-NEXT:    psrlw $4, %xmm1
502; X86-SSSE3-NEXT:    pand %xmm0, %xmm1
503; X86-SSSE3-NEXT:    movdqa %xmm3, %xmm2
504; X86-SSSE3-NEXT:    pshufb %xmm1, %xmm2
505; X86-SSSE3-NEXT:    paddb %xmm4, %xmm2
506; X86-SSSE3-NEXT:    pxor %xmm1, %xmm1
507; X86-SSSE3-NEXT:    psadbw %xmm1, %xmm2
508; X86-SSSE3-NEXT:    movd %xmm2, %ecx
509; X86-SSSE3-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero
510; X86-SSSE3-NEXT:    movdqa %xmm2, %xmm4
511; X86-SSSE3-NEXT:    pand %xmm0, %xmm4
512; X86-SSSE3-NEXT:    movdqa %xmm3, %xmm5
513; X86-SSSE3-NEXT:    pshufb %xmm4, %xmm5
514; X86-SSSE3-NEXT:    psrlw $4, %xmm2
515; X86-SSSE3-NEXT:    pand %xmm0, %xmm2
516; X86-SSSE3-NEXT:    pshufb %xmm2, %xmm3
517; X86-SSSE3-NEXT:    paddb %xmm5, %xmm3
518; X86-SSSE3-NEXT:    psadbw %xmm1, %xmm3
519; X86-SSSE3-NEXT:    movd %xmm3, %edx
520; X86-SSSE3-NEXT:    addl %ecx, %edx
521; X86-SSSE3-NEXT:    movl %edx, (%eax)
522; X86-SSSE3-NEXT:    movl $0, 12(%eax)
523; X86-SSSE3-NEXT:    movl $0, 8(%eax)
524; X86-SSSE3-NEXT:    movl $0, 4(%eax)
525; X86-SSSE3-NEXT:    retl $4
526  %cnt = tail call i128 @llvm.ctpop.i128(i128 %x)
527  ret i128 %cnt
528}
529
530define i64 @cnt64_noimplicitfloat(i64 %x) nounwind readnone noimplicitfloat  {
531; X86-LABEL: cnt64_noimplicitfloat:
532; X86:       # %bb.0:
533; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
534; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
535; X86-NEXT:    movl %ecx, %edx
536; X86-NEXT:    shrl %edx
537; X86-NEXT:    andl $1431655765, %edx # imm = 0x55555555
538; X86-NEXT:    subl %edx, %ecx
539; X86-NEXT:    movl %ecx, %edx
540; X86-NEXT:    andl $858993459, %edx # imm = 0x33333333
541; X86-NEXT:    shrl $2, %ecx
542; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
543; X86-NEXT:    addl %edx, %ecx
544; X86-NEXT:    movl %ecx, %edx
545; X86-NEXT:    shrl $4, %edx
546; X86-NEXT:    addl %ecx, %edx
547; X86-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
548; X86-NEXT:    imull $16843009, %edx, %ecx # imm = 0x1010101
549; X86-NEXT:    shrl $24, %ecx
550; X86-NEXT:    movl %eax, %edx
551; X86-NEXT:    shrl %edx
552; X86-NEXT:    andl $1431655765, %edx # imm = 0x55555555
553; X86-NEXT:    subl %edx, %eax
554; X86-NEXT:    movl %eax, %edx
555; X86-NEXT:    andl $858993459, %edx # imm = 0x33333333
556; X86-NEXT:    shrl $2, %eax
557; X86-NEXT:    andl $858993459, %eax # imm = 0x33333333
558; X86-NEXT:    addl %edx, %eax
559; X86-NEXT:    movl %eax, %edx
560; X86-NEXT:    shrl $4, %edx
561; X86-NEXT:    addl %eax, %edx
562; X86-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
563; X86-NEXT:    imull $16843009, %edx, %eax # imm = 0x1010101
564; X86-NEXT:    shrl $24, %eax
565; X86-NEXT:    addl %ecx, %eax
566; X86-NEXT:    xorl %edx, %edx
567; X86-NEXT:    retl
568;
569; X64-LABEL: cnt64_noimplicitfloat:
570; X64:       # %bb.0:
571; X64-NEXT:    movq %rdi, %rax
572; X64-NEXT:    shrq %rax
573; X64-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
574; X64-NEXT:    andq %rax, %rcx
575; X64-NEXT:    subq %rcx, %rdi
576; X64-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
577; X64-NEXT:    movq %rdi, %rcx
578; X64-NEXT:    andq %rax, %rcx
579; X64-NEXT:    shrq $2, %rdi
580; X64-NEXT:    andq %rax, %rdi
581; X64-NEXT:    addq %rcx, %rdi
582; X64-NEXT:    movq %rdi, %rax
583; X64-NEXT:    shrq $4, %rax
584; X64-NEXT:    addq %rdi, %rax
585; X64-NEXT:    movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
586; X64-NEXT:    andq %rax, %rcx
587; X64-NEXT:    movabsq $72340172838076673, %rax # imm = 0x101010101010101
588; X64-NEXT:    imulq %rcx, %rax
589; X64-NEXT:    shrq $56, %rax
590; X64-NEXT:    retq
591;
592; X86-POPCNT-LABEL: cnt64_noimplicitfloat:
593; X86-POPCNT:       # %bb.0:
594; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %ecx
595; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %eax
596; X86-POPCNT-NEXT:    addl %ecx, %eax
597; X86-POPCNT-NEXT:    xorl %edx, %edx
598; X86-POPCNT-NEXT:    retl
599;
600; X64-POPCNT-LABEL: cnt64_noimplicitfloat:
601; X64-POPCNT:       # %bb.0:
602; X64-POPCNT-NEXT:    popcntq %rdi, %rax
603; X64-POPCNT-NEXT:    retq
604  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
605  ret i64 %cnt
606}
607
608define i32 @cnt32_optsize(i32 %x) nounwind readnone optsize {
609; X86-LABEL: cnt32_optsize:
610; X86:       # %bb.0:
611; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
612; X86-NEXT:    movl %eax, %ecx
613; X86-NEXT:    shrl %ecx
614; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
615; X86-NEXT:    subl %ecx, %eax
616; X86-NEXT:    movl $858993459, %ecx # imm = 0x33333333
617; X86-NEXT:    movl %eax, %edx
618; X86-NEXT:    andl %ecx, %edx
619; X86-NEXT:    shrl $2, %eax
620; X86-NEXT:    andl %ecx, %eax
621; X86-NEXT:    addl %edx, %eax
622; X86-NEXT:    movl %eax, %ecx
623; X86-NEXT:    shrl $4, %ecx
624; X86-NEXT:    addl %eax, %ecx
625; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
626; X86-NEXT:    imull $16843009, %ecx, %eax # imm = 0x1010101
627; X86-NEXT:    shrl $24, %eax
628; X86-NEXT:    retl
629;
630; X64-LABEL: cnt32_optsize:
631; X64:       # %bb.0:
632; X64-NEXT:    movl %edi, %eax
633; X64-NEXT:    shrl %eax
634; X64-NEXT:    andl $1431655765, %eax # imm = 0x55555555
635; X64-NEXT:    subl %eax, %edi
636; X64-NEXT:    movl $858993459, %eax # imm = 0x33333333
637; X64-NEXT:    movl %edi, %ecx
638; X64-NEXT:    andl %eax, %ecx
639; X64-NEXT:    shrl $2, %edi
640; X64-NEXT:    andl %eax, %edi
641; X64-NEXT:    addl %ecx, %edi
642; X64-NEXT:    movl %edi, %eax
643; X64-NEXT:    shrl $4, %eax
644; X64-NEXT:    addl %edi, %eax
645; X64-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
646; X64-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
647; X64-NEXT:    shrl $24, %eax
648; X64-NEXT:    retq
649;
650; X86-POPCNT-LABEL: cnt32_optsize:
651; X86-POPCNT:       # %bb.0:
652; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %eax
653; X86-POPCNT-NEXT:    retl
654;
655; X64-POPCNT-LABEL: cnt32_optsize:
656; X64-POPCNT:       # %bb.0:
657; X64-POPCNT-NEXT:    popcntl %edi, %eax
658; X64-POPCNT-NEXT:    retq
659  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
660  ret i32 %cnt
661}
662
663define i64 @cnt64_optsize(i64 %x) nounwind readnone optsize {
664; X86-NOSSE-LABEL: cnt64_optsize:
665; X86-NOSSE:       # %bb.0:
666; X86-NOSSE-NEXT:    pushl %ebx
667; X86-NOSSE-NEXT:    pushl %edi
668; X86-NOSSE-NEXT:    pushl %esi
669; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
670; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
671; X86-NOSSE-NEXT:    movl %ecx, %edx
672; X86-NOSSE-NEXT:    shrl %edx
673; X86-NOSSE-NEXT:    movl $1431655765, %esi # imm = 0x55555555
674; X86-NOSSE-NEXT:    andl %esi, %edx
675; X86-NOSSE-NEXT:    subl %edx, %ecx
676; X86-NOSSE-NEXT:    movl $858993459, %edx # imm = 0x33333333
677; X86-NOSSE-NEXT:    movl %ecx, %edi
678; X86-NOSSE-NEXT:    andl %edx, %edi
679; X86-NOSSE-NEXT:    shrl $2, %ecx
680; X86-NOSSE-NEXT:    andl %edx, %ecx
681; X86-NOSSE-NEXT:    addl %edi, %ecx
682; X86-NOSSE-NEXT:    movl %ecx, %edi
683; X86-NOSSE-NEXT:    shrl $4, %edi
684; X86-NOSSE-NEXT:    addl %ecx, %edi
685; X86-NOSSE-NEXT:    movl $252645135, %ecx # imm = 0xF0F0F0F
686; X86-NOSSE-NEXT:    andl %ecx, %edi
687; X86-NOSSE-NEXT:    imull $16843009, %edi, %edi # imm = 0x1010101
688; X86-NOSSE-NEXT:    shrl $24, %edi
689; X86-NOSSE-NEXT:    movl %eax, %ebx
690; X86-NOSSE-NEXT:    shrl %ebx
691; X86-NOSSE-NEXT:    andl %esi, %ebx
692; X86-NOSSE-NEXT:    subl %ebx, %eax
693; X86-NOSSE-NEXT:    movl %eax, %esi
694; X86-NOSSE-NEXT:    andl %edx, %esi
695; X86-NOSSE-NEXT:    shrl $2, %eax
696; X86-NOSSE-NEXT:    andl %edx, %eax
697; X86-NOSSE-NEXT:    addl %esi, %eax
698; X86-NOSSE-NEXT:    movl %eax, %edx
699; X86-NOSSE-NEXT:    shrl $4, %edx
700; X86-NOSSE-NEXT:    addl %eax, %edx
701; X86-NOSSE-NEXT:    andl %ecx, %edx
702; X86-NOSSE-NEXT:    imull $16843009, %edx, %eax # imm = 0x1010101
703; X86-NOSSE-NEXT:    shrl $24, %eax
704; X86-NOSSE-NEXT:    addl %edi, %eax
705; X86-NOSSE-NEXT:    xorl %edx, %edx
706; X86-NOSSE-NEXT:    popl %esi
707; X86-NOSSE-NEXT:    popl %edi
708; X86-NOSSE-NEXT:    popl %ebx
709; X86-NOSSE-NEXT:    retl
710;
711; X64-LABEL: cnt64_optsize:
712; X64:       # %bb.0:
713; X64-NEXT:    movq %rdi, %rax
714; X64-NEXT:    shrq %rax
715; X64-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
716; X64-NEXT:    andq %rax, %rcx
717; X64-NEXT:    subq %rcx, %rdi
718; X64-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
719; X64-NEXT:    movq %rdi, %rcx
720; X64-NEXT:    andq %rax, %rcx
721; X64-NEXT:    shrq $2, %rdi
722; X64-NEXT:    andq %rax, %rdi
723; X64-NEXT:    addq %rcx, %rdi
724; X64-NEXT:    movq %rdi, %rax
725; X64-NEXT:    shrq $4, %rax
726; X64-NEXT:    addq %rdi, %rax
727; X64-NEXT:    movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
728; X64-NEXT:    andq %rax, %rcx
729; X64-NEXT:    movabsq $72340172838076673, %rax # imm = 0x101010101010101
730; X64-NEXT:    imulq %rcx, %rax
731; X64-NEXT:    shrq $56, %rax
732; X64-NEXT:    retq
733;
734; X86-POPCNT-LABEL: cnt64_optsize:
735; X86-POPCNT:       # %bb.0:
736; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %ecx
737; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %eax
738; X86-POPCNT-NEXT:    addl %ecx, %eax
739; X86-POPCNT-NEXT:    xorl %edx, %edx
740; X86-POPCNT-NEXT:    retl
741;
742; X64-POPCNT-LABEL: cnt64_optsize:
743; X64-POPCNT:       # %bb.0:
744; X64-POPCNT-NEXT:    popcntq %rdi, %rax
745; X64-POPCNT-NEXT:    retq
746;
747; X86-SSE2-LABEL: cnt64_optsize:
748; X86-SSE2:       # %bb.0:
749; X86-SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
750; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
751; X86-SSE2-NEXT:    psrlw $1, %xmm1
752; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm1
753; X86-SSE2-NEXT:    psubb %xmm1, %xmm0
754; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
755; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
756; X86-SSE2-NEXT:    pand %xmm1, %xmm2
757; X86-SSE2-NEXT:    psrlw $2, %xmm0
758; X86-SSE2-NEXT:    pand %xmm1, %xmm0
759; X86-SSE2-NEXT:    paddb %xmm2, %xmm0
760; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
761; X86-SSE2-NEXT:    psrlw $4, %xmm1
762; X86-SSE2-NEXT:    paddb %xmm0, %xmm1
763; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm1
764; X86-SSE2-NEXT:    pxor %xmm0, %xmm0
765; X86-SSE2-NEXT:    psadbw %xmm1, %xmm0
766; X86-SSE2-NEXT:    movd %xmm0, %eax
767; X86-SSE2-NEXT:    xorl %edx, %edx
768; X86-SSE2-NEXT:    retl
769;
770; X86-SSSE3-LABEL: cnt64_optsize:
771; X86-SSSE3:       # %bb.0:
772; X86-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
773; X86-SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
774; X86-SSSE3-NEXT:    movdqa %xmm1, %xmm2
775; X86-SSSE3-NEXT:    pand %xmm0, %xmm2
776; X86-SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
777; X86-SSSE3-NEXT:    movdqa %xmm3, %xmm4
778; X86-SSSE3-NEXT:    pshufb %xmm2, %xmm4
779; X86-SSSE3-NEXT:    psrlw $4, %xmm1
780; X86-SSSE3-NEXT:    pand %xmm0, %xmm1
781; X86-SSSE3-NEXT:    pshufb %xmm1, %xmm3
782; X86-SSSE3-NEXT:    paddb %xmm4, %xmm3
783; X86-SSSE3-NEXT:    pxor %xmm0, %xmm0
784; X86-SSSE3-NEXT:    psadbw %xmm3, %xmm0
785; X86-SSSE3-NEXT:    movd %xmm0, %eax
786; X86-SSSE3-NEXT:    xorl %edx, %edx
787; X86-SSSE3-NEXT:    retl
788  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
789  ret i64 %cnt
790}
791
792define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
793; X86-NOSSE-LABEL: cnt128_optsize:
794; X86-NOSSE:       # %bb.0:
795; X86-NOSSE-NEXT:    pushl %ebp
796; X86-NOSSE-NEXT:    pushl %ebx
797; X86-NOSSE-NEXT:    pushl %edi
798; X86-NOSSE-NEXT:    pushl %esi
799; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
800; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
801; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
802; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ebx
803; X86-NOSSE-NEXT:    movl %ebx, %ecx
804; X86-NOSSE-NEXT:    shrl %ecx
805; X86-NOSSE-NEXT:    movl $1431655765, %edi # imm = 0x55555555
806; X86-NOSSE-NEXT:    andl %edi, %ecx
807; X86-NOSSE-NEXT:    movl $1431655765, %edi # imm = 0x55555555
808; X86-NOSSE-NEXT:    subl %ecx, %ebx
809; X86-NOSSE-NEXT:    movl $858993459, %ecx # imm = 0x33333333
810; X86-NOSSE-NEXT:    movl %ebx, %ebp
811; X86-NOSSE-NEXT:    andl %ecx, %ebp
812; X86-NOSSE-NEXT:    shrl $2, %ebx
813; X86-NOSSE-NEXT:    andl %ecx, %ebx
814; X86-NOSSE-NEXT:    addl %ebp, %ebx
815; X86-NOSSE-NEXT:    movl %ebx, %ebp
816; X86-NOSSE-NEXT:    shrl $4, %ebp
817; X86-NOSSE-NEXT:    addl %ebx, %ebp
818; X86-NOSSE-NEXT:    movl %eax, %ebx
819; X86-NOSSE-NEXT:    shrl %ebx
820; X86-NOSSE-NEXT:    andl %edi, %ebx
821; X86-NOSSE-NEXT:    subl %ebx, %eax
822; X86-NOSSE-NEXT:    movl %eax, %ebx
823; X86-NOSSE-NEXT:    andl %ecx, %ebx
824; X86-NOSSE-NEXT:    shrl $2, %eax
825; X86-NOSSE-NEXT:    andl %ecx, %eax
826; X86-NOSSE-NEXT:    addl %ebx, %eax
827; X86-NOSSE-NEXT:    movl %eax, %edi
828; X86-NOSSE-NEXT:    shrl $4, %edi
829; X86-NOSSE-NEXT:    addl %eax, %edi
830; X86-NOSSE-NEXT:    movl $252645135, %ebx # imm = 0xF0F0F0F
831; X86-NOSSE-NEXT:    andl %ebx, %ebp
832; X86-NOSSE-NEXT:    imull $16843009, %ebp, %eax # imm = 0x1010101
833; X86-NOSSE-NEXT:    shrl $24, %eax
834; X86-NOSSE-NEXT:    andl %ebx, %edi
835; X86-NOSSE-NEXT:    imull $16843009, %edi, %edi # imm = 0x1010101
836; X86-NOSSE-NEXT:    shrl $24, %edi
837; X86-NOSSE-NEXT:    addl %eax, %edi
838; X86-NOSSE-NEXT:    movl %esi, %eax
839; X86-NOSSE-NEXT:    shrl %eax
840; X86-NOSSE-NEXT:    movl $1431655765, %ebp # imm = 0x55555555
841; X86-NOSSE-NEXT:    andl %ebp, %eax
842; X86-NOSSE-NEXT:    subl %eax, %esi
843; X86-NOSSE-NEXT:    movl %esi, %eax
844; X86-NOSSE-NEXT:    andl %ecx, %eax
845; X86-NOSSE-NEXT:    shrl $2, %esi
846; X86-NOSSE-NEXT:    andl %ecx, %esi
847; X86-NOSSE-NEXT:    addl %eax, %esi
848; X86-NOSSE-NEXT:    movl %esi, %eax
849; X86-NOSSE-NEXT:    shrl $4, %eax
850; X86-NOSSE-NEXT:    addl %esi, %eax
851; X86-NOSSE-NEXT:    movl %edx, %esi
852; X86-NOSSE-NEXT:    shrl %esi
853; X86-NOSSE-NEXT:    andl %ebp, %esi
854; X86-NOSSE-NEXT:    subl %esi, %edx
855; X86-NOSSE-NEXT:    movl %edx, %esi
856; X86-NOSSE-NEXT:    andl %ecx, %esi
857; X86-NOSSE-NEXT:    shrl $2, %edx
858; X86-NOSSE-NEXT:    andl %ecx, %edx
859; X86-NOSSE-NEXT:    addl %esi, %edx
860; X86-NOSSE-NEXT:    movl %edx, %ecx
861; X86-NOSSE-NEXT:    shrl $4, %ecx
862; X86-NOSSE-NEXT:    addl %edx, %ecx
863; X86-NOSSE-NEXT:    andl %ebx, %eax
864; X86-NOSSE-NEXT:    andl %ebx, %ecx
865; X86-NOSSE-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
866; X86-NOSSE-NEXT:    shrl $24, %eax
867; X86-NOSSE-NEXT:    imull $16843009, %ecx, %ecx # imm = 0x1010101
868; X86-NOSSE-NEXT:    shrl $24, %ecx
869; X86-NOSSE-NEXT:    addl %eax, %ecx
870; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
871; X86-NOSSE-NEXT:    addl %edi, %ecx
872; X86-NOSSE-NEXT:    xorl %edx, %edx
873; X86-NOSSE-NEXT:    movl %edx, 12(%eax)
874; X86-NOSSE-NEXT:    movl %edx, 8(%eax)
875; X86-NOSSE-NEXT:    movl %edx, 4(%eax)
876; X86-NOSSE-NEXT:    movl %ecx, (%eax)
877; X86-NOSSE-NEXT:    popl %esi
878; X86-NOSSE-NEXT:    popl %edi
879; X86-NOSSE-NEXT:    popl %ebx
880; X86-NOSSE-NEXT:    popl %ebp
881; X86-NOSSE-NEXT:    retl $4
882;
883; X64-LABEL: cnt128_optsize:
884; X64:       # %bb.0:
885; X64-NEXT:    movq %rsi, %rax
886; X64-NEXT:    shrq %rax
887; X64-NEXT:    movabsq $6148914691236517205, %r8 # imm = 0x5555555555555555
888; X64-NEXT:    andq %r8, %rax
889; X64-NEXT:    subq %rax, %rsi
890; X64-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
891; X64-NEXT:    movq %rsi, %rcx
892; X64-NEXT:    andq %rax, %rcx
893; X64-NEXT:    shrq $2, %rsi
894; X64-NEXT:    andq %rax, %rsi
895; X64-NEXT:    addq %rcx, %rsi
896; X64-NEXT:    movq %rsi, %rcx
897; X64-NEXT:    shrq $4, %rcx
898; X64-NEXT:    addq %rsi, %rcx
899; X64-NEXT:    movabsq $1085102592571150095, %r9 # imm = 0xF0F0F0F0F0F0F0F
900; X64-NEXT:    andq %r9, %rcx
901; X64-NEXT:    movabsq $72340172838076673, %rdx # imm = 0x101010101010101
902; X64-NEXT:    imulq %rdx, %rcx
903; X64-NEXT:    shrq $56, %rcx
904; X64-NEXT:    movq %rdi, %rsi
905; X64-NEXT:    shrq %rsi
906; X64-NEXT:    andq %r8, %rsi
907; X64-NEXT:    subq %rsi, %rdi
908; X64-NEXT:    movq %rdi, %rsi
909; X64-NEXT:    andq %rax, %rsi
910; X64-NEXT:    shrq $2, %rdi
911; X64-NEXT:    andq %rax, %rdi
912; X64-NEXT:    addq %rsi, %rdi
913; X64-NEXT:    movq %rdi, %rax
914; X64-NEXT:    shrq $4, %rax
915; X64-NEXT:    addq %rdi, %rax
916; X64-NEXT:    andq %r9, %rax
917; X64-NEXT:    imulq %rdx, %rax
918; X64-NEXT:    shrq $56, %rax
919; X64-NEXT:    addq %rcx, %rax
920; X64-NEXT:    xorl %edx, %edx
921; X64-NEXT:    retq
922;
923; X86-POPCNT-LABEL: cnt128_optsize:
924; X86-POPCNT:       # %bb.0:
925; X86-POPCNT-NEXT:    pushl %esi
926; X86-POPCNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
927; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %ecx
928; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %edx
929; X86-POPCNT-NEXT:    addl %ecx, %edx
930; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %ecx
931; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %esi
932; X86-POPCNT-NEXT:    addl %ecx, %esi
933; X86-POPCNT-NEXT:    addl %edx, %esi
934; X86-POPCNT-NEXT:    xorl %ecx, %ecx
935; X86-POPCNT-NEXT:    movl %ecx, 12(%eax)
936; X86-POPCNT-NEXT:    movl %ecx, 8(%eax)
937; X86-POPCNT-NEXT:    movl %ecx, 4(%eax)
938; X86-POPCNT-NEXT:    movl %esi, (%eax)
939; X86-POPCNT-NEXT:    popl %esi
940; X86-POPCNT-NEXT:    retl $4
941;
942; X64-POPCNT-LABEL: cnt128_optsize:
943; X64-POPCNT:       # %bb.0:
944; X64-POPCNT-NEXT:    popcntq %rsi, %rcx
945; X64-POPCNT-NEXT:    popcntq %rdi, %rax
946; X64-POPCNT-NEXT:    addq %rcx, %rax
947; X64-POPCNT-NEXT:    xorl %edx, %edx
948; X64-POPCNT-NEXT:    retq
949;
950; X86-SSE2-LABEL: cnt128_optsize:
951; X86-SSE2:       # %bb.0:
952; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
953; X86-SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
954; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
955; X86-SSE2-NEXT:    psrlw $1, %xmm1
956; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
957; X86-SSE2-NEXT:    pand %xmm2, %xmm1
958; X86-SSE2-NEXT:    psubb %xmm1, %xmm0
959; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
960; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
961; X86-SSE2-NEXT:    pand %xmm1, %xmm3
962; X86-SSE2-NEXT:    psrlw $2, %xmm0
963; X86-SSE2-NEXT:    pand %xmm1, %xmm0
964; X86-SSE2-NEXT:    paddb %xmm3, %xmm0
965; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
966; X86-SSE2-NEXT:    psrlw $4, %xmm3
967; X86-SSE2-NEXT:    paddb %xmm0, %xmm3
968; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
969; X86-SSE2-NEXT:    pand %xmm0, %xmm3
970; X86-SSE2-NEXT:    pxor %xmm4, %xmm4
971; X86-SSE2-NEXT:    psadbw %xmm4, %xmm3
972; X86-SSE2-NEXT:    movd %xmm3, %ecx
973; X86-SSE2-NEXT:    movq {{.*#+}} xmm3 = mem[0],zero
974; X86-SSE2-NEXT:    movdqa %xmm3, %xmm5
975; X86-SSE2-NEXT:    psrlw $1, %xmm5
976; X86-SSE2-NEXT:    pand %xmm2, %xmm5
977; X86-SSE2-NEXT:    psubb %xmm5, %xmm3
978; X86-SSE2-NEXT:    movdqa %xmm3, %xmm2
979; X86-SSE2-NEXT:    pand %xmm1, %xmm2
980; X86-SSE2-NEXT:    psrlw $2, %xmm3
981; X86-SSE2-NEXT:    pand %xmm1, %xmm3
982; X86-SSE2-NEXT:    paddb %xmm2, %xmm3
983; X86-SSE2-NEXT:    movdqa %xmm3, %xmm1
984; X86-SSE2-NEXT:    psrlw $4, %xmm1
985; X86-SSE2-NEXT:    paddb %xmm3, %xmm1
986; X86-SSE2-NEXT:    pand %xmm0, %xmm1
987; X86-SSE2-NEXT:    psadbw %xmm4, %xmm1
988; X86-SSE2-NEXT:    movd %xmm1, %edx
989; X86-SSE2-NEXT:    addl %ecx, %edx
990; X86-SSE2-NEXT:    xorl %ecx, %ecx
991; X86-SSE2-NEXT:    movl %ecx, 12(%eax)
992; X86-SSE2-NEXT:    movl %ecx, 8(%eax)
993; X86-SSE2-NEXT:    movl %ecx, 4(%eax)
994; X86-SSE2-NEXT:    movl %edx, (%eax)
995; X86-SSE2-NEXT:    retl $4
996;
997; X86-SSSE3-LABEL: cnt128_optsize:
998; X86-SSSE3:       # %bb.0:
999; X86-SSSE3-NEXT:    movl {{[0-9]+}}(%esp), %eax
1000; X86-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1001; X86-SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
1002; X86-SSSE3-NEXT:    movdqa %xmm1, %xmm2
1003; X86-SSSE3-NEXT:    pand %xmm0, %xmm2
1004; X86-SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1005; X86-SSSE3-NEXT:    movdqa %xmm3, %xmm4
1006; X86-SSSE3-NEXT:    pshufb %xmm2, %xmm4
1007; X86-SSSE3-NEXT:    psrlw $4, %xmm1
1008; X86-SSSE3-NEXT:    pand %xmm0, %xmm1
1009; X86-SSSE3-NEXT:    movdqa %xmm3, %xmm2
1010; X86-SSSE3-NEXT:    pshufb %xmm1, %xmm2
1011; X86-SSSE3-NEXT:    paddb %xmm4, %xmm2
1012; X86-SSSE3-NEXT:    pxor %xmm1, %xmm1
1013; X86-SSSE3-NEXT:    psadbw %xmm1, %xmm2
1014; X86-SSSE3-NEXT:    movd %xmm2, %ecx
1015; X86-SSSE3-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero
1016; X86-SSSE3-NEXT:    movdqa %xmm2, %xmm4
1017; X86-SSSE3-NEXT:    pand %xmm0, %xmm4
1018; X86-SSSE3-NEXT:    movdqa %xmm3, %xmm5
1019; X86-SSSE3-NEXT:    pshufb %xmm4, %xmm5
1020; X86-SSSE3-NEXT:    psrlw $4, %xmm2
1021; X86-SSSE3-NEXT:    pand %xmm0, %xmm2
1022; X86-SSSE3-NEXT:    pshufb %xmm2, %xmm3
1023; X86-SSSE3-NEXT:    paddb %xmm5, %xmm3
1024; X86-SSSE3-NEXT:    psadbw %xmm1, %xmm3
1025; X86-SSSE3-NEXT:    movd %xmm3, %edx
1026; X86-SSSE3-NEXT:    addl %ecx, %edx
1027; X86-SSSE3-NEXT:    xorl %ecx, %ecx
1028; X86-SSSE3-NEXT:    movl %ecx, 12(%eax)
1029; X86-SSSE3-NEXT:    movl %ecx, 8(%eax)
1030; X86-SSSE3-NEXT:    movl %ecx, 4(%eax)
1031; X86-SSSE3-NEXT:    movl %edx, (%eax)
1032; X86-SSSE3-NEXT:    retl $4
1033  %cnt = tail call i128 @llvm.ctpop.i128(i128 %x)
1034  ret i128 %cnt
1035}
1036
1037define i32 @cnt32_pgso(i32 %x) nounwind readnone !prof !14 {
1038; X86-LABEL: cnt32_pgso:
1039; X86:       # %bb.0:
1040; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1041; X86-NEXT:    movl %eax, %ecx
1042; X86-NEXT:    shrl %ecx
1043; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
1044; X86-NEXT:    subl %ecx, %eax
1045; X86-NEXT:    movl $858993459, %ecx # imm = 0x33333333
1046; X86-NEXT:    movl %eax, %edx
1047; X86-NEXT:    andl %ecx, %edx
1048; X86-NEXT:    shrl $2, %eax
1049; X86-NEXT:    andl %ecx, %eax
1050; X86-NEXT:    addl %edx, %eax
1051; X86-NEXT:    movl %eax, %ecx
1052; X86-NEXT:    shrl $4, %ecx
1053; X86-NEXT:    addl %eax, %ecx
1054; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
1055; X86-NEXT:    imull $16843009, %ecx, %eax # imm = 0x1010101
1056; X86-NEXT:    shrl $24, %eax
1057; X86-NEXT:    retl
1058;
1059; X64-LABEL: cnt32_pgso:
1060; X64:       # %bb.0:
1061; X64-NEXT:    movl %edi, %eax
1062; X64-NEXT:    shrl %eax
1063; X64-NEXT:    andl $1431655765, %eax # imm = 0x55555555
1064; X64-NEXT:    subl %eax, %edi
1065; X64-NEXT:    movl $858993459, %eax # imm = 0x33333333
1066; X64-NEXT:    movl %edi, %ecx
1067; X64-NEXT:    andl %eax, %ecx
1068; X64-NEXT:    shrl $2, %edi
1069; X64-NEXT:    andl %eax, %edi
1070; X64-NEXT:    addl %ecx, %edi
1071; X64-NEXT:    movl %edi, %eax
1072; X64-NEXT:    shrl $4, %eax
1073; X64-NEXT:    addl %edi, %eax
1074; X64-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
1075; X64-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
1076; X64-NEXT:    shrl $24, %eax
1077; X64-NEXT:    retq
1078;
1079; X86-POPCNT-LABEL: cnt32_pgso:
1080; X86-POPCNT:       # %bb.0:
1081; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %eax
1082; X86-POPCNT-NEXT:    retl
1083;
1084; X64-POPCNT-LABEL: cnt32_pgso:
1085; X64-POPCNT:       # %bb.0:
1086; X64-POPCNT-NEXT:    popcntl %edi, %eax
1087; X64-POPCNT-NEXT:    retq
1088  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
1089  ret i32 %cnt
1090}
1091
1092define i64 @cnt64_pgso(i64 %x) nounwind readnone !prof !14 {
1093; X86-NOSSE-LABEL: cnt64_pgso:
1094; X86-NOSSE:       # %bb.0:
1095; X86-NOSSE-NEXT:    pushl %ebx
1096; X86-NOSSE-NEXT:    pushl %edi
1097; X86-NOSSE-NEXT:    pushl %esi
1098; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
1099; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1100; X86-NOSSE-NEXT:    movl %ecx, %edx
1101; X86-NOSSE-NEXT:    shrl %edx
1102; X86-NOSSE-NEXT:    movl $1431655765, %esi # imm = 0x55555555
1103; X86-NOSSE-NEXT:    andl %esi, %edx
1104; X86-NOSSE-NEXT:    subl %edx, %ecx
1105; X86-NOSSE-NEXT:    movl $858993459, %edx # imm = 0x33333333
1106; X86-NOSSE-NEXT:    movl %ecx, %edi
1107; X86-NOSSE-NEXT:    andl %edx, %edi
1108; X86-NOSSE-NEXT:    shrl $2, %ecx
1109; X86-NOSSE-NEXT:    andl %edx, %ecx
1110; X86-NOSSE-NEXT:    addl %edi, %ecx
1111; X86-NOSSE-NEXT:    movl %ecx, %edi
1112; X86-NOSSE-NEXT:    shrl $4, %edi
1113; X86-NOSSE-NEXT:    addl %ecx, %edi
1114; X86-NOSSE-NEXT:    movl $252645135, %ecx # imm = 0xF0F0F0F
1115; X86-NOSSE-NEXT:    andl %ecx, %edi
1116; X86-NOSSE-NEXT:    imull $16843009, %edi, %edi # imm = 0x1010101
1117; X86-NOSSE-NEXT:    shrl $24, %edi
1118; X86-NOSSE-NEXT:    movl %eax, %ebx
1119; X86-NOSSE-NEXT:    shrl %ebx
1120; X86-NOSSE-NEXT:    andl %esi, %ebx
1121; X86-NOSSE-NEXT:    subl %ebx, %eax
1122; X86-NOSSE-NEXT:    movl %eax, %esi
1123; X86-NOSSE-NEXT:    andl %edx, %esi
1124; X86-NOSSE-NEXT:    shrl $2, %eax
1125; X86-NOSSE-NEXT:    andl %edx, %eax
1126; X86-NOSSE-NEXT:    addl %esi, %eax
1127; X86-NOSSE-NEXT:    movl %eax, %edx
1128; X86-NOSSE-NEXT:    shrl $4, %edx
1129; X86-NOSSE-NEXT:    addl %eax, %edx
1130; X86-NOSSE-NEXT:    andl %ecx, %edx
1131; X86-NOSSE-NEXT:    imull $16843009, %edx, %eax # imm = 0x1010101
1132; X86-NOSSE-NEXT:    shrl $24, %eax
1133; X86-NOSSE-NEXT:    addl %edi, %eax
1134; X86-NOSSE-NEXT:    xorl %edx, %edx
1135; X86-NOSSE-NEXT:    popl %esi
1136; X86-NOSSE-NEXT:    popl %edi
1137; X86-NOSSE-NEXT:    popl %ebx
1138; X86-NOSSE-NEXT:    retl
1139;
1140; X64-LABEL: cnt64_pgso:
1141; X64:       # %bb.0:
1142; X64-NEXT:    movq %rdi, %rax
1143; X64-NEXT:    shrq %rax
1144; X64-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
1145; X64-NEXT:    andq %rax, %rcx
1146; X64-NEXT:    subq %rcx, %rdi
1147; X64-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
1148; X64-NEXT:    movq %rdi, %rcx
1149; X64-NEXT:    andq %rax, %rcx
1150; X64-NEXT:    shrq $2, %rdi
1151; X64-NEXT:    andq %rax, %rdi
1152; X64-NEXT:    addq %rcx, %rdi
1153; X64-NEXT:    movq %rdi, %rax
1154; X64-NEXT:    shrq $4, %rax
1155; X64-NEXT:    addq %rdi, %rax
1156; X64-NEXT:    movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
1157; X64-NEXT:    andq %rax, %rcx
1158; X64-NEXT:    movabsq $72340172838076673, %rax # imm = 0x101010101010101
1159; X64-NEXT:    imulq %rcx, %rax
1160; X64-NEXT:    shrq $56, %rax
1161; X64-NEXT:    retq
1162;
1163; X86-POPCNT-LABEL: cnt64_pgso:
1164; X86-POPCNT:       # %bb.0:
1165; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %ecx
1166; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %eax
1167; X86-POPCNT-NEXT:    addl %ecx, %eax
1168; X86-POPCNT-NEXT:    xorl %edx, %edx
1169; X86-POPCNT-NEXT:    retl
1170;
1171; X64-POPCNT-LABEL: cnt64_pgso:
1172; X64-POPCNT:       # %bb.0:
1173; X64-POPCNT-NEXT:    popcntq %rdi, %rax
1174; X64-POPCNT-NEXT:    retq
1175;
1176; X86-SSE2-LABEL: cnt64_pgso:
1177; X86-SSE2:       # %bb.0:
1178; X86-SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
1179; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1180; X86-SSE2-NEXT:    psrlw $1, %xmm1
1181; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm1
1182; X86-SSE2-NEXT:    psubb %xmm1, %xmm0
1183; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1184; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
1185; X86-SSE2-NEXT:    pand %xmm1, %xmm2
1186; X86-SSE2-NEXT:    psrlw $2, %xmm0
1187; X86-SSE2-NEXT:    pand %xmm1, %xmm0
1188; X86-SSE2-NEXT:    paddb %xmm2, %xmm0
1189; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1190; X86-SSE2-NEXT:    psrlw $4, %xmm1
1191; X86-SSE2-NEXT:    paddb %xmm0, %xmm1
1192; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm1
1193; X86-SSE2-NEXT:    pxor %xmm0, %xmm0
1194; X86-SSE2-NEXT:    psadbw %xmm1, %xmm0
1195; X86-SSE2-NEXT:    movd %xmm0, %eax
1196; X86-SSE2-NEXT:    xorl %edx, %edx
1197; X86-SSE2-NEXT:    retl
1198;
1199; X86-SSSE3-LABEL: cnt64_pgso:
1200; X86-SSSE3:       # %bb.0:
1201; X86-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1202; X86-SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
1203; X86-SSSE3-NEXT:    movdqa %xmm1, %xmm2
1204; X86-SSSE3-NEXT:    pand %xmm0, %xmm2
1205; X86-SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1206; X86-SSSE3-NEXT:    movdqa %xmm3, %xmm4
1207; X86-SSSE3-NEXT:    pshufb %xmm2, %xmm4
1208; X86-SSSE3-NEXT:    psrlw $4, %xmm1
1209; X86-SSSE3-NEXT:    pand %xmm0, %xmm1
1210; X86-SSSE3-NEXT:    pshufb %xmm1, %xmm3
1211; X86-SSSE3-NEXT:    paddb %xmm4, %xmm3
1212; X86-SSSE3-NEXT:    pxor %xmm0, %xmm0
1213; X86-SSSE3-NEXT:    psadbw %xmm3, %xmm0
1214; X86-SSSE3-NEXT:    movd %xmm0, %eax
1215; X86-SSSE3-NEXT:    xorl %edx, %edx
1216; X86-SSSE3-NEXT:    retl
1217  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
1218  ret i64 %cnt
1219}
1220
1221define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 {
1222; X86-NOSSE-LABEL: cnt128_pgso:
1223; X86-NOSSE:       # %bb.0:
1224; X86-NOSSE-NEXT:    pushl %ebp
1225; X86-NOSSE-NEXT:    pushl %ebx
1226; X86-NOSSE-NEXT:    pushl %edi
1227; X86-NOSSE-NEXT:    pushl %esi
1228; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
1229; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
1230; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
1231; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ebx
1232; X86-NOSSE-NEXT:    movl %ebx, %ecx
1233; X86-NOSSE-NEXT:    shrl %ecx
1234; X86-NOSSE-NEXT:    movl $1431655765, %edi # imm = 0x55555555
1235; X86-NOSSE-NEXT:    andl %edi, %ecx
1236; X86-NOSSE-NEXT:    movl $1431655765, %edi # imm = 0x55555555
1237; X86-NOSSE-NEXT:    subl %ecx, %ebx
1238; X86-NOSSE-NEXT:    movl $858993459, %ecx # imm = 0x33333333
1239; X86-NOSSE-NEXT:    movl %ebx, %ebp
1240; X86-NOSSE-NEXT:    andl %ecx, %ebp
1241; X86-NOSSE-NEXT:    shrl $2, %ebx
1242; X86-NOSSE-NEXT:    andl %ecx, %ebx
1243; X86-NOSSE-NEXT:    addl %ebp, %ebx
1244; X86-NOSSE-NEXT:    movl %ebx, %ebp
1245; X86-NOSSE-NEXT:    shrl $4, %ebp
1246; X86-NOSSE-NEXT:    addl %ebx, %ebp
1247; X86-NOSSE-NEXT:    movl %eax, %ebx
1248; X86-NOSSE-NEXT:    shrl %ebx
1249; X86-NOSSE-NEXT:    andl %edi, %ebx
1250; X86-NOSSE-NEXT:    subl %ebx, %eax
1251; X86-NOSSE-NEXT:    movl %eax, %ebx
1252; X86-NOSSE-NEXT:    andl %ecx, %ebx
1253; X86-NOSSE-NEXT:    shrl $2, %eax
1254; X86-NOSSE-NEXT:    andl %ecx, %eax
1255; X86-NOSSE-NEXT:    addl %ebx, %eax
1256; X86-NOSSE-NEXT:    movl %eax, %edi
1257; X86-NOSSE-NEXT:    shrl $4, %edi
1258; X86-NOSSE-NEXT:    addl %eax, %edi
1259; X86-NOSSE-NEXT:    movl $252645135, %ebx # imm = 0xF0F0F0F
1260; X86-NOSSE-NEXT:    andl %ebx, %ebp
1261; X86-NOSSE-NEXT:    imull $16843009, %ebp, %eax # imm = 0x1010101
1262; X86-NOSSE-NEXT:    shrl $24, %eax
1263; X86-NOSSE-NEXT:    andl %ebx, %edi
1264; X86-NOSSE-NEXT:    imull $16843009, %edi, %edi # imm = 0x1010101
1265; X86-NOSSE-NEXT:    shrl $24, %edi
1266; X86-NOSSE-NEXT:    addl %eax, %edi
1267; X86-NOSSE-NEXT:    movl %esi, %eax
1268; X86-NOSSE-NEXT:    shrl %eax
1269; X86-NOSSE-NEXT:    movl $1431655765, %ebp # imm = 0x55555555
1270; X86-NOSSE-NEXT:    andl %ebp, %eax
1271; X86-NOSSE-NEXT:    subl %eax, %esi
1272; X86-NOSSE-NEXT:    movl %esi, %eax
1273; X86-NOSSE-NEXT:    andl %ecx, %eax
1274; X86-NOSSE-NEXT:    shrl $2, %esi
1275; X86-NOSSE-NEXT:    andl %ecx, %esi
1276; X86-NOSSE-NEXT:    addl %eax, %esi
1277; X86-NOSSE-NEXT:    movl %esi, %eax
1278; X86-NOSSE-NEXT:    shrl $4, %eax
1279; X86-NOSSE-NEXT:    addl %esi, %eax
1280; X86-NOSSE-NEXT:    movl %edx, %esi
1281; X86-NOSSE-NEXT:    shrl %esi
1282; X86-NOSSE-NEXT:    andl %ebp, %esi
1283; X86-NOSSE-NEXT:    subl %esi, %edx
1284; X86-NOSSE-NEXT:    movl %edx, %esi
1285; X86-NOSSE-NEXT:    andl %ecx, %esi
1286; X86-NOSSE-NEXT:    shrl $2, %edx
1287; X86-NOSSE-NEXT:    andl %ecx, %edx
1288; X86-NOSSE-NEXT:    addl %esi, %edx
1289; X86-NOSSE-NEXT:    movl %edx, %ecx
1290; X86-NOSSE-NEXT:    shrl $4, %ecx
1291; X86-NOSSE-NEXT:    addl %edx, %ecx
1292; X86-NOSSE-NEXT:    andl %ebx, %eax
1293; X86-NOSSE-NEXT:    andl %ebx, %ecx
1294; X86-NOSSE-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
1295; X86-NOSSE-NEXT:    shrl $24, %eax
1296; X86-NOSSE-NEXT:    imull $16843009, %ecx, %ecx # imm = 0x1010101
1297; X86-NOSSE-NEXT:    shrl $24, %ecx
1298; X86-NOSSE-NEXT:    addl %eax, %ecx
1299; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
1300; X86-NOSSE-NEXT:    addl %edi, %ecx
1301; X86-NOSSE-NEXT:    xorl %edx, %edx
1302; X86-NOSSE-NEXT:    movl %edx, 12(%eax)
1303; X86-NOSSE-NEXT:    movl %edx, 8(%eax)
1304; X86-NOSSE-NEXT:    movl %edx, 4(%eax)
1305; X86-NOSSE-NEXT:    movl %ecx, (%eax)
1306; X86-NOSSE-NEXT:    popl %esi
1307; X86-NOSSE-NEXT:    popl %edi
1308; X86-NOSSE-NEXT:    popl %ebx
1309; X86-NOSSE-NEXT:    popl %ebp
1310; X86-NOSSE-NEXT:    retl $4
1311;
1312; X64-LABEL: cnt128_pgso:
1313; X64:       # %bb.0:
1314; X64-NEXT:    movq %rsi, %rax
1315; X64-NEXT:    shrq %rax
1316; X64-NEXT:    movabsq $6148914691236517205, %r8 # imm = 0x5555555555555555
1317; X64-NEXT:    andq %r8, %rax
1318; X64-NEXT:    subq %rax, %rsi
1319; X64-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
1320; X64-NEXT:    movq %rsi, %rcx
1321; X64-NEXT:    andq %rax, %rcx
1322; X64-NEXT:    shrq $2, %rsi
1323; X64-NEXT:    andq %rax, %rsi
1324; X64-NEXT:    addq %rcx, %rsi
1325; X64-NEXT:    movq %rsi, %rcx
1326; X64-NEXT:    shrq $4, %rcx
1327; X64-NEXT:    addq %rsi, %rcx
1328; X64-NEXT:    movabsq $1085102592571150095, %r9 # imm = 0xF0F0F0F0F0F0F0F
1329; X64-NEXT:    andq %r9, %rcx
1330; X64-NEXT:    movabsq $72340172838076673, %rdx # imm = 0x101010101010101
1331; X64-NEXT:    imulq %rdx, %rcx
1332; X64-NEXT:    shrq $56, %rcx
1333; X64-NEXT:    movq %rdi, %rsi
1334; X64-NEXT:    shrq %rsi
1335; X64-NEXT:    andq %r8, %rsi
1336; X64-NEXT:    subq %rsi, %rdi
1337; X64-NEXT:    movq %rdi, %rsi
1338; X64-NEXT:    andq %rax, %rsi
1339; X64-NEXT:    shrq $2, %rdi
1340; X64-NEXT:    andq %rax, %rdi
1341; X64-NEXT:    addq %rsi, %rdi
1342; X64-NEXT:    movq %rdi, %rax
1343; X64-NEXT:    shrq $4, %rax
1344; X64-NEXT:    addq %rdi, %rax
1345; X64-NEXT:    andq %r9, %rax
1346; X64-NEXT:    imulq %rdx, %rax
1347; X64-NEXT:    shrq $56, %rax
1348; X64-NEXT:    addq %rcx, %rax
1349; X64-NEXT:    xorl %edx, %edx
1350; X64-NEXT:    retq
1351;
1352; X86-POPCNT-LABEL: cnt128_pgso:
1353; X86-POPCNT:       # %bb.0:
1354; X86-POPCNT-NEXT:    pushl %esi
1355; X86-POPCNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
1356; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %ecx
1357; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %edx
1358; X86-POPCNT-NEXT:    addl %ecx, %edx
1359; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %ecx
1360; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %esi
1361; X86-POPCNT-NEXT:    addl %ecx, %esi
1362; X86-POPCNT-NEXT:    addl %edx, %esi
1363; X86-POPCNT-NEXT:    xorl %ecx, %ecx
1364; X86-POPCNT-NEXT:    movl %ecx, 12(%eax)
1365; X86-POPCNT-NEXT:    movl %ecx, 8(%eax)
1366; X86-POPCNT-NEXT:    movl %ecx, 4(%eax)
1367; X86-POPCNT-NEXT:    movl %esi, (%eax)
1368; X86-POPCNT-NEXT:    popl %esi
1369; X86-POPCNT-NEXT:    retl $4
1370;
1371; X64-POPCNT-LABEL: cnt128_pgso:
1372; X64-POPCNT:       # %bb.0:
1373; X64-POPCNT-NEXT:    popcntq %rsi, %rcx
1374; X64-POPCNT-NEXT:    popcntq %rdi, %rax
1375; X64-POPCNT-NEXT:    addq %rcx, %rax
1376; X64-POPCNT-NEXT:    xorl %edx, %edx
1377; X64-POPCNT-NEXT:    retq
1378;
1379; X86-SSE2-LABEL: cnt128_pgso:
1380; X86-SSE2:       # %bb.0:
1381; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1382; X86-SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
1383; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1384; X86-SSE2-NEXT:    psrlw $1, %xmm1
1385; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
1386; X86-SSE2-NEXT:    pand %xmm2, %xmm1
1387; X86-SSE2-NEXT:    psubb %xmm1, %xmm0
1388; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1389; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
1390; X86-SSE2-NEXT:    pand %xmm1, %xmm3
1391; X86-SSE2-NEXT:    psrlw $2, %xmm0
1392; X86-SSE2-NEXT:    pand %xmm1, %xmm0
1393; X86-SSE2-NEXT:    paddb %xmm3, %xmm0
1394; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
1395; X86-SSE2-NEXT:    psrlw $4, %xmm3
1396; X86-SSE2-NEXT:    paddb %xmm0, %xmm3
1397; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1398; X86-SSE2-NEXT:    pand %xmm0, %xmm3
1399; X86-SSE2-NEXT:    pxor %xmm4, %xmm4
1400; X86-SSE2-NEXT:    psadbw %xmm4, %xmm3
1401; X86-SSE2-NEXT:    movd %xmm3, %ecx
1402; X86-SSE2-NEXT:    movq {{.*#+}} xmm3 = mem[0],zero
1403; X86-SSE2-NEXT:    movdqa %xmm3, %xmm5
1404; X86-SSE2-NEXT:    psrlw $1, %xmm5
1405; X86-SSE2-NEXT:    pand %xmm2, %xmm5
1406; X86-SSE2-NEXT:    psubb %xmm5, %xmm3
1407; X86-SSE2-NEXT:    movdqa %xmm3, %xmm2
1408; X86-SSE2-NEXT:    pand %xmm1, %xmm2
1409; X86-SSE2-NEXT:    psrlw $2, %xmm3
1410; X86-SSE2-NEXT:    pand %xmm1, %xmm3
1411; X86-SSE2-NEXT:    paddb %xmm2, %xmm3
1412; X86-SSE2-NEXT:    movdqa %xmm3, %xmm1
1413; X86-SSE2-NEXT:    psrlw $4, %xmm1
1414; X86-SSE2-NEXT:    paddb %xmm3, %xmm1
1415; X86-SSE2-NEXT:    pand %xmm0, %xmm1
1416; X86-SSE2-NEXT:    psadbw %xmm4, %xmm1
1417; X86-SSE2-NEXT:    movd %xmm1, %edx
1418; X86-SSE2-NEXT:    addl %ecx, %edx
1419; X86-SSE2-NEXT:    xorl %ecx, %ecx
1420; X86-SSE2-NEXT:    movl %ecx, 12(%eax)
1421; X86-SSE2-NEXT:    movl %ecx, 8(%eax)
1422; X86-SSE2-NEXT:    movl %ecx, 4(%eax)
1423; X86-SSE2-NEXT:    movl %edx, (%eax)
1424; X86-SSE2-NEXT:    retl $4
1425;
1426; X86-SSSE3-LABEL: cnt128_pgso:
1427; X86-SSSE3:       # %bb.0:
1428; X86-SSSE3-NEXT:    movl {{[0-9]+}}(%esp), %eax
1429; X86-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1430; X86-SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
1431; X86-SSSE3-NEXT:    movdqa %xmm1, %xmm2
1432; X86-SSSE3-NEXT:    pand %xmm0, %xmm2
1433; X86-SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1434; X86-SSSE3-NEXT:    movdqa %xmm3, %xmm4
1435; X86-SSSE3-NEXT:    pshufb %xmm2, %xmm4
1436; X86-SSSE3-NEXT:    psrlw $4, %xmm1
1437; X86-SSSE3-NEXT:    pand %xmm0, %xmm1
1438; X86-SSSE3-NEXT:    movdqa %xmm3, %xmm2
1439; X86-SSSE3-NEXT:    pshufb %xmm1, %xmm2
1440; X86-SSSE3-NEXT:    paddb %xmm4, %xmm2
1441; X86-SSSE3-NEXT:    pxor %xmm1, %xmm1
1442; X86-SSSE3-NEXT:    psadbw %xmm1, %xmm2
1443; X86-SSSE3-NEXT:    movd %xmm2, %ecx
1444; X86-SSSE3-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero
1445; X86-SSSE3-NEXT:    movdqa %xmm2, %xmm4
1446; X86-SSSE3-NEXT:    pand %xmm0, %xmm4
1447; X86-SSSE3-NEXT:    movdqa %xmm3, %xmm5
1448; X86-SSSE3-NEXT:    pshufb %xmm4, %xmm5
1449; X86-SSSE3-NEXT:    psrlw $4, %xmm2
1450; X86-SSSE3-NEXT:    pand %xmm0, %xmm2
1451; X86-SSSE3-NEXT:    pshufb %xmm2, %xmm3
1452; X86-SSSE3-NEXT:    paddb %xmm5, %xmm3
1453; X86-SSSE3-NEXT:    psadbw %xmm1, %xmm3
1454; X86-SSSE3-NEXT:    movd %xmm3, %edx
1455; X86-SSSE3-NEXT:    addl %ecx, %edx
1456; X86-SSSE3-NEXT:    xorl %ecx, %ecx
1457; X86-SSSE3-NEXT:    movl %ecx, 12(%eax)
1458; X86-SSSE3-NEXT:    movl %ecx, 8(%eax)
1459; X86-SSSE3-NEXT:    movl %ecx, 4(%eax)
1460; X86-SSSE3-NEXT:    movl %edx, (%eax)
1461; X86-SSSE3-NEXT:    retl $4
1462  %cnt = tail call i128 @llvm.ctpop.i128(i128 %x)
1463  ret i128 %cnt
1464}
1465
1466declare i8 @llvm.ctpop.i8(i8) nounwind readnone
1467declare i16 @llvm.ctpop.i16(i16) nounwind readnone
1468declare i32 @llvm.ctpop.i32(i32) nounwind readnone
1469declare i64 @llvm.ctpop.i64(i64) nounwind readnone
1470declare i128 @llvm.ctpop.i128(i128) nounwind readnone
1471
1472!llvm.module.flags = !{!0}
1473!0 = !{i32 1, !"ProfileSummary", !1}
1474!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
1475!2 = !{!"ProfileFormat", !"InstrProf"}
1476!3 = !{!"TotalCount", i64 10000}
1477!4 = !{!"MaxCount", i64 10}
1478!5 = !{!"MaxInternalCount", i64 1}
1479!6 = !{!"MaxFunctionCount", i64 1000}
1480!7 = !{!"NumCounts", i64 3}
1481!8 = !{!"NumFunctions", i64 3}
1482!9 = !{!"DetailedSummary", !10}
1483!10 = !{!11, !12, !13}
1484!11 = !{i32 10000, i64 100, i32 1}
1485!12 = !{i32 999000, i64 100, i32 1}
1486!13 = !{i32 999999, i64 1, i32 2}
1487!14 = !{!"function_entry_count", i64 0}
1488