1; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=haswell | FileCheck %s --check-prefix=HSW
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake | FileCheck %s --check-prefix=SKL
3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skx | FileCheck %s --check-prefix=SKL
4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=silvermont -mattr=+lzcnt,+bmi | FileCheck %s --check-prefix=SKL
5; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=goldmont -mattr=+lzcnt,+bmi | FileCheck %s --check-prefix=SKL
6
7; This tests a fix for bugzilla 33869 https://bugs.llvm.org/show_bug.cgi?id=33869
8
9declare i32 @llvm.ctpop.i32(i32)
10declare i64 @llvm.ctpop.i64(i64)
11declare i64 @llvm.ctlz.i64(i64, i1)
12declare i32 @llvm.cttz.i32(i32, i1)
13declare i64 @llvm.cttz.i64(i64, i1)
14declare i32 @llvm.ctlz.i32(i32, i1)
15
16define i32 @loopdep_popcnt32(i32* nocapture %x, double* nocapture %y) nounwind {
17entry:
18  %vx = load i32, i32* %x
19  br label %loop
20loop:
21  %i = phi i32 [ 1, %entry ], [ %inc, %loop ]
22  %s1 = phi i32 [ %vx, %entry ], [ %s2, %loop ]
23  tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
24  %j = tail call i32 @llvm.ctpop.i32(i32 %i)
25  %s2 = add i32 %s1, %j
26  %inc = add nsw i32 %i, 1
27  tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
28  %exitcond = icmp eq i32 %inc, 156250000
29  br i1 %exitcond, label %ret, label %loop
30ret:
31  ret i32 %s2
32
33;HSW-LABEL:@loopdep_popcnt32
34;HSW: xorl [[GPR0:%e[a-d]x]], [[GPR0]]
35;HSW-NEXT: popcntl {{.*}}, [[GPR0]]
36
37;SKL-LABEL:@loopdep_popcnt32
38;SKL: xorl [[GPR0:%e[a-d]x]], [[GPR0]]
39;SKL-NEXT: popcntl {{.*}}, [[GPR0]]
40}
41
42define i64 @loopdep_popcnt64(i64* nocapture %x, double* nocapture %y) nounwind {
43entry:
44  %vx = load i64, i64* %x
45  br label %loop
46loop:
47  %i = phi i64 [ 1, %entry ], [ %inc, %loop ]
48  %s1 = phi i64 [ %vx, %entry ], [ %s2, %loop ]
49  tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
50  %j = tail call i64 @llvm.ctpop.i64(i64 %i)
51  %s2 = add i64 %s1, %j
52  %inc = add nsw i64 %i, 1
53  tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
54  %exitcond = icmp eq i64 %inc, 156250000
55  br i1 %exitcond, label %ret, label %loop
56ret:
57  ret i64 %s2
58
59;HSW-LABEL:@loopdep_popcnt64
60;HSW: xorl %e[[GPR0:[a-d]x]], %e[[GPR0]]
61;HSW-NEXT: popcntq {{.*}}, %r[[GPR0]]
62
63;SKL-LABEL:@loopdep_popcnt64
64;SKL: xorl %e[[GPR0:[a-d]x]], %e[[GPR0]]
65;SKL-NEXT: popcntq {{.*}}, %r[[GPR0]]
66}
67
68define i32 @loopdep_tzct32(i32* nocapture %x, double* nocapture %y) nounwind {
69entry:
70  %vx = load i32, i32* %x
71  br label %loop
72loop:
73  %i = phi i32 [ 1, %entry ], [ %inc, %loop ]
74  %s1 = phi i32 [ %vx, %entry ], [ %s2, %loop ]
75  tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
76  %j = call i32 @llvm.cttz.i32(i32 %i, i1 true)
77  %s2 = add i32 %s1, %j
78  %inc = add nsw i32 %i, 1
79  tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
80  %exitcond = icmp eq i32 %inc, 156250000
81  br i1 %exitcond, label %ret, label %loop
82ret:
83  ret i32 %s2
84
85;HSW-LABEL:@loopdep_tzct32
86;HSW: xorl [[GPR0:%e[a-d]x]], [[GPR0]]
87;HSW-NEXT: tzcntl {{.*}}, [[GPR0]]
88
89; This false dependecy issue was fixed in Skylake
90;SKL-LABEL:@loopdep_tzct32
91;SKL-NOT: xor
92;SKL: tzcntl
93}
94
95define i64 @loopdep_tzct64(i64* nocapture %x, double* nocapture %y) nounwind {
96entry:
97  %vx = load i64, i64* %x
98  br label %loop
99loop:
100  %i = phi i64 [ 1, %entry ], [ %inc, %loop ]
101  %s1 = phi i64 [ %vx, %entry ], [ %s2, %loop ]
102  tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
103  %j = tail call i64 @llvm.cttz.i64(i64 %i, i1 true)
104  %s2 = add i64 %s1, %j
105  %inc = add nsw i64 %i, 1
106  tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
107  %exitcond = icmp eq i64 %inc, 156250000
108  br i1 %exitcond, label %ret, label %loop
109ret:
110  ret i64 %s2
111
112;HSW-LABEL:@loopdep_tzct64
113;HSW: xorl %e[[GPR0:[a-d]x]], %e[[GPR0]]
114;HSW-NEXT: tzcntq {{.*}}, %r[[GPR0]]
115
116; This false dependecy issue was fixed in Skylake
117;SKL-LABEL:@loopdep_tzct64
118;SKL-NOT: xor
119;SKL: tzcntq
120}
121
122define i32 @loopdep_lzct32(i32* nocapture %x, double* nocapture %y) nounwind {
123entry:
124  %vx = load i32, i32* %x
125  br label %loop
126loop:
127  %i = phi i32 [ 1, %entry ], [ %inc, %loop ]
128  %s1 = phi i32 [ %vx, %entry ], [ %s2, %loop ]
129  tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
130  %j = call i32 @llvm.ctlz.i32(i32 %i, i1 true)
131  %s2 = add i32 %s1, %j
132  %inc = add nsw i32 %i, 1
133  tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
134  %exitcond = icmp eq i32 %inc, 156250000
135  br i1 %exitcond, label %ret, label %loop
136ret:
137  ret i32 %s2
138
139;HSW-LABEL:@loopdep_lzct32
140;HSW: xorl [[GPR0:%e[a-d]x]], [[GPR0]]
141;HSW-NEXT: lzcntl {{.*}}, [[GPR0]]
142
143; This false dependecy issue was fixed in Skylake
144;SKL-LABEL:@loopdep_lzct32
145;SKL-NOT: xor
146;SKL: lzcntl
147}
148
149define i64 @loopdep_lzct64(i64* nocapture %x, double* nocapture %y) nounwind {
150entry:
151  %vx = load i64, i64* %x
152  br label %loop
153loop:
154  %i = phi i64 [ 1, %entry ], [ %inc, %loop ]
155  %s1 = phi i64 [ %vx, %entry ], [ %s2, %loop ]
156  tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
157  %j = tail call i64 @llvm.ctlz.i64(i64 %i, i1 true)
158  %s2 = add i64 %s1, %j
159  %inc = add nsw i64 %i, 1
160  tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
161  %exitcond = icmp eq i64 %inc, 156250000
162  br i1 %exitcond, label %ret, label %loop
163ret:
164  ret i64 %s2
165
166;HSW-LABEL:@loopdep_lzct64
167;HSW: xorl %e[[GPR0:[a-d]x]], %e[[GPR0]]
168;HSW-NEXT: lzcntq {{.*}}, %r[[GPR0]]
169
170; This false dependecy issue was fixed in Skylake
171;SKL-LABEL:@loopdep_lzct64
172;SKL-NOT: xor
173;SKL: lzcntq
174}
175