1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse2 -verify-machineinstrs | FileCheck %s
3
4; After tail duplication, two copies in an early exit BB can be cancelled out.
5; rdar://10640363
6define i32 @t1(i32 %a, i32 %b) nounwind  {
7; CHECK-LABEL: t1:
8; CHECK:       ## %bb.0: ## %entry
9; CHECK-NEXT:    movl %edi, %eax
10; CHECK-NEXT:    testl %esi, %esi
11; CHECK-NEXT:    je LBB0_4
12; CHECK-NEXT:  ## %bb.1: ## %while.body.preheader
13; CHECK-NEXT:    movl %esi, %edx
14; CHECK-NEXT:    .p2align 4, 0x90
15; CHECK-NEXT:  LBB0_2: ## %while.body
16; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
17; CHECK-NEXT:    movl %edx, %ecx
18; CHECK-NEXT:    cltd
19; CHECK-NEXT:    idivl %ecx
20; CHECK-NEXT:    testl %edx, %edx
21; CHECK-NEXT:    movl %ecx, %eax
22; CHECK-NEXT:    jne LBB0_2
23; CHECK-NEXT:  ## %bb.3: ## %while.end
24; CHECK-NEXT:    movl %ecx, %eax
25; CHECK-NEXT:  LBB0_4:
26; CHECK-NEXT:    retq
27entry:
28  %cmp1 = icmp eq i32 %b, 0
29  br i1 %cmp1, label %while.end, label %while.body
30
31while.body:                                       ; preds = %entry, %while.body
32  %a.addr.03 = phi i32 [ %b.addr.02, %while.body ], [ %a, %entry ]
33  %b.addr.02 = phi i32 [ %rem, %while.body ], [ %b, %entry ]
34  %rem = srem i32 %a.addr.03, %b.addr.02
35  %cmp = icmp eq i32 %rem, 0
36  br i1 %cmp, label %while.end, label %while.body
37
38while.end:                                        ; preds = %while.body, %entry
39  %a.addr.0.lcssa = phi i32 [ %a, %entry ], [ %b.addr.02, %while.body ]
40  ret i32 %a.addr.0.lcssa
41}
42
43; Two movdqa (from phi-elimination) in the entry BB cancels out.
44; rdar://10428165
45define <8 x i16> @t2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
46; CHECK-LABEL: t2:
47; CHECK:       ## %bb.0: ## %entry
48; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
49; CHECK-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,2,4,5,6,7]
50; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
51; CHECK-NEXT:    retq
52entry:
53  %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
54  ret <8 x i16> %tmp8
55}
56
57define i32 @t3(i64 %a, i64 %b) nounwind  {
58; CHECK-LABEL: t3:
59; CHECK:       ## %bb.0: ## %entry
60; CHECK-NEXT:    movq %rdi, %rax
61; CHECK-NEXT:    testq %rsi, %rsi
62; CHECK-NEXT:    je LBB2_4
63; CHECK-NEXT:  ## %bb.1: ## %while.body.preheader
64; CHECK-NEXT:    movq %rsi, %rdx
65; CHECK-NEXT:    .p2align 4, 0x90
66; CHECK-NEXT:  LBB2_2: ## %while.body
67; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
68; CHECK-NEXT:    movq %rdx, %rcx
69; CHECK-NEXT:    cqto
70; CHECK-NEXT:    idivq %rcx
71; CHECK-NEXT:    testq %rdx, %rdx
72; CHECK-NEXT:    movq %rcx, %rax
73; CHECK-NEXT:    jne LBB2_2
74; CHECK-NEXT:  ## %bb.3: ## %while.end
75; CHECK-NEXT:    movl %ecx, %eax
76; CHECK-NEXT:  LBB2_4:
77; CHECK-NEXT:    retq
78entry:
79  %cmp1 = icmp eq i64 %b, 0
80  br i1 %cmp1, label %while.end, label %while.body
81
82while.body:                                       ; preds = %entry, %while.body
83  %a.addr.03 = phi i64 [ %b.addr.02, %while.body ], [ %a, %entry ]
84  %b.addr.02 = phi i64 [ %rem, %while.body ], [ %b, %entry ]
85  %rem = srem i64 %a.addr.03, %b.addr.02
86  %cmp = icmp eq i64 %rem, 0
87  br i1 %cmp, label %while.end, label %while.body
88
89while.end:                                        ; preds = %while.body, %entry
90  %a.addr.0.lcssa = phi i64 [ %a, %entry ], [ %b.addr.02, %while.body ]
91  %t = trunc i64 %a.addr.0.lcssa to i32
92  ret i32 %t
93}
94
95; Check that copy propagation does not kill thing like:
96; dst = copy src <-- do not kill that.
97; ... = op1 undef dst
98; ... = op2 dst <-- this is used here.
99define <16 x float> @foo(<16 x float> %x) {
100; CHECK-LABEL: foo:
101; CHECK:       ## %bb.0: ## %bb
102; CHECK-NEXT:    movaps %xmm3, %xmm9
103; CHECK-NEXT:    movaps %xmm2, %xmm8
104; CHECK-NEXT:    movaps %xmm0, %xmm7
105; CHECK-NEXT:    xorps %xmm0, %xmm0
106; CHECK-NEXT:    movaps %xmm3, %xmm2
107; CHECK-NEXT:    cmpltps %xmm0, %xmm2
108; CHECK-NEXT:    movaps %xmm2, %xmm4
109; CHECK-NEXT:    orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4
110; CHECK-NEXT:    movaps %xmm4, %xmm10
111; CHECK-NEXT:    andnps %xmm2, %xmm10
112; CHECK-NEXT:    movaps %xmm8, %xmm5
113; CHECK-NEXT:    cmpltps %xmm0, %xmm5
114; CHECK-NEXT:    movaps {{.*#+}} xmm11 = [9,10,11,12]
115; CHECK-NEXT:    movaps %xmm5, %xmm2
116; CHECK-NEXT:    orps %xmm11, %xmm2
117; CHECK-NEXT:    movaps %xmm2, %xmm14
118; CHECK-NEXT:    andnps %xmm5, %xmm14
119; CHECK-NEXT:    cvttps2dq %xmm1, %xmm12
120; CHECK-NEXT:    cmpltps %xmm0, %xmm1
121; CHECK-NEXT:    movaps {{.*#+}} xmm13 = [5,6,7,8]
122; CHECK-NEXT:    movaps %xmm1, %xmm6
123; CHECK-NEXT:    orps %xmm13, %xmm6
124; CHECK-NEXT:    movaps %xmm6, %xmm5
125; CHECK-NEXT:    andnps %xmm1, %xmm5
126; CHECK-NEXT:    cvttps2dq %xmm7, %xmm3
127; CHECK-NEXT:    cmpltps %xmm0, %xmm7
128; CHECK-NEXT:    movaps {{.*#+}} xmm15 = [1,2,3,4]
129; CHECK-NEXT:    movaps %xmm7, %xmm0
130; CHECK-NEXT:    orps %xmm15, %xmm0
131; CHECK-NEXT:    movaps %xmm0, %xmm1
132; CHECK-NEXT:    andnps %xmm7, %xmm1
133; CHECK-NEXT:    andps %xmm15, %xmm0
134; CHECK-NEXT:    cvtdq2ps %xmm3, %xmm3
135; CHECK-NEXT:    andps %xmm3, %xmm0
136; CHECK-NEXT:    movaps {{.*#+}} xmm3 = [1,1,1,1]
137; CHECK-NEXT:    andps %xmm3, %xmm1
138; CHECK-NEXT:    orps %xmm1, %xmm0
139; CHECK-NEXT:    andps %xmm13, %xmm6
140; CHECK-NEXT:    cvtdq2ps %xmm12, %xmm1
141; CHECK-NEXT:    andps %xmm1, %xmm6
142; CHECK-NEXT:    andps %xmm3, %xmm5
143; CHECK-NEXT:    orps %xmm5, %xmm6
144; CHECK-NEXT:    andps %xmm11, %xmm2
145; CHECK-NEXT:    cvttps2dq %xmm8, %xmm1
146; CHECK-NEXT:    cvtdq2ps %xmm1, %xmm1
147; CHECK-NEXT:    andps %xmm1, %xmm2
148; CHECK-NEXT:    andps %xmm3, %xmm14
149; CHECK-NEXT:    orps %xmm14, %xmm2
150; CHECK-NEXT:    andps %xmm3, %xmm10
151; CHECK-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4
152; CHECK-NEXT:    cvttps2dq %xmm9, %xmm1
153; CHECK-NEXT:    cvtdq2ps %xmm1, %xmm1
154; CHECK-NEXT:    andps %xmm1, %xmm4
155; CHECK-NEXT:    orps %xmm10, %xmm4
156; CHECK-NEXT:    movaps %xmm6, %xmm1
157; CHECK-NEXT:    movaps %xmm4, %xmm3
158; CHECK-NEXT:    retq
159bb:
160  %v3 = icmp slt <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, zeroinitializer
161  %v14 = zext <16 x i1> %v3 to <16 x i32>
162  %v16 = fcmp olt <16 x float> %x, zeroinitializer
163  %v17 = sext <16 x i1> %v16 to <16 x i32>
164  %v18 = zext <16 x i1> %v16 to <16 x i32>
165  %v19 = xor <16 x i32> %v14, %v18
166  %v20 = or <16 x i32> %v17, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
167  %v21 = fptosi <16 x float> %x to <16 x i32>
168  %v22 = sitofp <16 x i32> %v21 to <16 x float>
169  %v69 = fcmp ogt <16 x float> %v22, zeroinitializer
170  %v75 = and <16 x i1> %v69, %v3
171  %v77 = bitcast <16 x float> %v22 to <16 x i32>
172  %v79 = sext <16 x i1> %v75 to <16 x i32>
173  %v80 = and <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>, %v79
174  %v81 = xor <16 x i32> %v77, %v80
175  %v82 = and <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>, %v81
176  %v83 = xor <16 x i32> %v19, %v82
177  %v84 = and <16 x i32> %v83, %v20
178  %v85 = xor <16 x i32> %v19, %v84
179  %v86 = bitcast <16 x i32> %v85 to <16 x float>
180  ret <16 x float> %v86
181}
182