1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown -verify-machineinstrs | FileCheck %s
3
4; This file tests following optimization
5;
6;        leal    (%rdx,%rax), %esi
7;        subl    %esi, %ecx
8;
9; can be transformed to
10;
11;        subl    %edx, %ecx
12;        subl    %eax, %ecx
13
14; C - (A + B)   -->    C - A - B
15define i32 @test1(i32* %p, i32 %a, i32 %b, i32 %c) {
16; CHECK-LABEL: test1:
17; CHECK:       # %bb.0: # %entry
18; CHECK-NEXT:    # kill: def $edx killed $edx def $rdx
19; CHECK-NEXT:    movl %esi, %eax
20; CHECK-NEXT:    subl %edx, %ecx
21; CHECK-NEXT:    subl %eax, %ecx
22; CHECK-NEXT:    movl %ecx, (%rdi)
23; CHECK-NEXT:    subl %edx, %eax
24; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
25; CHECK-NEXT:    retq
26entry:
27  %0 = add i32 %b, %a
28  %sub = sub i32 %c, %0
29  store i32 %sub, i32* %p, align 4
30  %sub1 = sub i32 %a, %b
31  ret i32 %sub1
32}
33
34; (A + B) + C   -->    C + A + B
35define i32 @test2(i32* %p, i32 %a, i32 %b, i32 %c) {
36; CHECK-LABEL: test2:
37; CHECK:       # %bb.0: # %entry
38; CHECK-NEXT:    # kill: def $edx killed $edx def $rdx
39; CHECK-NEXT:    movl %esi, %eax
40; CHECK-NEXT:    addl %eax, %ecx
41; CHECK-NEXT:    addl %edx, %ecx
42; CHECK-NEXT:    movl %ecx, (%rdi)
43; CHECK-NEXT:    subl %edx, %eax
44; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
45; CHECK-NEXT:    retq
46entry:
47  %0 = add i32 %a, %b
48  %1 = add i32 %c, %0
49  store i32 %1, i32* %p, align 4
50  %sub1 = sub i32 %a, %b
51  ret i32 %sub1
52}
53
54; C + (A + B)   -->    C + A + B
55define i32 @test3(i32* %p, i32 %a, i32 %b, i32 %c) {
56; CHECK-LABEL: test3:
57; CHECK:       # %bb.0: # %entry
58; CHECK-NEXT:    # kill: def $edx killed $edx def $rdx
59; CHECK-NEXT:    movl %esi, %eax
60; CHECK-NEXT:    addl %eax, %ecx
61; CHECK-NEXT:    addl %edx, %ecx
62; CHECK-NEXT:    movl %ecx, (%rdi)
63; CHECK-NEXT:    subl %edx, %eax
64; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
65; CHECK-NEXT:    retq
66entry:
67  %0 = add i32 %a, %b
68  %1 = add i32 %0, %c
69  store i32 %1, i32* %p, align 4
70  %sub1 = sub i32 %a, %b
71  ret i32 %sub1
72}
73
74; (A + B) - C
75; Can't be converted to A - C + B without introduce MOV
76define i32 @test4(i32* %p, i32 %a, i32 %b, i32 %c) {
77; CHECK-LABEL: test4:
78; CHECK:       # %bb.0: # %entry
79; CHECK-NEXT:    # kill: def $edx killed $edx def $rdx
80; CHECK-NEXT:    movl %esi, %eax
81; CHECK-NEXT:    leal (%rdx,%rax), %esi
82; CHECK-NEXT:    subl %ecx, %esi
83; CHECK-NEXT:    movl %esi, (%rdi)
84; CHECK-NEXT:    subl %edx, %eax
85; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
86; CHECK-NEXT:    retq
87entry:
88  %0 = add i32 %b, %a
89  %sub = sub i32 %0, %c
90  store i32 %sub, i32* %p, align 4
91  %sub1 = sub i32 %a, %b
92  ret i32 %sub1
93}
94
95define i64 @test5(i64* %p, i64 %a, i64 %b, i64 %c) {
96; CHECK-LABEL: test5:
97; CHECK:       # %bb.0: # %entry
98; CHECK-NEXT:    movq (%rdi), %rax
99; CHECK-NEXT:    subq %rdx, %rcx
100; CHECK-NEXT:    subq %rax, %rcx
101; CHECK-NEXT:    movq %rcx, (%rdi)
102; CHECK-NEXT:    subq %rdx, %rax
103; CHECK-NEXT:    retq
104entry:
105  %ld = load i64, i64* %p, align 8
106  %0 = add i64 %b, %ld
107  %sub = sub i64 %c, %0
108  store i64 %sub, i64* %p, align 8
109  %sub1 = sub i64 %ld, %b
110  ret i64 %sub1
111}
112
113define i64 @test6(i64* %p, i64 %a, i64 %b, i64 %c) {
114; CHECK-LABEL: test6:
115; CHECK:       # %bb.0: # %entry
116; CHECK-NEXT:    movq (%rdi), %rax
117; CHECK-NEXT:    addq %rdx, %rcx
118; CHECK-NEXT:    addq %rax, %rcx
119; CHECK-NEXT:    movq %rcx, (%rdi)
120; CHECK-NEXT:    subq %rdx, %rax
121; CHECK-NEXT:    retq
122entry:
123  %ld = load i64, i64* %p, align 8
124  %0 = add i64 %b, %ld
125  %1 = add i64 %0, %c
126  store i64 %1, i64* %p, align 8
127  %sub1 = sub i64 %ld, %b
128  ret i64 %sub1
129}
130
131define i64 @test7(i64* %p, i64 %a, i64 %b, i64 %c) {
132; CHECK-LABEL: test7:
133; CHECK:       # %bb.0: # %entry
134; CHECK-NEXT:    movq (%rdi), %rax
135; CHECK-NEXT:    addq %rdx, %rcx
136; CHECK-NEXT:    addq %rax, %rcx
137; CHECK-NEXT:    movq %rcx, (%rdi)
138; CHECK-NEXT:    subq %rdx, %rax
139; CHECK-NEXT:    retq
140entry:
141  %ld = load i64, i64* %p, align 8
142  %0 = add i64 %b, %ld
143  %1 = add i64 %c, %0
144  store i64 %1, i64* %p, align 8
145  %sub1 = sub i64 %ld, %b
146  ret i64 %sub1
147}
148
149; The sub instruction generated flags is used by following branch,
150; so it should not be transformed.
151define i64 @test8(i64* %p, i64 %a, i64 %b, i64 %c) {
152; CHECK-LABEL: test8:
153; CHECK:       # %bb.0: # %entry
154; CHECK-NEXT:    movq (%rdi), %rax
155; CHECK-NEXT:    leaq (%rdx,%rax), %rsi
156; CHECK-NEXT:    subq %rsi, %rcx
157; CHECK-NEXT:    ja .LBB7_2
158; CHECK-NEXT:  # %bb.1: # %then
159; CHECK-NEXT:    movq %rcx, (%rdi)
160; CHECK-NEXT:    subq %rdx, %rax
161; CHECK-NEXT:    retq
162; CHECK-NEXT:  .LBB7_2: # %else
163; CHECK-NEXT:    movq $0, (%rdi)
164; CHECK-NEXT:    subq %rdx, %rax
165; CHECK-NEXT:    retq
166entry:
167  %ld = load i64, i64* %p, align 8
168  %0 = add i64 %b, %ld
169  %sub = sub i64 %c, %0
170  %cond = icmp ule i64 %c, %0
171  br i1 %cond, label %then, label %else
172
173then:
174  store i64 %sub, i64* %p, align 8
175  br label %endif
176
177else:
178  store i64 0, i64* %p, align 8
179  br label %endif
180
181endif:
182  %sub1 = sub i64 %ld, %b
183  ret i64 %sub1
184}
185
186; PR50615
187; The sub register usage of lea dest should block the transformation.
188define void @test9(i64 %p, i64 %s) {
189; CHECK-LABEL: test9:
190; CHECK:       # %bb.0: # %entry
191; CHECK-NEXT:    leaq (%rsi,%rdi), %rax
192; CHECK-NEXT:    xorl %ecx, %ecx
193; CHECK-NEXT:    testl $4095, %eax # imm = 0xFFF
194; CHECK-NEXT:    setne %cl
195; CHECK-NEXT:    shlq $12, %rcx
196; CHECK-NEXT:    addq %rax, %rcx
197; CHECK-NEXT:    andq $-4096, %rcx # imm = 0xF000
198; CHECK-NEXT:    addq %rcx, %rdi
199; CHECK-NEXT:    jmp bar@PLT # TAILCALL
200entry:
201  %add = add i64 %s, %p
202  %rem = and i64 %add, 4095
203  %cmp.not = icmp eq i64 %rem, 0
204  %add18 = select i1 %cmp.not, i64 0, i64 4096
205  %div9 = add i64 %add18, %add
206  %mul = and i64 %div9, -4096
207  %add2 = add i64 %mul, %p
208  tail call void @bar(i64 %add2, i64 %s)
209  ret void
210}
211
212define void @test10() {
213; CHECK-LABEL: test10:
214; CHECK:       # %bb.0: # %entry
215; CHECK-NEXT:    movl (%rax), %eax
216; CHECK-NEXT:    movzwl (%rax), %ecx
217; CHECK-NEXT:    leal (%rcx,%rcx,2), %esi
218; CHECK-NEXT:    movl %ecx, %edi
219; CHECK-NEXT:    subl %ecx, %edi
220; CHECK-NEXT:    subl %ecx, %edi
221; CHECK-NEXT:    negl %esi
222; CHECK-NEXT:    xorl %ecx, %ecx
223; CHECK-NEXT:    cmpl $4, %eax
224; CHECK-NEXT:    movl %edi, (%rax)
225; CHECK-NEXT:    movl %esi, (%rax)
226; CHECK-NEXT:    cmovnel %eax, %ecx
227; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
228; CHECK-NEXT:    sarl %cl, %esi
229; CHECK-NEXT:    movl %esi, (%rax)
230; CHECK-NEXT:    retq
231entry:
232  %tmp = load i32, i32* undef, align 4
233  %tmp3 = sdiv i32 undef, 6
234  %tmp4 = load i32, i32* undef, align 4
235  %tmp5 = icmp eq i32 %tmp4, 4
236  %tmp6 = select i1 %tmp5, i32 %tmp3, i32 %tmp
237  %tmp10 = load i16, i16* undef, align 2
238  %tmp11 = zext i16 %tmp10 to i32
239  %tmp13 = zext i16 undef to i32
240  %tmp15 = load i16, i16* undef, align 2
241  %tmp16 = zext i16 %tmp15 to i32
242  %tmp19 = shl nsw i32 undef, 1
243  %tmp25 = shl nsw i32 undef, 1
244  %tmp26 = add nsw i32 %tmp25, %tmp13
245  %tmp28 = shl nsw i32 undef, 1
246  %tmp29 = add nsw i32 %tmp28, %tmp16
247  %tmp30 = sub nsw i32 %tmp19, %tmp29
248  %tmp31 = sub nsw i32 %tmp11, %tmp26
249  %tmp32 = shl nsw i32 %tmp30, 1
250  %tmp33 = add nsw i32 %tmp32, %tmp31
251  store i32 %tmp33, i32* undef, align 4
252  %tmp34 = mul nsw i32 %tmp31, -2
253  %tmp35 = add nsw i32 %tmp34, %tmp30
254  store i32 %tmp35, i32* undef, align 4
255  %tmp36 = select i1 %tmp5, i32 undef, i32 undef
256  %tmp38 = load i32, i32* undef, align 4
257  %tmp39 = ashr i32 %tmp38, %tmp6
258  store i32 %tmp39, i32* undef, align 4
259  ret void
260}
261
262declare void @bar(i64, i64)
263
264