1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown -verify-machineinstrs | FileCheck %s 3 4; This file tests following optimization 5; 6; leal (%rdx,%rax), %esi 7; subl %esi, %ecx 8; 9; can be transformed to 10; 11; subl %edx, %ecx 12; subl %eax, %ecx 13 14; C - (A + B) --> C - A - B 15define i32 @test1(i32* %p, i32 %a, i32 %b, i32 %c) { 16; CHECK-LABEL: test1: 17; CHECK: # %bb.0: # %entry 18; CHECK-NEXT: # kill: def $edx killed $edx def $rdx 19; CHECK-NEXT: movl %esi, %eax 20; CHECK-NEXT: subl %edx, %ecx 21; CHECK-NEXT: subl %eax, %ecx 22; CHECK-NEXT: movl %ecx, (%rdi) 23; CHECK-NEXT: subl %edx, %eax 24; CHECK-NEXT: # kill: def $eax killed $eax killed $rax 25; CHECK-NEXT: retq 26entry: 27 %0 = add i32 %b, %a 28 %sub = sub i32 %c, %0 29 store i32 %sub, i32* %p, align 4 30 %sub1 = sub i32 %a, %b 31 ret i32 %sub1 32} 33 34; (A + B) + C --> C + A + B 35define i32 @test2(i32* %p, i32 %a, i32 %b, i32 %c) { 36; CHECK-LABEL: test2: 37; CHECK: # %bb.0: # %entry 38; CHECK-NEXT: # kill: def $edx killed $edx def $rdx 39; CHECK-NEXT: movl %esi, %eax 40; CHECK-NEXT: addl %eax, %ecx 41; CHECK-NEXT: addl %edx, %ecx 42; CHECK-NEXT: movl %ecx, (%rdi) 43; CHECK-NEXT: subl %edx, %eax 44; CHECK-NEXT: # kill: def $eax killed $eax killed $rax 45; CHECK-NEXT: retq 46entry: 47 %0 = add i32 %a, %b 48 %1 = add i32 %c, %0 49 store i32 %1, i32* %p, align 4 50 %sub1 = sub i32 %a, %b 51 ret i32 %sub1 52} 53 54; C + (A + B) --> C + A + B 55define i32 @test3(i32* %p, i32 %a, i32 %b, i32 %c) { 56; CHECK-LABEL: test3: 57; CHECK: # %bb.0: # %entry 58; CHECK-NEXT: # kill: def $edx killed $edx def $rdx 59; CHECK-NEXT: movl %esi, %eax 60; CHECK-NEXT: addl %eax, %ecx 61; CHECK-NEXT: addl %edx, %ecx 62; CHECK-NEXT: movl %ecx, (%rdi) 63; CHECK-NEXT: subl %edx, %eax 64; CHECK-NEXT: # kill: def $eax killed $eax killed $rax 65; CHECK-NEXT: retq 66entry: 67 %0 = add i32 %a, %b 68 %1 = add i32 %0, %c 69 store i32 %1, i32* %p, align 4 70 %sub1 = sub i32 %a, %b 71 ret i32 %sub1 72} 73 74; (A + B) - C 75; Can't be converted to A - C + B without introduce MOV 76define i32 @test4(i32* %p, i32 %a, i32 %b, i32 %c) { 77; CHECK-LABEL: test4: 78; CHECK: # %bb.0: # %entry 79; CHECK-NEXT: # kill: def $edx killed $edx def $rdx 80; CHECK-NEXT: movl %esi, %eax 81; CHECK-NEXT: leal (%rdx,%rax), %esi 82; CHECK-NEXT: subl %ecx, %esi 83; CHECK-NEXT: movl %esi, (%rdi) 84; CHECK-NEXT: subl %edx, %eax 85; CHECK-NEXT: # kill: def $eax killed $eax killed $rax 86; CHECK-NEXT: retq 87entry: 88 %0 = add i32 %b, %a 89 %sub = sub i32 %0, %c 90 store i32 %sub, i32* %p, align 4 91 %sub1 = sub i32 %a, %b 92 ret i32 %sub1 93} 94 95define i64 @test5(i64* %p, i64 %a, i64 %b, i64 %c) { 96; CHECK-LABEL: test5: 97; CHECK: # %bb.0: # %entry 98; CHECK-NEXT: movq (%rdi), %rax 99; CHECK-NEXT: subq %rdx, %rcx 100; CHECK-NEXT: subq %rax, %rcx 101; CHECK-NEXT: movq %rcx, (%rdi) 102; CHECK-NEXT: subq %rdx, %rax 103; CHECK-NEXT: retq 104entry: 105 %ld = load i64, i64* %p, align 8 106 %0 = add i64 %b, %ld 107 %sub = sub i64 %c, %0 108 store i64 %sub, i64* %p, align 8 109 %sub1 = sub i64 %ld, %b 110 ret i64 %sub1 111} 112 113define i64 @test6(i64* %p, i64 %a, i64 %b, i64 %c) { 114; CHECK-LABEL: test6: 115; CHECK: # %bb.0: # %entry 116; CHECK-NEXT: movq (%rdi), %rax 117; CHECK-NEXT: addq %rdx, %rcx 118; CHECK-NEXT: addq %rax, %rcx 119; CHECK-NEXT: movq %rcx, (%rdi) 120; CHECK-NEXT: subq %rdx, %rax 121; CHECK-NEXT: retq 122entry: 123 %ld = load i64, i64* %p, align 8 124 %0 = add i64 %b, %ld 125 %1 = add i64 %0, %c 126 store i64 %1, i64* %p, align 8 127 %sub1 = sub i64 %ld, %b 128 ret i64 %sub1 129} 130 131define i64 @test7(i64* %p, i64 %a, i64 %b, i64 %c) { 132; CHECK-LABEL: test7: 133; CHECK: # %bb.0: # %entry 134; CHECK-NEXT: movq (%rdi), %rax 135; CHECK-NEXT: addq %rdx, %rcx 136; CHECK-NEXT: addq %rax, %rcx 137; CHECK-NEXT: movq %rcx, (%rdi) 138; CHECK-NEXT: subq %rdx, %rax 139; CHECK-NEXT: retq 140entry: 141 %ld = load i64, i64* %p, align 8 142 %0 = add i64 %b, %ld 143 %1 = add i64 %c, %0 144 store i64 %1, i64* %p, align 8 145 %sub1 = sub i64 %ld, %b 146 ret i64 %sub1 147} 148 149; The sub instruction generated flags is used by following branch, 150; so it should not be transformed. 151define i64 @test8(i64* %p, i64 %a, i64 %b, i64 %c) { 152; CHECK-LABEL: test8: 153; CHECK: # %bb.0: # %entry 154; CHECK-NEXT: movq (%rdi), %rax 155; CHECK-NEXT: leaq (%rdx,%rax), %rsi 156; CHECK-NEXT: subq %rsi, %rcx 157; CHECK-NEXT: ja .LBB7_2 158; CHECK-NEXT: # %bb.1: # %then 159; CHECK-NEXT: movq %rcx, (%rdi) 160; CHECK-NEXT: subq %rdx, %rax 161; CHECK-NEXT: retq 162; CHECK-NEXT: .LBB7_2: # %else 163; CHECK-NEXT: movq $0, (%rdi) 164; CHECK-NEXT: subq %rdx, %rax 165; CHECK-NEXT: retq 166entry: 167 %ld = load i64, i64* %p, align 8 168 %0 = add i64 %b, %ld 169 %sub = sub i64 %c, %0 170 %cond = icmp ule i64 %c, %0 171 br i1 %cond, label %then, label %else 172 173then: 174 store i64 %sub, i64* %p, align 8 175 br label %endif 176 177else: 178 store i64 0, i64* %p, align 8 179 br label %endif 180 181endif: 182 %sub1 = sub i64 %ld, %b 183 ret i64 %sub1 184} 185 186; PR50615 187; The sub register usage of lea dest should block the transformation. 188define void @test9(i64 %p, i64 %s) { 189; CHECK-LABEL: test9: 190; CHECK: # %bb.0: # %entry 191; CHECK-NEXT: leaq (%rsi,%rdi), %rax 192; CHECK-NEXT: xorl %ecx, %ecx 193; CHECK-NEXT: testl $4095, %eax # imm = 0xFFF 194; CHECK-NEXT: setne %cl 195; CHECK-NEXT: shlq $12, %rcx 196; CHECK-NEXT: addq %rax, %rcx 197; CHECK-NEXT: andq $-4096, %rcx # imm = 0xF000 198; CHECK-NEXT: addq %rcx, %rdi 199; CHECK-NEXT: jmp bar@PLT # TAILCALL 200entry: 201 %add = add i64 %s, %p 202 %rem = and i64 %add, 4095 203 %cmp.not = icmp eq i64 %rem, 0 204 %add18 = select i1 %cmp.not, i64 0, i64 4096 205 %div9 = add i64 %add18, %add 206 %mul = and i64 %div9, -4096 207 %add2 = add i64 %mul, %p 208 tail call void @bar(i64 %add2, i64 %s) 209 ret void 210} 211 212define void @test10() { 213; CHECK-LABEL: test10: 214; CHECK: # %bb.0: # %entry 215; CHECK-NEXT: movl (%rax), %eax 216; CHECK-NEXT: movzwl (%rax), %ecx 217; CHECK-NEXT: leal (%rcx,%rcx,2), %esi 218; CHECK-NEXT: movl %ecx, %edi 219; CHECK-NEXT: subl %ecx, %edi 220; CHECK-NEXT: subl %ecx, %edi 221; CHECK-NEXT: negl %esi 222; CHECK-NEXT: xorl %ecx, %ecx 223; CHECK-NEXT: cmpl $4, %eax 224; CHECK-NEXT: movl %edi, (%rax) 225; CHECK-NEXT: movl %esi, (%rax) 226; CHECK-NEXT: cmovnel %eax, %ecx 227; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx 228; CHECK-NEXT: sarl %cl, %esi 229; CHECK-NEXT: movl %esi, (%rax) 230; CHECK-NEXT: retq 231entry: 232 %tmp = load i32, i32* undef, align 4 233 %tmp3 = sdiv i32 undef, 6 234 %tmp4 = load i32, i32* undef, align 4 235 %tmp5 = icmp eq i32 %tmp4, 4 236 %tmp6 = select i1 %tmp5, i32 %tmp3, i32 %tmp 237 %tmp10 = load i16, i16* undef, align 2 238 %tmp11 = zext i16 %tmp10 to i32 239 %tmp13 = zext i16 undef to i32 240 %tmp15 = load i16, i16* undef, align 2 241 %tmp16 = zext i16 %tmp15 to i32 242 %tmp19 = shl nsw i32 undef, 1 243 %tmp25 = shl nsw i32 undef, 1 244 %tmp26 = add nsw i32 %tmp25, %tmp13 245 %tmp28 = shl nsw i32 undef, 1 246 %tmp29 = add nsw i32 %tmp28, %tmp16 247 %tmp30 = sub nsw i32 %tmp19, %tmp29 248 %tmp31 = sub nsw i32 %tmp11, %tmp26 249 %tmp32 = shl nsw i32 %tmp30, 1 250 %tmp33 = add nsw i32 %tmp32, %tmp31 251 store i32 %tmp33, i32* undef, align 4 252 %tmp34 = mul nsw i32 %tmp31, -2 253 %tmp35 = add nsw i32 %tmp34, %tmp30 254 store i32 %tmp35, i32* undef, align 4 255 %tmp36 = select i1 %tmp5, i32 undef, i32 undef 256 %tmp38 = load i32, i32* undef, align 4 257 %tmp39 = ashr i32 %tmp38, %tmp6 258 store i32 %tmp39, i32* undef, align 4 259 ret void 260} 261 262declare void @bar(i64, i64) 263 264