1; RUN: llc < %s -march=x86-64 -mcpu=bdver1 | FileCheck %s 2; Verify that for the architectures that are known to have poor latency 3; double precision shift instructions we generate alternative sequence 4; of instructions with lower latencies instead of shld instruction. 5 6;uint64_t lshift1(uint64_t a, uint64_t b) 7;{ 8; return (a << 1) | (b >> 63); 9;} 10 11; CHECK: lshift1: 12; CHECK: addq {{.*}},{{.*}} 13; CHECK-NEXT: shrq $63, {{.*}} 14; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}} 15 16 17define i64 @lshift1(i64 %a, i64 %b) nounwind readnone uwtable { 18entry: 19 %shl = shl i64 %a, 1 20 %shr = lshr i64 %b, 63 21 %or = or i64 %shr, %shl 22 ret i64 %or 23} 24 25;uint64_t lshift2(uint64_t a, uint64_t b) 26;{ 27; return (a << 2) | (b >> 62); 28;} 29 30; CHECK: lshift2: 31; CHECK: shlq $2, {{.*}} 32; CHECK-NEXT: shrq $62, {{.*}} 33; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}} 34 35define i64 @lshift2(i64 %a, i64 %b) nounwind readnone uwtable { 36entry: 37 %shl = shl i64 %a, 2 38 %shr = lshr i64 %b, 62 39 %or = or i64 %shr, %shl 40 ret i64 %or 41} 42 43;uint64_t lshift7(uint64_t a, uint64_t b) 44;{ 45; return (a << 7) | (b >> 57); 46;} 47 48; CHECK: lshift7: 49; CHECK: shlq $7, {{.*}} 50; CHECK-NEXT: shrq $57, {{.*}} 51; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}} 52 53define i64 @lshift7(i64 %a, i64 %b) nounwind readnone uwtable { 54entry: 55 %shl = shl i64 %a, 7 56 %shr = lshr i64 %b, 57 57 %or = or i64 %shr, %shl 58 ret i64 %or 59} 60 61;uint64_t lshift63(uint64_t a, uint64_t b) 62;{ 63; return (a << 63) | (b >> 1); 64;} 65 66; CHECK: lshift63: 67; CHECK: shlq $63, {{.*}} 68; CHECK-NEXT: shrq {{.*}} 69; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}} 70 71define i64 @lshift63(i64 %a, i64 %b) nounwind readnone uwtable { 72entry: 73 %shl = shl i64 %a, 63 74 %shr = lshr i64 %b, 1 75 %or = or i64 %shr, %shl 76 ret i64 %or 77} 78