1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -basic-aa -slp-vectorizer -S | FileCheck %s 3 4target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 5target triple = "aarch64--linux-gnu" 6 7; These examples correspond to input code like: 8; 9; void t(long * __restrict a, long * __restrict b) { 10; a[0] *= b[0]; 11; a[1] *= b[1]; 12; } 13; 14; If we SLP vectorise this then we end up with something like this because we 15; don't have a mul.2d: 16; 17; ldr q0, [x1] 18; ldr q1, [x0] 19; fmov x8, d0 20; mov x10, v0.d[1] 21; fmov x9, d1 22; mov x11, v1.d[1] 23; mul x8, x9, x8 24; mul x9, x11, x10 25; fmov d0, x8 26; mov v0.d[1], x9 27; str q0, [x0] 28; ret 29; 30; If we don't SLP vectorise but scalarize this we get this instead: 31; 32; ldp x8, x9, [x1] 33; ldp x10, x11, [x0] 34; mul x9, x11, x9 35; mul x8, x10, x8 36; stp x8, x9, [x0] 37; ret 38; 39define void @mul(i64* noalias nocapture %a, i64* noalias nocapture readonly %b) { 40; CHECK-LABEL: @mul( 41; CHECK-NEXT: entry: 42; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[B:%.*]], align 8 43; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[A:%.*]], align 8 44; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP1]], [[TMP0]] 45; CHECK-NEXT: store i64 [[MUL]], i64* [[A]], align 8 46; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 1 47; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[ARRAYIDX2]], align 8 48; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 1 49; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[ARRAYIDX3]], align 8 50; CHECK-NEXT: [[MUL4:%.*]] = mul nsw i64 [[TMP3]], [[TMP2]] 51; CHECK-NEXT: store i64 [[MUL4]], i64* [[ARRAYIDX3]], align 8 52; CHECK-NEXT: ret void 53; 54entry: 55 %0 = load i64, i64* %b, align 8 56 %1 = load i64, i64* %a, align 8 57 %mul = mul nsw i64 %1, %0 58 store i64 %mul, i64* %a, align 8 59 %arrayidx2 = getelementptr inbounds i64, i64* %b, i64 1 60 %2 = load i64, i64* %arrayidx2, align 8 61 %arrayidx3 = getelementptr inbounds i64, i64* %a, i64 1 62 %3 = load i64, i64* %arrayidx3, align 8 63 %mul4 = mul nsw i64 %3, %2 64 store i64 %mul4, i64* %arrayidx3, align 8 65 ret void 66} 67 68; Similar example, but now a multiply-accumulate: 69; 70; void x (long * __restrict a, long * __restrict b) { 71; a[0] *= b[0]; 72; a[1] *= b[1]; 73; a[0] += b[0]; 74; a[1] += b[1]; 75; } 76; 77define void @mac(i64* noalias nocapture %a, i64* noalias nocapture readonly %b) { 78; CHECK-LABEL: @mac( 79; CHECK-NEXT: entry: 80; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[B:%.*]], align 8 81; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[A:%.*]], align 8 82; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP1]], [[TMP0]] 83; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 1 84; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[ARRAYIDX2]], align 8 85; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 1 86; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[ARRAYIDX3]], align 8 87; CHECK-NEXT: [[MUL4:%.*]] = mul nsw i64 [[TMP3]], [[TMP2]] 88; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[MUL]], [[TMP0]] 89; CHECK-NEXT: store i64 [[ADD]], i64* [[A]], align 8 90; CHECK-NEXT: [[ADD9:%.*]] = add nsw i64 [[MUL4]], [[TMP2]] 91; CHECK-NEXT: store i64 [[ADD9]], i64* [[ARRAYIDX3]], align 8 92; CHECK-NEXT: ret void 93; 94entry: 95 %0 = load i64, i64* %b, align 8 96 %1 = load i64, i64* %a, align 8 97 %mul = mul nsw i64 %1, %0 98 %arrayidx2 = getelementptr inbounds i64, i64* %b, i64 1 99 %2 = load i64, i64* %arrayidx2, align 8 100 %arrayidx3 = getelementptr inbounds i64, i64* %a, i64 1 101 %3 = load i64, i64* %arrayidx3, align 8 102 %mul4 = mul nsw i64 %3, %2 103 %add = add nsw i64 %mul, %0 104 store i64 %add, i64* %a, align 8 105 %add9 = add nsw i64 %mul4, %2 106 store i64 %add9, i64* %arrayidx3, align 8 107 ret void 108} 109