1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -basic-aa -slp-vectorizer -S | FileCheck %s
3
4target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
5target triple = "aarch64--linux-gnu"
6
7; These examples correspond to input code like:
8;
9;   void t(long * __restrict a, long * __restrict b) {
10;     a[0] *= b[0];
11;     a[1] *= b[1];
12;   }
13;
14; If we SLP vectorise this then we end up with something like this because we
15; don't have a mul.2d:
16;
17;        ldr     q0, [x1]
18;        ldr     q1, [x0]
19;        fmov    x8, d0
20;        mov     x10, v0.d[1]
21;        fmov    x9, d1
22;        mov     x11, v1.d[1]
23;        mul     x8, x9, x8
24;        mul     x9, x11, x10
25;        fmov    d0, x8
26;        mov     v0.d[1], x9
27;        str     q0, [x0]
28;        ret
29;
30; If we don't SLP vectorise but scalarize this we get this instead:
31;
32;        ldp     x8, x9, [x1]
33;        ldp     x10, x11, [x0]
34;        mul     x9, x11, x9
35;        mul     x8, x10, x8
36;        stp     x8, x9, [x0]
37;        ret
38;
39define void @mul(i64* noalias nocapture %a, i64* noalias nocapture readonly %b) {
40; CHECK-LABEL: @mul(
41; CHECK-NEXT:  entry:
42; CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[B:%.*]], align 8
43; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[A:%.*]], align 8
44; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i64 [[TMP1]], [[TMP0]]
45; CHECK-NEXT:    store i64 [[MUL]], i64* [[A]], align 8
46; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 1
47; CHECK-NEXT:    [[TMP2:%.*]] = load i64, i64* [[ARRAYIDX2]], align 8
48; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 1
49; CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[ARRAYIDX3]], align 8
50; CHECK-NEXT:    [[MUL4:%.*]] = mul nsw i64 [[TMP3]], [[TMP2]]
51; CHECK-NEXT:    store i64 [[MUL4]], i64* [[ARRAYIDX3]], align 8
52; CHECK-NEXT:    ret void
53;
54entry:
55  %0 = load i64, i64* %b, align 8
56  %1 = load i64, i64* %a, align 8
57  %mul = mul nsw i64 %1, %0
58  store i64 %mul, i64* %a, align 8
59  %arrayidx2 = getelementptr inbounds i64, i64* %b, i64 1
60  %2 = load i64, i64* %arrayidx2, align 8
61  %arrayidx3 = getelementptr inbounds i64, i64* %a, i64 1
62  %3 = load i64, i64* %arrayidx3, align 8
63  %mul4 = mul nsw i64 %3, %2
64  store i64 %mul4, i64* %arrayidx3, align 8
65  ret void
66}
67
68; Similar example, but now a multiply-accumulate:
69;
70;  void x (long * __restrict a, long * __restrict b) {
71;    a[0] *= b[0];
72;    a[1] *= b[1];
73;    a[0] += b[0];
74;    a[1] += b[1];
75;  }
76;
77define void @mac(i64* noalias nocapture %a, i64* noalias nocapture readonly %b) {
78; CHECK-LABEL: @mac(
79; CHECK-NEXT:  entry:
80; CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[B:%.*]], align 8
81; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[A:%.*]], align 8
82; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i64 [[TMP1]], [[TMP0]]
83; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 1
84; CHECK-NEXT:    [[TMP2:%.*]] = load i64, i64* [[ARRAYIDX2]], align 8
85; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 1
86; CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[ARRAYIDX3]], align 8
87; CHECK-NEXT:    [[MUL4:%.*]] = mul nsw i64 [[TMP3]], [[TMP2]]
88; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[MUL]], [[TMP0]]
89; CHECK-NEXT:    store i64 [[ADD]], i64* [[A]], align 8
90; CHECK-NEXT:    [[ADD9:%.*]] = add nsw i64 [[MUL4]], [[TMP2]]
91; CHECK-NEXT:    store i64 [[ADD9]], i64* [[ARRAYIDX3]], align 8
92; CHECK-NEXT:    ret void
93;
94entry:
95  %0 = load i64, i64* %b, align 8
96  %1 = load i64, i64* %a, align 8
97  %mul = mul nsw i64 %1, %0
98  %arrayidx2 = getelementptr inbounds i64, i64* %b, i64 1
99  %2 = load i64, i64* %arrayidx2, align 8
100  %arrayidx3 = getelementptr inbounds i64, i64* %a, i64 1
101  %3 = load i64, i64* %arrayidx3, align 8
102  %mul4 = mul nsw i64 %3, %2
103  %add = add nsw i64 %mul, %0
104  store i64 %add, i64* %a, align 8
105  %add9 = add nsw i64 %mul4, %2
106  store i64 %add9, i64* %arrayidx3, align 8
107  ret void
108}
109