1; RUN: llc < %s -mcpu=cortex-a57 -mattr=+neon -fp-contract=fast -regalloc=pbqp -pbqp-coalescing | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN
2; RUN: llc < %s -mcpu=cortex-a57 -mattr=+neon -fp-contract=fast -regalloc=pbqp -pbqp-coalescing | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD
3;
4; Test PBQP is able to fulfill the accumulator chaining constraint.
5target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
6target triple = "aarch64"
7
8; CHECK-LABEL: fir
9; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
10; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
11; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
12; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
13; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
14; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
15; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
16; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
17; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
18; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
19; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
20; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
21; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
22; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
23define void @fir(double* nocapture %rx, double* nocapture %ry, double* nocapture %c, double* nocapture %x, double* nocapture %y) {
24entry:
25  %0 = load double, double* %c, align 8
26  %1 = load double, double* %x, align 8
27  %mul = fmul fast double %1, %0
28  %2 = load double, double* %y, align 8
29  %mul7 = fmul fast double %2, %0
30  %arrayidx.1 = getelementptr inbounds double, double* %c, i64 1
31  %3 = load double, double* %arrayidx.1, align 8
32  %arrayidx2.1 = getelementptr inbounds double, double* %x, i64 1
33  %4 = load double, double* %arrayidx2.1, align 8
34  %mul.1 = fmul fast double %4, %3
35  %add.1 = fadd fast double %mul.1, %mul
36  %arrayidx6.1 = getelementptr inbounds double, double* %y, i64 1
37  %5 = load double, double* %arrayidx6.1, align 8
38  %mul7.1 = fmul fast double %5, %3
39  %add8.1 = fadd fast double %mul7.1, %mul7
40  %arrayidx.2 = getelementptr inbounds double, double* %c, i64 2
41  %6 = load double, double* %arrayidx.2, align 8
42  %arrayidx2.2 = getelementptr inbounds double, double* %x, i64 2
43  %7 = load double, double* %arrayidx2.2, align 8
44  %mul.2 = fmul fast double %7, %6
45  %add.2 = fadd fast double %mul.2, %add.1
46  %arrayidx6.2 = getelementptr inbounds double, double* %y, i64 2
47  %8 = load double, double* %arrayidx6.2, align 8
48  %mul7.2 = fmul fast double %8, %6
49  %add8.2 = fadd fast double %mul7.2, %add8.1
50  %arrayidx.3 = getelementptr inbounds double, double* %c, i64 3
51  %9 = load double, double* %arrayidx.3, align 8
52  %arrayidx2.3 = getelementptr inbounds double, double* %x, i64 3
53  %10 = load double, double* %arrayidx2.3, align 8
54  %mul.3 = fmul fast double %10, %9
55  %add.3 = fadd fast double %mul.3, %add.2
56  %arrayidx6.3 = getelementptr inbounds double, double* %y, i64 3
57  %11 = load double, double* %arrayidx6.3, align 8
58  %mul7.3 = fmul fast double %11, %9
59  %add8.3 = fadd fast double %mul7.3, %add8.2
60  %arrayidx.4 = getelementptr inbounds double, double* %c, i64 4
61  %12 = load double, double* %arrayidx.4, align 8
62  %arrayidx2.4 = getelementptr inbounds double, double* %x, i64 4
63  %13 = load double, double* %arrayidx2.4, align 8
64  %mul.4 = fmul fast double %13, %12
65  %add.4 = fadd fast double %mul.4, %add.3
66  %arrayidx6.4 = getelementptr inbounds double, double* %y, i64 4
67  %14 = load double, double* %arrayidx6.4, align 8
68  %mul7.4 = fmul fast double %14, %12
69  %add8.4 = fadd fast double %mul7.4, %add8.3
70  %arrayidx.5 = getelementptr inbounds double, double* %c, i64 5
71  %15 = load double, double* %arrayidx.5, align 8
72  %arrayidx2.5 = getelementptr inbounds double, double* %x, i64 5
73  %16 = load double, double* %arrayidx2.5, align 8
74  %mul.5 = fmul fast double %16, %15
75  %add.5 = fadd fast double %mul.5, %add.4
76  %arrayidx6.5 = getelementptr inbounds double, double* %y, i64 5
77  %17 = load double, double* %arrayidx6.5, align 8
78  %mul7.5 = fmul fast double %17, %15
79  %add8.5 = fadd fast double %mul7.5, %add8.4
80  %arrayidx.6 = getelementptr inbounds double, double* %c, i64 6
81  %18 = load double, double* %arrayidx.6, align 8
82  %arrayidx2.6 = getelementptr inbounds double, double* %x, i64 6
83  %19 = load double, double* %arrayidx2.6, align 8
84  %mul.6 = fmul fast double %19, %18
85  %add.6 = fadd fast double %mul.6, %add.5
86  %arrayidx6.6 = getelementptr inbounds double, double* %y, i64 6
87  %20 = load double, double* %arrayidx6.6, align 8
88  %mul7.6 = fmul fast double %20, %18
89  %add8.6 = fadd fast double %mul7.6, %add8.5
90  %arrayidx.7 = getelementptr inbounds double, double* %c, i64 7
91  %21 = load double, double* %arrayidx.7, align 8
92  %arrayidx2.7 = getelementptr inbounds double, double* %x, i64 7
93  %22 = load double, double* %arrayidx2.7, align 8
94  %mul.7 = fmul fast double %22, %21
95  %add.7 = fadd fast double %mul.7, %add.6
96  %arrayidx6.7 = getelementptr inbounds double, double* %y, i64 7
97  %23 = load double, double* %arrayidx6.7, align 8
98  %mul7.7 = fmul fast double %23, %21
99  %add8.7 = fadd fast double %mul7.7, %add8.6
100  store double %add.7, double* %rx, align 8
101  store double %add8.7, double* %ry, align 8
102  ret void
103}
104
105