1; RUN: opt %loadPolly -polly-opt-isl -polly-ast -polly-tiling=0 -polly-parallel -polly-opt-outer-coincidence=no -analyze < %s | FileCheck %s
2; RUN: opt %loadPolly -polly-opt-isl -polly-ast -polly-tiling=0 -polly-parallel -polly-opt-outer-coincidence=yes -analyze < %s | FileCheck %s --check-prefix=OUTER
3
4; By skewing, the diagonal can be made parallel. ISL does this when the Check
5; the 'outer_coincidence' option is enabled.
6;
7; void func(int m, int n, float A[static const restrict m][n]) {
8;  for (int i = 1; i < m; i+=1)
9;    for (int j = 1; j < n; j+=1)
10;      A[i][j] = A[i-1][j] + A[i][j-1];
11;}
12
13define void @func(i64 %m, i64 %n, float* noalias nonnull %A) #0 {
14entry:
15  br label %for.cond
16
17for.cond:                                         ; preds = %for.inc11, %entry
18  %i.0 = phi i64 [ 1, %entry ], [ %add12, %for.inc11 ]
19  %cmp = icmp slt i64 %i.0, %m
20  br i1 %cmp, label %for.cond1.preheader, label %for.end13
21
22for.cond1.preheader:                              ; preds = %for.cond
23  br label %for.cond1
24
25for.cond1:                                        ; preds = %for.cond1.preheader, %for.body3
26  %j.0 = phi i64 [ %add10, %for.body3 ], [ 1, %for.cond1.preheader ]
27  %cmp2 = icmp slt i64 %j.0, %n
28  br i1 %cmp2, label %for.body3, label %for.inc11
29
30for.body3:                                        ; preds = %for.cond1
31  %sub = add nsw i64 %i.0, -1
32  %tmp = mul nsw i64 %sub, %n
33  %arrayidx = getelementptr inbounds float, float* %A, i64 %tmp
34  %arrayidx4 = getelementptr inbounds float, float* %arrayidx, i64 %j.0
35  %tmp13 = load float, float* %arrayidx4, align 4
36  %sub5 = add nsw i64 %j.0, -1
37  %tmp14 = mul nsw i64 %i.0, %n
38  %arrayidx6 = getelementptr inbounds float, float* %A, i64 %tmp14
39  %arrayidx7 = getelementptr inbounds float, float* %arrayidx6, i64 %sub5
40  %tmp15 = load float, float* %arrayidx7, align 4
41  %add = fadd float %tmp13, %tmp15
42  %tmp16 = mul nsw i64 %i.0, %n
43  %arrayidx8 = getelementptr inbounds float, float* %A, i64 %tmp16
44  %arrayidx9 = getelementptr inbounds float, float* %arrayidx8, i64 %j.0
45  store float %add, float* %arrayidx9, align 4
46  %add10 = add nuw nsw i64 %j.0, 1
47  br label %for.cond1
48
49for.inc11:                                        ; preds = %for.cond1
50  %add12 = add nuw nsw i64 %i.0, 1
51  br label %for.cond
52
53for.end13:                                        ; preds = %for.cond
54  ret void
55}
56
57
58; CHECK:      #pragma minimal dependence distance: 1
59; CHECK-NEXT: for (int c0 = 0; c0 < m - 1; c0 += 1)
60; CHECK-NEXT:   #pragma minimal dependence distance: 1
61; CHECK-NEXT:   for (int c1 = 0; c1 < n - 1; c1 += 1)
62; CHECK-NEXT:     Stmt_for_body3(c0, c1);
63
64; OUTER:      #pragma minimal dependence distance: 1
65; OUTER-NEXT: for (int c0 = 0; c0 < m + n - 3; c0 += 1)
66; OUTER-NEXT:   #pragma simd
67; OUTER-NEXT:   #pragma known-parallel
68; OUTER-NEXT:   for (int c1 = max(0, -n + c0 + 2); c1 <= min(m - 2, c0); c1 += 1)
69; OUTER-NEXT:     Stmt_for_body3(c1, c0 - c1);
70