1; RUN: opt %loadPolly -analyze -polly-process-unprofitable  -polly-remarks-minimal \
2; RUN:     -polly-opt-isl  -polly-pattern-matching-based-opts=true \
3; RUN:     -polly-target-throughput-vector-fma=1 \
4; RUN:     -polly-target-latency-vector-fma=1 \
5; RUN:     -polly-ast -polly-target-vector-register-bitwidth=4096 \
6; RUN:     -polly-target-1st-cache-level-associativity=3 < %s | FileCheck %s
7;
8;     /* Test that Polly does not crash due to configurations that can lead to
9;    incorrect tile size computations.
10;    The parameters are setup such that Car in `getMacroKernelParams`
11;    is evaluated to 0. */
12;
13;    static const int N = 3000;
14;
15;    void f(int A[N][N], int B[N][N], int C[N][N]) {
16;      for (int i = 0; i < N; i++) {
17;        for (int j = 0; j < N; j++) {
18;          A[i][j] = 0;
19;          for (int k = 0; k < N; k++) {
20;            A[i][j] += B[i][k] * C[k][j];
21;          }
22;        }
23;      }
24;    }
25;
26; CHECK:           // 1st level tiling - Tiles
27; CHECK-NEXT:      for (int c0 = 0; c0 <= 93; c0 += 1)
28; CHECK-NEXT:        for (int c1 = 0; c1 <= 93; c1 += 1) {
29; CHECK-NEXT:          // 1st level tiling - Points
30; CHECK-NEXT:          for (int c2 = 0; c2 <= min(31, -32 * c0 + 2999); c2 += 1)
31; CHECK-NEXT:            for (int c3 = 0; c3 <= min(31, -32 * c1 + 2999); c3 += 1)
32; CHECK-NEXT:              Stmt_for_body3(32 * c0 + c2, 32 * c1 + c3);
33; CHECK-NEXT:        }
34; CHECK-NEXT:      // Inter iteration alias-free
35; CHECK-NEXT:      // Register tiling - Tiles
36; CHECK-NEXT:      for (int c0 = 0; c0 <= 23; c0 += 1)
37; CHECK-NEXT:        for (int c1 = 0; c1 <= 2999; c1 += 1)
38; CHECK-NEXT:          for (int c2 = 0; c2 <= 2999; c2 += 1) {
39; CHECK-NEXT:            // Register tiling - Points
40; CHECK-NEXT:            {
41; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0, c2);
42; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 1, c2);
43; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 2, c2);
44; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 3, c2);
45; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 4, c2);
46; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 5, c2);
47; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 6, c2);
48; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 7, c2);
49; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 8, c2);
50; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 9, c2);
51; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 10, c2);
52; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 11, c2);
53; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 12, c2);
54; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 13, c2);
55; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 14, c2);
56; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 15, c2);
57; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 16, c2);
58; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 17, c2);
59; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 18, c2);
60; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 19, c2);
61; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 20, c2);
62; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 21, c2);
63; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 22, c2);
64; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 23, c2);
65; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 24, c2);
66; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 25, c2);
67; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 26, c2);
68; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 27, c2);
69; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 28, c2);
70; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 29, c2);
71; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 30, c2);
72; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 31, c2);
73; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 32, c2);
74; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 33, c2);
75; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 34, c2);
76; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 35, c2);
77; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 36, c2);
78; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 37, c2);
79; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 38, c2);
80; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 39, c2);
81; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 40, c2);
82; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 41, c2);
83; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 42, c2);
84; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 43, c2);
85; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 44, c2);
86; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 45, c2);
87; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 46, c2);
88; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 47, c2);
89; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 48, c2);
90; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 49, c2);
91; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 50, c2);
92; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 51, c2);
93; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 52, c2);
94; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 53, c2);
95; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 54, c2);
96; CHECK-NEXT:              Stmt_for_body8(c1, 128 * c0 + 55, c2);
97; CHECK-NEXT:              if (c0 <= 22) {
98; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 56, c2);
99; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 57, c2);
100; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 58, c2);
101; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 59, c2);
102; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 60, c2);
103; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 61, c2);
104; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 62, c2);
105; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 63, c2);
106; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 64, c2);
107; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 65, c2);
108; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 66, c2);
109; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 67, c2);
110; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 68, c2);
111; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 69, c2);
112; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 70, c2);
113; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 71, c2);
114; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 72, c2);
115; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 73, c2);
116; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 74, c2);
117; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 75, c2);
118; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 76, c2);
119; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 77, c2);
120; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 78, c2);
121; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 79, c2);
122; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 80, c2);
123; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 81, c2);
124; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 82, c2);
125; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 83, c2);
126; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 84, c2);
127; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 85, c2);
128; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 86, c2);
129; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 87, c2);
130; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 88, c2);
131; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 89, c2);
132; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 90, c2);
133; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 91, c2);
134; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 92, c2);
135; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 93, c2);
136; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 94, c2);
137; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 95, c2);
138; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 96, c2);
139; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 97, c2);
140; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 98, c2);
141; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 99, c2);
142; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 100, c2);
143; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 101, c2);
144; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 102, c2);
145; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 103, c2);
146; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 104, c2);
147; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 105, c2);
148; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 106, c2);
149; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 107, c2);
150; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 108, c2);
151; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 109, c2);
152; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 110, c2);
153; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 111, c2);
154; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 112, c2);
155; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 113, c2);
156; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 114, c2);
157; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 115, c2);
158; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 116, c2);
159; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 117, c2);
160; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 118, c2);
161; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 119, c2);
162; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 120, c2);
163; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 121, c2);
164; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 122, c2);
165; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 123, c2);
166; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 124, c2);
167; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 125, c2);
168; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 126, c2);
169; CHECK-NEXT:                Stmt_for_body8(c1, 128 * c0 + 127, c2);
170; CHECK-NEXT:              }
171; CHECK-NEXT:            }
172; CHECK-NEXT:          }
173target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
174
175define void @f([3000 x i32]* %A, [3000 x i32]* %B, [3000 x i32]* %C) {
176entry:
177  br label %for.cond
178
179for.cond:                                         ; preds = %for.inc24, %entry
180  %indvars.iv4 = phi i64 [ %indvars.iv.next5, %for.inc24 ], [ 0, %entry ]
181  %exitcond6 = icmp ne i64 %indvars.iv4, 3000
182  br i1 %exitcond6, label %for.body, label %for.end26
183
184for.body:                                         ; preds = %for.cond
185  br label %for.cond1
186
187for.cond1:                                        ; preds = %for.inc21, %for.body
188  %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc21 ], [ 0, %for.body ]
189  %exitcond3 = icmp ne i64 %indvars.iv1, 3000
190  br i1 %exitcond3, label %for.body3, label %for.end23
191
192for.body3:                                        ; preds = %for.cond1
193  %arrayidx5 = getelementptr inbounds [3000 x i32], [3000 x i32]* %A, i64 %indvars.iv4, i64 %indvars.iv1
194  store i32 0, i32* %arrayidx5, align 4
195  br label %for.cond6
196
197for.cond6:                                        ; preds = %for.inc, %for.body3
198  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body3 ]
199  %exitcond = icmp ne i64 %indvars.iv, 3000
200  br i1 %exitcond, label %for.body8, label %for.end
201
202for.body8:                                        ; preds = %for.cond6
203  %arrayidx12 = getelementptr inbounds [3000 x i32], [3000 x i32]* %B, i64 %indvars.iv4, i64 %indvars.iv
204  %tmp = load i32, i32* %arrayidx12, align 4
205  %arrayidx16 = getelementptr inbounds [3000 x i32], [3000 x i32]* %C, i64 %indvars.iv, i64 %indvars.iv1
206  %tmp7 = load i32, i32* %arrayidx16, align 4
207  %mul = mul nsw i32 %tmp, %tmp7
208  %arrayidx20 = getelementptr inbounds [3000 x i32], [3000 x i32]* %A, i64 %indvars.iv4, i64 %indvars.iv1
209  %tmp8 = load i32, i32* %arrayidx20, align 4
210  %add = add nsw i32 %tmp8, %mul
211  store i32 %add, i32* %arrayidx20, align 4
212  br label %for.inc
213
214for.inc:                                          ; preds = %for.body8
215  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
216  br label %for.cond6
217
218for.end:                                          ; preds = %for.cond6
219  br label %for.inc21
220
221for.inc21:                                        ; preds = %for.end
222  %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1
223  br label %for.cond1
224
225for.end23:                                        ; preds = %for.cond1
226  br label %for.inc24
227
228for.inc24:                                        ; preds = %for.end23
229  %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1
230  br label %for.cond
231
232for.end26:                                        ; preds = %for.cond
233  ret void
234}
235