1 /* Interchange heuristics and transform for loop interchange on
2    polyhedral representation.
3 
4    Copyright (C) 2009-2014 Free Software Foundation, Inc.
5    Contributed by Sebastian Pop <sebastian.pop@amd.com> and
6    Harsha Jagasia <harsha.jagasia@amd.com>.
7 
8 This file is part of GCC.
9 
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
14 
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 GNU General Public License for more details.
19 
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3.  If not see
22 <http://www.gnu.org/licenses/>.  */
23 
24 #include "config.h"
25 
26 #ifdef HAVE_cloog
27 #include <isl/aff.h>
28 #include <isl/set.h>
29 #include <isl/map.h>
30 #include <isl/union_map.h>
31 #include <isl/ilp.h>
32 #include <cloog/cloog.h>
33 #include <cloog/isl/domain.h>
34 #endif
35 
36 #include "system.h"
37 #include "coretypes.h"
38 #include "tree.h"
39 #include "basic-block.h"
40 #include "tree-ssa-alias.h"
41 #include "internal-fn.h"
42 #include "gimple-expr.h"
43 #include "is-a.h"
44 #include "gimple.h"
45 #include "gimple-iterator.h"
46 #include "tree-ssa-loop.h"
47 #include "dumpfile.h"
48 #include "cfgloop.h"
49 #include "tree-chrec.h"
50 #include "tree-data-ref.h"
51 #include "tree-scalar-evolution.h"
52 #include "sese.h"
53 
54 #ifdef HAVE_cloog
55 #include "graphite-poly.h"
56 
57 /* XXX isl rewrite following comment */
58 /* Builds a linear expression, of dimension DIM, representing PDR's
59    memory access:
60 
61    L = r_{n}*r_{n-1}*...*r_{1}*s_{0} + ... + r_{n}*s_{n-1} + s_{n}.
62 
63    For an array A[10][20] with two subscript locations s0 and s1, the
64    linear memory access is 20 * s0 + s1: a stride of 1 in subscript s0
65    corresponds to a memory stride of 20.
66 
67    OFFSET is a number of dimensions to prepend before the
68    subscript dimensions: s_0, s_1, ..., s_n.
69 
70    Thus, the final linear expression has the following format:
71    0 .. 0_{offset} | 0 .. 0_{nit} | 0 .. 0_{gd} | 0 | c_0 c_1 ... c_n
72    where the expression itself is:
73    c_0 * s_0 + c_1 * s_1 + ... c_n * s_n.  */
74 
75 static isl_constraint *
build_linearized_memory_access(isl_map * map,poly_dr_p pdr)76 build_linearized_memory_access (isl_map *map, poly_dr_p pdr)
77 {
78   isl_constraint *res;
79   isl_local_space *ls = isl_local_space_from_space (isl_map_get_space (map));
80   unsigned offset, nsubs;
81   int i;
82   isl_int size, subsize;
83 
84   res = isl_equality_alloc (ls);
85   isl_int_init (size);
86   isl_int_set_ui (size, 1);
87   isl_int_init (subsize);
88   isl_int_set_ui (subsize, 1);
89 
90   nsubs = isl_set_dim (pdr->extent, isl_dim_set);
91   /* -1 for the already included L dimension.  */
92   offset = isl_map_dim (map, isl_dim_out) - 1 - nsubs;
93   res = isl_constraint_set_coefficient_si (res, isl_dim_out, offset + nsubs, -1);
94   /* Go through all subscripts from last to first.  First dimension
95      is the alias set, ignore it.  */
96   for (i = nsubs - 1; i >= 1; i--)
97     {
98       isl_space *dc;
99       isl_aff *aff;
100 
101       res = isl_constraint_set_coefficient (res, isl_dim_out, offset + i, size);
102 
103       dc = isl_set_get_space (pdr->extent);
104       aff = isl_aff_zero_on_domain (isl_local_space_from_space (dc));
105       aff = isl_aff_set_coefficient_si (aff, isl_dim_in, i, 1);
106       isl_set_max (pdr->extent, aff, &subsize);
107       isl_aff_free (aff);
108       isl_int_mul (size, size, subsize);
109     }
110 
111   isl_int_clear (subsize);
112   isl_int_clear (size);
113 
114   return res;
115 }
116 
117 /* Set STRIDE to the stride of PDR in memory by advancing by one in
118    the loop at DEPTH.  */
119 
120 static void
pdr_stride_in_loop(mpz_t stride,graphite_dim_t depth,poly_dr_p pdr)121 pdr_stride_in_loop (mpz_t stride, graphite_dim_t depth, poly_dr_p pdr)
122 {
123   poly_bb_p pbb = PDR_PBB (pdr);
124   isl_map *map;
125   isl_set *set;
126   isl_aff *aff;
127   isl_space *dc;
128   isl_constraint *lma, *c;
129   isl_int islstride;
130   graphite_dim_t time_depth;
131   unsigned offset, nt;
132   unsigned i;
133   /* XXX isl rewrite following comments.  */
134   /* Builds a partial difference equations and inserts them
135      into pointset powerset polyhedron P.  Polyhedron is assumed
136      to have the format: T|I|T'|I'|G|S|S'|l1|l2.
137 
138      TIME_DEPTH is the time dimension w.r.t. which we are
139      differentiating.
140      OFFSET represents the number of dimensions between
141      columns t_{time_depth} and t'_{time_depth}.
142      DIM_SCTR is the number of scattering dimensions.  It is
143      essentially the dimensionality of the T vector.
144 
145      The following equations are inserted into the polyhedron P:
146      | t_1 = t_1'
147      | ...
148      | t_{time_depth-1} = t'_{time_depth-1}
149      | t_{time_depth} = t'_{time_depth} + 1
150      | t_{time_depth+1} = t'_{time_depth + 1}
151      | ...
152      | t_{dim_sctr} = t'_{dim_sctr}.  */
153 
154   /* Add the equality: t_{time_depth} = t'_{time_depth} + 1.
155      This is the core part of this alogrithm, since this
156      constraint asks for the memory access stride (difference)
157      between two consecutive points in time dimensions.  */
158 
159   /* Add equalities:
160      | t1 = t1'
161      | ...
162      | t_{time_depth-1} = t'_{time_depth-1}
163      | t_{time_depth+1} = t'_{time_depth+1}
164      | ...
165      | t_{dim_sctr} = t'_{dim_sctr}
166 
167      This means that all the time dimensions are equal except for
168      time_depth, where the constraint is t_{depth} = t'_{depth} + 1
169      step.  More to this: we should be careful not to add equalities
170      to the 'coupled' dimensions, which happens when the one dimension
171      is stripmined dimension, and the other dimension corresponds
172      to the point loop inside stripmined dimension.  */
173 
174   /* pdr->accesses:    [P1..nb_param,I1..nb_domain]->[a,S1..nb_subscript]
175           ??? [P] not used for PDRs?
176      pdr->extent:      [a,S1..nb_subscript]
177      pbb->domain:      [P1..nb_param,I1..nb_domain]
178      pbb->transformed: [P1..nb_param,I1..nb_domain]->[T1..Tnb_sctr]
179           [T] includes local vars (currently unused)
180 
181      First we create [P,I] -> [T,a,S].  */
182 
183   map = isl_map_flat_range_product (isl_map_copy (pbb->transformed),
184 				    isl_map_copy (pdr->accesses));
185   /* Add a dimension for L: [P,I] -> [T,a,S,L].*/
186   map = isl_map_add_dims (map, isl_dim_out, 1);
187   /* Build a constraint for "lma[S] - L == 0", effectively calculating
188      L in terms of subscripts.  */
189   lma = build_linearized_memory_access (map, pdr);
190   /* And add it to the map, so we now have:
191      [P,I] -> [T,a,S,L] : lma([S]) == L.  */
192   map = isl_map_add_constraint (map, lma);
193 
194   /* Then we create  [P,I,P',I'] -> [T,a,S,L,T',a',S',L'].  */
195   map = isl_map_flat_product (map, isl_map_copy (map));
196 
197   /* Now add the equality T[time_depth] == T'[time_depth]+1.  This will
198      force L' to be the linear address at T[time_depth] + 1. */
199   time_depth = psct_dynamic_dim (pbb, depth);
200   /* Length of [a,S] plus [L] ...  */
201   offset = 1 + isl_map_dim (pdr->accesses, isl_dim_out);
202   /* ... plus [T].  */
203   offset += isl_map_dim (pbb->transformed, isl_dim_out);
204 
205   c = isl_equality_alloc (isl_local_space_from_space (isl_map_get_space (map)));
206   c = isl_constraint_set_coefficient_si (c, isl_dim_out, time_depth, 1);
207   c = isl_constraint_set_coefficient_si (c, isl_dim_out,
208 					 offset + time_depth, -1);
209   c = isl_constraint_set_constant_si (c, 1);
210   map = isl_map_add_constraint (map, c);
211 
212   /* Now we equate most of the T/T' elements (making PITaSL nearly
213      the same is (PITaSL)', except for one dimension, namely for 'depth'
214      (an index into [I]), after translating to index into [T].  Take care
215      to not produce an empty map, which indicates we wanted to equate
216      two dimensions that are already coupled via the above time_depth
217      dimension.  Happens with strip mining where several scatter dimension
218      are interdependend.  */
219   /* Length of [T].  */
220   nt = pbb_nb_scattering_transform (pbb) + pbb_nb_local_vars (pbb);
221   for (i = 0; i < nt; i++)
222     if (i != time_depth)
223       {
224 	isl_map *temp = isl_map_equate (isl_map_copy (map),
225 					isl_dim_out, i,
226 					isl_dim_out, offset + i);
227 	if (isl_map_is_empty (temp))
228 	  isl_map_free (temp);
229 	else
230 	  {
231 	    isl_map_free (map);
232 	    map = temp;
233 	  }
234       }
235 
236   /* Now maximize the expression L' - L.  */
237   set = isl_map_range (map);
238   dc = isl_set_get_space (set);
239   aff = isl_aff_zero_on_domain (isl_local_space_from_space (dc));
240   aff = isl_aff_set_coefficient_si (aff, isl_dim_in, offset - 1, -1);
241   aff = isl_aff_set_coefficient_si (aff, isl_dim_in, offset + offset - 1, 1);
242   isl_int_init (islstride);
243   isl_set_max (set, aff, &islstride);
244   isl_int_get_gmp (islstride, stride);
245   isl_int_clear (islstride);
246   isl_aff_free (aff);
247   isl_set_free (set);
248 
249   if (dump_file && (dump_flags & TDF_DETAILS))
250     {
251       gmp_fprintf (dump_file, "\nStride in BB_%d, DR_%d, depth %d:  %Zd ",
252 		   pbb_index (pbb), PDR_ID (pdr), (int) depth, stride);
253     }
254 }
255 
256 /* Sets STRIDES to the sum of all the strides of the data references
257    accessed in LOOP at DEPTH.  */
258 
259 static void
memory_strides_in_loop_1(lst_p loop,graphite_dim_t depth,mpz_t strides)260 memory_strides_in_loop_1 (lst_p loop, graphite_dim_t depth, mpz_t strides)
261 {
262   int i, j;
263   lst_p l;
264   poly_dr_p pdr;
265   mpz_t s, n;
266 
267   mpz_init (s);
268   mpz_init (n);
269 
270   FOR_EACH_VEC_ELT (LST_SEQ (loop), j, l)
271     if (LST_LOOP_P (l))
272       memory_strides_in_loop_1 (l, depth, strides);
273     else
274       FOR_EACH_VEC_ELT (PBB_DRS (LST_PBB (l)), i, pdr)
275 	{
276 	  pdr_stride_in_loop (s, depth, pdr);
277 	  mpz_set_si (n, PDR_NB_REFS (pdr));
278 	  mpz_mul (s, s, n);
279 	  mpz_add (strides, strides, s);
280 	}
281 
282   mpz_clear (s);
283   mpz_clear (n);
284 }
285 
286 /* Sets STRIDES to the sum of all the strides of the data references
287    accessed in LOOP at DEPTH.  */
288 
289 static void
memory_strides_in_loop(lst_p loop,graphite_dim_t depth,mpz_t strides)290 memory_strides_in_loop (lst_p loop, graphite_dim_t depth, mpz_t strides)
291 {
292   if (mpz_cmp_si (loop->memory_strides, -1) == 0)
293     {
294       mpz_set_si (strides, 0);
295       memory_strides_in_loop_1 (loop, depth, strides);
296     }
297   else
298     mpz_set (strides, loop->memory_strides);
299 }
300 
301 /* Return true when the interchange of loops LOOP1 and LOOP2 is
302    profitable.
303 
304    Example:
305 
306    | int a[100][100];
307    |
308    | int
309    | foo (int N)
310    | {
311    |   int j;
312    |   int i;
313    |
314    |   for (i = 0; i < N; i++)
315    |     for (j = 0; j < N; j++)
316    |       a[j][2 * i] += 1;
317    |
318    |   return a[N][12];
319    | }
320 
321    The data access A[j][i] is described like this:
322 
323    | i   j   N   a  s0  s1   1
324    | 0   0   0   1   0   0  -5    = 0
325    | 0  -1   0   0   1   0   0    = 0
326    |-2   0   0   0   0   1   0    = 0
327    | 0   0   0   0   1   0   0   >= 0
328    | 0   0   0   0   0   1   0   >= 0
329    | 0   0   0   0  -1   0 100   >= 0
330    | 0   0   0   0   0  -1 100   >= 0
331 
332    The linearized memory access L to A[100][100] is:
333 
334    | i   j   N   a  s0  s1   1
335    | 0   0   0   0 100   1   0
336 
337    TODO: the shown format is not valid as it does not show the fact
338    that the iteration domain "i j" is transformed using the scattering.
339 
340    Next, to measure the impact of iterating once in loop "i", we build
341    a maximization problem: first, we add to DR accesses the dimensions
342    k, s2, s3, L1 = 100 * s0 + s1, L2, and D1: this is the polyhedron P1.
343    L1 and L2 are the linearized memory access functions.
344 
345    | i   j   N   a  s0  s1   k  s2  s3  L1  L2  D1   1
346    | 0   0   0   1   0   0   0   0   0   0   0   0  -5    = 0  alias = 5
347    | 0  -1   0   0   1   0   0   0   0   0   0   0   0    = 0  s0 = j
348    |-2   0   0   0   0   1   0   0   0   0   0   0   0    = 0  s1 = 2 * i
349    | 0   0   0   0   1   0   0   0   0   0   0   0   0   >= 0
350    | 0   0   0   0   0   1   0   0   0   0   0   0   0   >= 0
351    | 0   0   0   0  -1   0   0   0   0   0   0   0 100   >= 0
352    | 0   0   0   0   0  -1   0   0   0   0   0   0 100   >= 0
353    | 0   0   0   0 100   1   0   0   0  -1   0   0   0    = 0  L1 = 100 * s0 + s1
354 
355    Then, we generate the polyhedron P2 by interchanging the dimensions
356    (s0, s2), (s1, s3), (L1, L2), (k, i)
357 
358    | i   j   N   a  s0  s1   k  s2  s3  L1  L2  D1   1
359    | 0   0   0   1   0   0   0   0   0   0   0   0  -5    = 0  alias = 5
360    | 0  -1   0   0   0   0   0   1   0   0   0   0   0    = 0  s2 = j
361    | 0   0   0   0   0   0  -2   0   1   0   0   0   0    = 0  s3 = 2 * k
362    | 0   0   0   0   0   0   0   1   0   0   0   0   0   >= 0
363    | 0   0   0   0   0   0   0   0   1   0   0   0   0   >= 0
364    | 0   0   0   0   0   0   0  -1   0   0   0   0 100   >= 0
365    | 0   0   0   0   0   0   0   0  -1   0   0   0 100   >= 0
366    | 0   0   0   0   0   0   0 100   1   0  -1   0   0    = 0  L2 = 100 * s2 + s3
367 
368    then we add to P2 the equality k = i + 1:
369 
370    |-1   0   0   0   0   0   1   0   0   0   0   0  -1    = 0  k = i + 1
371 
372    and finally we maximize the expression "D1 = max (P1 inter P2, L2 - L1)".
373 
374    Similarly, to determine the impact of one iteration on loop "j", we
375    interchange (k, j), we add "k = j + 1", and we compute D2 the
376    maximal value of the difference.
377 
378    Finally, the profitability test is D1 < D2: if in the outer loop
379    the strides are smaller than in the inner loop, then it is
380    profitable to interchange the loops at DEPTH1 and DEPTH2.  */
381 
382 static bool
lst_interchange_profitable_p(lst_p nest,int depth1,int depth2)383 lst_interchange_profitable_p (lst_p nest, int depth1, int depth2)
384 {
385   mpz_t d1, d2;
386   bool res;
387 
388   gcc_assert (depth1 < depth2);
389 
390   mpz_init (d1);
391   mpz_init (d2);
392 
393   memory_strides_in_loop (nest, depth1, d1);
394   memory_strides_in_loop (nest, depth2, d2);
395 
396   res = mpz_cmp (d1, d2) < 0;
397 
398   mpz_clear (d1);
399   mpz_clear (d2);
400 
401   return res;
402 }
403 
404 /* Interchanges the loops at DEPTH1 and DEPTH2 of the original
405    scattering and assigns the resulting polyhedron to the transformed
406    scattering.  */
407 
408 static void
pbb_interchange_loop_depths(graphite_dim_t depth1,graphite_dim_t depth2,poly_bb_p pbb)409 pbb_interchange_loop_depths (graphite_dim_t depth1, graphite_dim_t depth2,
410 			     poly_bb_p pbb)
411 {
412   unsigned i;
413   unsigned dim1 = psct_dynamic_dim (pbb, depth1);
414   unsigned dim2 = psct_dynamic_dim (pbb, depth2);
415   isl_space *d = isl_map_get_space (pbb->transformed);
416   isl_space *d1 = isl_space_range (d);
417   unsigned n = isl_space_dim (d1, isl_dim_out);
418   isl_space *d2 = isl_space_add_dims (d1, isl_dim_in, n);
419   isl_map *x = isl_map_universe (d2);
420 
421   x = isl_map_equate (x, isl_dim_in, dim1, isl_dim_out, dim2);
422   x = isl_map_equate (x, isl_dim_in, dim2, isl_dim_out, dim1);
423 
424   for (i = 0; i < n; i++)
425     if (i != dim1 && i != dim2)
426       x = isl_map_equate (x, isl_dim_in, i, isl_dim_out, i);
427 
428   pbb->transformed = isl_map_apply_range (pbb->transformed, x);
429 }
430 
431 /* Apply the interchange of loops at depths DEPTH1 and DEPTH2 to all
432    the statements below LST.  */
433 
434 static void
lst_apply_interchange(lst_p lst,int depth1,int depth2)435 lst_apply_interchange (lst_p lst, int depth1, int depth2)
436 {
437   if (!lst)
438     return;
439 
440   if (LST_LOOP_P (lst))
441     {
442       int i;
443       lst_p l;
444 
445       FOR_EACH_VEC_ELT (LST_SEQ (lst), i, l)
446 	lst_apply_interchange (l, depth1, depth2);
447     }
448   else
449     pbb_interchange_loop_depths (depth1, depth2, LST_PBB (lst));
450 }
451 
452 /* Return true when the nest starting at LOOP1 and ending on LOOP2 is
453    perfect: i.e. there are no sequence of statements.  */
454 
455 static bool
lst_perfectly_nested_p(lst_p loop1,lst_p loop2)456 lst_perfectly_nested_p (lst_p loop1, lst_p loop2)
457 {
458   if (loop1 == loop2)
459     return true;
460 
461   if (!LST_LOOP_P (loop1))
462     return false;
463 
464   return LST_SEQ (loop1).length () == 1
465          && lst_perfectly_nested_p (LST_SEQ (loop1)[0], loop2);
466 }
467 
468 /* Transform the loop nest between LOOP1 and LOOP2 into a perfect
469    nest.  To continue the naming tradition, this function is called
470    after perfect_nestify.  NEST is set to the perfectly nested loop
471    that is created.  BEFORE/AFTER are set to the loops distributed
472    before/after the loop NEST.  */
473 
474 static void
lst_perfect_nestify(lst_p loop1,lst_p loop2,lst_p * before,lst_p * nest,lst_p * after)475 lst_perfect_nestify (lst_p loop1, lst_p loop2, lst_p *before,
476 		     lst_p *nest, lst_p *after)
477 {
478   poly_bb_p first, last;
479 
480   gcc_assert (loop1 && loop2
481 	      && loop1 != loop2
482 	      && LST_LOOP_P (loop1) && LST_LOOP_P (loop2));
483 
484   first = LST_PBB (lst_find_first_pbb (loop2));
485   last = LST_PBB (lst_find_last_pbb (loop2));
486 
487   *before = copy_lst (loop1);
488   *nest = copy_lst (loop1);
489   *after = copy_lst (loop1);
490 
491   lst_remove_all_before_including_pbb (*before, first, false);
492   lst_remove_all_before_including_pbb (*after, last, true);
493 
494   lst_remove_all_before_excluding_pbb (*nest, first, true);
495   lst_remove_all_before_excluding_pbb (*nest, last, false);
496 
497   if (lst_empty_p (*before))
498     {
499       free_lst (*before);
500       *before = NULL;
501     }
502   if (lst_empty_p (*after))
503     {
504       free_lst (*after);
505       *after = NULL;
506     }
507   if (lst_empty_p (*nest))
508     {
509       free_lst (*nest);
510       *nest = NULL;
511     }
512 }
513 
514 /* Try to interchange LOOP1 with LOOP2 for all the statements of the
515    body of LOOP2.  LOOP1 contains LOOP2.  Return true if it did the
516    interchange.  */
517 
518 static bool
lst_try_interchange_loops(scop_p scop,lst_p loop1,lst_p loop2)519 lst_try_interchange_loops (scop_p scop, lst_p loop1, lst_p loop2)
520 {
521   int depth1 = lst_depth (loop1);
522   int depth2 = lst_depth (loop2);
523   lst_p transformed;
524 
525   lst_p before = NULL, nest = NULL, after = NULL;
526 
527   if (!lst_perfectly_nested_p (loop1, loop2))
528     lst_perfect_nestify (loop1, loop2, &before, &nest, &after);
529 
530   if (!lst_interchange_profitable_p (loop2, depth1, depth2))
531     return false;
532 
533   lst_apply_interchange (loop2, depth1, depth2);
534 
535   /* Sync the transformed LST information and the PBB scatterings
536      before using the scatterings in the data dependence analysis.  */
537   if (before || nest || after)
538     {
539       transformed = lst_substitute_3 (SCOP_TRANSFORMED_SCHEDULE (scop), loop1,
540 				      before, nest, after);
541       lst_update_scattering (transformed);
542       free_lst (transformed);
543     }
544 
545   if (graphite_legal_transform (scop))
546     {
547       if (dump_file && (dump_flags & TDF_DETAILS))
548 	fprintf (dump_file,
549 		 "Loops at depths %d and %d will be interchanged.\n",
550 		 depth1, depth2);
551 
552       /* Transform the SCOP_TRANSFORMED_SCHEDULE of the SCOP.  */
553       lst_insert_in_sequence (before, loop1, true);
554       lst_insert_in_sequence (after, loop1, false);
555 
556       if (nest)
557 	{
558 	  lst_replace (loop1, nest);
559 	  free_lst (loop1);
560 	}
561 
562       return true;
563     }
564 
565   /* Undo the transform.  */
566   free_lst (before);
567   free_lst (nest);
568   free_lst (after);
569   lst_apply_interchange (loop2, depth2, depth1);
570   return false;
571 }
572 
573 /* Selects the inner loop in LST_SEQ (INNER_FATHER) to be interchanged
574    with the loop OUTER in LST_SEQ (OUTER_FATHER).  */
575 
576 static bool
lst_interchange_select_inner(scop_p scop,lst_p outer_father,int outer,lst_p inner_father)577 lst_interchange_select_inner (scop_p scop, lst_p outer_father, int outer,
578 			      lst_p inner_father)
579 {
580   int inner;
581   lst_p loop1, loop2;
582 
583   gcc_assert (outer_father
584 	      && LST_LOOP_P (outer_father)
585 	      && LST_LOOP_P (LST_SEQ (outer_father)[outer])
586 	      && inner_father
587 	      && LST_LOOP_P (inner_father));
588 
589   loop1 = LST_SEQ (outer_father)[outer];
590 
591   FOR_EACH_VEC_ELT (LST_SEQ (inner_father), inner, loop2)
592     if (LST_LOOP_P (loop2)
593 	&& (lst_try_interchange_loops (scop, loop1, loop2)
594 	    || lst_interchange_select_inner (scop, outer_father, outer, loop2)))
595       return true;
596 
597   return false;
598 }
599 
600 /* Interchanges all the loops of LOOP and the loops of its body that
601    are considered profitable to interchange.  Return the number of
602    interchanged loops.  OUTER is the index in LST_SEQ (LOOP) that
603    points to the next outer loop to be considered for interchange.  */
604 
605 static int
lst_interchange_select_outer(scop_p scop,lst_p loop,int outer)606 lst_interchange_select_outer (scop_p scop, lst_p loop, int outer)
607 {
608   lst_p l;
609   int res = 0;
610   int i = 0;
611   lst_p father;
612 
613   if (!loop || !LST_LOOP_P (loop))
614     return 0;
615 
616   father = LST_LOOP_FATHER (loop);
617   if (father)
618     {
619       while (lst_interchange_select_inner (scop, father, outer, loop))
620 	{
621 	  res++;
622 	  loop = LST_SEQ (father)[outer];
623 	}
624     }
625 
626   if (LST_LOOP_P (loop))
627     FOR_EACH_VEC_ELT (LST_SEQ (loop), i, l)
628       if (LST_LOOP_P (l))
629 	res += lst_interchange_select_outer (scop, l, i);
630 
631   return res;
632 }
633 
634 /* Interchanges all the loop depths that are considered profitable for
635    SCOP.  Return the number of interchanged loops.  */
636 
637 int
scop_do_interchange(scop_p scop)638 scop_do_interchange (scop_p scop)
639 {
640   int res = lst_interchange_select_outer
641     (scop, SCOP_TRANSFORMED_SCHEDULE (scop), 0);
642 
643   lst_update_scattering (SCOP_TRANSFORMED_SCHEDULE (scop));
644 
645   return res;
646 }
647 
648 
649 #endif
650 
651