xref: /openbsd/gnu/gcc/gcc/lambda-code.c (revision 404b540a)
1 /*  Loop transformation code generation
2     Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
3     Contributed by Daniel Berlin <dberlin@dberlin.org>
4 
5     This file is part of GCC.
6 
7     GCC is free software; you can redistribute it and/or modify it under
8     the terms of the GNU General Public License as published by the Free
9     Software Foundation; either version 2, or (at your option) any later
10     version.
11 
12     GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13     WARRANTY; without even the implied warranty of MERCHANTABILITY or
14     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15     for more details.
16 
17     You should have received a copy of the GNU General Public License
18     along with GCC; see the file COPYING.  If not, write to the Free
19     Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20     02110-1301, USA.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "cfgloop.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "tree-chrec.h"
39 #include "tree-data-ref.h"
40 #include "tree-pass.h"
41 #include "tree-scalar-evolution.h"
42 #include "vec.h"
43 #include "lambda.h"
44 #include "vecprim.h"
45 
46 /* This loop nest code generation is based on non-singular matrix
47    math.
48 
49  A little terminology and a general sketch of the algorithm.  See "A singular
50  loop transformation framework based on non-singular matrices" by Wei Li and
51  Keshav Pingali for formal proofs that the various statements below are
52  correct.
53 
54  A loop iteration space represents the points traversed by the loop.  A point in the
55  iteration space can be represented by a vector of size <loop depth>.  You can
56  therefore represent the iteration space as an integral combinations of a set
57  of basis vectors.
58 
59  A loop iteration space is dense if every integer point between the loop
60  bounds is a point in the iteration space.  Every loop with a step of 1
61  therefore has a dense iteration space.
62 
63  for i = 1 to 3, step 1 is a dense iteration space.
64 
65  A loop iteration space is sparse if it is not dense.  That is, the iteration
66  space skips integer points that are within the loop bounds.
67 
68  for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
69  2 is skipped.
70 
71  Dense source spaces are easy to transform, because they don't skip any
72  points to begin with.  Thus we can compute the exact bounds of the target
73  space using min/max and floor/ceil.
74 
75  For a dense source space, we take the transformation matrix, decompose it
76  into a lower triangular part (H) and a unimodular part (U).
77  We then compute the auxiliary space from the unimodular part (source loop
78  nest . U = auxiliary space) , which has two important properties:
79   1. It traverses the iterations in the same lexicographic order as the source
80   space.
81   2. It is a dense space when the source is a dense space (even if the target
82   space is going to be sparse).
83 
84  Given the auxiliary space, we use the lower triangular part to compute the
85  bounds in the target space by simple matrix multiplication.
86  The gaps in the target space (IE the new loop step sizes) will be the
87  diagonals of the H matrix.
88 
89  Sparse source spaces require another step, because you can't directly compute
90  the exact bounds of the auxiliary and target space from the sparse space.
91  Rather than try to come up with a separate algorithm to handle sparse source
92  spaces directly, we just find a legal transformation matrix that gives you
93  the sparse source space, from a dense space, and then transform the dense
94  space.
95 
96  For a regular sparse space, you can represent the source space as an integer
97  lattice, and the base space of that lattice will always be dense.  Thus, we
98  effectively use the lattice to figure out the transformation from the lattice
99  base space, to the sparse iteration space (IE what transform was applied to
100  the dense space to make it sparse).  We then compose this transform with the
101  transformation matrix specified by the user (since our matrix transformations
102  are closed under composition, this is okay).  We can then use the base space
103  (which is dense) plus the composed transformation matrix, to compute the rest
104  of the transform using the dense space algorithm above.
105 
106  In other words, our sparse source space (B) is decomposed into a dense base
107  space (A), and a matrix (L) that transforms A into B, such that A.L = B.
108  We then compute the composition of L and the user transformation matrix (T),
109  so that T is now a transform from A to the result, instead of from B to the
110  result.
111  IE A.(LT) = result instead of B.T = result
112  Since A is now a dense source space, we can use the dense source space
113  algorithm above to compute the result of applying transform (LT) to A.
114 
115  Fourier-Motzkin elimination is used to compute the bounds of the base space
116  of the lattice.  */
117 
118 static bool perfect_nestify (struct loops *,
119 			     struct loop *, VEC(tree,heap) *,
120 			     VEC(tree,heap) *, VEC(int,heap) *,
121 			     VEC(tree,heap) *);
122 /* Lattice stuff that is internal to the code generation algorithm.  */
123 
124 typedef struct
125 {
126   /* Lattice base matrix.  */
127   lambda_matrix base;
128   /* Lattice dimension.  */
129   int dimension;
130   /* Origin vector for the coefficients.  */
131   lambda_vector origin;
132   /* Origin matrix for the invariants.  */
133   lambda_matrix origin_invariants;
134   /* Number of invariants.  */
135   int invariants;
136 } *lambda_lattice;
137 
138 #define LATTICE_BASE(T) ((T)->base)
139 #define LATTICE_DIMENSION(T) ((T)->dimension)
140 #define LATTICE_ORIGIN(T) ((T)->origin)
141 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
142 #define LATTICE_INVARIANTS(T) ((T)->invariants)
143 
144 static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
145 		       int, int);
146 static lambda_lattice lambda_lattice_new (int, int);
147 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
148 
149 static tree find_induction_var_from_exit_cond (struct loop *);
150 static bool can_convert_to_perfect_nest (struct loop *);
151 
152 /* Create a new lambda body vector.  */
153 
154 lambda_body_vector
lambda_body_vector_new(int size)155 lambda_body_vector_new (int size)
156 {
157   lambda_body_vector ret;
158 
159   ret = ggc_alloc (sizeof (*ret));
160   LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
161   LBV_SIZE (ret) = size;
162   LBV_DENOMINATOR (ret) = 1;
163   return ret;
164 }
165 
166 /* Compute the new coefficients for the vector based on the
167   *inverse* of the transformation matrix.  */
168 
169 lambda_body_vector
lambda_body_vector_compute_new(lambda_trans_matrix transform,lambda_body_vector vect)170 lambda_body_vector_compute_new (lambda_trans_matrix transform,
171 				lambda_body_vector vect)
172 {
173   lambda_body_vector temp;
174   int depth;
175 
176   /* Make sure the matrix is square.  */
177   gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
178 
179   depth = LTM_ROWSIZE (transform);
180 
181   temp = lambda_body_vector_new (depth);
182   LBV_DENOMINATOR (temp) =
183     LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
184   lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
185 			     LTM_MATRIX (transform), depth,
186 			     LBV_COEFFICIENTS (temp));
187   LBV_SIZE (temp) = LBV_SIZE (vect);
188   return temp;
189 }
190 
191 /* Print out a lambda body vector.  */
192 
193 void
print_lambda_body_vector(FILE * outfile,lambda_body_vector body)194 print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
195 {
196   print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
197 }
198 
199 /* Return TRUE if two linear expressions are equal.  */
200 
201 static bool
lle_equal(lambda_linear_expression lle1,lambda_linear_expression lle2,int depth,int invariants)202 lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
203 	   int depth, int invariants)
204 {
205   int i;
206 
207   if (lle1 == NULL || lle2 == NULL)
208     return false;
209   if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
210     return false;
211   if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
212     return false;
213   for (i = 0; i < depth; i++)
214     if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
215       return false;
216   for (i = 0; i < invariants; i++)
217     if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
218 	LLE_INVARIANT_COEFFICIENTS (lle2)[i])
219       return false;
220   return true;
221 }
222 
223 /* Create a new linear expression with dimension DIM, and total number
224    of invariants INVARIANTS.  */
225 
226 lambda_linear_expression
lambda_linear_expression_new(int dim,int invariants)227 lambda_linear_expression_new (int dim, int invariants)
228 {
229   lambda_linear_expression ret;
230 
231   ret = ggc_alloc_cleared (sizeof (*ret));
232 
233   LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
234   LLE_CONSTANT (ret) = 0;
235   LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
236   LLE_DENOMINATOR (ret) = 1;
237   LLE_NEXT (ret) = NULL;
238 
239   return ret;
240 }
241 
242 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
243    The starting letter used for variable names is START.  */
244 
245 static void
print_linear_expression(FILE * outfile,lambda_vector expr,int size,char start)246 print_linear_expression (FILE * outfile, lambda_vector expr, int size,
247 			 char start)
248 {
249   int i;
250   bool first = true;
251   for (i = 0; i < size; i++)
252     {
253       if (expr[i] != 0)
254 	{
255 	  if (first)
256 	    {
257 	      if (expr[i] < 0)
258 		fprintf (outfile, "-");
259 	      first = false;
260 	    }
261 	  else if (expr[i] > 0)
262 	    fprintf (outfile, " + ");
263 	  else
264 	    fprintf (outfile, " - ");
265 	  if (abs (expr[i]) == 1)
266 	    fprintf (outfile, "%c", start + i);
267 	  else
268 	    fprintf (outfile, "%d%c", abs (expr[i]), start + i);
269 	}
270     }
271 }
272 
273 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
274    depth/number of coefficients is given by DEPTH, the number of invariants is
275    given by INVARIANTS, and the character to start variable names with is given
276    by START.  */
277 
278 void
print_lambda_linear_expression(FILE * outfile,lambda_linear_expression expr,int depth,int invariants,char start)279 print_lambda_linear_expression (FILE * outfile,
280 				lambda_linear_expression expr,
281 				int depth, int invariants, char start)
282 {
283   fprintf (outfile, "\tLinear expression: ");
284   print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
285   fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
286   fprintf (outfile, "  invariants: ");
287   print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
288 			   invariants, 'A');
289   fprintf (outfile, "  denominator: %d\n", LLE_DENOMINATOR (expr));
290 }
291 
292 /* Print a lambda loop structure LOOP to OUTFILE.  The depth/number of
293    coefficients is given by DEPTH, the number of invariants is
294    given by INVARIANTS, and the character to start variable names with is given
295    by START.  */
296 
297 void
print_lambda_loop(FILE * outfile,lambda_loop loop,int depth,int invariants,char start)298 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
299 		   int invariants, char start)
300 {
301   int step;
302   lambda_linear_expression expr;
303 
304   gcc_assert (loop);
305 
306   expr = LL_LINEAR_OFFSET (loop);
307   step = LL_STEP (loop);
308   fprintf (outfile, "  step size = %d \n", step);
309 
310   if (expr)
311     {
312       fprintf (outfile, "  linear offset: \n");
313       print_lambda_linear_expression (outfile, expr, depth, invariants,
314 				      start);
315     }
316 
317   fprintf (outfile, "  lower bound: \n");
318   for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
319     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
320   fprintf (outfile, "  upper bound: \n");
321   for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
322     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
323 }
324 
325 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
326    number of invariants.  */
327 
328 lambda_loopnest
lambda_loopnest_new(int depth,int invariants)329 lambda_loopnest_new (int depth, int invariants)
330 {
331   lambda_loopnest ret;
332   ret = ggc_alloc (sizeof (*ret));
333 
334   LN_LOOPS (ret) = ggc_alloc_cleared (depth * sizeof (lambda_loop));
335   LN_DEPTH (ret) = depth;
336   LN_INVARIANTS (ret) = invariants;
337 
338   return ret;
339 }
340 
341 /* Print a lambda loopnest structure, NEST, to OUTFILE.  The starting
342    character to use for loop names is given by START.  */
343 
344 void
print_lambda_loopnest(FILE * outfile,lambda_loopnest nest,char start)345 print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
346 {
347   int i;
348   for (i = 0; i < LN_DEPTH (nest); i++)
349     {
350       fprintf (outfile, "Loop %c\n", start + i);
351       print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
352 			 LN_INVARIANTS (nest), 'i');
353       fprintf (outfile, "\n");
354     }
355 }
356 
357 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
358    of invariants.  */
359 
360 static lambda_lattice
lambda_lattice_new(int depth,int invariants)361 lambda_lattice_new (int depth, int invariants)
362 {
363   lambda_lattice ret;
364   ret = ggc_alloc (sizeof (*ret));
365   LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
366   LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
367   LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
368   LATTICE_DIMENSION (ret) = depth;
369   LATTICE_INVARIANTS (ret) = invariants;
370   return ret;
371 }
372 
373 /* Compute the lattice base for NEST.  The lattice base is essentially a
374    non-singular transform from a dense base space to a sparse iteration space.
375    We use it so that we don't have to specially handle the case of a sparse
376    iteration space in other parts of the algorithm.  As a result, this routine
377    only does something interesting (IE produce a matrix that isn't the
378    identity matrix) if NEST is a sparse space.  */
379 
380 static lambda_lattice
lambda_lattice_compute_base(lambda_loopnest nest)381 lambda_lattice_compute_base (lambda_loopnest nest)
382 {
383   lambda_lattice ret;
384   int depth, invariants;
385   lambda_matrix base;
386 
387   int i, j, step;
388   lambda_loop loop;
389   lambda_linear_expression expression;
390 
391   depth = LN_DEPTH (nest);
392   invariants = LN_INVARIANTS (nest);
393 
394   ret = lambda_lattice_new (depth, invariants);
395   base = LATTICE_BASE (ret);
396   for (i = 0; i < depth; i++)
397     {
398       loop = LN_LOOPS (nest)[i];
399       gcc_assert (loop);
400       step = LL_STEP (loop);
401       /* If we have a step of 1, then the base is one, and the
402          origin and invariant coefficients are 0.  */
403       if (step == 1)
404 	{
405 	  for (j = 0; j < depth; j++)
406 	    base[i][j] = 0;
407 	  base[i][i] = 1;
408 	  LATTICE_ORIGIN (ret)[i] = 0;
409 	  for (j = 0; j < invariants; j++)
410 	    LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
411 	}
412       else
413 	{
414 	  /* Otherwise, we need the lower bound expression (which must
415 	     be an affine function)  to determine the base.  */
416 	  expression = LL_LOWER_BOUND (loop);
417 	  gcc_assert (expression && !LLE_NEXT (expression)
418 		      && LLE_DENOMINATOR (expression) == 1);
419 
420 	  /* The lower triangular portion of the base is going to be the
421 	     coefficient times the step */
422 	  for (j = 0; j < i; j++)
423 	    base[i][j] = LLE_COEFFICIENTS (expression)[j]
424 	      * LL_STEP (LN_LOOPS (nest)[j]);
425 	  base[i][i] = step;
426 	  for (j = i + 1; j < depth; j++)
427 	    base[i][j] = 0;
428 
429 	  /* Origin for this loop is the constant of the lower bound
430 	     expression.  */
431 	  LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
432 
433 	  /* Coefficient for the invariants are equal to the invariant
434 	     coefficients in the expression.  */
435 	  for (j = 0; j < invariants; j++)
436 	    LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
437 	      LLE_INVARIANT_COEFFICIENTS (expression)[j];
438 	}
439     }
440   return ret;
441 }
442 
443 /* Compute the least common multiple of two numbers A and B .  */
444 
445 static int
lcm(int a,int b)446 lcm (int a, int b)
447 {
448   return (abs (a) * abs (b) / gcd (a, b));
449 }
450 
451 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
452    auxiliary nest.
453    Fourier-Motzkin is a way of reducing systems of linear inequalities so that
454    it is easy to calculate the answer and bounds.
455    A sketch of how it works:
456    Given a system of linear inequalities, ai * xj >= bk, you can always
457    rewrite the constraints so they are all of the form
458    a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
459    in b1 ... bk, and some a in a1...ai)
460    You can then eliminate this x from the non-constant inequalities by
461    rewriting these as a <= b, x >= constant, and delete the x variable.
462    You can then repeat this for any remaining x variables, and then we have
463    an easy to use variable <= constant (or no variables at all) form that we
464    can construct our bounds from.
465 
466    In our case, each time we eliminate, we construct part of the bound from
467    the ith variable, then delete the ith variable.
468 
469    Remember the constant are in our vector a, our coefficient matrix is A,
470    and our invariant coefficient matrix is B.
471 
472    SIZE is the size of the matrices being passed.
473    DEPTH is the loop nest depth.
474    INVARIANTS is the number of loop invariants.
475    A, B, and a are the coefficient matrix, invariant coefficient, and a
476    vector of constants, respectively.  */
477 
478 static lambda_loopnest
compute_nest_using_fourier_motzkin(int size,int depth,int invariants,lambda_matrix A,lambda_matrix B,lambda_vector a)479 compute_nest_using_fourier_motzkin (int size,
480 				    int depth,
481 				    int invariants,
482 				    lambda_matrix A,
483 				    lambda_matrix B,
484 				    lambda_vector a)
485 {
486 
487   int multiple, f1, f2;
488   int i, j, k;
489   lambda_linear_expression expression;
490   lambda_loop loop;
491   lambda_loopnest auxillary_nest;
492   lambda_matrix swapmatrix, A1, B1;
493   lambda_vector swapvector, a1;
494   int newsize;
495 
496   A1 = lambda_matrix_new (128, depth);
497   B1 = lambda_matrix_new (128, invariants);
498   a1 = lambda_vector_new (128);
499 
500   auxillary_nest = lambda_loopnest_new (depth, invariants);
501 
502   for (i = depth - 1; i >= 0; i--)
503     {
504       loop = lambda_loop_new ();
505       LN_LOOPS (auxillary_nest)[i] = loop;
506       LL_STEP (loop) = 1;
507 
508       for (j = 0; j < size; j++)
509 	{
510 	  if (A[j][i] < 0)
511 	    {
512 	      /* Any linear expression in the matrix with a coefficient less
513 		 than 0 becomes part of the new lower bound.  */
514 	      expression = lambda_linear_expression_new (depth, invariants);
515 
516 	      for (k = 0; k < i; k++)
517 		LLE_COEFFICIENTS (expression)[k] = A[j][k];
518 
519 	      for (k = 0; k < invariants; k++)
520 		LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
521 
522 	      LLE_DENOMINATOR (expression) = -1 * A[j][i];
523 	      LLE_CONSTANT (expression) = -1 * a[j];
524 
525 	      /* Ignore if identical to the existing lower bound.  */
526 	      if (!lle_equal (LL_LOWER_BOUND (loop),
527 			      expression, depth, invariants))
528 		{
529 		  LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
530 		  LL_LOWER_BOUND (loop) = expression;
531 		}
532 
533 	    }
534 	  else if (A[j][i] > 0)
535 	    {
536 	      /* Any linear expression with a coefficient greater than 0
537 		 becomes part of the new upper bound.  */
538 	      expression = lambda_linear_expression_new (depth, invariants);
539 	      for (k = 0; k < i; k++)
540 		LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
541 
542 	      for (k = 0; k < invariants; k++)
543 		LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
544 
545 	      LLE_DENOMINATOR (expression) = A[j][i];
546 	      LLE_CONSTANT (expression) = a[j];
547 
548 	      /* Ignore if identical to the existing upper bound.  */
549 	      if (!lle_equal (LL_UPPER_BOUND (loop),
550 			      expression, depth, invariants))
551 		{
552 		  LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
553 		  LL_UPPER_BOUND (loop) = expression;
554 		}
555 
556 	    }
557 	}
558 
559       /* This portion creates a new system of linear inequalities by deleting
560 	 the i'th variable, reducing the system by one variable.  */
561       newsize = 0;
562       for (j = 0; j < size; j++)
563 	{
564 	  /* If the coefficient for the i'th variable is 0, then we can just
565 	     eliminate the variable straightaway.  Otherwise, we have to
566 	     multiply through by the coefficients we are eliminating.  */
567 	  if (A[j][i] == 0)
568 	    {
569 	      lambda_vector_copy (A[j], A1[newsize], depth);
570 	      lambda_vector_copy (B[j], B1[newsize], invariants);
571 	      a1[newsize] = a[j];
572 	      newsize++;
573 	    }
574 	  else if (A[j][i] > 0)
575 	    {
576 	      for (k = 0; k < size; k++)
577 		{
578 		  if (A[k][i] < 0)
579 		    {
580 		      multiple = lcm (A[j][i], A[k][i]);
581 		      f1 = multiple / A[j][i];
582 		      f2 = -1 * multiple / A[k][i];
583 
584 		      lambda_vector_add_mc (A[j], f1, A[k], f2,
585 					    A1[newsize], depth);
586 		      lambda_vector_add_mc (B[j], f1, B[k], f2,
587 					    B1[newsize], invariants);
588 		      a1[newsize] = f1 * a[j] + f2 * a[k];
589 		      newsize++;
590 		    }
591 		}
592 	    }
593 	}
594 
595       swapmatrix = A;
596       A = A1;
597       A1 = swapmatrix;
598 
599       swapmatrix = B;
600       B = B1;
601       B1 = swapmatrix;
602 
603       swapvector = a;
604       a = a1;
605       a1 = swapvector;
606 
607       size = newsize;
608     }
609 
610   return auxillary_nest;
611 }
612 
613 /* Compute the loop bounds for the auxiliary space NEST.
614    Input system used is Ax <= b.  TRANS is the unimodular transformation.
615    Given the original nest, this function will
616    1. Convert the nest into matrix form, which consists of a matrix for the
617    coefficients, a matrix for the
618    invariant coefficients, and a vector for the constants.
619    2. Use the matrix form to calculate the lattice base for the nest (which is
620    a dense space)
621    3. Compose the dense space transform with the user specified transform, to
622    get a transform we can easily calculate transformed bounds for.
623    4. Multiply the composed transformation matrix times the matrix form of the
624    loop.
625    5. Transform the newly created matrix (from step 4) back into a loop nest
626    using Fourier-Motzkin elimination to figure out the bounds.  */
627 
628 static lambda_loopnest
lambda_compute_auxillary_space(lambda_loopnest nest,lambda_trans_matrix trans)629 lambda_compute_auxillary_space (lambda_loopnest nest,
630 				lambda_trans_matrix trans)
631 {
632   lambda_matrix A, B, A1, B1;
633   lambda_vector a, a1;
634   lambda_matrix invertedtrans;
635   int depth, invariants, size;
636   int i, j;
637   lambda_loop loop;
638   lambda_linear_expression expression;
639   lambda_lattice lattice;
640 
641   depth = LN_DEPTH (nest);
642   invariants = LN_INVARIANTS (nest);
643 
644   /* Unfortunately, we can't know the number of constraints we'll have
645      ahead of time, but this should be enough even in ridiculous loop nest
646      cases. We must not go over this limit.  */
647   A = lambda_matrix_new (128, depth);
648   B = lambda_matrix_new (128, invariants);
649   a = lambda_vector_new (128);
650 
651   A1 = lambda_matrix_new (128, depth);
652   B1 = lambda_matrix_new (128, invariants);
653   a1 = lambda_vector_new (128);
654 
655   /* Store the bounds in the equation matrix A, constant vector a, and
656      invariant matrix B, so that we have Ax <= a + B.
657      This requires a little equation rearranging so that everything is on the
658      correct side of the inequality.  */
659   size = 0;
660   for (i = 0; i < depth; i++)
661     {
662       loop = LN_LOOPS (nest)[i];
663 
664       /* First we do the lower bound.  */
665       if (LL_STEP (loop) > 0)
666 	expression = LL_LOWER_BOUND (loop);
667       else
668 	expression = LL_UPPER_BOUND (loop);
669 
670       for (; expression != NULL; expression = LLE_NEXT (expression))
671 	{
672 	  /* Fill in the coefficient.  */
673 	  for (j = 0; j < i; j++)
674 	    A[size][j] = LLE_COEFFICIENTS (expression)[j];
675 
676 	  /* And the invariant coefficient.  */
677 	  for (j = 0; j < invariants; j++)
678 	    B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
679 
680 	  /* And the constant.  */
681 	  a[size] = LLE_CONSTANT (expression);
682 
683 	  /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b.  IE put all
684 	     constants and single variables on   */
685 	  A[size][i] = -1 * LLE_DENOMINATOR (expression);
686 	  a[size] *= -1;
687 	  for (j = 0; j < invariants; j++)
688 	    B[size][j] *= -1;
689 
690 	  size++;
691 	  /* Need to increase matrix sizes above.  */
692 	  gcc_assert (size <= 127);
693 
694 	}
695 
696       /* Then do the exact same thing for the upper bounds.  */
697       if (LL_STEP (loop) > 0)
698 	expression = LL_UPPER_BOUND (loop);
699       else
700 	expression = LL_LOWER_BOUND (loop);
701 
702       for (; expression != NULL; expression = LLE_NEXT (expression))
703 	{
704 	  /* Fill in the coefficient.  */
705 	  for (j = 0; j < i; j++)
706 	    A[size][j] = LLE_COEFFICIENTS (expression)[j];
707 
708 	  /* And the invariant coefficient.  */
709 	  for (j = 0; j < invariants; j++)
710 	    B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
711 
712 	  /* And the constant.  */
713 	  a[size] = LLE_CONSTANT (expression);
714 
715 	  /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b.  */
716 	  for (j = 0; j < i; j++)
717 	    A[size][j] *= -1;
718 	  A[size][i] = LLE_DENOMINATOR (expression);
719 	  size++;
720 	  /* Need to increase matrix sizes above.  */
721 	  gcc_assert (size <= 127);
722 
723 	}
724     }
725 
726   /* Compute the lattice base x = base * y + origin, where y is the
727      base space.  */
728   lattice = lambda_lattice_compute_base (nest);
729 
730   /* Ax <= a + B then becomes ALy <= a+B - A*origin.  L is the lattice base  */
731 
732   /* A1 = A * L */
733   lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
734 
735   /* a1 = a - A * origin constant.  */
736   lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
737   lambda_vector_add_mc (a, 1, a1, -1, a1, size);
738 
739   /* B1 = B - A * origin invariant.  */
740   lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
741 		      invariants);
742   lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
743 
744   /* Now compute the auxiliary space bounds by first inverting U, multiplying
745      it by A1, then performing Fourier-Motzkin.  */
746 
747   invertedtrans = lambda_matrix_new (depth, depth);
748 
749   /* Compute the inverse of U.  */
750   lambda_matrix_inverse (LTM_MATRIX (trans),
751 			 invertedtrans, depth);
752 
753   /* A = A1 inv(U).  */
754   lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
755 
756   return compute_nest_using_fourier_motzkin (size, depth, invariants,
757 					     A, B1, a1);
758 }
759 
760 /* Compute the loop bounds for the target space, using the bounds of
761    the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
762    The target space loop bounds are computed by multiplying the triangular
763    matrix H by the auxiliary nest, to get the new loop bounds.  The sign of
764    the loop steps (positive or negative) is then used to swap the bounds if
765    the loop counts downwards.
766    Return the target loopnest.  */
767 
768 static lambda_loopnest
lambda_compute_target_space(lambda_loopnest auxillary_nest,lambda_trans_matrix H,lambda_vector stepsigns)769 lambda_compute_target_space (lambda_loopnest auxillary_nest,
770 			     lambda_trans_matrix H, lambda_vector stepsigns)
771 {
772   lambda_matrix inverse, H1;
773   int determinant, i, j;
774   int gcd1, gcd2;
775   int factor;
776 
777   lambda_loopnest target_nest;
778   int depth, invariants;
779   lambda_matrix target;
780 
781   lambda_loop auxillary_loop, target_loop;
782   lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
783 
784   depth = LN_DEPTH (auxillary_nest);
785   invariants = LN_INVARIANTS (auxillary_nest);
786 
787   inverse = lambda_matrix_new (depth, depth);
788   determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
789 
790   /* H1 is H excluding its diagonal.  */
791   H1 = lambda_matrix_new (depth, depth);
792   lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
793 
794   for (i = 0; i < depth; i++)
795     H1[i][i] = 0;
796 
797   /* Computes the linear offsets of the loop bounds.  */
798   target = lambda_matrix_new (depth, depth);
799   lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
800 
801   target_nest = lambda_loopnest_new (depth, invariants);
802 
803   for (i = 0; i < depth; i++)
804     {
805 
806       /* Get a new loop structure.  */
807       target_loop = lambda_loop_new ();
808       LN_LOOPS (target_nest)[i] = target_loop;
809 
810       /* Computes the gcd of the coefficients of the linear part.  */
811       gcd1 = lambda_vector_gcd (target[i], i);
812 
813       /* Include the denominator in the GCD.  */
814       gcd1 = gcd (gcd1, determinant);
815 
816       /* Now divide through by the gcd.  */
817       for (j = 0; j < i; j++)
818 	target[i][j] = target[i][j] / gcd1;
819 
820       expression = lambda_linear_expression_new (depth, invariants);
821       lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
822       LLE_DENOMINATOR (expression) = determinant / gcd1;
823       LLE_CONSTANT (expression) = 0;
824       lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
825 			   invariants);
826       LL_LINEAR_OFFSET (target_loop) = expression;
827     }
828 
829   /* For each loop, compute the new bounds from H.  */
830   for (i = 0; i < depth; i++)
831     {
832       auxillary_loop = LN_LOOPS (auxillary_nest)[i];
833       target_loop = LN_LOOPS (target_nest)[i];
834       LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
835       factor = LTM_MATRIX (H)[i][i];
836 
837       /* First we do the lower bound.  */
838       auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
839 
840       for (; auxillary_expr != NULL;
841 	   auxillary_expr = LLE_NEXT (auxillary_expr))
842 	{
843 	  target_expr = lambda_linear_expression_new (depth, invariants);
844 	  lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
845 				     depth, inverse, depth,
846 				     LLE_COEFFICIENTS (target_expr));
847 	  lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
848 				    LLE_COEFFICIENTS (target_expr), depth,
849 				    factor);
850 
851 	  LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
852 	  lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
853 			      LLE_INVARIANT_COEFFICIENTS (target_expr),
854 			      invariants);
855 	  lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
856 				    LLE_INVARIANT_COEFFICIENTS (target_expr),
857 				    invariants, factor);
858 	  LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
859 
860 	  if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
861 	    {
862 	      LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
863 		* determinant;
864 	      lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
865 					(target_expr),
866 					LLE_INVARIANT_COEFFICIENTS
867 					(target_expr), invariants,
868 					determinant);
869 	      LLE_DENOMINATOR (target_expr) =
870 		LLE_DENOMINATOR (target_expr) * determinant;
871 	    }
872 	  /* Find the gcd and divide by it here, rather than doing it
873 	     at the tree level.  */
874 	  gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
875 	  gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
876 				    invariants);
877 	  gcd1 = gcd (gcd1, gcd2);
878 	  gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
879 	  gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
880 	  for (j = 0; j < depth; j++)
881 	    LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
882 	  for (j = 0; j < invariants; j++)
883 	    LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
884 	  LLE_CONSTANT (target_expr) /= gcd1;
885 	  LLE_DENOMINATOR (target_expr) /= gcd1;
886 	  /* Ignore if identical to existing bound.  */
887 	  if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
888 			  invariants))
889 	    {
890 	      LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
891 	      LL_LOWER_BOUND (target_loop) = target_expr;
892 	    }
893 	}
894       /* Now do the upper bound.  */
895       auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
896 
897       for (; auxillary_expr != NULL;
898 	   auxillary_expr = LLE_NEXT (auxillary_expr))
899 	{
900 	  target_expr = lambda_linear_expression_new (depth, invariants);
901 	  lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
902 				     depth, inverse, depth,
903 				     LLE_COEFFICIENTS (target_expr));
904 	  lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
905 				    LLE_COEFFICIENTS (target_expr), depth,
906 				    factor);
907 	  LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
908 	  lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
909 			      LLE_INVARIANT_COEFFICIENTS (target_expr),
910 			      invariants);
911 	  lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
912 				    LLE_INVARIANT_COEFFICIENTS (target_expr),
913 				    invariants, factor);
914 	  LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
915 
916 	  if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
917 	    {
918 	      LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
919 		* determinant;
920 	      lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
921 					(target_expr),
922 					LLE_INVARIANT_COEFFICIENTS
923 					(target_expr), invariants,
924 					determinant);
925 	      LLE_DENOMINATOR (target_expr) =
926 		LLE_DENOMINATOR (target_expr) * determinant;
927 	    }
928 	  /* Find the gcd and divide by it here, instead of at the
929 	     tree level.  */
930 	  gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
931 	  gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
932 				    invariants);
933 	  gcd1 = gcd (gcd1, gcd2);
934 	  gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
935 	  gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
936 	  for (j = 0; j < depth; j++)
937 	    LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
938 	  for (j = 0; j < invariants; j++)
939 	    LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
940 	  LLE_CONSTANT (target_expr) /= gcd1;
941 	  LLE_DENOMINATOR (target_expr) /= gcd1;
942 	  /* Ignore if equal to existing bound.  */
943 	  if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
944 			  invariants))
945 	    {
946 	      LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
947 	      LL_UPPER_BOUND (target_loop) = target_expr;
948 	    }
949 	}
950     }
951   for (i = 0; i < depth; i++)
952     {
953       target_loop = LN_LOOPS (target_nest)[i];
954       /* If necessary, exchange the upper and lower bounds and negate
955          the step size.  */
956       if (stepsigns[i] < 0)
957 	{
958 	  LL_STEP (target_loop) *= -1;
959 	  tmp_expr = LL_LOWER_BOUND (target_loop);
960 	  LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
961 	  LL_UPPER_BOUND (target_loop) = tmp_expr;
962 	}
963     }
964   return target_nest;
965 }
966 
967 /* Compute the step signs of TRANS, using TRANS and stepsigns.  Return the new
968    result.  */
969 
970 static lambda_vector
lambda_compute_step_signs(lambda_trans_matrix trans,lambda_vector stepsigns)971 lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
972 {
973   lambda_matrix matrix, H;
974   int size;
975   lambda_vector newsteps;
976   int i, j, factor, minimum_column;
977   int temp;
978 
979   matrix = LTM_MATRIX (trans);
980   size = LTM_ROWSIZE (trans);
981   H = lambda_matrix_new (size, size);
982 
983   newsteps = lambda_vector_new (size);
984   lambda_vector_copy (stepsigns, newsteps, size);
985 
986   lambda_matrix_copy (matrix, H, size, size);
987 
988   for (j = 0; j < size; j++)
989     {
990       lambda_vector row;
991       row = H[j];
992       for (i = j; i < size; i++)
993 	if (row[i] < 0)
994 	  lambda_matrix_col_negate (H, size, i);
995       while (lambda_vector_first_nz (row, size, j + 1) < size)
996 	{
997 	  minimum_column = lambda_vector_min_nz (row, size, j);
998 	  lambda_matrix_col_exchange (H, size, j, minimum_column);
999 
1000 	  temp = newsteps[j];
1001 	  newsteps[j] = newsteps[minimum_column];
1002 	  newsteps[minimum_column] = temp;
1003 
1004 	  for (i = j + 1; i < size; i++)
1005 	    {
1006 	      factor = row[i] / row[j];
1007 	      lambda_matrix_col_add (H, size, j, i, -1 * factor);
1008 	    }
1009 	}
1010     }
1011   return newsteps;
1012 }
1013 
1014 /* Transform NEST according to TRANS, and return the new loopnest.
1015    This involves
1016    1. Computing a lattice base for the transformation
1017    2. Composing the dense base with the specified transformation (TRANS)
1018    3. Decomposing the combined transformation into a lower triangular portion,
1019    and a unimodular portion.
1020    4. Computing the auxiliary nest using the unimodular portion.
1021    5. Computing the target nest using the auxiliary nest and the lower
1022    triangular portion.  */
1023 
1024 lambda_loopnest
lambda_loopnest_transform(lambda_loopnest nest,lambda_trans_matrix trans)1025 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans)
1026 {
1027   lambda_loopnest auxillary_nest, target_nest;
1028 
1029   int depth, invariants;
1030   int i, j;
1031   lambda_lattice lattice;
1032   lambda_trans_matrix trans1, H, U;
1033   lambda_loop loop;
1034   lambda_linear_expression expression;
1035   lambda_vector origin;
1036   lambda_matrix origin_invariants;
1037   lambda_vector stepsigns;
1038   int f;
1039 
1040   depth = LN_DEPTH (nest);
1041   invariants = LN_INVARIANTS (nest);
1042 
1043   /* Keep track of the signs of the loop steps.  */
1044   stepsigns = lambda_vector_new (depth);
1045   for (i = 0; i < depth; i++)
1046     {
1047       if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1048 	stepsigns[i] = 1;
1049       else
1050 	stepsigns[i] = -1;
1051     }
1052 
1053   /* Compute the lattice base.  */
1054   lattice = lambda_lattice_compute_base (nest);
1055   trans1 = lambda_trans_matrix_new (depth, depth);
1056 
1057   /* Multiply the transformation matrix by the lattice base.  */
1058 
1059   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1060 		      LTM_MATRIX (trans1), depth, depth, depth);
1061 
1062   /* Compute the Hermite normal form for the new transformation matrix.  */
1063   H = lambda_trans_matrix_new (depth, depth);
1064   U = lambda_trans_matrix_new (depth, depth);
1065   lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1066 			 LTM_MATRIX (U));
1067 
1068   /* Compute the auxiliary loop nest's space from the unimodular
1069      portion.  */
1070   auxillary_nest = lambda_compute_auxillary_space (nest, U);
1071 
1072   /* Compute the loop step signs from the old step signs and the
1073      transformation matrix.  */
1074   stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1075 
1076   /* Compute the target loop nest space from the auxiliary nest and
1077      the lower triangular matrix H.  */
1078   target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns);
1079   origin = lambda_vector_new (depth);
1080   origin_invariants = lambda_matrix_new (depth, invariants);
1081   lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1082 			     LATTICE_ORIGIN (lattice), origin);
1083   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1084 		      origin_invariants, depth, depth, invariants);
1085 
1086   for (i = 0; i < depth; i++)
1087     {
1088       loop = LN_LOOPS (target_nest)[i];
1089       expression = LL_LINEAR_OFFSET (loop);
1090       if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1091 	f = 1;
1092       else
1093 	f = LLE_DENOMINATOR (expression);
1094 
1095       LLE_CONSTANT (expression) += f * origin[i];
1096 
1097       for (j = 0; j < invariants; j++)
1098 	LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1099 	  f * origin_invariants[i][j];
1100     }
1101 
1102   return target_nest;
1103 
1104 }
1105 
1106 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1107    return the new expression.  DEPTH is the depth of the loopnest.
1108    OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1109    in this nest.  INVARIANTS is the array of invariants for the loop.  EXTRA
1110    is the amount we have to add/subtract from the expression because of the
1111    type of comparison it is used in.  */
1112 
1113 static lambda_linear_expression
gcc_tree_to_linear_expression(int depth,tree expr,VEC (tree,heap)* outerinductionvars,VEC (tree,heap)* invariants,int extra)1114 gcc_tree_to_linear_expression (int depth, tree expr,
1115 			       VEC(tree,heap) *outerinductionvars,
1116 			       VEC(tree,heap) *invariants, int extra)
1117 {
1118   lambda_linear_expression lle = NULL;
1119   switch (TREE_CODE (expr))
1120     {
1121     case INTEGER_CST:
1122       {
1123 	lle = lambda_linear_expression_new (depth, 2 * depth);
1124 	LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1125 	if (extra != 0)
1126 	  LLE_CONSTANT (lle) += extra;
1127 
1128 	LLE_DENOMINATOR (lle) = 1;
1129       }
1130       break;
1131     case SSA_NAME:
1132       {
1133 	tree iv, invar;
1134 	size_t i;
1135 	for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1136 	  if (iv != NULL)
1137 	    {
1138 	      if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1139 		{
1140 		  lle = lambda_linear_expression_new (depth, 2 * depth);
1141 		  LLE_COEFFICIENTS (lle)[i] = 1;
1142 		  if (extra != 0)
1143 		    LLE_CONSTANT (lle) = extra;
1144 
1145 		  LLE_DENOMINATOR (lle) = 1;
1146 		}
1147 	    }
1148 	for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1149 	  if (invar != NULL)
1150 	    {
1151 	      if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1152 		{
1153 		  lle = lambda_linear_expression_new (depth, 2 * depth);
1154 		  LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1155 		  if (extra != 0)
1156 		    LLE_CONSTANT (lle) = extra;
1157 		  LLE_DENOMINATOR (lle) = 1;
1158 		}
1159 	    }
1160       }
1161       break;
1162     default:
1163       return NULL;
1164     }
1165 
1166   return lle;
1167 }
1168 
1169 /* Return the depth of the loopnest NEST */
1170 
1171 static int
depth_of_nest(struct loop * nest)1172 depth_of_nest (struct loop *nest)
1173 {
1174   size_t depth = 0;
1175   while (nest)
1176     {
1177       depth++;
1178       nest = nest->inner;
1179     }
1180   return depth;
1181 }
1182 
1183 
1184 /* Return true if OP is invariant in LOOP and all outer loops.  */
1185 
1186 static bool
invariant_in_loop_and_outer_loops(struct loop * loop,tree op)1187 invariant_in_loop_and_outer_loops (struct loop *loop, tree op)
1188 {
1189   if (is_gimple_min_invariant (op))
1190     return true;
1191   if (loop->depth == 0)
1192     return true;
1193   if (!expr_invariant_in_loop_p (loop, op))
1194     return false;
1195   if (loop->outer
1196       && !invariant_in_loop_and_outer_loops (loop->outer, op))
1197     return false;
1198   return true;
1199 }
1200 
1201 /* Generate a lambda loop from a gcc loop LOOP.  Return the new lambda loop,
1202    or NULL if it could not be converted.
1203    DEPTH is the depth of the loop.
1204    INVARIANTS is a pointer to the array of loop invariants.
1205    The induction variable for this loop should be stored in the parameter
1206    OURINDUCTIONVAR.
1207    OUTERINDUCTIONVARS is an array of induction variables for outer loops.  */
1208 
1209 static lambda_loop
gcc_loop_to_lambda_loop(struct loop * loop,int depth,VEC (tree,heap)** invariants,tree * ourinductionvar,VEC (tree,heap)* outerinductionvars,VEC (tree,heap)** lboundvars,VEC (tree,heap)** uboundvars,VEC (int,heap)** steps)1210 gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1211 			 VEC(tree,heap) ** invariants,
1212 			 tree * ourinductionvar,
1213 			 VEC(tree,heap) * outerinductionvars,
1214 			 VEC(tree,heap) ** lboundvars,
1215 			 VEC(tree,heap) ** uboundvars,
1216 			 VEC(int,heap) ** steps)
1217 {
1218   tree phi;
1219   tree exit_cond;
1220   tree access_fn, inductionvar;
1221   tree step;
1222   lambda_loop lloop = NULL;
1223   lambda_linear_expression lbound, ubound;
1224   tree test;
1225   int stepint;
1226   int extra = 0;
1227   tree lboundvar, uboundvar, uboundresult;
1228 
1229   /* Find out induction var and exit condition.  */
1230   inductionvar = find_induction_var_from_exit_cond (loop);
1231   exit_cond = get_loop_exit_condition (loop);
1232 
1233   if (inductionvar == NULL || exit_cond == NULL)
1234     {
1235       if (dump_file && (dump_flags & TDF_DETAILS))
1236 	fprintf (dump_file,
1237 		 "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1238       return NULL;
1239     }
1240 
1241   test = TREE_OPERAND (exit_cond, 0);
1242 
1243   if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
1244     {
1245 
1246       if (dump_file && (dump_flags & TDF_DETAILS))
1247 	fprintf (dump_file,
1248 		 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1249 
1250       return NULL;
1251     }
1252 
1253   phi = SSA_NAME_DEF_STMT (inductionvar);
1254   if (TREE_CODE (phi) != PHI_NODE)
1255     {
1256       phi = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE);
1257       if (!phi)
1258 	{
1259 
1260 	  if (dump_file && (dump_flags & TDF_DETAILS))
1261 	    fprintf (dump_file,
1262 		     "Unable to convert loop: Cannot find PHI node for induction variable\n");
1263 
1264 	  return NULL;
1265 	}
1266 
1267       phi = SSA_NAME_DEF_STMT (phi);
1268       if (TREE_CODE (phi) != PHI_NODE)
1269 	{
1270 
1271 	  if (dump_file && (dump_flags & TDF_DETAILS))
1272 	    fprintf (dump_file,
1273 		     "Unable to convert loop: Cannot find PHI node for induction variable\n");
1274 	  return NULL;
1275 	}
1276 
1277     }
1278 
1279   /* The induction variable name/version we want to put in the array is the
1280      result of the induction variable phi node.  */
1281   *ourinductionvar = PHI_RESULT (phi);
1282   access_fn = instantiate_parameters
1283     (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1284   if (access_fn == chrec_dont_know)
1285     {
1286       if (dump_file && (dump_flags & TDF_DETAILS))
1287 	fprintf (dump_file,
1288 		 "Unable to convert loop: Access function for induction variable phi is unknown\n");
1289 
1290       return NULL;
1291     }
1292 
1293   step = evolution_part_in_loop_num (access_fn, loop->num);
1294   if (!step || step == chrec_dont_know)
1295     {
1296       if (dump_file && (dump_flags & TDF_DETAILS))
1297 	fprintf (dump_file,
1298 		 "Unable to convert loop: Cannot determine step of loop.\n");
1299 
1300       return NULL;
1301     }
1302   if (TREE_CODE (step) != INTEGER_CST)
1303     {
1304 
1305       if (dump_file && (dump_flags & TDF_DETAILS))
1306 	fprintf (dump_file,
1307 		 "Unable to convert loop: Step of loop is not integer.\n");
1308       return NULL;
1309     }
1310 
1311   stepint = TREE_INT_CST_LOW (step);
1312 
1313   /* Only want phis for induction vars, which will have two
1314      arguments.  */
1315   if (PHI_NUM_ARGS (phi) != 2)
1316     {
1317       if (dump_file && (dump_flags & TDF_DETAILS))
1318 	fprintf (dump_file,
1319 		 "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1320       return NULL;
1321     }
1322 
1323   /* Another induction variable check. One argument's source should be
1324      in the loop, one outside the loop.  */
1325   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
1326       && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
1327     {
1328 
1329       if (dump_file && (dump_flags & TDF_DETAILS))
1330 	fprintf (dump_file,
1331 		 "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1332 
1333       return NULL;
1334     }
1335 
1336   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
1337     {
1338       lboundvar = PHI_ARG_DEF (phi, 1);
1339       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1340 					      outerinductionvars, *invariants,
1341 					      0);
1342     }
1343   else
1344     {
1345       lboundvar = PHI_ARG_DEF (phi, 0);
1346       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1347 					      outerinductionvars, *invariants,
1348 					      0);
1349     }
1350 
1351   if (!lbound)
1352     {
1353 
1354       if (dump_file && (dump_flags & TDF_DETAILS))
1355 	fprintf (dump_file,
1356 		 "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1357 
1358       return NULL;
1359     }
1360   /* One part of the test may be a loop invariant tree.  */
1361   VEC_reserve (tree, heap, *invariants, 1);
1362   if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
1363       && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 1)))
1364     VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 1));
1365   else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
1366 	   && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 0)))
1367     VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 0));
1368 
1369   /* The non-induction variable part of the test is the upper bound variable.
1370    */
1371   if (TREE_OPERAND (test, 0) == inductionvar)
1372     uboundvar = TREE_OPERAND (test, 1);
1373   else
1374     uboundvar = TREE_OPERAND (test, 0);
1375 
1376 
1377   /* We only size the vectors assuming we have, at max, 2 times as many
1378      invariants as we do loops (one for each bound).
1379      This is just an arbitrary number, but it has to be matched against the
1380      code below.  */
1381   gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1382 
1383 
1384   /* We might have some leftover.  */
1385   if (TREE_CODE (test) == LT_EXPR)
1386     extra = -1 * stepint;
1387   else if (TREE_CODE (test) == NE_EXPR)
1388     extra = -1 * stepint;
1389   else if (TREE_CODE (test) == GT_EXPR)
1390     extra = -1 * stepint;
1391   else if (TREE_CODE (test) == EQ_EXPR)
1392     extra = 1 * stepint;
1393 
1394   ubound = gcc_tree_to_linear_expression (depth, uboundvar,
1395 					  outerinductionvars,
1396 					  *invariants, extra);
1397   uboundresult = build2 (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1398 			 build_int_cst (TREE_TYPE (uboundvar), extra));
1399   VEC_safe_push (tree, heap, *uboundvars, uboundresult);
1400   VEC_safe_push (tree, heap, *lboundvars, lboundvar);
1401   VEC_safe_push (int, heap, *steps, stepint);
1402   if (!ubound)
1403     {
1404       if (dump_file && (dump_flags & TDF_DETAILS))
1405 	fprintf (dump_file,
1406 		 "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1407       return NULL;
1408     }
1409 
1410   lloop = lambda_loop_new ();
1411   LL_STEP (lloop) = stepint;
1412   LL_LOWER_BOUND (lloop) = lbound;
1413   LL_UPPER_BOUND (lloop) = ubound;
1414   return lloop;
1415 }
1416 
1417 /* Given a LOOP, find the induction variable it is testing against in the exit
1418    condition.  Return the induction variable if found, NULL otherwise.  */
1419 
1420 static tree
find_induction_var_from_exit_cond(struct loop * loop)1421 find_induction_var_from_exit_cond (struct loop *loop)
1422 {
1423   tree expr = get_loop_exit_condition (loop);
1424   tree ivarop;
1425   tree test;
1426   if (expr == NULL_TREE)
1427     return NULL_TREE;
1428   if (TREE_CODE (expr) != COND_EXPR)
1429     return NULL_TREE;
1430   test = TREE_OPERAND (expr, 0);
1431   if (!COMPARISON_CLASS_P (test))
1432     return NULL_TREE;
1433 
1434   /* Find the side that is invariant in this loop. The ivar must be the other
1435      side.  */
1436 
1437   if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 0)))
1438       ivarop = TREE_OPERAND (test, 1);
1439   else if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 1)))
1440       ivarop = TREE_OPERAND (test, 0);
1441   else
1442     return NULL_TREE;
1443 
1444   if (TREE_CODE (ivarop) != SSA_NAME)
1445     return NULL_TREE;
1446   return ivarop;
1447 }
1448 
1449 DEF_VEC_P(lambda_loop);
1450 DEF_VEC_ALLOC_P(lambda_loop,heap);
1451 
1452 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1453    Return the new loop nest.
1454    INDUCTIONVARS is a pointer to an array of induction variables for the
1455    loopnest that will be filled in during this process.
1456    INVARIANTS is a pointer to an array of invariants that will be filled in
1457    during this process.  */
1458 
1459 lambda_loopnest
gcc_loopnest_to_lambda_loopnest(struct loops * currloops,struct loop * loop_nest,VEC (tree,heap)** inductionvars,VEC (tree,heap)** invariants)1460 gcc_loopnest_to_lambda_loopnest (struct loops *currloops,
1461 				 struct loop *loop_nest,
1462 				 VEC(tree,heap) **inductionvars,
1463 				 VEC(tree,heap) **invariants)
1464 {
1465   lambda_loopnest ret = NULL;
1466   struct loop *temp = loop_nest;
1467   int depth = depth_of_nest (loop_nest);
1468   size_t i;
1469   VEC(lambda_loop,heap) *loops = NULL;
1470   VEC(tree,heap) *uboundvars = NULL;
1471   VEC(tree,heap) *lboundvars  = NULL;
1472   VEC(int,heap) *steps = NULL;
1473   lambda_loop newloop;
1474   tree inductionvar = NULL;
1475   bool perfect_nest = perfect_nest_p (loop_nest);
1476 
1477   if (!perfect_nest && !can_convert_to_perfect_nest (loop_nest))
1478     goto fail;
1479 
1480   while (temp)
1481     {
1482       newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1483 					 &inductionvar, *inductionvars,
1484 					 &lboundvars, &uboundvars,
1485 					 &steps);
1486       if (!newloop)
1487 	goto fail;
1488 
1489       VEC_safe_push (tree, heap, *inductionvars, inductionvar);
1490       VEC_safe_push (lambda_loop, heap, loops, newloop);
1491       temp = temp->inner;
1492     }
1493 
1494   if (!perfect_nest)
1495     {
1496       if (!perfect_nestify (currloops, loop_nest,
1497 			    lboundvars, uboundvars, steps, *inductionvars))
1498 	{
1499 	  if (dump_file)
1500 	    fprintf (dump_file,
1501 		     "Not a perfect loop nest and couldn't convert to one.\n");
1502 	  goto fail;
1503 	}
1504       else if (dump_file)
1505 	fprintf (dump_file,
1506 		 "Successfully converted loop nest to perfect loop nest.\n");
1507     }
1508 
1509   ret = lambda_loopnest_new (depth, 2 * depth);
1510 
1511   for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1512     LN_LOOPS (ret)[i] = newloop;
1513 
1514  fail:
1515   VEC_free (lambda_loop, heap, loops);
1516   VEC_free (tree, heap, uboundvars);
1517   VEC_free (tree, heap, lboundvars);
1518   VEC_free (int, heap, steps);
1519 
1520   return ret;
1521 }
1522 
1523 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1524    STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1525    inserted for us are stored.  INDUCTION_VARS is the array of induction
1526    variables for the loop this LBV is from.  TYPE is the tree type to use for
1527    the variables and trees involved.  */
1528 
1529 static tree
lbv_to_gcc_expression(lambda_body_vector lbv,tree type,VEC (tree,heap)* induction_vars,tree * stmts_to_insert)1530 lbv_to_gcc_expression (lambda_body_vector lbv,
1531 		       tree type, VEC(tree,heap) *induction_vars,
1532 		       tree *stmts_to_insert)
1533 {
1534   tree stmts, stmt, resvar, name;
1535   tree iv;
1536   size_t i;
1537   tree_stmt_iterator tsi;
1538 
1539   /* Create a statement list and a linear expression temporary.  */
1540   stmts = alloc_stmt_list ();
1541   resvar = create_tmp_var (type, "lbvtmp");
1542   add_referenced_var (resvar);
1543 
1544   /* Start at 0.  */
1545   stmt = build2 (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1546   name = make_ssa_name (resvar, stmt);
1547   TREE_OPERAND (stmt, 0) = name;
1548   tsi = tsi_last (stmts);
1549   tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1550 
1551   for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1552     {
1553       if (LBV_COEFFICIENTS (lbv)[i] != 0)
1554 	{
1555 	  tree newname;
1556 	  tree coeffmult;
1557 
1558 	  /* newname = coefficient * induction_variable */
1559 	  coeffmult = build_int_cst (type, LBV_COEFFICIENTS (lbv)[i]);
1560 	  stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1561 			 fold_build2 (MULT_EXPR, type, iv, coeffmult));
1562 
1563 	  newname = make_ssa_name (resvar, stmt);
1564 	  TREE_OPERAND (stmt, 0) = newname;
1565 	  fold_stmt (&stmt);
1566 	  tsi = tsi_last (stmts);
1567 	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1568 
1569 	  /* name = name + newname */
1570 	  stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1571 			 build2 (PLUS_EXPR, type, name, newname));
1572 	  name = make_ssa_name (resvar, stmt);
1573 	  TREE_OPERAND (stmt, 0) = name;
1574 	  fold_stmt (&stmt);
1575 	  tsi = tsi_last (stmts);
1576 	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1577 
1578 	}
1579     }
1580 
1581   /* Handle any denominator that occurs.  */
1582   if (LBV_DENOMINATOR (lbv) != 1)
1583     {
1584       tree denominator = build_int_cst (type, LBV_DENOMINATOR (lbv));
1585       stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1586 		     build2 (CEIL_DIV_EXPR, type, name, denominator));
1587       name = make_ssa_name (resvar, stmt);
1588       TREE_OPERAND (stmt, 0) = name;
1589       fold_stmt (&stmt);
1590       tsi = tsi_last (stmts);
1591       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1592     }
1593   *stmts_to_insert = stmts;
1594   return name;
1595 }
1596 
1597 /* Convert a linear expression from coefficient and constant form to a
1598    gcc tree.
1599    Return the tree that represents the final value of the expression.
1600    LLE is the linear expression to convert.
1601    OFFSET is the linear offset to apply to the expression.
1602    TYPE is the tree type to use for the variables and math.
1603    INDUCTION_VARS is a vector of induction variables for the loops.
1604    INVARIANTS is a vector of the loop nest invariants.
1605    WRAP specifies what tree code to wrap the results in, if there is more than
1606    one (it is either MAX_EXPR, or MIN_EXPR).
1607    STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1608    statements that need to be inserted for the linear expression.  */
1609 
1610 static tree
lle_to_gcc_expression(lambda_linear_expression lle,lambda_linear_expression offset,tree type,VEC (tree,heap)* induction_vars,VEC (tree,heap)* invariants,enum tree_code wrap,tree * stmts_to_insert)1611 lle_to_gcc_expression (lambda_linear_expression lle,
1612 		       lambda_linear_expression offset,
1613 		       tree type,
1614 		       VEC(tree,heap) *induction_vars,
1615 		       VEC(tree,heap) *invariants,
1616 		       enum tree_code wrap, tree *stmts_to_insert)
1617 {
1618   tree stmts, stmt, resvar, name;
1619   size_t i;
1620   tree_stmt_iterator tsi;
1621   tree iv, invar;
1622   VEC(tree,heap) *results = NULL;
1623 
1624   gcc_assert (wrap == MAX_EXPR || wrap == MIN_EXPR);
1625   name = NULL_TREE;
1626   /* Create a statement list and a linear expression temporary.  */
1627   stmts = alloc_stmt_list ();
1628   resvar = create_tmp_var (type, "lletmp");
1629   add_referenced_var (resvar);
1630 
1631   /* Build up the linear expressions, and put the variable representing the
1632      result in the results array.  */
1633   for (; lle != NULL; lle = LLE_NEXT (lle))
1634     {
1635       /* Start at name = 0.  */
1636       stmt = build2 (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1637       name = make_ssa_name (resvar, stmt);
1638       TREE_OPERAND (stmt, 0) = name;
1639       fold_stmt (&stmt);
1640       tsi = tsi_last (stmts);
1641       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1642 
1643       /* First do the induction variables.
1644          at the end, name = name + all the induction variables added
1645          together.  */
1646       for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1647 	{
1648 	  if (LLE_COEFFICIENTS (lle)[i] != 0)
1649 	    {
1650 	      tree newname;
1651 	      tree mult;
1652 	      tree coeff;
1653 
1654 	      /* mult = induction variable * coefficient.  */
1655 	      if (LLE_COEFFICIENTS (lle)[i] == 1)
1656 		{
1657 		  mult = VEC_index (tree, induction_vars, i);
1658 		}
1659 	      else
1660 		{
1661 		  coeff = build_int_cst (type,
1662 					 LLE_COEFFICIENTS (lle)[i]);
1663 		  mult = fold_build2 (MULT_EXPR, type, iv, coeff);
1664 		}
1665 
1666 	      /* newname = mult */
1667 	      stmt = build2 (MODIFY_EXPR, void_type_node, resvar, mult);
1668 	      newname = make_ssa_name (resvar, stmt);
1669 	      TREE_OPERAND (stmt, 0) = newname;
1670 	      fold_stmt (&stmt);
1671 	      tsi = tsi_last (stmts);
1672 	      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1673 
1674 	      /* name = name + newname */
1675 	      stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1676 			     build2 (PLUS_EXPR, type, name, newname));
1677 	      name = make_ssa_name (resvar, stmt);
1678 	      TREE_OPERAND (stmt, 0) = name;
1679 	      fold_stmt (&stmt);
1680 	      tsi = tsi_last (stmts);
1681 	      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1682 	    }
1683 	}
1684 
1685       /* Handle our invariants.
1686          At the end, we have name = name + result of adding all multiplied
1687          invariants.  */
1688       for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1689 	{
1690 	  if (LLE_INVARIANT_COEFFICIENTS (lle)[i] != 0)
1691 	    {
1692 	      tree newname;
1693 	      tree mult;
1694 	      tree coeff;
1695 	      int invcoeff = LLE_INVARIANT_COEFFICIENTS (lle)[i];
1696 	      /* mult = invariant * coefficient  */
1697 	      if (invcoeff == 1)
1698 		{
1699 		  mult = invar;
1700 		}
1701 	      else
1702 		{
1703 		  coeff = build_int_cst (type, invcoeff);
1704 		  mult = fold_build2 (MULT_EXPR, type, invar, coeff);
1705 		}
1706 
1707 	      /* newname = mult */
1708 	      stmt = build2 (MODIFY_EXPR, void_type_node, resvar, mult);
1709 	      newname = make_ssa_name (resvar, stmt);
1710 	      TREE_OPERAND (stmt, 0) = newname;
1711 	      fold_stmt (&stmt);
1712 	      tsi = tsi_last (stmts);
1713 	      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1714 
1715 	      /* name = name + newname */
1716 	      stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1717 			     build2 (PLUS_EXPR, type, name, newname));
1718 	      name = make_ssa_name (resvar, stmt);
1719 	      TREE_OPERAND (stmt, 0) = name;
1720 	      fold_stmt (&stmt);
1721 	      tsi = tsi_last (stmts);
1722 	      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1723 	    }
1724 	}
1725 
1726       /* Now handle the constant.
1727          name = name + constant.  */
1728       if (LLE_CONSTANT (lle) != 0)
1729 	{
1730 	  stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1731 			 build2 (PLUS_EXPR, type, name,
1732 			         build_int_cst (type, LLE_CONSTANT (lle))));
1733 	  name = make_ssa_name (resvar, stmt);
1734 	  TREE_OPERAND (stmt, 0) = name;
1735 	  fold_stmt (&stmt);
1736 	  tsi = tsi_last (stmts);
1737 	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1738 	}
1739 
1740       /* Now handle the offset.
1741          name = name + linear offset.  */
1742       if (LLE_CONSTANT (offset) != 0)
1743 	{
1744 	  stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1745 			 build2 (PLUS_EXPR, type, name,
1746 			         build_int_cst (type, LLE_CONSTANT (offset))));
1747 	  name = make_ssa_name (resvar, stmt);
1748 	  TREE_OPERAND (stmt, 0) = name;
1749 	  fold_stmt (&stmt);
1750 	  tsi = tsi_last (stmts);
1751 	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1752 	}
1753 
1754       /* Handle any denominator that occurs.  */
1755       if (LLE_DENOMINATOR (lle) != 1)
1756 	{
1757 	  stmt = build_int_cst (type, LLE_DENOMINATOR (lle));
1758 	  stmt = build2 (wrap == MAX_EXPR ? CEIL_DIV_EXPR : FLOOR_DIV_EXPR,
1759 			 type, name, stmt);
1760 	  stmt = build2 (MODIFY_EXPR, void_type_node, resvar, stmt);
1761 
1762 	  /* name = {ceil, floor}(name/denominator) */
1763 	  name = make_ssa_name (resvar, stmt);
1764 	  TREE_OPERAND (stmt, 0) = name;
1765 	  tsi = tsi_last (stmts);
1766 	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1767 	}
1768       VEC_safe_push (tree, heap, results, name);
1769     }
1770 
1771   /* Again, out of laziness, we don't handle this case yet.  It's not
1772      hard, it just hasn't occurred.  */
1773   gcc_assert (VEC_length (tree, results) <= 2);
1774 
1775   /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR.  */
1776   if (VEC_length (tree, results) > 1)
1777     {
1778       tree op1 = VEC_index (tree, results, 0);
1779       tree op2 = VEC_index (tree, results, 1);
1780       stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1781 		     build2 (wrap, type, op1, op2));
1782       name = make_ssa_name (resvar, stmt);
1783       TREE_OPERAND (stmt, 0) = name;
1784       tsi = tsi_last (stmts);
1785       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1786     }
1787 
1788   VEC_free (tree, heap, results);
1789 
1790   *stmts_to_insert = stmts;
1791   return name;
1792 }
1793 
1794 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1795    it, back into gcc code.  This changes the
1796    loops, their induction variables, and their bodies, so that they
1797    match the transformed loopnest.
1798    OLD_LOOPNEST is the loopnest before we've replaced it with the new
1799    loopnest.
1800    OLD_IVS is a vector of induction variables from the old loopnest.
1801    INVARIANTS is a vector of loop invariants from the old loopnest.
1802    NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1803    TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1804    NEW_LOOPNEST.  */
1805 
1806 void
lambda_loopnest_to_gcc_loopnest(struct loop * old_loopnest,VEC (tree,heap)* old_ivs,VEC (tree,heap)* invariants,lambda_loopnest new_loopnest,lambda_trans_matrix transform)1807 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1808 				 VEC(tree,heap) *old_ivs,
1809 				 VEC(tree,heap) *invariants,
1810 				 lambda_loopnest new_loopnest,
1811 				 lambda_trans_matrix transform)
1812 {
1813   struct loop *temp;
1814   size_t i = 0;
1815   size_t depth = 0;
1816   VEC(tree,heap) *new_ivs = NULL;
1817   tree oldiv;
1818 
1819   block_stmt_iterator bsi;
1820 
1821   if (dump_file)
1822     {
1823       transform = lambda_trans_matrix_inverse (transform);
1824       fprintf (dump_file, "Inverse of transformation matrix:\n");
1825       print_lambda_trans_matrix (dump_file, transform);
1826     }
1827   depth = depth_of_nest (old_loopnest);
1828   temp = old_loopnest;
1829 
1830   while (temp)
1831     {
1832       lambda_loop newloop;
1833       basic_block bb;
1834       edge exit;
1835       tree ivvar, ivvarinced, exitcond, stmts;
1836       enum tree_code testtype;
1837       tree newupperbound, newlowerbound;
1838       lambda_linear_expression offset;
1839       tree type;
1840       bool insert_after;
1841       tree inc_stmt;
1842 
1843       oldiv = VEC_index (tree, old_ivs, i);
1844       type = TREE_TYPE (oldiv);
1845 
1846       /* First, build the new induction variable temporary  */
1847 
1848       ivvar = create_tmp_var (type, "lnivtmp");
1849       add_referenced_var (ivvar);
1850 
1851       VEC_safe_push (tree, heap, new_ivs, ivvar);
1852 
1853       newloop = LN_LOOPS (new_loopnest)[i];
1854 
1855       /* Linear offset is a bit tricky to handle.  Punt on the unhandled
1856          cases for now.  */
1857       offset = LL_LINEAR_OFFSET (newloop);
1858 
1859       gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1860 		  lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1861 
1862       /* Now build the  new lower bounds, and insert the statements
1863          necessary to generate it on the loop preheader.  */
1864       newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1865 					     LL_LINEAR_OFFSET (newloop),
1866 					     type,
1867 					     new_ivs,
1868 					     invariants, MAX_EXPR, &stmts);
1869       bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
1870       bsi_commit_edge_inserts ();
1871       /* Build the new upper bound and insert its statements in the
1872          basic block of the exit condition */
1873       newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1874 					     LL_LINEAR_OFFSET (newloop),
1875 					     type,
1876 					     new_ivs,
1877 					     invariants, MIN_EXPR, &stmts);
1878       exit = temp->single_exit;
1879       exitcond = get_loop_exit_condition (temp);
1880       bb = bb_for_stmt (exitcond);
1881       bsi = bsi_start (bb);
1882       bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
1883 
1884       /* Create the new iv.  */
1885 
1886       standard_iv_increment_position (temp, &bsi, &insert_after);
1887       create_iv (newlowerbound,
1888 		 build_int_cst (type, LL_STEP (newloop)),
1889 		 ivvar, temp, &bsi, insert_after, &ivvar,
1890 		 NULL);
1891 
1892       /* Unfortunately, the incremented ivvar that create_iv inserted may not
1893 	 dominate the block containing the exit condition.
1894 	 So we simply create our own incremented iv to use in the new exit
1895 	 test,  and let redundancy elimination sort it out.  */
1896       inc_stmt = build2 (PLUS_EXPR, type,
1897 			 ivvar, build_int_cst (type, LL_STEP (newloop)));
1898       inc_stmt = build2 (MODIFY_EXPR, void_type_node, SSA_NAME_VAR (ivvar),
1899 			 inc_stmt);
1900       ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt);
1901       TREE_OPERAND (inc_stmt, 0) = ivvarinced;
1902       bsi = bsi_for_stmt (exitcond);
1903       bsi_insert_before (&bsi, inc_stmt, BSI_SAME_STMT);
1904 
1905       /* Replace the exit condition with the new upper bound
1906          comparison.  */
1907 
1908       testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1909 
1910       /* We want to build a conditional where true means exit the loop, and
1911 	 false means continue the loop.
1912 	 So swap the testtype if this isn't the way things are.*/
1913 
1914       if (exit->flags & EDGE_FALSE_VALUE)
1915 	testtype = swap_tree_comparison (testtype);
1916 
1917       COND_EXPR_COND (exitcond) = build2 (testtype,
1918 					  boolean_type_node,
1919 					  newupperbound, ivvarinced);
1920       update_stmt (exitcond);
1921       VEC_replace (tree, new_ivs, i, ivvar);
1922 
1923       i++;
1924       temp = temp->inner;
1925     }
1926 
1927   /* Rewrite uses of the old ivs so that they are now specified in terms of
1928      the new ivs.  */
1929 
1930   for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++)
1931     {
1932       imm_use_iterator imm_iter;
1933       use_operand_p use_p;
1934       tree oldiv_def;
1935       tree oldiv_stmt = SSA_NAME_DEF_STMT (oldiv);
1936       tree stmt;
1937 
1938       if (TREE_CODE (oldiv_stmt) == PHI_NODE)
1939         oldiv_def = PHI_RESULT (oldiv_stmt);
1940       else
1941 	oldiv_def = SINGLE_SSA_TREE_OPERAND (oldiv_stmt, SSA_OP_DEF);
1942       gcc_assert (oldiv_def != NULL_TREE);
1943 
1944       FOR_EACH_IMM_USE_STMT (stmt, imm_iter, oldiv_def)
1945         {
1946 	  tree newiv, stmts;
1947 	  lambda_body_vector lbv, newlbv;
1948 
1949 	  gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1950 
1951 	  /* Compute the new expression for the induction
1952 	     variable.  */
1953 	  depth = VEC_length (tree, new_ivs);
1954 	  lbv = lambda_body_vector_new (depth);
1955 	  LBV_COEFFICIENTS (lbv)[i] = 1;
1956 
1957 	  newlbv = lambda_body_vector_compute_new (transform, lbv);
1958 
1959 	  newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
1960 					 new_ivs, &stmts);
1961 	  bsi = bsi_for_stmt (stmt);
1962 	  /* Insert the statements to build that
1963 	     expression.  */
1964 	  bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
1965 
1966 	  FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1967 	    propagate_value (use_p, newiv);
1968 	  update_stmt (stmt);
1969 	}
1970     }
1971   VEC_free (tree, heap, new_ivs);
1972 }
1973 
1974 /* Return TRUE if this is not interesting statement from the perspective of
1975    determining if we have a perfect loop nest.  */
1976 
1977 static bool
not_interesting_stmt(tree stmt)1978 not_interesting_stmt (tree stmt)
1979 {
1980   /* Note that COND_EXPR's aren't interesting because if they were exiting the
1981      loop, we would have already failed the number of exits tests.  */
1982   if (TREE_CODE (stmt) == LABEL_EXPR
1983       || TREE_CODE (stmt) == GOTO_EXPR
1984       || TREE_CODE (stmt) == COND_EXPR)
1985     return true;
1986   return false;
1987 }
1988 
1989 /* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP.  */
1990 
1991 static bool
phi_loop_edge_uses_def(struct loop * loop,tree phi,tree def)1992 phi_loop_edge_uses_def (struct loop *loop, tree phi, tree def)
1993 {
1994   int i;
1995   for (i = 0; i < PHI_NUM_ARGS (phi); i++)
1996     if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src))
1997       if (PHI_ARG_DEF (phi, i) == def)
1998 	return true;
1999   return false;
2000 }
2001 
2002 /* Return TRUE if STMT is a use of PHI_RESULT.  */
2003 
2004 static bool
stmt_uses_phi_result(tree stmt,tree phi_result)2005 stmt_uses_phi_result (tree stmt, tree phi_result)
2006 {
2007   tree use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
2008 
2009   /* This is conservatively true, because we only want SIMPLE bumpers
2010      of the form x +- constant for our pass.  */
2011   return (use == phi_result);
2012 }
2013 
2014 /* STMT is a bumper stmt for LOOP if the version it defines is used in the
2015    in-loop-edge in a phi node, and the operand it uses is the result of that
2016    phi node.
2017    I.E. i_29 = i_3 + 1
2018         i_3 = PHI (0, i_29);  */
2019 
2020 static bool
stmt_is_bumper_for_loop(struct loop * loop,tree stmt)2021 stmt_is_bumper_for_loop (struct loop *loop, tree stmt)
2022 {
2023   tree use;
2024   tree def;
2025   imm_use_iterator iter;
2026   use_operand_p use_p;
2027 
2028   def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2029   if (!def)
2030     return false;
2031 
2032   FOR_EACH_IMM_USE_FAST (use_p, iter, def)
2033     {
2034       use = USE_STMT (use_p);
2035       if (TREE_CODE (use) == PHI_NODE)
2036 	{
2037 	  if (phi_loop_edge_uses_def (loop, use, def))
2038 	    if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
2039 	      return true;
2040 	}
2041     }
2042   return false;
2043 }
2044 
2045 
2046 /* Return true if LOOP is a perfect loop nest.
2047    Perfect loop nests are those loop nests where all code occurs in the
2048    innermost loop body.
2049    If S is a program statement, then
2050 
2051    i.e.
2052    DO I = 1, 20
2053        S1
2054        DO J = 1, 20
2055        ...
2056        END DO
2057    END DO
2058    is not a perfect loop nest because of S1.
2059 
2060    DO I = 1, 20
2061       DO J = 1, 20
2062         S1
2063 	...
2064       END DO
2065    END DO
2066    is a perfect loop nest.
2067 
2068    Since we don't have high level loops anymore, we basically have to walk our
2069    statements and ignore those that are there because the loop needs them (IE
2070    the induction variable increment, and jump back to the top of the loop).  */
2071 
2072 bool
perfect_nest_p(struct loop * loop)2073 perfect_nest_p (struct loop *loop)
2074 {
2075   basic_block *bbs;
2076   size_t i;
2077   tree exit_cond;
2078 
2079   if (!loop->inner)
2080     return true;
2081   bbs = get_loop_body (loop);
2082   exit_cond = get_loop_exit_condition (loop);
2083   for (i = 0; i < loop->num_nodes; i++)
2084     {
2085       if (bbs[i]->loop_father == loop)
2086 	{
2087 	  block_stmt_iterator bsi;
2088 	  for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2089 	    {
2090 	      tree stmt = bsi_stmt (bsi);
2091 	      if (stmt == exit_cond
2092 		  || not_interesting_stmt (stmt)
2093 		  || stmt_is_bumper_for_loop (loop, stmt))
2094 		continue;
2095 	      free (bbs);
2096 	      return false;
2097 	    }
2098 	}
2099     }
2100   free (bbs);
2101   /* See if the inner loops are perfectly nested as well.  */
2102   if (loop->inner)
2103     return perfect_nest_p (loop->inner);
2104   return true;
2105 }
2106 
2107 /* Replace the USES of X in STMT, or uses with the same step as X with Y.
2108    YINIT is the initial value of Y, REPLACEMENTS is a hash table to
2109    avoid creating duplicate temporaries and FIRSTBSI is statement
2110    iterator where new temporaries should be inserted at the beginning
2111    of body basic block.  */
2112 
2113 static void
replace_uses_equiv_to_x_with_y(struct loop * loop,tree stmt,tree x,int xstep,tree y,tree yinit,htab_t replacements,block_stmt_iterator * firstbsi)2114 replace_uses_equiv_to_x_with_y (struct loop *loop, tree stmt, tree x,
2115 				int xstep, tree y, tree yinit,
2116 				htab_t replacements,
2117 				block_stmt_iterator *firstbsi)
2118 {
2119   ssa_op_iter iter;
2120   use_operand_p use_p;
2121 
2122   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
2123     {
2124       tree use = USE_FROM_PTR (use_p);
2125       tree step = NULL_TREE;
2126       tree scev, init, val, var, setstmt;
2127       struct tree_map *h, in;
2128       void **loc;
2129 
2130       /* Replace uses of X with Y right away.  */
2131       if (use == x)
2132 	{
2133 	  SET_USE (use_p, y);
2134 	  continue;
2135 	}
2136 
2137       scev = instantiate_parameters (loop,
2138 				     analyze_scalar_evolution (loop, use));
2139 
2140       if (scev == NULL || scev == chrec_dont_know)
2141 	continue;
2142 
2143       step = evolution_part_in_loop_num (scev, loop->num);
2144       if (step == NULL
2145 	  || step == chrec_dont_know
2146 	  || TREE_CODE (step) != INTEGER_CST
2147 	  || int_cst_value (step) != xstep)
2148 	continue;
2149 
2150       /* Use REPLACEMENTS hash table to cache already created
2151 	 temporaries.  */
2152       in.hash = htab_hash_pointer (use);
2153       in.from = use;
2154       h = htab_find_with_hash (replacements, &in, in.hash);
2155       if (h != NULL)
2156 	{
2157 	  SET_USE (use_p, h->to);
2158 	  continue;
2159 	}
2160 
2161       /* USE which has the same step as X should be replaced
2162 	 with a temporary set to Y + YINIT - INIT.  */
2163       init = initial_condition_in_loop_num (scev, loop->num);
2164       gcc_assert (init != NULL && init != chrec_dont_know);
2165       if (TREE_TYPE (use) == TREE_TYPE (y))
2166 	{
2167 	  val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), init, yinit);
2168 	  val = fold_build2 (PLUS_EXPR, TREE_TYPE (y), y, val);
2169 	  if (val == y)
2170  	    {
2171 	      /* If X has the same type as USE, the same step
2172 		 and same initial value, it can be replaced by Y.  */
2173 	      SET_USE (use_p, y);
2174 	      continue;
2175 	    }
2176 	}
2177       else
2178 	{
2179 	  val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), y, yinit);
2180 	  val = fold_convert (TREE_TYPE (use), val);
2181 	  val = fold_build2 (PLUS_EXPR, TREE_TYPE (use), val, init);
2182 	}
2183 
2184       /* Create a temporary variable and insert it at the beginning
2185 	 of the loop body basic block, right after the PHI node
2186 	 which sets Y.  */
2187       var = create_tmp_var (TREE_TYPE (use), "perfecttmp");
2188       add_referenced_var (var);
2189       val = force_gimple_operand_bsi (firstbsi, val, false, NULL);
2190       setstmt = build2 (MODIFY_EXPR, void_type_node, var, val);
2191       var = make_ssa_name (var, setstmt);
2192       TREE_OPERAND (setstmt, 0) = var;
2193       bsi_insert_before (firstbsi, setstmt, BSI_SAME_STMT);
2194       update_stmt (setstmt);
2195       SET_USE (use_p, var);
2196       h = ggc_alloc (sizeof (struct tree_map));
2197       h->hash = in.hash;
2198       h->from = use;
2199       h->to = var;
2200       loc = htab_find_slot_with_hash (replacements, h, in.hash, INSERT);
2201       gcc_assert ((*(struct tree_map **)loc) == NULL);
2202       *(struct tree_map **) loc = h;
2203     }
2204 }
2205 
2206 /* Return true if STMT is an exit PHI for LOOP */
2207 
2208 static bool
exit_phi_for_loop_p(struct loop * loop,tree stmt)2209 exit_phi_for_loop_p (struct loop *loop, tree stmt)
2210 {
2211 
2212   if (TREE_CODE (stmt) != PHI_NODE
2213       || PHI_NUM_ARGS (stmt) != 1
2214       || bb_for_stmt (stmt) != loop->single_exit->dest)
2215     return false;
2216 
2217   return true;
2218 }
2219 
2220 /* Return true if STMT can be put back into the loop INNER, by
2221    copying it to the beginning of that loop and changing the uses.  */
2222 
2223 static bool
can_put_in_inner_loop(struct loop * inner,tree stmt)2224 can_put_in_inner_loop (struct loop *inner, tree stmt)
2225 {
2226   imm_use_iterator imm_iter;
2227   use_operand_p use_p;
2228 
2229   gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
2230   if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS)
2231       || !expr_invariant_in_loop_p (inner, TREE_OPERAND (stmt, 1)))
2232     return false;
2233 
2234   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, TREE_OPERAND (stmt, 0))
2235     {
2236       if (!exit_phi_for_loop_p (inner, USE_STMT (use_p)))
2237 	{
2238 	  basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2239 
2240 	  if (!flow_bb_inside_loop_p (inner, immbb))
2241 	    return false;
2242 	}
2243     }
2244   return true;
2245 }
2246 
2247 /* Return true if STMT can be put *after* the inner loop of LOOP.  */
2248 static bool
can_put_after_inner_loop(struct loop * loop,tree stmt)2249 can_put_after_inner_loop (struct loop *loop, tree stmt)
2250 {
2251   imm_use_iterator imm_iter;
2252   use_operand_p use_p;
2253 
2254   if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
2255     return false;
2256 
2257   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, TREE_OPERAND (stmt, 0))
2258     {
2259       if (!exit_phi_for_loop_p (loop, USE_STMT (use_p)))
2260 	{
2261 	  basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2262 
2263 	  if (!dominated_by_p (CDI_DOMINATORS,
2264 			       immbb,
2265 			       loop->inner->header)
2266 	      && !can_put_in_inner_loop (loop->inner, stmt))
2267 	    return false;
2268 	}
2269     }
2270   return true;
2271 }
2272 
2273 
2274 
2275 /* Return TRUE if LOOP is an imperfect nest that we can convert to a
2276    perfect one.  At the moment, we only handle imperfect nests of
2277    depth 2, where all of the statements occur after the inner loop.  */
2278 
2279 static bool
can_convert_to_perfect_nest(struct loop * loop)2280 can_convert_to_perfect_nest (struct loop *loop)
2281 {
2282   basic_block *bbs;
2283   tree exit_condition, phi;
2284   size_t i;
2285   block_stmt_iterator bsi;
2286   basic_block exitdest;
2287 
2288   /* Can't handle triply nested+ loops yet.  */
2289   if (!loop->inner || loop->inner->inner)
2290     return false;
2291 
2292   bbs = get_loop_body (loop);
2293   exit_condition = get_loop_exit_condition (loop);
2294   for (i = 0; i < loop->num_nodes; i++)
2295     {
2296       if (bbs[i]->loop_father == loop)
2297 	{
2298 	  for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2299 	    {
2300 	      tree stmt = bsi_stmt (bsi);
2301 
2302 	      if (stmt == exit_condition
2303 		  || not_interesting_stmt (stmt)
2304 		  || stmt_is_bumper_for_loop (loop, stmt))
2305 		continue;
2306 
2307 	      /* If this is a scalar operation that can be put back
2308 	         into the inner loop, or after the inner loop, through
2309 		 copying, then do so. This works on the theory that
2310 		 any amount of scalar code we have to reduplicate
2311 		 into or after the loops is less expensive that the
2312 		 win we get from rearranging the memory walk
2313 		 the loop is doing so that it has better
2314 		 cache behavior.  */
2315 	      if (TREE_CODE (stmt) == MODIFY_EXPR)
2316 		{
2317 		  use_operand_p use_a, use_b;
2318 		  imm_use_iterator imm_iter;
2319 		  ssa_op_iter op_iter, op_iter1;
2320 		  tree op0 = TREE_OPERAND (stmt, 0);
2321 		  tree scev = instantiate_parameters
2322 		    (loop, analyze_scalar_evolution (loop, op0));
2323 
2324 		  /* If the IV is simple, it can be duplicated.  */
2325 		  if (!automatically_generated_chrec_p (scev))
2326 		    {
2327 		      tree step = evolution_part_in_loop_num (scev, loop->num);
2328 		      if (step && step != chrec_dont_know
2329 			  && TREE_CODE (step) == INTEGER_CST)
2330 			continue;
2331 		    }
2332 
2333 		  /* The statement should not define a variable used
2334 		     in the inner loop.  */
2335 		  if (TREE_CODE (op0) == SSA_NAME)
2336 		    FOR_EACH_IMM_USE_FAST (use_a, imm_iter, op0)
2337 		      if (bb_for_stmt (USE_STMT (use_a))->loop_father
2338 			  == loop->inner)
2339 			goto fail;
2340 
2341 		  FOR_EACH_SSA_USE_OPERAND (use_a, stmt, op_iter, SSA_OP_USE)
2342 		    {
2343 		      tree node, op = USE_FROM_PTR (use_a);
2344 
2345 		      /* The variables should not be used in both loops.  */
2346 		      FOR_EACH_IMM_USE_FAST (use_b, imm_iter, op)
2347 		      if (bb_for_stmt (USE_STMT (use_b))->loop_father
2348 			  == loop->inner)
2349 			goto fail;
2350 
2351 		      /* The statement should not use the value of a
2352 			 scalar that was modified in the loop.  */
2353 		      node = SSA_NAME_DEF_STMT (op);
2354 		      if (TREE_CODE (node) == PHI_NODE)
2355 			FOR_EACH_PHI_ARG (use_b, node, op_iter1, SSA_OP_USE)
2356 			  {
2357 			    tree arg = USE_FROM_PTR (use_b);
2358 
2359 			    if (TREE_CODE (arg) == SSA_NAME)
2360 			      {
2361 				tree arg_stmt = SSA_NAME_DEF_STMT (arg);
2362 
2363 				if (bb_for_stmt (arg_stmt)->loop_father
2364 				    == loop->inner)
2365 				  goto fail;
2366 			      }
2367 			  }
2368 		    }
2369 
2370 		  if (can_put_in_inner_loop (loop->inner, stmt)
2371 		      || can_put_after_inner_loop (loop, stmt))
2372 		    continue;
2373 		}
2374 
2375 	      /* Otherwise, if the bb of a statement we care about isn't
2376 		 dominated by the header of the inner loop, then we can't
2377 		 handle this case right now.  This test ensures that the
2378 		 statement comes completely *after* the inner loop.  */
2379 	      if (!dominated_by_p (CDI_DOMINATORS,
2380 				   bb_for_stmt (stmt),
2381 				   loop->inner->header))
2382 		goto fail;
2383 	    }
2384 	}
2385     }
2386 
2387   /* We also need to make sure the loop exit only has simple copy phis in it,
2388      otherwise we don't know how to transform it into a perfect nest right
2389      now.  */
2390   exitdest = loop->single_exit->dest;
2391 
2392   for (phi = phi_nodes (exitdest); phi; phi = PHI_CHAIN (phi))
2393     if (PHI_NUM_ARGS (phi) != 1)
2394       goto fail;
2395 
2396   free (bbs);
2397   return true;
2398 
2399  fail:
2400   free (bbs);
2401   return false;
2402 }
2403 
2404 /* Transform the loop nest into a perfect nest, if possible.
2405    LOOPS is the current struct loops *
2406    LOOP is the loop nest to transform into a perfect nest
2407    LBOUNDS are the lower bounds for the loops to transform
2408    UBOUNDS are the upper bounds for the loops to transform
2409    STEPS is the STEPS for the loops to transform.
2410    LOOPIVS is the induction variables for the loops to transform.
2411 
2412    Basically, for the case of
2413 
2414    FOR (i = 0; i < 50; i++)
2415     {
2416      FOR (j =0; j < 50; j++)
2417      {
2418         <whatever>
2419      }
2420      <some code>
2421     }
2422 
2423    This function will transform it into a perfect loop nest by splitting the
2424    outer loop into two loops, like so:
2425 
2426    FOR (i = 0; i < 50; i++)
2427    {
2428      FOR (j = 0; j < 50; j++)
2429      {
2430          <whatever>
2431      }
2432    }
2433 
2434    FOR (i = 0; i < 50; i ++)
2435    {
2436     <some code>
2437    }
2438 
2439    Return FALSE if we can't make this loop into a perfect nest.  */
2440 
2441 static bool
perfect_nestify(struct loops * loops,struct loop * loop,VEC (tree,heap)* lbounds,VEC (tree,heap)* ubounds,VEC (int,heap)* steps,VEC (tree,heap)* loopivs)2442 perfect_nestify (struct loops *loops,
2443 		 struct loop *loop,
2444 		 VEC(tree,heap) *lbounds,
2445 		 VEC(tree,heap) *ubounds,
2446 		 VEC(int,heap) *steps,
2447 		 VEC(tree,heap) *loopivs)
2448 {
2449   basic_block *bbs;
2450   tree exit_condition;
2451   tree then_label, else_label, cond_stmt;
2452   basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2453   int i;
2454   block_stmt_iterator bsi, firstbsi;
2455   bool insert_after;
2456   edge e;
2457   struct loop *newloop;
2458   tree phi;
2459   tree uboundvar;
2460   tree stmt;
2461   tree oldivvar, ivvar, ivvarinced;
2462   VEC(tree,heap) *phis = NULL;
2463   htab_t replacements = NULL;
2464 
2465   /* Create the new loop.  */
2466   olddest = loop->single_exit->dest;
2467   preheaderbb = loop_split_edge_with (loop->single_exit, NULL);
2468   headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2469 
2470   /* Push the exit phi nodes that we are moving.  */
2471   for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi))
2472     {
2473       VEC_reserve (tree, heap, phis, 2);
2474       VEC_quick_push (tree, phis, PHI_RESULT (phi));
2475       VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0));
2476     }
2477   e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb);
2478 
2479   /* Remove the exit phis from the old basic block.  Make sure to set
2480      PHI_RESULT to null so it doesn't get released.  */
2481   while (phi_nodes (olddest) != NULL)
2482     {
2483       SET_PHI_RESULT (phi_nodes (olddest), NULL);
2484       remove_phi_node (phi_nodes (olddest), NULL);
2485     }
2486 
2487   /* and add them back to the new basic block.  */
2488   while (VEC_length (tree, phis) != 0)
2489     {
2490       tree def;
2491       tree phiname;
2492       def = VEC_pop (tree, phis);
2493       phiname = VEC_pop (tree, phis);
2494       phi = create_phi_node (phiname, preheaderbb);
2495       add_phi_arg (phi, def, single_pred_edge (preheaderbb));
2496     }
2497   flush_pending_stmts (e);
2498   VEC_free (tree, heap, phis);
2499 
2500   bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2501   latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2502   make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2503   then_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (latchbb));
2504   else_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (olddest));
2505   cond_stmt = build3 (COND_EXPR, void_type_node,
2506 		      build2 (NE_EXPR, boolean_type_node,
2507 			      integer_one_node,
2508 			      integer_zero_node),
2509 		      then_label, else_label);
2510   bsi = bsi_start (bodybb);
2511   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
2512   e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2513   make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2514   make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2515 
2516   /* Update the loop structures.  */
2517   newloop = duplicate_loop (loops, loop, olddest->loop_father);
2518   newloop->header = headerbb;
2519   newloop->latch = latchbb;
2520   newloop->single_exit = e;
2521   add_bb_to_loop (latchbb, newloop);
2522   add_bb_to_loop (bodybb, newloop);
2523   add_bb_to_loop (headerbb, newloop);
2524   set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2525   set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2526   set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2527 			   loop->single_exit->src);
2528   set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2529   set_immediate_dominator (CDI_DOMINATORS, olddest, bodybb);
2530   /* Create the new iv.  */
2531   oldivvar = VEC_index (tree, loopivs, 0);
2532   ivvar = create_tmp_var (TREE_TYPE (oldivvar), "perfectiv");
2533   add_referenced_var (ivvar);
2534   standard_iv_increment_position (newloop, &bsi, &insert_after);
2535   create_iv (VEC_index (tree, lbounds, 0),
2536 	     build_int_cst (TREE_TYPE (oldivvar), VEC_index (int, steps, 0)),
2537 	     ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
2538 
2539   /* Create the new upper bound.  This may be not just a variable, so we copy
2540      it to one just in case.  */
2541 
2542   exit_condition = get_loop_exit_condition (newloop);
2543   uboundvar = create_tmp_var (integer_type_node, "uboundvar");
2544   add_referenced_var (uboundvar);
2545   stmt = build2 (MODIFY_EXPR, void_type_node, uboundvar,
2546 		 VEC_index (tree, ubounds, 0));
2547   uboundvar = make_ssa_name (uboundvar, stmt);
2548   TREE_OPERAND (stmt, 0) = uboundvar;
2549 
2550   if (insert_after)
2551     bsi_insert_after (&bsi, stmt, BSI_SAME_STMT);
2552   else
2553     bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
2554   update_stmt (stmt);
2555   COND_EXPR_COND (exit_condition) = build2 (GE_EXPR,
2556 					    boolean_type_node,
2557 					    uboundvar,
2558 					    ivvarinced);
2559   update_stmt (exit_condition);
2560   replacements = htab_create_ggc (20, tree_map_hash,
2561 				  tree_map_eq, NULL);
2562   bbs = get_loop_body_in_dom_order (loop);
2563   /* Now move the statements, and replace the induction variable in the moved
2564      statements with the correct loop induction variable.  */
2565   oldivvar = VEC_index (tree, loopivs, 0);
2566   firstbsi = bsi_start (bodybb);
2567   for (i = loop->num_nodes - 1; i >= 0 ; i--)
2568     {
2569       block_stmt_iterator tobsi = bsi_last (bodybb);
2570       if (bbs[i]->loop_father == loop)
2571 	{
2572 	  /* If this is true, we are *before* the inner loop.
2573 	     If this isn't true, we are *after* it.
2574 
2575 	     The only time can_convert_to_perfect_nest returns true when we
2576 	     have statements before the inner loop is if they can be moved
2577 	     into the inner loop.
2578 
2579 	     The only time can_convert_to_perfect_nest returns true when we
2580 	     have statements after the inner loop is if they can be moved into
2581 	     the new split loop.  */
2582 
2583 	  if (dominated_by_p (CDI_DOMINATORS, loop->inner->header, bbs[i]))
2584 	    {
2585 	      block_stmt_iterator header_bsi
2586 		= bsi_after_labels (loop->inner->header);
2587 
2588 	      for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2589 		{
2590 		  tree stmt = bsi_stmt (bsi);
2591 
2592 		  if (stmt == exit_condition
2593 		      || not_interesting_stmt (stmt)
2594 		      || stmt_is_bumper_for_loop (loop, stmt))
2595 		    {
2596 		      bsi_next (&bsi);
2597 		      continue;
2598 		    }
2599 
2600 		  bsi_move_before (&bsi, &header_bsi);
2601 		}
2602 	    }
2603 	  else
2604 	    {
2605 	      /* Note that the bsi only needs to be explicitly incremented
2606 		 when we don't move something, since it is automatically
2607 		 incremented when we do.  */
2608 	      for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2609 		{
2610 		  ssa_op_iter i;
2611 		  tree n, stmt = bsi_stmt (bsi);
2612 
2613 		  if (stmt == exit_condition
2614 		      || not_interesting_stmt (stmt)
2615 		      || stmt_is_bumper_for_loop (loop, stmt))
2616 		    {
2617 		      bsi_next (&bsi);
2618 		      continue;
2619 		    }
2620 
2621 		  replace_uses_equiv_to_x_with_y
2622 		    (loop, stmt, oldivvar, VEC_index (int, steps, 0), ivvar,
2623 		     VEC_index (tree, lbounds, 0), replacements, &firstbsi);
2624 
2625 		  bsi_move_before (&bsi, &tobsi);
2626 
2627 		  /* If the statement has any virtual operands, they may
2628 		     need to be rewired because the original loop may
2629 		     still reference them.  */
2630 		  FOR_EACH_SSA_TREE_OPERAND (n, stmt, i, SSA_OP_ALL_VIRTUALS)
2631 		    mark_sym_for_renaming (SSA_NAME_VAR (n));
2632 		}
2633 	    }
2634 
2635 	}
2636     }
2637 
2638   free (bbs);
2639   htab_delete (replacements);
2640   return perfect_nest_p (loop);
2641 }
2642 
2643 /* Return true if TRANS is a legal transformation matrix that respects
2644    the dependence vectors in DISTS and DIRS.  The conservative answer
2645    is false.
2646 
2647    "Wolfe proves that a unimodular transformation represented by the
2648    matrix T is legal when applied to a loop nest with a set of
2649    lexicographically non-negative distance vectors RDG if and only if
2650    for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2651    i.e.: if and only if it transforms the lexicographically positive
2652    distance vectors to lexicographically positive vectors.  Note that
2653    a unimodular matrix must transform the zero vector (and only it) to
2654    the zero vector." S.Muchnick.  */
2655 
2656 bool
lambda_transform_legal_p(lambda_trans_matrix trans,int nb_loops,VEC (ddr_p,heap)* dependence_relations)2657 lambda_transform_legal_p (lambda_trans_matrix trans,
2658 			  int nb_loops,
2659 			  VEC (ddr_p, heap) *dependence_relations)
2660 {
2661   unsigned int i, j;
2662   lambda_vector distres;
2663   struct data_dependence_relation *ddr;
2664 
2665   gcc_assert (LTM_COLSIZE (trans) == nb_loops
2666 	      && LTM_ROWSIZE (trans) == nb_loops);
2667 
2668   /* When there is an unknown relation in the dependence_relations, we
2669      know that it is no worth looking at this loop nest: give up.  */
2670   ddr = VEC_index (ddr_p, dependence_relations, 0);
2671   if (ddr == NULL)
2672     return true;
2673   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2674     return false;
2675 
2676   distres = lambda_vector_new (nb_loops);
2677 
2678   /* For each distance vector in the dependence graph.  */
2679   for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
2680     {
2681       /* Don't care about relations for which we know that there is no
2682 	 dependence, nor about read-read (aka. output-dependences):
2683 	 these data accesses can happen in any order.  */
2684       if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2685 	  || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2686 	continue;
2687 
2688       /* Conservatively answer: "this transformation is not valid".  */
2689       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2690 	return false;
2691 
2692       /* If the dependence could not be captured by a distance vector,
2693 	 conservatively answer that the transform is not valid.  */
2694       if (DDR_NUM_DIST_VECTS (ddr) == 0)
2695 	return false;
2696 
2697       /* Compute trans.dist_vect */
2698       for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
2699 	{
2700 	  lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2701 				     DDR_DIST_VECT (ddr, j), distres);
2702 
2703 	  if (!lambda_vector_lexico_pos (distres, nb_loops))
2704 	    return false;
2705 	}
2706     }
2707   return true;
2708 }
2709