1 /*  Loop transformation code generation
2     Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3     Contributed by Daniel Berlin <dberlin@dberlin.org>
4 
5     This file is part of GCC.
6 
7     GCC is free software; you can redistribute it and/or modify it under
8     the terms of the GNU General Public License as published by the Free
9     Software Foundation; either version 2, or (at your option) any later
10     version.
11 
12     GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13     WARRANTY; without even the implied warranty of MERCHANTABILITY or
14     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15     for more details.
16 
17     You should have received a copy of the GNU General Public License
18     along with GCC; see the file COPYING.  If not, write to the Free
19     Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20     02110-1301, USA.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "cfgloop.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "tree-chrec.h"
39 #include "tree-data-ref.h"
40 #include "tree-pass.h"
41 #include "tree-scalar-evolution.h"
42 #include "vec.h"
43 #include "lambda.h"
44 
45 /* This loop nest code generation is based on non-singular matrix
46    math.
47 
48  A little terminology and a general sketch of the algorithm.  See "A singular
49  loop transformation framework based on non-singular matrices" by Wei Li and
50  Keshav Pingali for formal proofs that the various statements below are
51  correct.
52 
53  A loop iteration space represents the points traversed by the loop.  A point in the
54  iteration space can be represented by a vector of size <loop depth>.  You can
55  therefore represent the iteration space as an integral combinations of a set
56  of basis vectors.
57 
58  A loop iteration space is dense if every integer point between the loop
59  bounds is a point in the iteration space.  Every loop with a step of 1
60  therefore has a dense iteration space.
61 
62  for i = 1 to 3, step 1 is a dense iteration space.
63 
64  A loop iteration space is sparse if it is not dense.  That is, the iteration
65  space skips integer points that are within the loop bounds.
66 
67  for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
68  2 is skipped.
69 
70  Dense source spaces are easy to transform, because they don't skip any
71  points to begin with.  Thus we can compute the exact bounds of the target
72  space using min/max and floor/ceil.
73 
74  For a dense source space, we take the transformation matrix, decompose it
75  into a lower triangular part (H) and a unimodular part (U).
76  We then compute the auxiliary space from the unimodular part (source loop
77  nest . U = auxiliary space) , which has two important properties:
78   1. It traverses the iterations in the same lexicographic order as the source
79   space.
80   2. It is a dense space when the source is a dense space (even if the target
81   space is going to be sparse).
82 
83  Given the auxiliary space, we use the lower triangular part to compute the
84  bounds in the target space by simple matrix multiplication.
85  The gaps in the target space (IE the new loop step sizes) will be the
86  diagonals of the H matrix.
87 
88  Sparse source spaces require another step, because you can't directly compute
89  the exact bounds of the auxiliary and target space from the sparse space.
90  Rather than try to come up with a separate algorithm to handle sparse source
91  spaces directly, we just find a legal transformation matrix that gives you
92  the sparse source space, from a dense space, and then transform the dense
93  space.
94 
95  For a regular sparse space, you can represent the source space as an integer
96  lattice, and the base space of that lattice will always be dense.  Thus, we
97  effectively use the lattice to figure out the transformation from the lattice
98  base space, to the sparse iteration space (IE what transform was applied to
99  the dense space to make it sparse).  We then compose this transform with the
100  transformation matrix specified by the user (since our matrix transformations
101  are closed under composition, this is okay).  We can then use the base space
102  (which is dense) plus the composed transformation matrix, to compute the rest
103  of the transform using the dense space algorithm above.
104 
105  In other words, our sparse source space (B) is decomposed into a dense base
106  space (A), and a matrix (L) that transforms A into B, such that A.L = B.
107  We then compute the composition of L and the user transformation matrix (T),
108  so that T is now a transform from A to the result, instead of from B to the
109  result.
110  IE A.(LT) = result instead of B.T = result
111  Since A is now a dense source space, we can use the dense source space
112  algorithm above to compute the result of applying transform (LT) to A.
113 
114  Fourier-Motzkin elimination is used to compute the bounds of the base space
115  of the lattice.  */
116 
117 DEF_VEC_I(int);
118 DEF_VEC_ALLOC_I(int,heap);
119 
120 static bool perfect_nestify (struct loops *,
121 			     struct loop *, VEC(tree,heap) *,
122 			     VEC(tree,heap) *, VEC(int,heap) *,
123 			     VEC(tree,heap) *);
124 /* Lattice stuff that is internal to the code generation algorithm.  */
125 
126 typedef struct
127 {
128   /* Lattice base matrix.  */
129   lambda_matrix base;
130   /* Lattice dimension.  */
131   int dimension;
132   /* Origin vector for the coefficients.  */
133   lambda_vector origin;
134   /* Origin matrix for the invariants.  */
135   lambda_matrix origin_invariants;
136   /* Number of invariants.  */
137   int invariants;
138 } *lambda_lattice;
139 
140 #define LATTICE_BASE(T) ((T)->base)
141 #define LATTICE_DIMENSION(T) ((T)->dimension)
142 #define LATTICE_ORIGIN(T) ((T)->origin)
143 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
144 #define LATTICE_INVARIANTS(T) ((T)->invariants)
145 
146 static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
147 		       int, int);
148 static lambda_lattice lambda_lattice_new (int, int);
149 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
150 
151 static tree find_induction_var_from_exit_cond (struct loop *);
152 
153 /* Create a new lambda body vector.  */
154 
155 lambda_body_vector
lambda_body_vector_new(int size)156 lambda_body_vector_new (int size)
157 {
158   lambda_body_vector ret;
159 
160   ret = ggc_alloc (sizeof (*ret));
161   LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
162   LBV_SIZE (ret) = size;
163   LBV_DENOMINATOR (ret) = 1;
164   return ret;
165 }
166 
167 /* Compute the new coefficients for the vector based on the
168   *inverse* of the transformation matrix.  */
169 
170 lambda_body_vector
lambda_body_vector_compute_new(lambda_trans_matrix transform,lambda_body_vector vect)171 lambda_body_vector_compute_new (lambda_trans_matrix transform,
172 				lambda_body_vector vect)
173 {
174   lambda_body_vector temp;
175   int depth;
176 
177   /* Make sure the matrix is square.  */
178   gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
179 
180   depth = LTM_ROWSIZE (transform);
181 
182   temp = lambda_body_vector_new (depth);
183   LBV_DENOMINATOR (temp) =
184     LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
185   lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
186 			     LTM_MATRIX (transform), depth,
187 			     LBV_COEFFICIENTS (temp));
188   LBV_SIZE (temp) = LBV_SIZE (vect);
189   return temp;
190 }
191 
192 /* Print out a lambda body vector.  */
193 
194 void
print_lambda_body_vector(FILE * outfile,lambda_body_vector body)195 print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
196 {
197   print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
198 }
199 
200 /* Return TRUE if two linear expressions are equal.  */
201 
202 static bool
lle_equal(lambda_linear_expression lle1,lambda_linear_expression lle2,int depth,int invariants)203 lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
204 	   int depth, int invariants)
205 {
206   int i;
207 
208   if (lle1 == NULL || lle2 == NULL)
209     return false;
210   if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
211     return false;
212   if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
213     return false;
214   for (i = 0; i < depth; i++)
215     if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
216       return false;
217   for (i = 0; i < invariants; i++)
218     if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
219 	LLE_INVARIANT_COEFFICIENTS (lle2)[i])
220       return false;
221   return true;
222 }
223 
224 /* Create a new linear expression with dimension DIM, and total number
225    of invariants INVARIANTS.  */
226 
227 lambda_linear_expression
lambda_linear_expression_new(int dim,int invariants)228 lambda_linear_expression_new (int dim, int invariants)
229 {
230   lambda_linear_expression ret;
231 
232   ret = ggc_alloc_cleared (sizeof (*ret));
233 
234   LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
235   LLE_CONSTANT (ret) = 0;
236   LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
237   LLE_DENOMINATOR (ret) = 1;
238   LLE_NEXT (ret) = NULL;
239 
240   return ret;
241 }
242 
243 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
244    The starting letter used for variable names is START.  */
245 
246 static void
print_linear_expression(FILE * outfile,lambda_vector expr,int size,char start)247 print_linear_expression (FILE * outfile, lambda_vector expr, int size,
248 			 char start)
249 {
250   int i;
251   bool first = true;
252   for (i = 0; i < size; i++)
253     {
254       if (expr[i] != 0)
255 	{
256 	  if (first)
257 	    {
258 	      if (expr[i] < 0)
259 		fprintf (outfile, "-");
260 	      first = false;
261 	    }
262 	  else if (expr[i] > 0)
263 	    fprintf (outfile, " + ");
264 	  else
265 	    fprintf (outfile, " - ");
266 	  if (abs (expr[i]) == 1)
267 	    fprintf (outfile, "%c", start + i);
268 	  else
269 	    fprintf (outfile, "%d%c", abs (expr[i]), start + i);
270 	}
271     }
272 }
273 
274 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
275    depth/number of coefficients is given by DEPTH, the number of invariants is
276    given by INVARIANTS, and the character to start variable names with is given
277    by START.  */
278 
279 void
print_lambda_linear_expression(FILE * outfile,lambda_linear_expression expr,int depth,int invariants,char start)280 print_lambda_linear_expression (FILE * outfile,
281 				lambda_linear_expression expr,
282 				int depth, int invariants, char start)
283 {
284   fprintf (outfile, "\tLinear expression: ");
285   print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
286   fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
287   fprintf (outfile, "  invariants: ");
288   print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
289 			   invariants, 'A');
290   fprintf (outfile, "  denominator: %d\n", LLE_DENOMINATOR (expr));
291 }
292 
293 /* Print a lambda loop structure LOOP to OUTFILE.  The depth/number of
294    coefficients is given by DEPTH, the number of invariants is
295    given by INVARIANTS, and the character to start variable names with is given
296    by START.  */
297 
298 void
print_lambda_loop(FILE * outfile,lambda_loop loop,int depth,int invariants,char start)299 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
300 		   int invariants, char start)
301 {
302   int step;
303   lambda_linear_expression expr;
304 
305   gcc_assert (loop);
306 
307   expr = LL_LINEAR_OFFSET (loop);
308   step = LL_STEP (loop);
309   fprintf (outfile, "  step size = %d \n", step);
310 
311   if (expr)
312     {
313       fprintf (outfile, "  linear offset: \n");
314       print_lambda_linear_expression (outfile, expr, depth, invariants,
315 				      start);
316     }
317 
318   fprintf (outfile, "  lower bound: \n");
319   for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
320     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
321   fprintf (outfile, "  upper bound: \n");
322   for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
323     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
324 }
325 
326 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
327    number of invariants.  */
328 
329 lambda_loopnest
lambda_loopnest_new(int depth,int invariants)330 lambda_loopnest_new (int depth, int invariants)
331 {
332   lambda_loopnest ret;
333   ret = ggc_alloc (sizeof (*ret));
334 
335   LN_LOOPS (ret) = ggc_alloc_cleared (depth * sizeof (lambda_loop));
336   LN_DEPTH (ret) = depth;
337   LN_INVARIANTS (ret) = invariants;
338 
339   return ret;
340 }
341 
342 /* Print a lambda loopnest structure, NEST, to OUTFILE.  The starting
343    character to use for loop names is given by START.  */
344 
345 void
print_lambda_loopnest(FILE * outfile,lambda_loopnest nest,char start)346 print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
347 {
348   int i;
349   for (i = 0; i < LN_DEPTH (nest); i++)
350     {
351       fprintf (outfile, "Loop %c\n", start + i);
352       print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
353 			 LN_INVARIANTS (nest), 'i');
354       fprintf (outfile, "\n");
355     }
356 }
357 
358 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
359    of invariants.  */
360 
361 static lambda_lattice
lambda_lattice_new(int depth,int invariants)362 lambda_lattice_new (int depth, int invariants)
363 {
364   lambda_lattice ret;
365   ret = ggc_alloc (sizeof (*ret));
366   LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
367   LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
368   LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
369   LATTICE_DIMENSION (ret) = depth;
370   LATTICE_INVARIANTS (ret) = invariants;
371   return ret;
372 }
373 
374 /* Compute the lattice base for NEST.  The lattice base is essentially a
375    non-singular transform from a dense base space to a sparse iteration space.
376    We use it so that we don't have to specially handle the case of a sparse
377    iteration space in other parts of the algorithm.  As a result, this routine
378    only does something interesting (IE produce a matrix that isn't the
379    identity matrix) if NEST is a sparse space.  */
380 
381 static lambda_lattice
lambda_lattice_compute_base(lambda_loopnest nest)382 lambda_lattice_compute_base (lambda_loopnest nest)
383 {
384   lambda_lattice ret;
385   int depth, invariants;
386   lambda_matrix base;
387 
388   int i, j, step;
389   lambda_loop loop;
390   lambda_linear_expression expression;
391 
392   depth = LN_DEPTH (nest);
393   invariants = LN_INVARIANTS (nest);
394 
395   ret = lambda_lattice_new (depth, invariants);
396   base = LATTICE_BASE (ret);
397   for (i = 0; i < depth; i++)
398     {
399       loop = LN_LOOPS (nest)[i];
400       gcc_assert (loop);
401       step = LL_STEP (loop);
402       /* If we have a step of 1, then the base is one, and the
403          origin and invariant coefficients are 0.  */
404       if (step == 1)
405 	{
406 	  for (j = 0; j < depth; j++)
407 	    base[i][j] = 0;
408 	  base[i][i] = 1;
409 	  LATTICE_ORIGIN (ret)[i] = 0;
410 	  for (j = 0; j < invariants; j++)
411 	    LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
412 	}
413       else
414 	{
415 	  /* Otherwise, we need the lower bound expression (which must
416 	     be an affine function)  to determine the base.  */
417 	  expression = LL_LOWER_BOUND (loop);
418 	  gcc_assert (expression && !LLE_NEXT (expression)
419 		      && LLE_DENOMINATOR (expression) == 1);
420 
421 	  /* The lower triangular portion of the base is going to be the
422 	     coefficient times the step */
423 	  for (j = 0; j < i; j++)
424 	    base[i][j] = LLE_COEFFICIENTS (expression)[j]
425 	      * LL_STEP (LN_LOOPS (nest)[j]);
426 	  base[i][i] = step;
427 	  for (j = i + 1; j < depth; j++)
428 	    base[i][j] = 0;
429 
430 	  /* Origin for this loop is the constant of the lower bound
431 	     expression.  */
432 	  LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
433 
434 	  /* Coefficient for the invariants are equal to the invariant
435 	     coefficients in the expression.  */
436 	  for (j = 0; j < invariants; j++)
437 	    LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
438 	      LLE_INVARIANT_COEFFICIENTS (expression)[j];
439 	}
440     }
441   return ret;
442 }
443 
444 /* Compute the greatest common denominator of two numbers (A and B) using
445    Euclid's algorithm.  */
446 
447 static int
gcd(int a,int b)448 gcd (int a, int b)
449 {
450 
451   int x, y, z;
452 
453   x = abs (a);
454   y = abs (b);
455 
456   while (x > 0)
457     {
458       z = y % x;
459       y = x;
460       x = z;
461     }
462 
463   return (y);
464 }
465 
466 /* Compute the greatest common denominator of a VECTOR of SIZE numbers.  */
467 
468 static int
gcd_vector(lambda_vector vector,int size)469 gcd_vector (lambda_vector vector, int size)
470 {
471   int i;
472   int gcd1 = 0;
473 
474   if (size > 0)
475     {
476       gcd1 = vector[0];
477       for (i = 1; i < size; i++)
478 	gcd1 = gcd (gcd1, vector[i]);
479     }
480   return gcd1;
481 }
482 
483 /* Compute the least common multiple of two numbers A and B .  */
484 
485 static int
lcm(int a,int b)486 lcm (int a, int b)
487 {
488   return (abs (a) * abs (b) / gcd (a, b));
489 }
490 
491 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
492    auxiliary nest.
493    Fourier-Motzkin is a way of reducing systems of linear inequalities so that
494    it is easy to calculate the answer and bounds.
495    A sketch of how it works:
496    Given a system of linear inequalities, ai * xj >= bk, you can always
497    rewrite the constraints so they are all of the form
498    a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
499    in b1 ... bk, and some a in a1...ai)
500    You can then eliminate this x from the non-constant inequalities by
501    rewriting these as a <= b, x >= constant, and delete the x variable.
502    You can then repeat this for any remaining x variables, and then we have
503    an easy to use variable <= constant (or no variables at all) form that we
504    can construct our bounds from.
505 
506    In our case, each time we eliminate, we construct part of the bound from
507    the ith variable, then delete the ith variable.
508 
509    Remember the constant are in our vector a, our coefficient matrix is A,
510    and our invariant coefficient matrix is B.
511 
512    SIZE is the size of the matrices being passed.
513    DEPTH is the loop nest depth.
514    INVARIANTS is the number of loop invariants.
515    A, B, and a are the coefficient matrix, invariant coefficient, and a
516    vector of constants, respectively.  */
517 
518 static lambda_loopnest
compute_nest_using_fourier_motzkin(int size,int depth,int invariants,lambda_matrix A,lambda_matrix B,lambda_vector a)519 compute_nest_using_fourier_motzkin (int size,
520 				    int depth,
521 				    int invariants,
522 				    lambda_matrix A,
523 				    lambda_matrix B,
524 				    lambda_vector a)
525 {
526 
527   int multiple, f1, f2;
528   int i, j, k;
529   lambda_linear_expression expression;
530   lambda_loop loop;
531   lambda_loopnest auxillary_nest;
532   lambda_matrix swapmatrix, A1, B1;
533   lambda_vector swapvector, a1;
534   int newsize;
535 
536   A1 = lambda_matrix_new (128, depth);
537   B1 = lambda_matrix_new (128, invariants);
538   a1 = lambda_vector_new (128);
539 
540   auxillary_nest = lambda_loopnest_new (depth, invariants);
541 
542   for (i = depth - 1; i >= 0; i--)
543     {
544       loop = lambda_loop_new ();
545       LN_LOOPS (auxillary_nest)[i] = loop;
546       LL_STEP (loop) = 1;
547 
548       for (j = 0; j < size; j++)
549 	{
550 	  if (A[j][i] < 0)
551 	    {
552 	      /* Any linear expression in the matrix with a coefficient less
553 		 than 0 becomes part of the new lower bound.  */
554 	      expression = lambda_linear_expression_new (depth, invariants);
555 
556 	      for (k = 0; k < i; k++)
557 		LLE_COEFFICIENTS (expression)[k] = A[j][k];
558 
559 	      for (k = 0; k < invariants; k++)
560 		LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
561 
562 	      LLE_DENOMINATOR (expression) = -1 * A[j][i];
563 	      LLE_CONSTANT (expression) = -1 * a[j];
564 
565 	      /* Ignore if identical to the existing lower bound.  */
566 	      if (!lle_equal (LL_LOWER_BOUND (loop),
567 			      expression, depth, invariants))
568 		{
569 		  LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
570 		  LL_LOWER_BOUND (loop) = expression;
571 		}
572 
573 	    }
574 	  else if (A[j][i] > 0)
575 	    {
576 	      /* Any linear expression with a coefficient greater than 0
577 		 becomes part of the new upper bound.  */
578 	      expression = lambda_linear_expression_new (depth, invariants);
579 	      for (k = 0; k < i; k++)
580 		LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
581 
582 	      for (k = 0; k < invariants; k++)
583 		LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
584 
585 	      LLE_DENOMINATOR (expression) = A[j][i];
586 	      LLE_CONSTANT (expression) = a[j];
587 
588 	      /* Ignore if identical to the existing upper bound.  */
589 	      if (!lle_equal (LL_UPPER_BOUND (loop),
590 			      expression, depth, invariants))
591 		{
592 		  LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
593 		  LL_UPPER_BOUND (loop) = expression;
594 		}
595 
596 	    }
597 	}
598 
599       /* This portion creates a new system of linear inequalities by deleting
600 	 the i'th variable, reducing the system by one variable.  */
601       newsize = 0;
602       for (j = 0; j < size; j++)
603 	{
604 	  /* If the coefficient for the i'th variable is 0, then we can just
605 	     eliminate the variable straightaway.  Otherwise, we have to
606 	     multiply through by the coefficients we are eliminating.  */
607 	  if (A[j][i] == 0)
608 	    {
609 	      lambda_vector_copy (A[j], A1[newsize], depth);
610 	      lambda_vector_copy (B[j], B1[newsize], invariants);
611 	      a1[newsize] = a[j];
612 	      newsize++;
613 	    }
614 	  else if (A[j][i] > 0)
615 	    {
616 	      for (k = 0; k < size; k++)
617 		{
618 		  if (A[k][i] < 0)
619 		    {
620 		      multiple = lcm (A[j][i], A[k][i]);
621 		      f1 = multiple / A[j][i];
622 		      f2 = -1 * multiple / A[k][i];
623 
624 		      lambda_vector_add_mc (A[j], f1, A[k], f2,
625 					    A1[newsize], depth);
626 		      lambda_vector_add_mc (B[j], f1, B[k], f2,
627 					    B1[newsize], invariants);
628 		      a1[newsize] = f1 * a[j] + f2 * a[k];
629 		      newsize++;
630 		    }
631 		}
632 	    }
633 	}
634 
635       swapmatrix = A;
636       A = A1;
637       A1 = swapmatrix;
638 
639       swapmatrix = B;
640       B = B1;
641       B1 = swapmatrix;
642 
643       swapvector = a;
644       a = a1;
645       a1 = swapvector;
646 
647       size = newsize;
648     }
649 
650   return auxillary_nest;
651 }
652 
653 /* Compute the loop bounds for the auxiliary space NEST.
654    Input system used is Ax <= b.  TRANS is the unimodular transformation.
655    Given the original nest, this function will
656    1. Convert the nest into matrix form, which consists of a matrix for the
657    coefficients, a matrix for the
658    invariant coefficients, and a vector for the constants.
659    2. Use the matrix form to calculate the lattice base for the nest (which is
660    a dense space)
661    3. Compose the dense space transform with the user specified transform, to
662    get a transform we can easily calculate transformed bounds for.
663    4. Multiply the composed transformation matrix times the matrix form of the
664    loop.
665    5. Transform the newly created matrix (from step 4) back into a loop nest
666    using fourier motzkin elimination to figure out the bounds.  */
667 
668 static lambda_loopnest
lambda_compute_auxillary_space(lambda_loopnest nest,lambda_trans_matrix trans)669 lambda_compute_auxillary_space (lambda_loopnest nest,
670 				lambda_trans_matrix trans)
671 {
672   lambda_matrix A, B, A1, B1;
673   lambda_vector a, a1;
674   lambda_matrix invertedtrans;
675   int depth, invariants, size;
676   int i, j;
677   lambda_loop loop;
678   lambda_linear_expression expression;
679   lambda_lattice lattice;
680 
681   depth = LN_DEPTH (nest);
682   invariants = LN_INVARIANTS (nest);
683 
684   /* Unfortunately, we can't know the number of constraints we'll have
685      ahead of time, but this should be enough even in ridiculous loop nest
686      cases. We must not go over this limit.  */
687   A = lambda_matrix_new (128, depth);
688   B = lambda_matrix_new (128, invariants);
689   a = lambda_vector_new (128);
690 
691   A1 = lambda_matrix_new (128, depth);
692   B1 = lambda_matrix_new (128, invariants);
693   a1 = lambda_vector_new (128);
694 
695   /* Store the bounds in the equation matrix A, constant vector a, and
696      invariant matrix B, so that we have Ax <= a + B.
697      This requires a little equation rearranging so that everything is on the
698      correct side of the inequality.  */
699   size = 0;
700   for (i = 0; i < depth; i++)
701     {
702       loop = LN_LOOPS (nest)[i];
703 
704       /* First we do the lower bound.  */
705       if (LL_STEP (loop) > 0)
706 	expression = LL_LOWER_BOUND (loop);
707       else
708 	expression = LL_UPPER_BOUND (loop);
709 
710       for (; expression != NULL; expression = LLE_NEXT (expression))
711 	{
712 	  /* Fill in the coefficient.  */
713 	  for (j = 0; j < i; j++)
714 	    A[size][j] = LLE_COEFFICIENTS (expression)[j];
715 
716 	  /* And the invariant coefficient.  */
717 	  for (j = 0; j < invariants; j++)
718 	    B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
719 
720 	  /* And the constant.  */
721 	  a[size] = LLE_CONSTANT (expression);
722 
723 	  /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b.  IE put all
724 	     constants and single variables on   */
725 	  A[size][i] = -1 * LLE_DENOMINATOR (expression);
726 	  a[size] *= -1;
727 	  for (j = 0; j < invariants; j++)
728 	    B[size][j] *= -1;
729 
730 	  size++;
731 	  /* Need to increase matrix sizes above.  */
732 	  gcc_assert (size <= 127);
733 
734 	}
735 
736       /* Then do the exact same thing for the upper bounds.  */
737       if (LL_STEP (loop) > 0)
738 	expression = LL_UPPER_BOUND (loop);
739       else
740 	expression = LL_LOWER_BOUND (loop);
741 
742       for (; expression != NULL; expression = LLE_NEXT (expression))
743 	{
744 	  /* Fill in the coefficient.  */
745 	  for (j = 0; j < i; j++)
746 	    A[size][j] = LLE_COEFFICIENTS (expression)[j];
747 
748 	  /* And the invariant coefficient.  */
749 	  for (j = 0; j < invariants; j++)
750 	    B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
751 
752 	  /* And the constant.  */
753 	  a[size] = LLE_CONSTANT (expression);
754 
755 	  /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b.  */
756 	  for (j = 0; j < i; j++)
757 	    A[size][j] *= -1;
758 	  A[size][i] = LLE_DENOMINATOR (expression);
759 	  size++;
760 	  /* Need to increase matrix sizes above.  */
761 	  gcc_assert (size <= 127);
762 
763 	}
764     }
765 
766   /* Compute the lattice base x = base * y + origin, where y is the
767      base space.  */
768   lattice = lambda_lattice_compute_base (nest);
769 
770   /* Ax <= a + B then becomes ALy <= a+B - A*origin.  L is the lattice base  */
771 
772   /* A1 = A * L */
773   lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
774 
775   /* a1 = a - A * origin constant.  */
776   lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
777   lambda_vector_add_mc (a, 1, a1, -1, a1, size);
778 
779   /* B1 = B - A * origin invariant.  */
780   lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
781 		      invariants);
782   lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
783 
784   /* Now compute the auxiliary space bounds by first inverting U, multiplying
785      it by A1, then performing fourier motzkin.  */
786 
787   invertedtrans = lambda_matrix_new (depth, depth);
788 
789   /* Compute the inverse of U.  */
790   lambda_matrix_inverse (LTM_MATRIX (trans),
791 			 invertedtrans, depth);
792 
793   /* A = A1 inv(U).  */
794   lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
795 
796   return compute_nest_using_fourier_motzkin (size, depth, invariants,
797 					     A, B1, a1);
798 }
799 
800 /* Compute the loop bounds for the target space, using the bounds of
801    the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
802    The target space loop bounds are computed by multiplying the triangular
803    matrix H by the auxiliary nest, to get the new loop bounds.  The sign of
804    the loop steps (positive or negative) is then used to swap the bounds if
805    the loop counts downwards.
806    Return the target loopnest.  */
807 
808 static lambda_loopnest
lambda_compute_target_space(lambda_loopnest auxillary_nest,lambda_trans_matrix H,lambda_vector stepsigns)809 lambda_compute_target_space (lambda_loopnest auxillary_nest,
810 			     lambda_trans_matrix H, lambda_vector stepsigns)
811 {
812   lambda_matrix inverse, H1;
813   int determinant, i, j;
814   int gcd1, gcd2;
815   int factor;
816 
817   lambda_loopnest target_nest;
818   int depth, invariants;
819   lambda_matrix target;
820 
821   lambda_loop auxillary_loop, target_loop;
822   lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
823 
824   depth = LN_DEPTH (auxillary_nest);
825   invariants = LN_INVARIANTS (auxillary_nest);
826 
827   inverse = lambda_matrix_new (depth, depth);
828   determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
829 
830   /* H1 is H excluding its diagonal.  */
831   H1 = lambda_matrix_new (depth, depth);
832   lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
833 
834   for (i = 0; i < depth; i++)
835     H1[i][i] = 0;
836 
837   /* Computes the linear offsets of the loop bounds.  */
838   target = lambda_matrix_new (depth, depth);
839   lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
840 
841   target_nest = lambda_loopnest_new (depth, invariants);
842 
843   for (i = 0; i < depth; i++)
844     {
845 
846       /* Get a new loop structure.  */
847       target_loop = lambda_loop_new ();
848       LN_LOOPS (target_nest)[i] = target_loop;
849 
850       /* Computes the gcd of the coefficients of the linear part.  */
851       gcd1 = gcd_vector (target[i], i);
852 
853       /* Include the denominator in the GCD.  */
854       gcd1 = gcd (gcd1, determinant);
855 
856       /* Now divide through by the gcd.  */
857       for (j = 0; j < i; j++)
858 	target[i][j] = target[i][j] / gcd1;
859 
860       expression = lambda_linear_expression_new (depth, invariants);
861       lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
862       LLE_DENOMINATOR (expression) = determinant / gcd1;
863       LLE_CONSTANT (expression) = 0;
864       lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
865 			   invariants);
866       LL_LINEAR_OFFSET (target_loop) = expression;
867     }
868 
869   /* For each loop, compute the new bounds from H.  */
870   for (i = 0; i < depth; i++)
871     {
872       auxillary_loop = LN_LOOPS (auxillary_nest)[i];
873       target_loop = LN_LOOPS (target_nest)[i];
874       LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
875       factor = LTM_MATRIX (H)[i][i];
876 
877       /* First we do the lower bound.  */
878       auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
879 
880       for (; auxillary_expr != NULL;
881 	   auxillary_expr = LLE_NEXT (auxillary_expr))
882 	{
883 	  target_expr = lambda_linear_expression_new (depth, invariants);
884 	  lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
885 				     depth, inverse, depth,
886 				     LLE_COEFFICIENTS (target_expr));
887 	  lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
888 				    LLE_COEFFICIENTS (target_expr), depth,
889 				    factor);
890 
891 	  LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
892 	  lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
893 			      LLE_INVARIANT_COEFFICIENTS (target_expr),
894 			      invariants);
895 	  lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
896 				    LLE_INVARIANT_COEFFICIENTS (target_expr),
897 				    invariants, factor);
898 	  LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
899 
900 	  if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
901 	    {
902 	      LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
903 		* determinant;
904 	      lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
905 					(target_expr),
906 					LLE_INVARIANT_COEFFICIENTS
907 					(target_expr), invariants,
908 					determinant);
909 	      LLE_DENOMINATOR (target_expr) =
910 		LLE_DENOMINATOR (target_expr) * determinant;
911 	    }
912 	  /* Find the gcd and divide by it here, rather than doing it
913 	     at the tree level.  */
914 	  gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
915 	  gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
916 			     invariants);
917 	  gcd1 = gcd (gcd1, gcd2);
918 	  gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
919 	  gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
920 	  for (j = 0; j < depth; j++)
921 	    LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
922 	  for (j = 0; j < invariants; j++)
923 	    LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
924 	  LLE_CONSTANT (target_expr) /= gcd1;
925 	  LLE_DENOMINATOR (target_expr) /= gcd1;
926 	  /* Ignore if identical to existing bound.  */
927 	  if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
928 			  invariants))
929 	    {
930 	      LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
931 	      LL_LOWER_BOUND (target_loop) = target_expr;
932 	    }
933 	}
934       /* Now do the upper bound.  */
935       auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
936 
937       for (; auxillary_expr != NULL;
938 	   auxillary_expr = LLE_NEXT (auxillary_expr))
939 	{
940 	  target_expr = lambda_linear_expression_new (depth, invariants);
941 	  lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
942 				     depth, inverse, depth,
943 				     LLE_COEFFICIENTS (target_expr));
944 	  lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
945 				    LLE_COEFFICIENTS (target_expr), depth,
946 				    factor);
947 	  LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
948 	  lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
949 			      LLE_INVARIANT_COEFFICIENTS (target_expr),
950 			      invariants);
951 	  lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
952 				    LLE_INVARIANT_COEFFICIENTS (target_expr),
953 				    invariants, factor);
954 	  LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
955 
956 	  if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
957 	    {
958 	      LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
959 		* determinant;
960 	      lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
961 					(target_expr),
962 					LLE_INVARIANT_COEFFICIENTS
963 					(target_expr), invariants,
964 					determinant);
965 	      LLE_DENOMINATOR (target_expr) =
966 		LLE_DENOMINATOR (target_expr) * determinant;
967 	    }
968 	  /* Find the gcd and divide by it here, instead of at the
969 	     tree level.  */
970 	  gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
971 	  gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
972 			     invariants);
973 	  gcd1 = gcd (gcd1, gcd2);
974 	  gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
975 	  gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
976 	  for (j = 0; j < depth; j++)
977 	    LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
978 	  for (j = 0; j < invariants; j++)
979 	    LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
980 	  LLE_CONSTANT (target_expr) /= gcd1;
981 	  LLE_DENOMINATOR (target_expr) /= gcd1;
982 	  /* Ignore if equal to existing bound.  */
983 	  if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
984 			  invariants))
985 	    {
986 	      LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
987 	      LL_UPPER_BOUND (target_loop) = target_expr;
988 	    }
989 	}
990     }
991   for (i = 0; i < depth; i++)
992     {
993       target_loop = LN_LOOPS (target_nest)[i];
994       /* If necessary, exchange the upper and lower bounds and negate
995          the step size.  */
996       if (stepsigns[i] < 0)
997 	{
998 	  LL_STEP (target_loop) *= -1;
999 	  tmp_expr = LL_LOWER_BOUND (target_loop);
1000 	  LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
1001 	  LL_UPPER_BOUND (target_loop) = tmp_expr;
1002 	}
1003     }
1004   return target_nest;
1005 }
1006 
1007 /* Compute the step signs of TRANS, using TRANS and stepsigns.  Return the new
1008    result.  */
1009 
1010 static lambda_vector
lambda_compute_step_signs(lambda_trans_matrix trans,lambda_vector stepsigns)1011 lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
1012 {
1013   lambda_matrix matrix, H;
1014   int size;
1015   lambda_vector newsteps;
1016   int i, j, factor, minimum_column;
1017   int temp;
1018 
1019   matrix = LTM_MATRIX (trans);
1020   size = LTM_ROWSIZE (trans);
1021   H = lambda_matrix_new (size, size);
1022 
1023   newsteps = lambda_vector_new (size);
1024   lambda_vector_copy (stepsigns, newsteps, size);
1025 
1026   lambda_matrix_copy (matrix, H, size, size);
1027 
1028   for (j = 0; j < size; j++)
1029     {
1030       lambda_vector row;
1031       row = H[j];
1032       for (i = j; i < size; i++)
1033 	if (row[i] < 0)
1034 	  lambda_matrix_col_negate (H, size, i);
1035       while (lambda_vector_first_nz (row, size, j + 1) < size)
1036 	{
1037 	  minimum_column = lambda_vector_min_nz (row, size, j);
1038 	  lambda_matrix_col_exchange (H, size, j, minimum_column);
1039 
1040 	  temp = newsteps[j];
1041 	  newsteps[j] = newsteps[minimum_column];
1042 	  newsteps[minimum_column] = temp;
1043 
1044 	  for (i = j + 1; i < size; i++)
1045 	    {
1046 	      factor = row[i] / row[j];
1047 	      lambda_matrix_col_add (H, size, j, i, -1 * factor);
1048 	    }
1049 	}
1050     }
1051   return newsteps;
1052 }
1053 
1054 /* Transform NEST according to TRANS, and return the new loopnest.
1055    This involves
1056    1. Computing a lattice base for the transformation
1057    2. Composing the dense base with the specified transformation (TRANS)
1058    3. Decomposing the combined transformation into a lower triangular portion,
1059    and a unimodular portion.
1060    4. Computing the auxiliary nest using the unimodular portion.
1061    5. Computing the target nest using the auxiliary nest and the lower
1062    triangular portion.  */
1063 
1064 lambda_loopnest
lambda_loopnest_transform(lambda_loopnest nest,lambda_trans_matrix trans)1065 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans)
1066 {
1067   lambda_loopnest auxillary_nest, target_nest;
1068 
1069   int depth, invariants;
1070   int i, j;
1071   lambda_lattice lattice;
1072   lambda_trans_matrix trans1, H, U;
1073   lambda_loop loop;
1074   lambda_linear_expression expression;
1075   lambda_vector origin;
1076   lambda_matrix origin_invariants;
1077   lambda_vector stepsigns;
1078   int f;
1079 
1080   depth = LN_DEPTH (nest);
1081   invariants = LN_INVARIANTS (nest);
1082 
1083   /* Keep track of the signs of the loop steps.  */
1084   stepsigns = lambda_vector_new (depth);
1085   for (i = 0; i < depth; i++)
1086     {
1087       if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1088 	stepsigns[i] = 1;
1089       else
1090 	stepsigns[i] = -1;
1091     }
1092 
1093   /* Compute the lattice base.  */
1094   lattice = lambda_lattice_compute_base (nest);
1095   trans1 = lambda_trans_matrix_new (depth, depth);
1096 
1097   /* Multiply the transformation matrix by the lattice base.  */
1098 
1099   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1100 		      LTM_MATRIX (trans1), depth, depth, depth);
1101 
1102   /* Compute the Hermite normal form for the new transformation matrix.  */
1103   H = lambda_trans_matrix_new (depth, depth);
1104   U = lambda_trans_matrix_new (depth, depth);
1105   lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1106 			 LTM_MATRIX (U));
1107 
1108   /* Compute the auxiliary loop nest's space from the unimodular
1109      portion.  */
1110   auxillary_nest = lambda_compute_auxillary_space (nest, U);
1111 
1112   /* Compute the loop step signs from the old step signs and the
1113      transformation matrix.  */
1114   stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1115 
1116   /* Compute the target loop nest space from the auxiliary nest and
1117      the lower triangular matrix H.  */
1118   target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns);
1119   origin = lambda_vector_new (depth);
1120   origin_invariants = lambda_matrix_new (depth, invariants);
1121   lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1122 			     LATTICE_ORIGIN (lattice), origin);
1123   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1124 		      origin_invariants, depth, depth, invariants);
1125 
1126   for (i = 0; i < depth; i++)
1127     {
1128       loop = LN_LOOPS (target_nest)[i];
1129       expression = LL_LINEAR_OFFSET (loop);
1130       if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1131 	f = 1;
1132       else
1133 	f = LLE_DENOMINATOR (expression);
1134 
1135       LLE_CONSTANT (expression) += f * origin[i];
1136 
1137       for (j = 0; j < invariants; j++)
1138 	LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1139 	  f * origin_invariants[i][j];
1140     }
1141 
1142   return target_nest;
1143 
1144 }
1145 
1146 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1147    return the new expression.  DEPTH is the depth of the loopnest.
1148    OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1149    in this nest.  INVARIANTS is the array of invariants for the loop.  EXTRA
1150    is the amount we have to add/subtract from the expression because of the
1151    type of comparison it is used in.  */
1152 
1153 static lambda_linear_expression
gcc_tree_to_linear_expression(int depth,tree expr,VEC (tree,heap)* outerinductionvars,VEC (tree,heap)* invariants,int extra)1154 gcc_tree_to_linear_expression (int depth, tree expr,
1155 			       VEC(tree,heap) *outerinductionvars,
1156 			       VEC(tree,heap) *invariants, int extra)
1157 {
1158   lambda_linear_expression lle = NULL;
1159   switch (TREE_CODE (expr))
1160     {
1161     case INTEGER_CST:
1162       {
1163 	lle = lambda_linear_expression_new (depth, 2 * depth);
1164 	LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1165 	if (extra != 0)
1166 	  LLE_CONSTANT (lle) += extra;
1167 
1168 	LLE_DENOMINATOR (lle) = 1;
1169       }
1170       break;
1171     case SSA_NAME:
1172       {
1173 	tree iv, invar;
1174 	size_t i;
1175 	for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1176 	  if (iv != NULL)
1177 	    {
1178 	      if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1179 		{
1180 		  lle = lambda_linear_expression_new (depth, 2 * depth);
1181 		  LLE_COEFFICIENTS (lle)[i] = 1;
1182 		  if (extra != 0)
1183 		    LLE_CONSTANT (lle) = extra;
1184 
1185 		  LLE_DENOMINATOR (lle) = 1;
1186 		}
1187 	    }
1188 	for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1189 	  if (invar != NULL)
1190 	    {
1191 	      if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1192 		{
1193 		  lle = lambda_linear_expression_new (depth, 2 * depth);
1194 		  LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1195 		  if (extra != 0)
1196 		    LLE_CONSTANT (lle) = extra;
1197 		  LLE_DENOMINATOR (lle) = 1;
1198 		}
1199 	    }
1200       }
1201       break;
1202     default:
1203       return NULL;
1204     }
1205 
1206   return lle;
1207 }
1208 
1209 /* Return the depth of the loopnest NEST */
1210 
1211 static int
depth_of_nest(struct loop * nest)1212 depth_of_nest (struct loop *nest)
1213 {
1214   size_t depth = 0;
1215   while (nest)
1216     {
1217       depth++;
1218       nest = nest->inner;
1219     }
1220   return depth;
1221 }
1222 
1223 
1224 /* Return true if OP is invariant in LOOP and all outer loops.  */
1225 
1226 static bool
invariant_in_loop_and_outer_loops(struct loop * loop,tree op)1227 invariant_in_loop_and_outer_loops (struct loop *loop, tree op)
1228 {
1229   if (is_gimple_min_invariant (op))
1230     return true;
1231   if (loop->depth == 0)
1232     return true;
1233   if (!expr_invariant_in_loop_p (loop, op))
1234     return false;
1235   if (loop->outer
1236       && !invariant_in_loop_and_outer_loops (loop->outer, op))
1237     return false;
1238   return true;
1239 }
1240 
1241 /* Generate a lambda loop from a gcc loop LOOP.  Return the new lambda loop,
1242    or NULL if it could not be converted.
1243    DEPTH is the depth of the loop.
1244    INVARIANTS is a pointer to the array of loop invariants.
1245    The induction variable for this loop should be stored in the parameter
1246    OURINDUCTIONVAR.
1247    OUTERINDUCTIONVARS is an array of induction variables for outer loops.  */
1248 
1249 static lambda_loop
gcc_loop_to_lambda_loop(struct loop * loop,int depth,VEC (tree,heap)** invariants,tree * ourinductionvar,VEC (tree,heap)* outerinductionvars,VEC (tree,heap)** lboundvars,VEC (tree,heap)** uboundvars,VEC (int,heap)** steps)1250 gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1251 			 VEC(tree,heap) ** invariants,
1252 			 tree * ourinductionvar,
1253 			 VEC(tree,heap) * outerinductionvars,
1254 			 VEC(tree,heap) ** lboundvars,
1255 			 VEC(tree,heap) ** uboundvars,
1256 			 VEC(int,heap) ** steps)
1257 {
1258   tree phi;
1259   tree exit_cond;
1260   tree access_fn, inductionvar;
1261   tree step;
1262   lambda_loop lloop = NULL;
1263   lambda_linear_expression lbound, ubound;
1264   tree test;
1265   int stepint;
1266   int extra = 0;
1267   tree lboundvar, uboundvar, uboundresult;
1268 
1269   /* Find out induction var and exit condition.  */
1270   inductionvar = find_induction_var_from_exit_cond (loop);
1271   exit_cond = get_loop_exit_condition (loop);
1272 
1273   if (inductionvar == NULL || exit_cond == NULL)
1274     {
1275       if (dump_file && (dump_flags & TDF_DETAILS))
1276 	fprintf (dump_file,
1277 		 "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1278       return NULL;
1279     }
1280 
1281   test = TREE_OPERAND (exit_cond, 0);
1282 
1283   if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
1284     {
1285 
1286       if (dump_file && (dump_flags & TDF_DETAILS))
1287 	fprintf (dump_file,
1288 		 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1289 
1290       return NULL;
1291     }
1292 
1293   phi = SSA_NAME_DEF_STMT (inductionvar);
1294   if (TREE_CODE (phi) != PHI_NODE)
1295     {
1296       phi = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE);
1297       if (!phi)
1298 	{
1299 
1300 	  if (dump_file && (dump_flags & TDF_DETAILS))
1301 	    fprintf (dump_file,
1302 		     "Unable to convert loop: Cannot find PHI node for induction variable\n");
1303 
1304 	  return NULL;
1305 	}
1306 
1307       phi = SSA_NAME_DEF_STMT (phi);
1308       if (TREE_CODE (phi) != PHI_NODE)
1309 	{
1310 
1311 	  if (dump_file && (dump_flags & TDF_DETAILS))
1312 	    fprintf (dump_file,
1313 		     "Unable to convert loop: Cannot find PHI node for induction variable\n");
1314 	  return NULL;
1315 	}
1316 
1317     }
1318 
1319   /* The induction variable name/version we want to put in the array is the
1320      result of the induction variable phi node.  */
1321   *ourinductionvar = PHI_RESULT (phi);
1322   access_fn = instantiate_parameters
1323     (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1324   if (access_fn == chrec_dont_know)
1325     {
1326       if (dump_file && (dump_flags & TDF_DETAILS))
1327 	fprintf (dump_file,
1328 		 "Unable to convert loop: Access function for induction variable phi is unknown\n");
1329 
1330       return NULL;
1331     }
1332 
1333   step = evolution_part_in_loop_num (access_fn, loop->num);
1334   if (!step || step == chrec_dont_know)
1335     {
1336       if (dump_file && (dump_flags & TDF_DETAILS))
1337 	fprintf (dump_file,
1338 		 "Unable to convert loop: Cannot determine step of loop.\n");
1339 
1340       return NULL;
1341     }
1342   if (TREE_CODE (step) != INTEGER_CST)
1343     {
1344 
1345       if (dump_file && (dump_flags & TDF_DETAILS))
1346 	fprintf (dump_file,
1347 		 "Unable to convert loop: Step of loop is not integer.\n");
1348       return NULL;
1349     }
1350 
1351   stepint = TREE_INT_CST_LOW (step);
1352 
1353   /* Only want phis for induction vars, which will have two
1354      arguments.  */
1355   if (PHI_NUM_ARGS (phi) != 2)
1356     {
1357       if (dump_file && (dump_flags & TDF_DETAILS))
1358 	fprintf (dump_file,
1359 		 "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1360       return NULL;
1361     }
1362 
1363   /* Another induction variable check. One argument's source should be
1364      in the loop, one outside the loop.  */
1365   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
1366       && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
1367     {
1368 
1369       if (dump_file && (dump_flags & TDF_DETAILS))
1370 	fprintf (dump_file,
1371 		 "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1372 
1373       return NULL;
1374     }
1375 
1376   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
1377     {
1378       lboundvar = PHI_ARG_DEF (phi, 1);
1379       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1380 					      outerinductionvars, *invariants,
1381 					      0);
1382     }
1383   else
1384     {
1385       lboundvar = PHI_ARG_DEF (phi, 0);
1386       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1387 					      outerinductionvars, *invariants,
1388 					      0);
1389     }
1390 
1391   if (!lbound)
1392     {
1393 
1394       if (dump_file && (dump_flags & TDF_DETAILS))
1395 	fprintf (dump_file,
1396 		 "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1397 
1398       return NULL;
1399     }
1400   /* One part of the test may be a loop invariant tree.  */
1401   VEC_reserve (tree, heap, *invariants, 1);
1402   if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
1403       && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 1)))
1404     VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 1));
1405   else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
1406 	   && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 0)))
1407     VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 0));
1408 
1409   /* The non-induction variable part of the test is the upper bound variable.
1410    */
1411   if (TREE_OPERAND (test, 0) == inductionvar)
1412     uboundvar = TREE_OPERAND (test, 1);
1413   else
1414     uboundvar = TREE_OPERAND (test, 0);
1415 
1416 
1417   /* We only size the vectors assuming we have, at max, 2 times as many
1418      invariants as we do loops (one for each bound).
1419      This is just an arbitrary number, but it has to be matched against the
1420      code below.  */
1421   gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1422 
1423 
1424   /* We might have some leftover.  */
1425   if (TREE_CODE (test) == LT_EXPR)
1426     extra = -1 * stepint;
1427   else if (TREE_CODE (test) == NE_EXPR)
1428     extra = -1 * stepint;
1429   else if (TREE_CODE (test) == GT_EXPR)
1430     extra = -1 * stepint;
1431   else if (TREE_CODE (test) == EQ_EXPR)
1432     extra = 1 * stepint;
1433 
1434   ubound = gcc_tree_to_linear_expression (depth, uboundvar,
1435 					  outerinductionvars,
1436 					  *invariants, extra);
1437   uboundresult = build (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1438 			build_int_cst (TREE_TYPE (uboundvar), extra));
1439   VEC_safe_push (tree, heap, *uboundvars, uboundresult);
1440   VEC_safe_push (tree, heap, *lboundvars, lboundvar);
1441   VEC_safe_push (int, heap, *steps, stepint);
1442   if (!ubound)
1443     {
1444       if (dump_file && (dump_flags & TDF_DETAILS))
1445 	fprintf (dump_file,
1446 		 "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1447       return NULL;
1448     }
1449 
1450   lloop = lambda_loop_new ();
1451   LL_STEP (lloop) = stepint;
1452   LL_LOWER_BOUND (lloop) = lbound;
1453   LL_UPPER_BOUND (lloop) = ubound;
1454   return lloop;
1455 }
1456 
1457 /* Given a LOOP, find the induction variable it is testing against in the exit
1458    condition.  Return the induction variable if found, NULL otherwise.  */
1459 
1460 static tree
find_induction_var_from_exit_cond(struct loop * loop)1461 find_induction_var_from_exit_cond (struct loop *loop)
1462 {
1463   tree expr = get_loop_exit_condition (loop);
1464   tree ivarop;
1465   tree test;
1466   if (expr == NULL_TREE)
1467     return NULL_TREE;
1468   if (TREE_CODE (expr) != COND_EXPR)
1469     return NULL_TREE;
1470   test = TREE_OPERAND (expr, 0);
1471   if (!COMPARISON_CLASS_P (test))
1472     return NULL_TREE;
1473 
1474   /* Find the side that is invariant in this loop. The ivar must be the other
1475      side.  */
1476 
1477   if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 0)))
1478       ivarop = TREE_OPERAND (test, 1);
1479   else if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 1)))
1480       ivarop = TREE_OPERAND (test, 0);
1481   else
1482     return NULL_TREE;
1483 
1484   if (TREE_CODE (ivarop) != SSA_NAME)
1485     return NULL_TREE;
1486   return ivarop;
1487 }
1488 
1489 DEF_VEC_P(lambda_loop);
1490 DEF_VEC_ALLOC_P(lambda_loop,heap);
1491 
1492 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1493    Return the new loop nest.
1494    INDUCTIONVARS is a pointer to an array of induction variables for the
1495    loopnest that will be filled in during this process.
1496    INVARIANTS is a pointer to an array of invariants that will be filled in
1497    during this process.  */
1498 
1499 lambda_loopnest
gcc_loopnest_to_lambda_loopnest(struct loops * currloops,struct loop * loop_nest,VEC (tree,heap)** inductionvars,VEC (tree,heap)** invariants,bool need_perfect_nest)1500 gcc_loopnest_to_lambda_loopnest (struct loops *currloops,
1501 				 struct loop * loop_nest,
1502 				 VEC(tree,heap) **inductionvars,
1503 				 VEC(tree,heap) **invariants,
1504 				 bool need_perfect_nest)
1505 {
1506   lambda_loopnest ret = NULL;
1507   struct loop *temp;
1508   int depth = 0;
1509   size_t i;
1510   VEC(lambda_loop,heap) *loops = NULL;
1511   VEC(tree,heap) *uboundvars = NULL;
1512   VEC(tree,heap) *lboundvars  = NULL;
1513   VEC(int,heap) *steps = NULL;
1514   lambda_loop newloop;
1515   tree inductionvar = NULL;
1516 
1517   depth = depth_of_nest (loop_nest);
1518   temp = loop_nest;
1519   while (temp)
1520     {
1521       newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1522 					 &inductionvar, *inductionvars,
1523 					 &lboundvars, &uboundvars,
1524 					 &steps);
1525       if (!newloop)
1526 	return NULL;
1527       VEC_safe_push (tree, heap, *inductionvars, inductionvar);
1528       VEC_safe_push (lambda_loop, heap, loops, newloop);
1529       temp = temp->inner;
1530     }
1531   if (need_perfect_nest)
1532     {
1533       if (!perfect_nestify (currloops, loop_nest,
1534 			    lboundvars, uboundvars, steps, *inductionvars))
1535 	{
1536 	  if (dump_file)
1537 	    fprintf (dump_file,
1538 		     "Not a perfect loop nest and couldn't convert to one.\n");
1539 	  goto fail;
1540 	}
1541       else if (dump_file)
1542 	fprintf (dump_file,
1543 		 "Successfully converted loop nest to perfect loop nest.\n");
1544     }
1545   ret = lambda_loopnest_new (depth, 2 * depth);
1546   for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1547     LN_LOOPS (ret)[i] = newloop;
1548  fail:
1549   VEC_free (lambda_loop, heap, loops);
1550   VEC_free (tree, heap, uboundvars);
1551   VEC_free (tree, heap, lboundvars);
1552   VEC_free (int, heap, steps);
1553 
1554   return ret;
1555 }
1556 
1557 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1558    STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1559    inserted for us are stored.  INDUCTION_VARS is the array of induction
1560    variables for the loop this LBV is from.  TYPE is the tree type to use for
1561    the variables and trees involved.  */
1562 
1563 static tree
lbv_to_gcc_expression(lambda_body_vector lbv,tree type,VEC (tree,heap)* induction_vars,tree * stmts_to_insert)1564 lbv_to_gcc_expression (lambda_body_vector lbv,
1565 		       tree type, VEC(tree,heap) *induction_vars,
1566 		       tree *stmts_to_insert)
1567 {
1568   tree stmts, stmt, resvar, name;
1569   tree iv;
1570   size_t i;
1571   tree_stmt_iterator tsi;
1572 
1573   /* Create a statement list and a linear expression temporary.  */
1574   stmts = alloc_stmt_list ();
1575   resvar = create_tmp_var (type, "lbvtmp");
1576   add_referenced_tmp_var (resvar);
1577 
1578   /* Start at 0.  */
1579   stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1580   name = make_ssa_name (resvar, stmt);
1581   TREE_OPERAND (stmt, 0) = name;
1582   tsi = tsi_last (stmts);
1583   tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1584 
1585   for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1586     {
1587       if (LBV_COEFFICIENTS (lbv)[i] != 0)
1588 	{
1589 	  tree newname;
1590 	  tree coeffmult;
1591 
1592 	  /* newname = coefficient * induction_variable */
1593 	  coeffmult = build_int_cst (type, LBV_COEFFICIENTS (lbv)[i]);
1594 	  stmt = build (MODIFY_EXPR, void_type_node, resvar,
1595 			fold_build2 (MULT_EXPR, type, iv, coeffmult));
1596 
1597 	  newname = make_ssa_name (resvar, stmt);
1598 	  TREE_OPERAND (stmt, 0) = newname;
1599 	  fold_stmt (&stmt);
1600 	  tsi = tsi_last (stmts);
1601 	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1602 
1603 	  /* name = name + newname */
1604 	  stmt = build (MODIFY_EXPR, void_type_node, resvar,
1605 			build (PLUS_EXPR, type, name, newname));
1606 	  name = make_ssa_name (resvar, stmt);
1607 	  TREE_OPERAND (stmt, 0) = name;
1608 	  fold_stmt (&stmt);
1609 	  tsi = tsi_last (stmts);
1610 	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1611 
1612 	}
1613     }
1614 
1615   /* Handle any denominator that occurs.  */
1616   if (LBV_DENOMINATOR (lbv) != 1)
1617     {
1618       tree denominator = build_int_cst (type, LBV_DENOMINATOR (lbv));
1619       stmt = build (MODIFY_EXPR, void_type_node, resvar,
1620 		    build (CEIL_DIV_EXPR, type, name, denominator));
1621       name = make_ssa_name (resvar, stmt);
1622       TREE_OPERAND (stmt, 0) = name;
1623       fold_stmt (&stmt);
1624       tsi = tsi_last (stmts);
1625       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1626     }
1627   *stmts_to_insert = stmts;
1628   return name;
1629 }
1630 
1631 /* Convert a linear expression from coefficient and constant form to a
1632    gcc tree.
1633    Return the tree that represents the final value of the expression.
1634    LLE is the linear expression to convert.
1635    OFFSET is the linear offset to apply to the expression.
1636    TYPE is the tree type to use for the variables and math.
1637    INDUCTION_VARS is a vector of induction variables for the loops.
1638    INVARIANTS is a vector of the loop nest invariants.
1639    WRAP specifies what tree code to wrap the results in, if there is more than
1640    one (it is either MAX_EXPR, or MIN_EXPR).
1641    STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1642    statements that need to be inserted for the linear expression.  */
1643 
1644 static tree
lle_to_gcc_expression(lambda_linear_expression lle,lambda_linear_expression offset,tree type,VEC (tree,heap)* induction_vars,VEC (tree,heap)* invariants,enum tree_code wrap,tree * stmts_to_insert)1645 lle_to_gcc_expression (lambda_linear_expression lle,
1646 		       lambda_linear_expression offset,
1647 		       tree type,
1648 		       VEC(tree,heap) *induction_vars,
1649 		       VEC(tree,heap) *invariants,
1650 		       enum tree_code wrap, tree *stmts_to_insert)
1651 {
1652   tree stmts, stmt, resvar, name;
1653   size_t i;
1654   tree_stmt_iterator tsi;
1655   tree iv, invar;
1656   VEC(tree,heap) *results = NULL;
1657 
1658   gcc_assert (wrap == MAX_EXPR || wrap == MIN_EXPR);
1659   name = NULL_TREE;
1660   /* Create a statement list and a linear expression temporary.  */
1661   stmts = alloc_stmt_list ();
1662   resvar = create_tmp_var (type, "lletmp");
1663   add_referenced_tmp_var (resvar);
1664 
1665   /* Build up the linear expressions, and put the variable representing the
1666      result in the results array.  */
1667   for (; lle != NULL; lle = LLE_NEXT (lle))
1668     {
1669       /* Start at name = 0.  */
1670       stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1671       name = make_ssa_name (resvar, stmt);
1672       TREE_OPERAND (stmt, 0) = name;
1673       fold_stmt (&stmt);
1674       tsi = tsi_last (stmts);
1675       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1676 
1677       /* First do the induction variables.
1678          at the end, name = name + all the induction variables added
1679          together.  */
1680       for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1681 	{
1682 	  if (LLE_COEFFICIENTS (lle)[i] != 0)
1683 	    {
1684 	      tree newname;
1685 	      tree mult;
1686 	      tree coeff;
1687 
1688 	      /* mult = induction variable * coefficient.  */
1689 	      if (LLE_COEFFICIENTS (lle)[i] == 1)
1690 		{
1691 		  mult = VEC_index (tree, induction_vars, i);
1692 		}
1693 	      else
1694 		{
1695 		  coeff = build_int_cst (type,
1696 					 LLE_COEFFICIENTS (lle)[i]);
1697 		  mult = fold_build2 (MULT_EXPR, type, iv, coeff);
1698 		}
1699 
1700 	      /* newname = mult */
1701 	      stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1702 	      newname = make_ssa_name (resvar, stmt);
1703 	      TREE_OPERAND (stmt, 0) = newname;
1704 	      fold_stmt (&stmt);
1705 	      tsi = tsi_last (stmts);
1706 	      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1707 
1708 	      /* name = name + newname */
1709 	      stmt = build (MODIFY_EXPR, void_type_node, resvar,
1710 			    build (PLUS_EXPR, type, name, newname));
1711 	      name = make_ssa_name (resvar, stmt);
1712 	      TREE_OPERAND (stmt, 0) = name;
1713 	      fold_stmt (&stmt);
1714 	      tsi = tsi_last (stmts);
1715 	      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1716 	    }
1717 	}
1718 
1719       /* Handle our invariants.
1720          At the end, we have name = name + result of adding all multiplied
1721          invariants.  */
1722       for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1723 	{
1724 	  if (LLE_INVARIANT_COEFFICIENTS (lle)[i] != 0)
1725 	    {
1726 	      tree newname;
1727 	      tree mult;
1728 	      tree coeff;
1729 	      int invcoeff = LLE_INVARIANT_COEFFICIENTS (lle)[i];
1730 	      /* mult = invariant * coefficient  */
1731 	      if (invcoeff == 1)
1732 		{
1733 		  mult = invar;
1734 		}
1735 	      else
1736 		{
1737 		  coeff = build_int_cst (type, invcoeff);
1738 		  mult = fold_build2 (MULT_EXPR, type, invar, coeff);
1739 		}
1740 
1741 	      /* newname = mult */
1742 	      stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1743 	      newname = make_ssa_name (resvar, stmt);
1744 	      TREE_OPERAND (stmt, 0) = newname;
1745 	      fold_stmt (&stmt);
1746 	      tsi = tsi_last (stmts);
1747 	      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1748 
1749 	      /* name = name + newname */
1750 	      stmt = build (MODIFY_EXPR, void_type_node, resvar,
1751 			    build (PLUS_EXPR, type, name, newname));
1752 	      name = make_ssa_name (resvar, stmt);
1753 	      TREE_OPERAND (stmt, 0) = name;
1754 	      fold_stmt (&stmt);
1755 	      tsi = tsi_last (stmts);
1756 	      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1757 	    }
1758 	}
1759 
1760       /* Now handle the constant.
1761          name = name + constant.  */
1762       if (LLE_CONSTANT (lle) != 0)
1763 	{
1764 	  stmt = build (MODIFY_EXPR, void_type_node, resvar,
1765 			build (PLUS_EXPR, type, name,
1766 			       build_int_cst (type, LLE_CONSTANT (lle))));
1767 	  name = make_ssa_name (resvar, stmt);
1768 	  TREE_OPERAND (stmt, 0) = name;
1769 	  fold_stmt (&stmt);
1770 	  tsi = tsi_last (stmts);
1771 	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1772 	}
1773 
1774       /* Now handle the offset.
1775          name = name + linear offset.  */
1776       if (LLE_CONSTANT (offset) != 0)
1777 	{
1778 	  stmt = build (MODIFY_EXPR, void_type_node, resvar,
1779 			build (PLUS_EXPR, type, name,
1780 			       build_int_cst (type, LLE_CONSTANT (offset))));
1781 	  name = make_ssa_name (resvar, stmt);
1782 	  TREE_OPERAND (stmt, 0) = name;
1783 	  fold_stmt (&stmt);
1784 	  tsi = tsi_last (stmts);
1785 	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1786 	}
1787 
1788       /* Handle any denominator that occurs.  */
1789       if (LLE_DENOMINATOR (lle) != 1)
1790 	{
1791 	  stmt = build_int_cst (type, LLE_DENOMINATOR (lle));
1792 	  stmt = build (wrap == MAX_EXPR ? CEIL_DIV_EXPR : FLOOR_DIV_EXPR,
1793 			type, name, stmt);
1794 	  stmt = build (MODIFY_EXPR, void_type_node, resvar, stmt);
1795 
1796 	  /* name = {ceil, floor}(name/denominator) */
1797 	  name = make_ssa_name (resvar, stmt);
1798 	  TREE_OPERAND (stmt, 0) = name;
1799 	  tsi = tsi_last (stmts);
1800 	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1801 	}
1802       VEC_safe_push (tree, heap, results, name);
1803     }
1804 
1805   /* Again, out of laziness, we don't handle this case yet.  It's not
1806      hard, it just hasn't occurred.  */
1807   gcc_assert (VEC_length (tree, results) <= 2);
1808 
1809   /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR.  */
1810   if (VEC_length (tree, results) > 1)
1811     {
1812       tree op1 = VEC_index (tree, results, 0);
1813       tree op2 = VEC_index (tree, results, 1);
1814       stmt = build (MODIFY_EXPR, void_type_node, resvar,
1815 		    build (wrap, type, op1, op2));
1816       name = make_ssa_name (resvar, stmt);
1817       TREE_OPERAND (stmt, 0) = name;
1818       tsi = tsi_last (stmts);
1819       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1820     }
1821 
1822   VEC_free (tree, heap, results);
1823 
1824   *stmts_to_insert = stmts;
1825   return name;
1826 }
1827 
1828 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1829    it, back into gcc code.  This changes the
1830    loops, their induction variables, and their bodies, so that they
1831    match the transformed loopnest.
1832    OLD_LOOPNEST is the loopnest before we've replaced it with the new
1833    loopnest.
1834    OLD_IVS is a vector of induction variables from the old loopnest.
1835    INVARIANTS is a vector of loop invariants from the old loopnest.
1836    NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1837    TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1838    NEW_LOOPNEST.  */
1839 
1840 void
lambda_loopnest_to_gcc_loopnest(struct loop * old_loopnest,VEC (tree,heap)* old_ivs,VEC (tree,heap)* invariants,lambda_loopnest new_loopnest,lambda_trans_matrix transform)1841 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1842 				 VEC(tree,heap) *old_ivs,
1843 				 VEC(tree,heap) *invariants,
1844 				 lambda_loopnest new_loopnest,
1845 				 lambda_trans_matrix transform)
1846 {
1847   struct loop *temp;
1848   size_t i = 0;
1849   size_t depth = 0;
1850   VEC(tree,heap) *new_ivs = NULL;
1851   tree oldiv;
1852 
1853   block_stmt_iterator bsi;
1854 
1855   if (dump_file)
1856     {
1857       transform = lambda_trans_matrix_inverse (transform);
1858       fprintf (dump_file, "Inverse of transformation matrix:\n");
1859       print_lambda_trans_matrix (dump_file, transform);
1860     }
1861   depth = depth_of_nest (old_loopnest);
1862   temp = old_loopnest;
1863 
1864   while (temp)
1865     {
1866       lambda_loop newloop;
1867       basic_block bb;
1868       edge exit;
1869       tree ivvar, ivvarinced, exitcond, stmts;
1870       enum tree_code testtype;
1871       tree newupperbound, newlowerbound;
1872       lambda_linear_expression offset;
1873       tree type;
1874       bool insert_after;
1875       tree inc_stmt;
1876 
1877       oldiv = VEC_index (tree, old_ivs, i);
1878       type = TREE_TYPE (oldiv);
1879 
1880       /* First, build the new induction variable temporary  */
1881 
1882       ivvar = create_tmp_var (type, "lnivtmp");
1883       add_referenced_tmp_var (ivvar);
1884 
1885       VEC_safe_push (tree, heap, new_ivs, ivvar);
1886 
1887       newloop = LN_LOOPS (new_loopnest)[i];
1888 
1889       /* Linear offset is a bit tricky to handle.  Punt on the unhandled
1890          cases for now.  */
1891       offset = LL_LINEAR_OFFSET (newloop);
1892 
1893       gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1894 		  lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1895 
1896       /* Now build the  new lower bounds, and insert the statements
1897          necessary to generate it on the loop preheader.  */
1898       newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1899 					     LL_LINEAR_OFFSET (newloop),
1900 					     type,
1901 					     new_ivs,
1902 					     invariants, MAX_EXPR, &stmts);
1903       bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
1904       bsi_commit_edge_inserts ();
1905       /* Build the new upper bound and insert its statements in the
1906          basic block of the exit condition */
1907       newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1908 					     LL_LINEAR_OFFSET (newloop),
1909 					     type,
1910 					     new_ivs,
1911 					     invariants, MIN_EXPR, &stmts);
1912       exit = temp->single_exit;
1913       exitcond = get_loop_exit_condition (temp);
1914       bb = bb_for_stmt (exitcond);
1915       bsi = bsi_start (bb);
1916       bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
1917 
1918       /* Create the new iv.  */
1919 
1920       standard_iv_increment_position (temp, &bsi, &insert_after);
1921       create_iv (newlowerbound,
1922 		 build_int_cst (type, LL_STEP (newloop)),
1923 		 ivvar, temp, &bsi, insert_after, &ivvar,
1924 		 NULL);
1925 
1926       /* Unfortunately, the incremented ivvar that create_iv inserted may not
1927 	 dominate the block containing the exit condition.
1928 	 So we simply create our own incremented iv to use in the new exit
1929 	 test,  and let redundancy elimination sort it out.  */
1930       inc_stmt = build (PLUS_EXPR, type,
1931 			ivvar, build_int_cst (type, LL_STEP (newloop)));
1932       inc_stmt = build (MODIFY_EXPR, void_type_node, SSA_NAME_VAR (ivvar),
1933 			inc_stmt);
1934       ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt);
1935       TREE_OPERAND (inc_stmt, 0) = ivvarinced;
1936       bsi = bsi_for_stmt (exitcond);
1937       bsi_insert_before (&bsi, inc_stmt, BSI_SAME_STMT);
1938 
1939       /* Replace the exit condition with the new upper bound
1940          comparison.  */
1941 
1942       testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1943 
1944       /* We want to build a conditional where true means exit the loop, and
1945 	 false means continue the loop.
1946 	 So swap the testtype if this isn't the way things are.*/
1947 
1948       if (exit->flags & EDGE_FALSE_VALUE)
1949 	testtype = swap_tree_comparison (testtype);
1950 
1951       COND_EXPR_COND (exitcond) = build (testtype,
1952 					 boolean_type_node,
1953 					 newupperbound, ivvarinced);
1954       update_stmt (exitcond);
1955       VEC_replace (tree, new_ivs, i, ivvar);
1956 
1957       i++;
1958       temp = temp->inner;
1959     }
1960 
1961   /* Rewrite uses of the old ivs so that they are now specified in terms of
1962      the new ivs.  */
1963 
1964   for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++)
1965     {
1966       imm_use_iterator imm_iter;
1967       use_operand_p imm_use;
1968       tree oldiv_def;
1969       tree oldiv_stmt = SSA_NAME_DEF_STMT (oldiv);
1970 
1971       if (TREE_CODE (oldiv_stmt) == PHI_NODE)
1972         oldiv_def = PHI_RESULT (oldiv_stmt);
1973       else
1974 	oldiv_def = SINGLE_SSA_TREE_OPERAND (oldiv_stmt, SSA_OP_DEF);
1975       gcc_assert (oldiv_def != NULL_TREE);
1976 
1977       FOR_EACH_IMM_USE_SAFE (imm_use, imm_iter, oldiv_def)
1978 	{
1979 	  tree stmt = USE_STMT (imm_use);
1980 	  use_operand_p use_p;
1981 	  ssa_op_iter iter;
1982 	  gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1983 	  FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
1984 	    {
1985 	      if (USE_FROM_PTR (use_p) == oldiv)
1986 		{
1987 		  tree newiv, stmts;
1988 		  lambda_body_vector lbv, newlbv;
1989 		  /* Compute the new expression for the induction
1990 		     variable.  */
1991 		  depth = VEC_length (tree, new_ivs);
1992 		  lbv = lambda_body_vector_new (depth);
1993 		  LBV_COEFFICIENTS (lbv)[i] = 1;
1994 
1995 		  newlbv = lambda_body_vector_compute_new (transform, lbv);
1996 
1997 		  newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
1998 						 new_ivs, &stmts);
1999 		  bsi = bsi_for_stmt (stmt);
2000 		  /* Insert the statements to build that
2001 		     expression.  */
2002 		  bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
2003 		  propagate_value (use_p, newiv);
2004 		  update_stmt (stmt);
2005 
2006 		}
2007 	    }
2008 	}
2009     }
2010   VEC_free (tree, heap, new_ivs);
2011 }
2012 
2013 /* Return TRUE if this is not interesting statement from the perspective of
2014    determining if we have a perfect loop nest.  */
2015 
2016 static bool
not_interesting_stmt(tree stmt)2017 not_interesting_stmt (tree stmt)
2018 {
2019   /* Note that COND_EXPR's aren't interesting because if they were exiting the
2020      loop, we would have already failed the number of exits tests.  */
2021   if (TREE_CODE (stmt) == LABEL_EXPR
2022       || TREE_CODE (stmt) == GOTO_EXPR
2023       || TREE_CODE (stmt) == COND_EXPR)
2024     return true;
2025   return false;
2026 }
2027 
2028 /* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP.  */
2029 
2030 static bool
phi_loop_edge_uses_def(struct loop * loop,tree phi,tree def)2031 phi_loop_edge_uses_def (struct loop *loop, tree phi, tree def)
2032 {
2033   int i;
2034   for (i = 0; i < PHI_NUM_ARGS (phi); i++)
2035     if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src))
2036       if (PHI_ARG_DEF (phi, i) == def)
2037 	return true;
2038   return false;
2039 }
2040 
2041 /* Return TRUE if STMT is a use of PHI_RESULT.  */
2042 
2043 static bool
stmt_uses_phi_result(tree stmt,tree phi_result)2044 stmt_uses_phi_result (tree stmt, tree phi_result)
2045 {
2046   tree use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
2047 
2048   /* This is conservatively true, because we only want SIMPLE bumpers
2049      of the form x +- constant for our pass.  */
2050   return (use == phi_result);
2051 }
2052 
2053 /* STMT is a bumper stmt for LOOP if the version it defines is used in the
2054    in-loop-edge in a phi node, and the operand it uses is the result of that
2055    phi node.
2056    I.E. i_29 = i_3 + 1
2057         i_3 = PHI (0, i_29);  */
2058 
2059 static bool
stmt_is_bumper_for_loop(struct loop * loop,tree stmt)2060 stmt_is_bumper_for_loop (struct loop *loop, tree stmt)
2061 {
2062   tree use;
2063   tree def;
2064   imm_use_iterator iter;
2065   use_operand_p use_p;
2066 
2067   def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2068   if (!def)
2069     return false;
2070 
2071   FOR_EACH_IMM_USE_FAST (use_p, iter, def)
2072     {
2073       use = USE_STMT (use_p);
2074       if (TREE_CODE (use) == PHI_NODE)
2075 	{
2076 	  if (phi_loop_edge_uses_def (loop, use, def))
2077 	    if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
2078 	      return true;
2079 	}
2080     }
2081   return false;
2082 }
2083 
2084 
2085 /* Return true if LOOP is a perfect loop nest.
2086    Perfect loop nests are those loop nests where all code occurs in the
2087    innermost loop body.
2088    If S is a program statement, then
2089 
2090    i.e.
2091    DO I = 1, 20
2092        S1
2093        DO J = 1, 20
2094        ...
2095        END DO
2096    END DO
2097    is not a perfect loop nest because of S1.
2098 
2099    DO I = 1, 20
2100       DO J = 1, 20
2101         S1
2102 	...
2103       END DO
2104    END DO
2105    is a perfect loop nest.
2106 
2107    Since we don't have high level loops anymore, we basically have to walk our
2108    statements and ignore those that are there because the loop needs them (IE
2109    the induction variable increment, and jump back to the top of the loop).  */
2110 
2111 bool
perfect_nest_p(struct loop * loop)2112 perfect_nest_p (struct loop *loop)
2113 {
2114   basic_block *bbs;
2115   size_t i;
2116   tree exit_cond;
2117 
2118   if (!loop->inner)
2119     return true;
2120   bbs = get_loop_body (loop);
2121   exit_cond = get_loop_exit_condition (loop);
2122   for (i = 0; i < loop->num_nodes; i++)
2123     {
2124       if (bbs[i]->loop_father == loop)
2125 	{
2126 	  block_stmt_iterator bsi;
2127 	  for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2128 	    {
2129 	      tree stmt = bsi_stmt (bsi);
2130 	      if (stmt == exit_cond
2131 		  || not_interesting_stmt (stmt)
2132 		  || stmt_is_bumper_for_loop (loop, stmt))
2133 		continue;
2134 	      free (bbs);
2135 	      return false;
2136 	    }
2137 	}
2138     }
2139   free (bbs);
2140   /* See if the inner loops are perfectly nested as well.  */
2141   if (loop->inner)
2142     return perfect_nest_p (loop->inner);
2143   return true;
2144 }
2145 
2146 /* Replace the USES of X in STMT, or uses with the same step as X  with Y.  */
2147 
2148 static void
replace_uses_equiv_to_x_with_y(struct loop * loop,tree stmt,tree x,int xstep,tree y)2149 replace_uses_equiv_to_x_with_y (struct loop *loop, tree stmt, tree x,
2150 				int xstep, tree y)
2151 {
2152   ssa_op_iter iter;
2153   use_operand_p use_p;
2154 
2155   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
2156     {
2157       tree use = USE_FROM_PTR (use_p);
2158       tree step = NULL_TREE;
2159       tree access_fn = NULL_TREE;
2160 
2161 
2162       access_fn = instantiate_parameters
2163 	(loop, analyze_scalar_evolution (loop, use));
2164       if (access_fn != NULL_TREE && access_fn != chrec_dont_know)
2165 	step = evolution_part_in_loop_num (access_fn, loop->num);
2166       if ((step && step != chrec_dont_know
2167 	   && TREE_CODE (step) == INTEGER_CST
2168 	   && int_cst_value (step) == xstep)
2169 	  || USE_FROM_PTR (use_p) == x)
2170 	SET_USE (use_p, y);
2171     }
2172 }
2173 
2174 /* Return TRUE if STMT uses tree OP in it's uses.  */
2175 
2176 static bool
stmt_uses_op(tree stmt,tree op)2177 stmt_uses_op (tree stmt, tree op)
2178 {
2179   ssa_op_iter iter;
2180   tree use;
2181 
2182   FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2183     {
2184       if (use == op)
2185 	return true;
2186     }
2187   return false;
2188 }
2189 
2190 /* Return true if STMT is an exit PHI for LOOP */
2191 
2192 static bool
exit_phi_for_loop_p(struct loop * loop,tree stmt)2193 exit_phi_for_loop_p (struct loop *loop, tree stmt)
2194 {
2195 
2196   if (TREE_CODE (stmt) != PHI_NODE
2197       || PHI_NUM_ARGS (stmt) != 1
2198       || bb_for_stmt (stmt) != loop->single_exit->dest)
2199     return false;
2200 
2201   return true;
2202 }
2203 
2204 /* Return true if STMT can be put back into INNER, a loop by moving it to the
2205    beginning of that loop.  */
2206 
2207 static bool
can_put_in_inner_loop(struct loop * inner,tree stmt)2208 can_put_in_inner_loop (struct loop *inner, tree stmt)
2209 {
2210   imm_use_iterator imm_iter;
2211   use_operand_p use_p;
2212   basic_block use_bb = NULL;
2213 
2214   gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
2215   if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS)
2216       || !expr_invariant_in_loop_p (inner, TREE_OPERAND (stmt, 1)))
2217     return false;
2218 
2219   /* We require that the basic block of all uses be the same, or the use be an
2220      exit phi.  */
2221   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, TREE_OPERAND (stmt, 0))
2222     {
2223       if (!exit_phi_for_loop_p (inner, USE_STMT (use_p)))
2224 	{
2225 	  basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2226 
2227 	  if (!flow_bb_inside_loop_p (inner, immbb))
2228 	    return false;
2229 	  if (use_bb == NULL)
2230 	    use_bb = immbb;
2231 	  else if (immbb != use_bb)
2232 	    return false;
2233 	}
2234     }
2235   return true;
2236 
2237 }
2238 
2239 
2240 /* Return TRUE if LOOP is an imperfect nest that we can convert to a perfect
2241    one.  LOOPIVS is a vector of induction variables, one per loop.
2242    ATM, we only handle imperfect nests of depth 2, where all of the statements
2243    occur after the inner loop.  */
2244 
2245 static bool
can_convert_to_perfect_nest(struct loop * loop,VEC (tree,heap)* loopivs)2246 can_convert_to_perfect_nest (struct loop *loop,
2247 			     VEC(tree,heap) *loopivs)
2248 {
2249   basic_block *bbs;
2250   tree exit_condition, phi;
2251   size_t i;
2252   block_stmt_iterator bsi;
2253   basic_block exitdest;
2254 
2255   /* Can't handle triply nested+ loops yet.  */
2256   if (!loop->inner || loop->inner->inner)
2257     return false;
2258 
2259   bbs = get_loop_body (loop);
2260   exit_condition = get_loop_exit_condition (loop);
2261   for (i = 0; i < loop->num_nodes; i++)
2262     {
2263       if (bbs[i]->loop_father == loop)
2264 	{
2265 	  for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2266 	    {
2267 	      size_t j;
2268 	      tree stmt = bsi_stmt (bsi);
2269 	      tree iv;
2270 
2271 	      if (stmt == exit_condition
2272 		  || not_interesting_stmt (stmt)
2273 		  || stmt_is_bumper_for_loop (loop, stmt))
2274 		continue;
2275 	      /* If the statement uses inner loop ivs, we == screwed.  */
2276 	      for (j = 1; VEC_iterate (tree, loopivs, j, iv); j++)
2277 		if (stmt_uses_op (stmt, iv))
2278 		  goto fail;
2279 
2280 	      /* If this is a simple operation like a cast that is invariant
2281 		 in the inner loop, only used there, and we can place it
2282 		 there, then it's not going to hurt us.
2283 		 This means that we will propagate casts and other cheap
2284 		 invariant operations *back*
2285 		 into the inner loop if we can interchange the loop, on the
2286 		 theory that we are going to gain a lot more by interchanging
2287 		 the loop than we are by leaving some invariant code there for
2288 		 some other pass to clean up.  */
2289 	      if (TREE_CODE (stmt) == MODIFY_EXPR
2290 		  && is_gimple_cast (TREE_OPERAND (stmt, 1))
2291 		  && can_put_in_inner_loop (loop->inner, stmt))
2292 		continue;
2293 
2294 	      /* Otherwise, if the bb of a statement we care about isn't
2295 		 dominated by the header of the inner loop, then we can't
2296 		 handle this case right now.  This test ensures that the
2297 		 statement comes completely *after* the inner loop.  */
2298 	      if (!dominated_by_p (CDI_DOMINATORS,
2299 				   bb_for_stmt (stmt),
2300 				   loop->inner->header))
2301 		goto fail;
2302 	    }
2303 	}
2304     }
2305 
2306   /* We also need to make sure the loop exit only has simple copy phis in it,
2307      otherwise we don't know how to transform it into a perfect nest right
2308      now.  */
2309   exitdest = loop->single_exit->dest;
2310 
2311   for (phi = phi_nodes (exitdest); phi; phi = PHI_CHAIN (phi))
2312     if (PHI_NUM_ARGS (phi) != 1)
2313       goto fail;
2314 
2315   free (bbs);
2316   return true;
2317 
2318  fail:
2319   free (bbs);
2320   return false;
2321 }
2322 
2323 /* Transform the loop nest into a perfect nest, if possible.
2324    LOOPS is the current struct loops *
2325    LOOP is the loop nest to transform into a perfect nest
2326    LBOUNDS are the lower bounds for the loops to transform
2327    UBOUNDS are the upper bounds for the loops to transform
2328    STEPS is the STEPS for the loops to transform.
2329    LOOPIVS is the induction variables for the loops to transform.
2330 
2331    Basically, for the case of
2332 
2333    FOR (i = 0; i < 50; i++)
2334     {
2335      FOR (j =0; j < 50; j++)
2336      {
2337         <whatever>
2338      }
2339      <some code>
2340     }
2341 
2342    This function will transform it into a perfect loop nest by splitting the
2343    outer loop into two loops, like so:
2344 
2345    FOR (i = 0; i < 50; i++)
2346    {
2347      FOR (j = 0; j < 50; j++)
2348      {
2349          <whatever>
2350      }
2351    }
2352 
2353    FOR (i = 0; i < 50; i ++)
2354    {
2355     <some code>
2356    }
2357 
2358    Return FALSE if we can't make this loop into a perfect nest.  */
2359 
2360 static bool
perfect_nestify(struct loops * loops,struct loop * loop,VEC (tree,heap)* lbounds,VEC (tree,heap)* ubounds,VEC (int,heap)* steps,VEC (tree,heap)* loopivs)2361 perfect_nestify (struct loops *loops,
2362 		 struct loop *loop,
2363 		 VEC(tree,heap) *lbounds,
2364 		 VEC(tree,heap) *ubounds,
2365 		 VEC(int,heap) *steps,
2366 		 VEC(tree,heap) *loopivs)
2367 {
2368   basic_block *bbs;
2369   tree exit_condition;
2370   tree then_label, else_label, cond_stmt;
2371   basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2372   int i;
2373   block_stmt_iterator bsi;
2374   bool insert_after;
2375   edge e;
2376   struct loop *newloop;
2377   tree phi;
2378   tree uboundvar;
2379   tree stmt;
2380   tree oldivvar, ivvar, ivvarinced;
2381   VEC(tree,heap) *phis = NULL;
2382 
2383   if (!can_convert_to_perfect_nest (loop, loopivs))
2384     return false;
2385 
2386   /* Create the new loop */
2387 
2388   olddest = loop->single_exit->dest;
2389   preheaderbb =  loop_split_edge_with (loop->single_exit, NULL);
2390   headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2391 
2392   /* Push the exit phi nodes that we are moving.  */
2393   for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi))
2394     {
2395       VEC_reserve (tree, heap, phis, 2);
2396       VEC_quick_push (tree, phis, PHI_RESULT (phi));
2397       VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0));
2398     }
2399   e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb);
2400 
2401   /* Remove the exit phis from the old basic block.  Make sure to set
2402      PHI_RESULT to null so it doesn't get released.  */
2403   while (phi_nodes (olddest) != NULL)
2404     {
2405       SET_PHI_RESULT (phi_nodes (olddest), NULL);
2406       remove_phi_node (phi_nodes (olddest), NULL);
2407     }
2408 
2409   /* and add them back to the new basic block.  */
2410   while (VEC_length (tree, phis) != 0)
2411     {
2412       tree def;
2413       tree phiname;
2414       def = VEC_pop (tree, phis);
2415       phiname = VEC_pop (tree, phis);
2416       phi = create_phi_node (phiname, preheaderbb);
2417       add_phi_arg (phi, def, single_pred_edge (preheaderbb));
2418     }
2419   flush_pending_stmts (e);
2420   VEC_free (tree, heap, phis);
2421 
2422   bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2423   latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2424   make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2425   then_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (latchbb));
2426   else_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (olddest));
2427   cond_stmt = build (COND_EXPR, void_type_node,
2428 		     build (NE_EXPR, boolean_type_node,
2429 			    integer_one_node,
2430 			    integer_zero_node),
2431 		     then_label, else_label);
2432   bsi = bsi_start (bodybb);
2433   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
2434   e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2435   make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2436   make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2437 
2438   /* Update the loop structures.  */
2439   newloop = duplicate_loop (loops, loop, olddest->loop_father);
2440   newloop->header = headerbb;
2441   newloop->latch = latchbb;
2442   newloop->single_exit = e;
2443   add_bb_to_loop (latchbb, newloop);
2444   add_bb_to_loop (bodybb, newloop);
2445   add_bb_to_loop (headerbb, newloop);
2446   set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2447   set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2448   set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2449 			   loop->single_exit->src);
2450   set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2451   set_immediate_dominator (CDI_DOMINATORS, olddest, bodybb);
2452   /* Create the new iv.  */
2453   oldivvar = VEC_index (tree, loopivs, 0);
2454   ivvar = create_tmp_var (TREE_TYPE (oldivvar), "perfectiv");
2455   add_referenced_tmp_var (ivvar);
2456   standard_iv_increment_position (newloop, &bsi, &insert_after);
2457   create_iv (VEC_index (tree, lbounds, 0),
2458 	     build_int_cst (TREE_TYPE (oldivvar), VEC_index (int, steps, 0)),
2459 	     ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
2460 
2461   /* Create the new upper bound.  This may be not just a variable, so we copy
2462      it to one just in case.  */
2463 
2464   exit_condition = get_loop_exit_condition (newloop);
2465   uboundvar = create_tmp_var (integer_type_node, "uboundvar");
2466   add_referenced_tmp_var (uboundvar);
2467   stmt = build (MODIFY_EXPR, void_type_node, uboundvar,
2468 		VEC_index (tree, ubounds, 0));
2469   uboundvar = make_ssa_name (uboundvar, stmt);
2470   TREE_OPERAND (stmt, 0) = uboundvar;
2471 
2472   if (insert_after)
2473     bsi_insert_after (&bsi, stmt, BSI_SAME_STMT);
2474   else
2475     bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
2476   update_stmt (stmt);
2477   COND_EXPR_COND (exit_condition) = build (GE_EXPR,
2478 					   boolean_type_node,
2479 					   uboundvar,
2480 					   ivvarinced);
2481   update_stmt (exit_condition);
2482   bbs = get_loop_body_in_dom_order (loop);
2483   /* Now move the statements, and replace the induction variable in the moved
2484      statements with the correct loop induction variable.  */
2485   oldivvar = VEC_index (tree, loopivs, 0);
2486   for (i = loop->num_nodes - 1; i >= 0 ; i--)
2487     {
2488       block_stmt_iterator tobsi = bsi_last (bodybb);
2489       if (bbs[i]->loop_father == loop)
2490 	{
2491 	  /* If this is true, we are *before* the inner loop.
2492 	     If this isn't true, we are *after* it.
2493 
2494 	     The only time can_convert_to_perfect_nest returns true when we
2495 	     have statements before the inner loop is if they can be moved
2496 	     into the inner loop.
2497 
2498 	     The only time can_convert_to_perfect_nest returns true when we
2499 	     have statements after the inner loop is if they can be moved into
2500 	     the new split loop.  */
2501 
2502 	  if (dominated_by_p (CDI_DOMINATORS, loop->inner->header, bbs[i]))
2503 	    {
2504 	      for (bsi = bsi_last (bbs[i]); !bsi_end_p (bsi);)
2505 		{
2506 		  use_operand_p use_p;
2507 		  imm_use_iterator imm_iter;
2508 		  tree stmt = bsi_stmt (bsi);
2509 
2510 		  if (stmt == exit_condition
2511 		      || not_interesting_stmt (stmt)
2512 		      || stmt_is_bumper_for_loop (loop, stmt))
2513 		    {
2514 		      if (!bsi_end_p (bsi))
2515 			bsi_prev (&bsi);
2516 		      continue;
2517 		    }
2518 		  /* Move this statement back into the inner loop.
2519 		     This looks a bit confusing, but we are really just
2520 		     finding the first non-exit phi use and moving the
2521 		     statement to the beginning of that use's basic
2522 		     block.  */
2523 		  FOR_EACH_IMM_USE_SAFE (use_p, imm_iter,
2524 					 TREE_OPERAND (stmt, 0))
2525 		    {
2526 		      tree imm_stmt = USE_STMT (use_p);
2527 		      if (!exit_phi_for_loop_p (loop->inner, imm_stmt))
2528 			{
2529 			  block_stmt_iterator tobsi = bsi_after_labels (bb_for_stmt (imm_stmt));
2530 			  bsi_move_before (&bsi, &tobsi);
2531 			  update_stmt (stmt);
2532 			  BREAK_FROM_SAFE_IMM_USE (imm_iter);
2533 			}
2534 		    }
2535 		}
2536 	    }
2537 	  else
2538 	    {
2539 	      /* Note that the bsi only needs to be explicitly incremented
2540 		 when we don't move something, since it is automatically
2541 		 incremented when we do.  */
2542 	      for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2543 		{
2544 		  ssa_op_iter i;
2545 		  tree n, stmt = bsi_stmt (bsi);
2546 
2547 		  if (stmt == exit_condition
2548 		      || not_interesting_stmt (stmt)
2549 		      || stmt_is_bumper_for_loop (loop, stmt))
2550 		    {
2551 		      bsi_next (&bsi);
2552 		      continue;
2553 		    }
2554 
2555 		  replace_uses_equiv_to_x_with_y (loop, stmt,
2556 						  oldivvar,
2557 						  VEC_index (int, steps, 0),
2558 						  ivvar);
2559 		  bsi_move_before (&bsi, &tobsi);
2560 
2561 		  /* If the statement has any virtual operands, they may
2562 		     need to be rewired because the original loop may
2563 		     still reference them.  */
2564 		  FOR_EACH_SSA_TREE_OPERAND (n, stmt, i, SSA_OP_ALL_VIRTUALS)
2565 		    mark_sym_for_renaming (SSA_NAME_VAR (n));
2566 		}
2567 	    }
2568 
2569 	}
2570     }
2571 
2572   free (bbs);
2573   return perfect_nest_p (loop);
2574 }
2575 
2576 /* Return true if TRANS is a legal transformation matrix that respects
2577    the dependence vectors in DISTS and DIRS.  The conservative answer
2578    is false.
2579 
2580    "Wolfe proves that a unimodular transformation represented by the
2581    matrix T is legal when applied to a loop nest with a set of
2582    lexicographically non-negative distance vectors RDG if and only if
2583    for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2584    i.e.: if and only if it transforms the lexicographically positive
2585    distance vectors to lexicographically positive vectors.  Note that
2586    a unimodular matrix must transform the zero vector (and only it) to
2587    the zero vector." S.Muchnick.  */
2588 
2589 bool
lambda_transform_legal_p(lambda_trans_matrix trans,int nb_loops,varray_type dependence_relations)2590 lambda_transform_legal_p (lambda_trans_matrix trans,
2591 			  int nb_loops,
2592 			  varray_type dependence_relations)
2593 {
2594   unsigned int i, j;
2595   lambda_vector distres;
2596   struct data_dependence_relation *ddr;
2597 
2598   gcc_assert (LTM_COLSIZE (trans) == nb_loops
2599 	      && LTM_ROWSIZE (trans) == nb_loops);
2600 
2601   /* When there is an unknown relation in the dependence_relations, we
2602      know that it is no worth looking at this loop nest: give up.  */
2603   ddr = (struct data_dependence_relation *)
2604     VARRAY_GENERIC_PTR (dependence_relations, 0);
2605   if (ddr == NULL)
2606     return true;
2607   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2608     return false;
2609 
2610   distres = lambda_vector_new (nb_loops);
2611 
2612   /* For each distance vector in the dependence graph.  */
2613   for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
2614     {
2615       ddr = (struct data_dependence_relation *)
2616 	VARRAY_GENERIC_PTR (dependence_relations, i);
2617 
2618       /* Don't care about relations for which we know that there is no
2619 	 dependence, nor about read-read (aka. output-dependences):
2620 	 these data accesses can happen in any order.  */
2621       if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2622 	  || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2623 	continue;
2624 
2625       /* Conservatively answer: "this transformation is not valid".  */
2626       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2627 	return false;
2628 
2629       /* If the dependence could not be captured by a distance vector,
2630 	 conservatively answer that the transform is not valid.  */
2631       if (DDR_NUM_DIST_VECTS (ddr) == 0)
2632 	return false;
2633 
2634       /* Compute trans.dist_vect */
2635       for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
2636 	{
2637 	  lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2638 				     DDR_DIST_VECT (ddr, j), distres);
2639 
2640 	  if (!lambda_vector_lexico_pos (distres, nb_loops))
2641 	    return false;
2642 	}
2643     }
2644   return true;
2645 }
2646