1 /* Scalar evolution detector.
2    Copyright (C) 2003-2021 Free Software Foundation, Inc.
3    Contributed by Sebastian Pop <s.pop@laposte.net>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /*
22    Description:
23 
24    This pass analyzes the evolution of scalar variables in loop
25    structures.  The algorithm is based on the SSA representation,
26    and on the loop hierarchy tree.  This algorithm is not based on
27    the notion of versions of a variable, as it was the case for the
28    previous implementations of the scalar evolution algorithm, but
29    it assumes that each defined name is unique.
30 
31    The notation used in this file is called "chains of recurrences",
32    and has been proposed by Eugene Zima, Robert Van Engelen, and
33    others for describing induction variables in programs.  For example
34    "b -> {0, +, 2}_1" means that the scalar variable "b" is equal to 0
35    when entering in the loop_1 and has a step 2 in this loop, in other
36    words "for (b = 0; b < N; b+=2);".  Note that the coefficients of
37    this chain of recurrence (or chrec [shrek]) can contain the name of
38    other variables, in which case they are called parametric chrecs.
39    For example, "b -> {a, +, 2}_1" means that the initial value of "b"
40    is the value of "a".  In most of the cases these parametric chrecs
41    are fully instantiated before their use because symbolic names can
42    hide some difficult cases such as self-references described later
43    (see the Fibonacci example).
44 
45    A short sketch of the algorithm is:
46 
47    Given a scalar variable to be analyzed, follow the SSA edge to
48    its definition:
49 
50    - When the definition is a GIMPLE_ASSIGN: if the right hand side
51    (RHS) of the definition cannot be statically analyzed, the answer
52    of the analyzer is: "don't know".
53    Otherwise, for all the variables that are not yet analyzed in the
54    RHS, try to determine their evolution, and finally try to
55    evaluate the operation of the RHS that gives the evolution
56    function of the analyzed variable.
57 
58    - When the definition is a condition-phi-node: determine the
59    evolution function for all the branches of the phi node, and
60    finally merge these evolutions (see chrec_merge).
61 
62    - When the definition is a loop-phi-node: determine its initial
63    condition, that is the SSA edge defined in an outer loop, and
64    keep it symbolic.  Then determine the SSA edges that are defined
65    in the body of the loop.  Follow the inner edges until ending on
66    another loop-phi-node of the same analyzed loop.  If the reached
67    loop-phi-node is not the starting loop-phi-node, then we keep
68    this definition under a symbolic form.  If the reached
69    loop-phi-node is the same as the starting one, then we compute a
70    symbolic stride on the return path.  The result is then the
71    symbolic chrec {initial_condition, +, symbolic_stride}_loop.
72 
73    Examples:
74 
75    Example 1: Illustration of the basic algorithm.
76 
77    | a = 3
78    | loop_1
79    |   b = phi (a, c)
80    |   c = b + 1
81    |   if (c > 10) exit_loop
82    | endloop
83 
84    Suppose that we want to know the number of iterations of the
85    loop_1.  The exit_loop is controlled by a COND_EXPR (c > 10).  We
86    ask the scalar evolution analyzer two questions: what's the
87    scalar evolution (scev) of "c", and what's the scev of "10".  For
88    "10" the answer is "10" since it is a scalar constant.  For the
89    scalar variable "c", it follows the SSA edge to its definition,
90    "c = b + 1", and then asks again what's the scev of "b".
91    Following the SSA edge, we end on a loop-phi-node "b = phi (a,
92    c)", where the initial condition is "a", and the inner loop edge
93    is "c".  The initial condition is kept under a symbolic form (it
94    may be the case that the copy constant propagation has done its
95    work and we end with the constant "3" as one of the edges of the
96    loop-phi-node).  The update edge is followed to the end of the
97    loop, and until reaching again the starting loop-phi-node: b -> c
98    -> b.  At this point we have drawn a path from "b" to "b" from
99    which we compute the stride in the loop: in this example it is
100    "+1".  The resulting scev for "b" is "b -> {a, +, 1}_1".  Now
101    that the scev for "b" is known, it is possible to compute the
102    scev for "c", that is "c -> {a + 1, +, 1}_1".  In order to
103    determine the number of iterations in the loop_1, we have to
104    instantiate_parameters (loop_1, {a + 1, +, 1}_1), that gives after some
105    more analysis the scev {4, +, 1}_1, or in other words, this is
106    the function "f (x) = x + 4", where x is the iteration count of
107    the loop_1.  Now we have to solve the inequality "x + 4 > 10",
108    and take the smallest iteration number for which the loop is
109    exited: x = 7.  This loop runs from x = 0 to x = 7, and in total
110    there are 8 iterations.  In terms of loop normalization, we have
111    created a variable that is implicitly defined, "x" or just "_1",
112    and all the other analyzed scalars of the loop are defined in
113    function of this variable:
114 
115    a -> 3
116    b -> {3, +, 1}_1
117    c -> {4, +, 1}_1
118 
119    or in terms of a C program:
120 
121    | a = 3
122    | for (x = 0; x <= 7; x++)
123    |   {
124    |     b = x + 3
125    |     c = x + 4
126    |   }
127 
128    Example 2a: Illustration of the algorithm on nested loops.
129 
130    | loop_1
131    |   a = phi (1, b)
132    |   c = a + 2
133    |   loop_2  10 times
134    |     b = phi (c, d)
135    |     d = b + 3
136    |   endloop
137    | endloop
138 
139    For analyzing the scalar evolution of "a", the algorithm follows
140    the SSA edge into the loop's body: "a -> b".  "b" is an inner
141    loop-phi-node, and its analysis as in Example 1, gives:
142 
143    b -> {c, +, 3}_2
144    d -> {c + 3, +, 3}_2
145 
146    Following the SSA edge for the initial condition, we end on "c = a
147    + 2", and then on the starting loop-phi-node "a".  From this point,
148    the loop stride is computed: back on "c = a + 2" we get a "+2" in
149    the loop_1, then on the loop-phi-node "b" we compute the overall
150    effect of the inner loop that is "b = c + 30", and we get a "+30"
151    in the loop_1.  That means that the overall stride in loop_1 is
152    equal to "+32", and the result is:
153 
154    a -> {1, +, 32}_1
155    c -> {3, +, 32}_1
156 
157    Example 2b: Multivariate chains of recurrences.
158 
159    | loop_1
160    |   k = phi (0, k + 1)
161    |   loop_2  4 times
162    |     j = phi (0, j + 1)
163    |     loop_3 4 times
164    |       i = phi (0, i + 1)
165    |       A[j + k] = ...
166    |     endloop
167    |   endloop
168    | endloop
169 
170    Analyzing the access function of array A with
171    instantiate_parameters (loop_1, "j + k"), we obtain the
172    instantiation and the analysis of the scalar variables "j" and "k"
173    in loop_1.  This leads to the scalar evolution {4, +, 1}_1: the end
174    value of loop_2 for "j" is 4, and the evolution of "k" in loop_1 is
175    {0, +, 1}_1.  To obtain the evolution function in loop_3 and
176    instantiate the scalar variables up to loop_1, one has to use:
177    instantiate_scev (block_before_loop (loop_1), loop_3, "j + k").
178    The result of this call is {{0, +, 1}_1, +, 1}_2.
179 
180    Example 3: Higher degree polynomials.
181 
182    | loop_1
183    |   a = phi (2, b)
184    |   c = phi (5, d)
185    |   b = a + 1
186    |   d = c + a
187    | endloop
188 
189    a -> {2, +, 1}_1
190    b -> {3, +, 1}_1
191    c -> {5, +, a}_1
192    d -> {5 + a, +, a}_1
193 
194    instantiate_parameters (loop_1, {5, +, a}_1) -> {5, +, 2, +, 1}_1
195    instantiate_parameters (loop_1, {5 + a, +, a}_1) -> {7, +, 3, +, 1}_1
196 
197    Example 4: Lucas, Fibonacci, or mixers in general.
198 
199    | loop_1
200    |   a = phi (1, b)
201    |   c = phi (3, d)
202    |   b = c
203    |   d = c + a
204    | endloop
205 
206    a -> (1, c)_1
207    c -> {3, +, a}_1
208 
209    The syntax "(1, c)_1" stands for a PEELED_CHREC that has the
210    following semantics: during the first iteration of the loop_1, the
211    variable contains the value 1, and then it contains the value "c".
212    Note that this syntax is close to the syntax of the loop-phi-node:
213    "a -> (1, c)_1" vs. "a = phi (1, c)".
214 
215    The symbolic chrec representation contains all the semantics of the
216    original code.  What is more difficult is to use this information.
217 
218    Example 5: Flip-flops, or exchangers.
219 
220    | loop_1
221    |   a = phi (1, b)
222    |   c = phi (3, d)
223    |   b = c
224    |   d = a
225    | endloop
226 
227    a -> (1, c)_1
228    c -> (3, a)_1
229 
230    Based on these symbolic chrecs, it is possible to refine this
231    information into the more precise PERIODIC_CHRECs:
232 
233    a -> |1, 3|_1
234    c -> |3, 1|_1
235 
236    This transformation is not yet implemented.
237 
238    Further readings:
239 
240    You can find a more detailed description of the algorithm in:
241    http://icps.u-strasbg.fr/~pop/DEA_03_Pop.pdf
242    http://icps.u-strasbg.fr/~pop/DEA_03_Pop.ps.gz.  But note that
243    this is a preliminary report and some of the details of the
244    algorithm have changed.  I'm working on a research report that
245    updates the description of the algorithms to reflect the design
246    choices used in this implementation.
247 
248    A set of slides show a high level overview of the algorithm and run
249    an example through the scalar evolution analyzer:
250    http://cri.ensmp.fr/~pop/gcc/mar04/slides.pdf
251 
252    The slides that I have presented at the GCC Summit'04 are available
253    at: http://cri.ensmp.fr/~pop/gcc/20040604/gccsummit-lno-spop.pdf
254 */
255 
256 #include "config.h"
257 #include "system.h"
258 #include "coretypes.h"
259 #include "backend.h"
260 #include "target.h"
261 #include "rtl.h"
262 #include "optabs-query.h"
263 #include "tree.h"
264 #include "gimple.h"
265 #include "ssa.h"
266 #include "gimple-pretty-print.h"
267 #include "fold-const.h"
268 #include "gimplify.h"
269 #include "gimple-iterator.h"
270 #include "gimplify-me.h"
271 #include "tree-cfg.h"
272 #include "tree-ssa-loop-ivopts.h"
273 #include "tree-ssa-loop-manip.h"
274 #include "tree-ssa-loop-niter.h"
275 #include "tree-ssa-loop.h"
276 #include "tree-ssa.h"
277 #include "cfgloop.h"
278 #include "tree-chrec.h"
279 #include "tree-affine.h"
280 #include "tree-scalar-evolution.h"
281 #include "dumpfile.h"
282 #include "tree-ssa-propagate.h"
283 #include "gimple-fold.h"
284 #include "tree-into-ssa.h"
285 #include "builtins.h"
286 #include "case-cfn-macros.h"
287 
288 static tree analyze_scalar_evolution_1 (class loop *, tree);
289 static tree analyze_scalar_evolution_for_address_of (class loop *loop,
290 						     tree var);
291 
292 /* The cached information about an SSA name with version NAME_VERSION,
293    claiming that below basic block with index INSTANTIATED_BELOW, the
294    value of the SSA name can be expressed as CHREC.  */
295 
296 struct GTY((for_user)) scev_info_str {
297   unsigned int name_version;
298   int instantiated_below;
299   tree chrec;
300 };
301 
302 /* Counters for the scev database.  */
303 static unsigned nb_set_scev = 0;
304 static unsigned nb_get_scev = 0;
305 
306 struct scev_info_hasher : ggc_ptr_hash<scev_info_str>
307 {
308   static hashval_t hash (scev_info_str *i);
309   static bool equal (const scev_info_str *a, const scev_info_str *b);
310 };
311 
312 static GTY (()) hash_table<scev_info_hasher> *scalar_evolution_info;
313 
314 
315 /* Constructs a new SCEV_INFO_STR structure for VAR and INSTANTIATED_BELOW.  */
316 
317 static inline struct scev_info_str *
new_scev_info_str(basic_block instantiated_below,tree var)318 new_scev_info_str (basic_block instantiated_below, tree var)
319 {
320   struct scev_info_str *res;
321 
322   res = ggc_alloc<scev_info_str> ();
323   res->name_version = SSA_NAME_VERSION (var);
324   res->chrec = chrec_not_analyzed_yet;
325   res->instantiated_below = instantiated_below->index;
326 
327   return res;
328 }
329 
330 /* Computes a hash function for database element ELT.  */
331 
332 hashval_t
hash(scev_info_str * elt)333 scev_info_hasher::hash (scev_info_str *elt)
334 {
335   return elt->name_version ^ elt->instantiated_below;
336 }
337 
338 /* Compares database elements E1 and E2.  */
339 
340 bool
equal(const scev_info_str * elt1,const scev_info_str * elt2)341 scev_info_hasher::equal (const scev_info_str *elt1, const scev_info_str *elt2)
342 {
343   return (elt1->name_version == elt2->name_version
344 	  && elt1->instantiated_below == elt2->instantiated_below);
345 }
346 
347 /* Get the scalar evolution of VAR for INSTANTIATED_BELOW basic block.
348    A first query on VAR returns chrec_not_analyzed_yet.  */
349 
350 static tree *
find_var_scev_info(basic_block instantiated_below,tree var)351 find_var_scev_info (basic_block instantiated_below, tree var)
352 {
353   struct scev_info_str *res;
354   struct scev_info_str tmp;
355 
356   tmp.name_version = SSA_NAME_VERSION (var);
357   tmp.instantiated_below = instantiated_below->index;
358   scev_info_str **slot = scalar_evolution_info->find_slot (&tmp, INSERT);
359 
360   if (!*slot)
361     *slot = new_scev_info_str (instantiated_below, var);
362   res = *slot;
363 
364   return &res->chrec;
365 }
366 
367 
368 /* Hashtable helpers for a temporary hash-table used when
369    analyzing a scalar evolution, instantiating a CHREC or
370    resolving mixers.  */
371 
372 class instantiate_cache_type
373 {
374 public:
375   htab_t map;
376   vec<scev_info_str> entries;
377 
instantiate_cache_type()378   instantiate_cache_type () : map (NULL), entries (vNULL) {}
379   ~instantiate_cache_type ();
get(unsigned slot)380   tree get (unsigned slot) { return entries[slot].chrec; }
set(unsigned slot,tree chrec)381   void set (unsigned slot, tree chrec) { entries[slot].chrec = chrec; }
382 };
383 
~instantiate_cache_type()384 instantiate_cache_type::~instantiate_cache_type ()
385 {
386   if (map != NULL)
387     {
388       htab_delete (map);
389       entries.release ();
390     }
391 }
392 
393 /* Cache to avoid infinite recursion when instantiating an SSA name.
394    Live during the outermost analyze_scalar_evolution, instantiate_scev
395    or resolve_mixers call.  */
396 static instantiate_cache_type *global_cache;
397 
398 
399 /* Return true when PHI is a loop-phi-node.  */
400 
401 static bool
loop_phi_node_p(gimple * phi)402 loop_phi_node_p (gimple *phi)
403 {
404   /* The implementation of this function is based on the following
405      property: "all the loop-phi-nodes of a loop are contained in the
406      loop's header basic block".  */
407 
408   return loop_containing_stmt (phi)->header == gimple_bb (phi);
409 }
410 
411 /* Compute the scalar evolution for EVOLUTION_FN after crossing LOOP.
412    In general, in the case of multivariate evolutions we want to get
413    the evolution in different loops.  LOOP specifies the level for
414    which to get the evolution.
415 
416    Example:
417 
418    | for (j = 0; j < 100; j++)
419    |   {
420    |     for (k = 0; k < 100; k++)
421    |       {
422    |         i = k + j;   - Here the value of i is a function of j, k.
423    |       }
424    |      ... = i         - Here the value of i is a function of j.
425    |   }
426    | ... = i              - Here the value of i is a scalar.
427 
428    Example:
429 
430    | i_0 = ...
431    | loop_1 10 times
432    |   i_1 = phi (i_0, i_2)
433    |   i_2 = i_1 + 2
434    | endloop
435 
436    This loop has the same effect as:
437    LOOP_1 has the same effect as:
438 
439    | i_1 = i_0 + 20
440 
441    The overall effect of the loop, "i_0 + 20" in the previous example,
442    is obtained by passing in the parameters: LOOP = 1,
443    EVOLUTION_FN = {i_0, +, 2}_1.
444 */
445 
446 tree
compute_overall_effect_of_inner_loop(class loop * loop,tree evolution_fn)447 compute_overall_effect_of_inner_loop (class loop *loop, tree evolution_fn)
448 {
449   bool val = false;
450 
451   if (evolution_fn == chrec_dont_know)
452     return chrec_dont_know;
453 
454   else if (TREE_CODE (evolution_fn) == POLYNOMIAL_CHREC)
455     {
456       class loop *inner_loop = get_chrec_loop (evolution_fn);
457 
458       if (inner_loop == loop
459 	  || flow_loop_nested_p (loop, inner_loop))
460 	{
461 	  tree nb_iter = number_of_latch_executions (inner_loop);
462 
463 	  if (nb_iter == chrec_dont_know)
464 	    return chrec_dont_know;
465 	  else
466 	    {
467 	      tree res;
468 
469 	      /* evolution_fn is the evolution function in LOOP.  Get
470 		 its value in the nb_iter-th iteration.  */
471 	      res = chrec_apply (inner_loop->num, evolution_fn, nb_iter);
472 
473 	      if (chrec_contains_symbols_defined_in_loop (res, loop->num))
474 		res = instantiate_parameters (loop, res);
475 
476 	      /* Continue the computation until ending on a parent of LOOP.  */
477 	      return compute_overall_effect_of_inner_loop (loop, res);
478 	    }
479 	}
480       else
481 	return evolution_fn;
482      }
483 
484   /* If the evolution function is an invariant, there is nothing to do.  */
485   else if (no_evolution_in_loop_p (evolution_fn, loop->num, &val) && val)
486     return evolution_fn;
487 
488   else
489     return chrec_dont_know;
490 }
491 
492 /* Associate CHREC to SCALAR.  */
493 
494 static void
set_scalar_evolution(basic_block instantiated_below,tree scalar,tree chrec)495 set_scalar_evolution (basic_block instantiated_below, tree scalar, tree chrec)
496 {
497   tree *scalar_info;
498 
499   if (TREE_CODE (scalar) != SSA_NAME)
500     return;
501 
502   scalar_info = find_var_scev_info (instantiated_below, scalar);
503 
504   if (dump_file)
505     {
506       if (dump_flags & TDF_SCEV)
507 	{
508 	  fprintf (dump_file, "(set_scalar_evolution \n");
509 	  fprintf (dump_file, "  instantiated_below = %d \n",
510 		   instantiated_below->index);
511 	  fprintf (dump_file, "  (scalar = ");
512 	  print_generic_expr (dump_file, scalar);
513 	  fprintf (dump_file, ")\n  (scalar_evolution = ");
514 	  print_generic_expr (dump_file, chrec);
515 	  fprintf (dump_file, "))\n");
516 	}
517       if (dump_flags & TDF_STATS)
518 	nb_set_scev++;
519     }
520 
521   *scalar_info = chrec;
522 }
523 
524 /* Retrieve the chrec associated to SCALAR instantiated below
525    INSTANTIATED_BELOW block.  */
526 
527 static tree
get_scalar_evolution(basic_block instantiated_below,tree scalar)528 get_scalar_evolution (basic_block instantiated_below, tree scalar)
529 {
530   tree res;
531 
532   if (dump_file)
533     {
534       if (dump_flags & TDF_SCEV)
535 	{
536 	  fprintf (dump_file, "(get_scalar_evolution \n");
537 	  fprintf (dump_file, "  (scalar = ");
538 	  print_generic_expr (dump_file, scalar);
539 	  fprintf (dump_file, ")\n");
540 	}
541       if (dump_flags & TDF_STATS)
542 	nb_get_scev++;
543     }
544 
545   if (VECTOR_TYPE_P (TREE_TYPE (scalar))
546       || TREE_CODE (TREE_TYPE (scalar)) == COMPLEX_TYPE)
547     /* For chrec_dont_know we keep the symbolic form.  */
548     res = scalar;
549   else
550     switch (TREE_CODE (scalar))
551       {
552       case SSA_NAME:
553         if (SSA_NAME_IS_DEFAULT_DEF (scalar))
554 	  res = scalar;
555 	else
556 	  res = *find_var_scev_info (instantiated_below, scalar);
557 	break;
558 
559       case REAL_CST:
560       case FIXED_CST:
561       case INTEGER_CST:
562 	res = scalar;
563 	break;
564 
565       default:
566 	res = chrec_not_analyzed_yet;
567 	break;
568       }
569 
570   if (dump_file && (dump_flags & TDF_SCEV))
571     {
572       fprintf (dump_file, "  (scalar_evolution = ");
573       print_generic_expr (dump_file, res);
574       fprintf (dump_file, "))\n");
575     }
576 
577   return res;
578 }
579 
580 /* Helper function for add_to_evolution.  Returns the evolution
581    function for an assignment of the form "a = b + c", where "a" and
582    "b" are on the strongly connected component.  CHREC_BEFORE is the
583    information that we already have collected up to this point.
584    TO_ADD is the evolution of "c".
585 
586    When CHREC_BEFORE has an evolution part in LOOP_NB, add to this
587    evolution the expression TO_ADD, otherwise construct an evolution
588    part for this loop.  */
589 
590 static tree
add_to_evolution_1(unsigned loop_nb,tree chrec_before,tree to_add,gimple * at_stmt)591 add_to_evolution_1 (unsigned loop_nb, tree chrec_before, tree to_add,
592 		    gimple *at_stmt)
593 {
594   tree type, left, right;
595   class loop *loop = get_loop (cfun, loop_nb), *chloop;
596 
597   switch (TREE_CODE (chrec_before))
598     {
599     case POLYNOMIAL_CHREC:
600       chloop = get_chrec_loop (chrec_before);
601       if (chloop == loop
602 	  || flow_loop_nested_p (chloop, loop))
603 	{
604 	  unsigned var;
605 
606 	  type = chrec_type (chrec_before);
607 
608 	  /* When there is no evolution part in this loop, build it.  */
609 	  if (chloop != loop)
610 	    {
611 	      var = loop_nb;
612 	      left = chrec_before;
613 	      right = SCALAR_FLOAT_TYPE_P (type)
614 		? build_real (type, dconst0)
615 		: build_int_cst (type, 0);
616 	    }
617 	  else
618 	    {
619 	      var = CHREC_VARIABLE (chrec_before);
620 	      left = CHREC_LEFT (chrec_before);
621 	      right = CHREC_RIGHT (chrec_before);
622 	    }
623 
624 	  to_add = chrec_convert (type, to_add, at_stmt);
625 	  right = chrec_convert_rhs (type, right, at_stmt);
626 	  right = chrec_fold_plus (chrec_type (right), right, to_add);
627 	  return build_polynomial_chrec (var, left, right);
628 	}
629       else
630 	{
631 	  gcc_assert (flow_loop_nested_p (loop, chloop));
632 
633 	  /* Search the evolution in LOOP_NB.  */
634 	  left = add_to_evolution_1 (loop_nb, CHREC_LEFT (chrec_before),
635 				     to_add, at_stmt);
636 	  right = CHREC_RIGHT (chrec_before);
637 	  right = chrec_convert_rhs (chrec_type (left), right, at_stmt);
638 	  return build_polynomial_chrec (CHREC_VARIABLE (chrec_before),
639 					 left, right);
640 	}
641 
642     default:
643       /* These nodes do not depend on a loop.  */
644       if (chrec_before == chrec_dont_know)
645 	return chrec_dont_know;
646 
647       left = chrec_before;
648       right = chrec_convert_rhs (chrec_type (left), to_add, at_stmt);
649       return build_polynomial_chrec (loop_nb, left, right);
650     }
651 }
652 
653 /* Add TO_ADD to the evolution part of CHREC_BEFORE in the dimension
654    of LOOP_NB.
655 
656    Description (provided for completeness, for those who read code in
657    a plane, and for my poor 62 bytes brain that would have forgotten
658    all this in the next two or three months):
659 
660    The algorithm of translation of programs from the SSA representation
661    into the chrecs syntax is based on a pattern matching.  After having
662    reconstructed the overall tree expression for a loop, there are only
663    two cases that can arise:
664 
665    1. a = loop-phi (init, a + expr)
666    2. a = loop-phi (init, expr)
667 
668    where EXPR is either a scalar constant with respect to the analyzed
669    loop (this is a degree 0 polynomial), or an expression containing
670    other loop-phi definitions (these are higher degree polynomials).
671 
672    Examples:
673 
674    1.
675    | init = ...
676    | loop_1
677    |   a = phi (init, a + 5)
678    | endloop
679 
680    2.
681    | inita = ...
682    | initb = ...
683    | loop_1
684    |   a = phi (inita, 2 * b + 3)
685    |   b = phi (initb, b + 1)
686    | endloop
687 
688    For the first case, the semantics of the SSA representation is:
689 
690    | a (x) = init + \sum_{j = 0}^{x - 1} expr (j)
691 
692    that is, there is a loop index "x" that determines the scalar value
693    of the variable during the loop execution.  During the first
694    iteration, the value is that of the initial condition INIT, while
695    during the subsequent iterations, it is the sum of the initial
696    condition with the sum of all the values of EXPR from the initial
697    iteration to the before last considered iteration.
698 
699    For the second case, the semantics of the SSA program is:
700 
701    | a (x) = init, if x = 0;
702    |         expr (x - 1), otherwise.
703 
704    The second case corresponds to the PEELED_CHREC, whose syntax is
705    close to the syntax of a loop-phi-node:
706 
707    | phi (init, expr)  vs.  (init, expr)_x
708 
709    The proof of the translation algorithm for the first case is a
710    proof by structural induction based on the degree of EXPR.
711 
712    Degree 0:
713    When EXPR is a constant with respect to the analyzed loop, or in
714    other words when EXPR is a polynomial of degree 0, the evolution of
715    the variable A in the loop is an affine function with an initial
716    condition INIT, and a step EXPR.  In order to show this, we start
717    from the semantics of the SSA representation:
718 
719    f (x) = init + \sum_{j = 0}^{x - 1} expr (j)
720 
721    and since "expr (j)" is a constant with respect to "j",
722 
723    f (x) = init + x * expr
724 
725    Finally, based on the semantics of the pure sum chrecs, by
726    identification we get the corresponding chrecs syntax:
727 
728    f (x) = init * \binom{x}{0} + expr * \binom{x}{1}
729    f (x) -> {init, +, expr}_x
730 
731    Higher degree:
732    Suppose that EXPR is a polynomial of degree N with respect to the
733    analyzed loop_x for which we have already determined that it is
734    written under the chrecs syntax:
735 
736    | expr (x)  ->  {b_0, +, b_1, +, ..., +, b_{n-1}} (x)
737 
738    We start from the semantics of the SSA program:
739 
740    | f (x) = init + \sum_{j = 0}^{x - 1} expr (j)
741    |
742    | f (x) = init + \sum_{j = 0}^{x - 1}
743    |                (b_0 * \binom{j}{0} + ... + b_{n-1} * \binom{j}{n-1})
744    |
745    | f (x) = init + \sum_{j = 0}^{x - 1}
746    |                \sum_{k = 0}^{n - 1} (b_k * \binom{j}{k})
747    |
748    | f (x) = init + \sum_{k = 0}^{n - 1}
749    |                (b_k * \sum_{j = 0}^{x - 1} \binom{j}{k})
750    |
751    | f (x) = init + \sum_{k = 0}^{n - 1}
752    |                (b_k * \binom{x}{k + 1})
753    |
754    | f (x) = init + b_0 * \binom{x}{1} + ...
755    |              + b_{n-1} * \binom{x}{n}
756    |
757    | f (x) = init * \binom{x}{0} + b_0 * \binom{x}{1} + ...
758    |                             + b_{n-1} * \binom{x}{n}
759    |
760 
761    And finally from the definition of the chrecs syntax, we identify:
762    | f (x)  ->  {init, +, b_0, +, ..., +, b_{n-1}}_x
763 
764    This shows the mechanism that stands behind the add_to_evolution
765    function.  An important point is that the use of symbolic
766    parameters avoids the need of an analysis schedule.
767 
768    Example:
769 
770    | inita = ...
771    | initb = ...
772    | loop_1
773    |   a = phi (inita, a + 2 + b)
774    |   b = phi (initb, b + 1)
775    | endloop
776 
777    When analyzing "a", the algorithm keeps "b" symbolically:
778 
779    | a  ->  {inita, +, 2 + b}_1
780 
781    Then, after instantiation, the analyzer ends on the evolution:
782 
783    | a  ->  {inita, +, 2 + initb, +, 1}_1
784 
785 */
786 
787 static tree
add_to_evolution(unsigned loop_nb,tree chrec_before,enum tree_code code,tree to_add,gimple * at_stmt)788 add_to_evolution (unsigned loop_nb, tree chrec_before, enum tree_code code,
789 		  tree to_add, gimple *at_stmt)
790 {
791   tree type = chrec_type (to_add);
792   tree res = NULL_TREE;
793 
794   if (to_add == NULL_TREE)
795     return chrec_before;
796 
797   /* TO_ADD is either a scalar, or a parameter.  TO_ADD is not
798      instantiated at this point.  */
799   if (TREE_CODE (to_add) == POLYNOMIAL_CHREC)
800     /* This should not happen.  */
801     return chrec_dont_know;
802 
803   if (dump_file && (dump_flags & TDF_SCEV))
804     {
805       fprintf (dump_file, "(add_to_evolution \n");
806       fprintf (dump_file, "  (loop_nb = %d)\n", loop_nb);
807       fprintf (dump_file, "  (chrec_before = ");
808       print_generic_expr (dump_file, chrec_before);
809       fprintf (dump_file, ")\n  (to_add = ");
810       print_generic_expr (dump_file, to_add);
811       fprintf (dump_file, ")\n");
812     }
813 
814   if (code == MINUS_EXPR)
815     to_add = chrec_fold_multiply (type, to_add, SCALAR_FLOAT_TYPE_P (type)
816 				  ? build_real (type, dconstm1)
817 				  : build_int_cst_type (type, -1));
818 
819   res = add_to_evolution_1 (loop_nb, chrec_before, to_add, at_stmt);
820 
821   if (dump_file && (dump_flags & TDF_SCEV))
822     {
823       fprintf (dump_file, "  (res = ");
824       print_generic_expr (dump_file, res);
825       fprintf (dump_file, "))\n");
826     }
827 
828   return res;
829 }
830 
831 
832 
833 /* This section selects the loops that will be good candidates for the
834    scalar evolution analysis.  For the moment, greedily select all the
835    loop nests we could analyze.  */
836 
837 /* For a loop with a single exit edge, return the COND_EXPR that
838    guards the exit edge.  If the expression is too difficult to
839    analyze, then give up.  */
840 
841 gcond *
get_loop_exit_condition(const class loop * loop)842 get_loop_exit_condition (const class loop *loop)
843 {
844   gcond *res = NULL;
845   edge exit_edge = single_exit (loop);
846 
847   if (dump_file && (dump_flags & TDF_SCEV))
848     fprintf (dump_file, "(get_loop_exit_condition \n  ");
849 
850   if (exit_edge)
851     {
852       gimple *stmt;
853 
854       stmt = last_stmt (exit_edge->src);
855       if (gcond *cond_stmt = safe_dyn_cast <gcond *> (stmt))
856 	res = cond_stmt;
857     }
858 
859   if (dump_file && (dump_flags & TDF_SCEV))
860     {
861       print_gimple_stmt (dump_file, res, 0);
862       fprintf (dump_file, ")\n");
863     }
864 
865   return res;
866 }
867 
868 
869 /* Depth first search algorithm.  */
870 
871 enum t_bool {
872   t_false,
873   t_true,
874   t_dont_know
875 };
876 
877 
878 static t_bool follow_ssa_edge_expr (class loop *loop, gimple *, tree, gphi *,
879 				    tree *, int);
880 
881 /* Follow the ssa edge into the binary expression RHS0 CODE RHS1.
882    Return true if the strongly connected component has been found.  */
883 
884 static t_bool
follow_ssa_edge_binary(class loop * loop,gimple * at_stmt,tree type,tree rhs0,enum tree_code code,tree rhs1,gphi * halting_phi,tree * evolution_of_loop,int limit)885 follow_ssa_edge_binary (class loop *loop, gimple *at_stmt,
886 			tree type, tree rhs0, enum tree_code code, tree rhs1,
887 			gphi *halting_phi, tree *evolution_of_loop,
888 			int limit)
889 {
890   t_bool res = t_false;
891   tree evol;
892 
893   switch (code)
894     {
895     case POINTER_PLUS_EXPR:
896     case PLUS_EXPR:
897       if (TREE_CODE (rhs0) == SSA_NAME)
898 	{
899 	  if (TREE_CODE (rhs1) == SSA_NAME)
900 	    {
901 	      /* Match an assignment under the form:
902 		 "a = b + c".  */
903 
904 	      /* We want only assignments of form "name + name" contribute to
905 		 LIMIT, as the other cases do not necessarily contribute to
906 		 the complexity of the expression.  */
907 	      limit++;
908 
909 	      evol = *evolution_of_loop;
910 	      evol = add_to_evolution
911 		  (loop->num,
912 		   chrec_convert (type, evol, at_stmt),
913 		   code, rhs1, at_stmt);
914 	      res = follow_ssa_edge_expr
915 		(loop, at_stmt, rhs0, halting_phi, &evol, limit);
916 	      if (res == t_true)
917 		*evolution_of_loop = evol;
918 	      else if (res == t_false)
919 		{
920 		  *evolution_of_loop = add_to_evolution
921 		      (loop->num,
922 		       chrec_convert (type, *evolution_of_loop, at_stmt),
923 		       code, rhs0, at_stmt);
924 		  res = follow_ssa_edge_expr
925 		    (loop, at_stmt, rhs1, halting_phi,
926 		     evolution_of_loop, limit);
927 		}
928 	    }
929 
930 	  else
931 	    gcc_unreachable ();  /* Handled in caller.  */
932 	}
933 
934       else if (TREE_CODE (rhs1) == SSA_NAME)
935 	{
936 	  /* Match an assignment under the form:
937 	     "a = ... + c".  */
938 	  *evolution_of_loop = add_to_evolution
939 	      (loop->num, chrec_convert (type, *evolution_of_loop,
940 					 at_stmt),
941 	       code, rhs0, at_stmt);
942 	  res = follow_ssa_edge_expr
943 	    (loop, at_stmt, rhs1, halting_phi,
944 	     evolution_of_loop, limit);
945 	}
946 
947       else
948 	/* Otherwise, match an assignment under the form:
949 	   "a = ... + ...".  */
950 	/* And there is nothing to do.  */
951 	res = t_false;
952       break;
953 
954     case MINUS_EXPR:
955       /* This case is under the form "opnd0 = rhs0 - rhs1".  */
956       if (TREE_CODE (rhs0) == SSA_NAME)
957 	gcc_unreachable (); /* Handled in caller.  */
958       else
959 	/* Otherwise, match an assignment under the form:
960 	   "a = ... - ...".  */
961 	/* And there is nothing to do.  */
962 	res = t_false;
963       break;
964 
965     default:
966       res = t_false;
967     }
968 
969   return res;
970 }
971 
972 /* Checks whether the I-th argument of a PHI comes from a backedge.  */
973 
974 static bool
backedge_phi_arg_p(gphi * phi,int i)975 backedge_phi_arg_p (gphi *phi, int i)
976 {
977   const_edge e = gimple_phi_arg_edge (phi, i);
978 
979   /* We would in fact like to test EDGE_DFS_BACK here, but we do not care
980      about updating it anywhere, and this should work as well most of the
981      time.  */
982   if (e->flags & EDGE_IRREDUCIBLE_LOOP)
983     return true;
984 
985   return false;
986 }
987 
988 /* Helper function for one branch of the condition-phi-node.  Return
989    true if the strongly connected component has been found following
990    this path.  */
991 
992 static inline t_bool
follow_ssa_edge_in_condition_phi_branch(int i,class loop * loop,gphi * condition_phi,gphi * halting_phi,tree * evolution_of_branch,tree init_cond,int limit)993 follow_ssa_edge_in_condition_phi_branch (int i,
994 					 class loop *loop,
995 					 gphi *condition_phi,
996 					 gphi *halting_phi,
997 					 tree *evolution_of_branch,
998 					 tree init_cond, int limit)
999 {
1000   tree branch = PHI_ARG_DEF (condition_phi, i);
1001   *evolution_of_branch = chrec_dont_know;
1002 
1003   /* Do not follow back edges (they must belong to an irreducible loop, which
1004      we really do not want to worry about).  */
1005   if (backedge_phi_arg_p (condition_phi, i))
1006     return t_false;
1007 
1008   if (TREE_CODE (branch) == SSA_NAME)
1009     {
1010       *evolution_of_branch = init_cond;
1011       return follow_ssa_edge_expr (loop, condition_phi, branch, halting_phi,
1012 				   evolution_of_branch, limit);
1013     }
1014 
1015   /* This case occurs when one of the condition branches sets
1016      the variable to a constant: i.e. a phi-node like
1017      "a_2 = PHI <a_7(5), 2(6)>;".
1018 
1019      FIXME:  This case have to be refined correctly:
1020      in some cases it is possible to say something better than
1021      chrec_dont_know, for example using a wrap-around notation.  */
1022   return t_false;
1023 }
1024 
1025 /* This function merges the branches of a condition-phi-node in a
1026    loop.  */
1027 
1028 static t_bool
follow_ssa_edge_in_condition_phi(class loop * loop,gphi * condition_phi,gphi * halting_phi,tree * evolution_of_loop,int limit)1029 follow_ssa_edge_in_condition_phi (class loop *loop,
1030 				  gphi *condition_phi,
1031 				  gphi *halting_phi,
1032 				  tree *evolution_of_loop, int limit)
1033 {
1034   int i, n;
1035   tree init = *evolution_of_loop;
1036   tree evolution_of_branch;
1037   t_bool res = follow_ssa_edge_in_condition_phi_branch (0, loop, condition_phi,
1038 							halting_phi,
1039 							&evolution_of_branch,
1040 							init, limit);
1041   if (res == t_false || res == t_dont_know)
1042     return res;
1043 
1044   *evolution_of_loop = evolution_of_branch;
1045 
1046   n = gimple_phi_num_args (condition_phi);
1047   for (i = 1; i < n; i++)
1048     {
1049       /* Quickly give up when the evolution of one of the branches is
1050 	 not known.  */
1051       if (*evolution_of_loop == chrec_dont_know)
1052 	return t_true;
1053 
1054       /* Increase the limit by the PHI argument number to avoid exponential
1055 	 time and memory complexity.  */
1056       res = follow_ssa_edge_in_condition_phi_branch (i, loop, condition_phi,
1057 						     halting_phi,
1058 						     &evolution_of_branch,
1059 						     init, limit + i);
1060       if (res == t_false || res == t_dont_know)
1061 	return res;
1062 
1063       *evolution_of_loop = chrec_merge (*evolution_of_loop,
1064 					evolution_of_branch);
1065     }
1066 
1067   return t_true;
1068 }
1069 
1070 /* Follow an SSA edge in an inner loop.  It computes the overall
1071    effect of the loop, and following the symbolic initial conditions,
1072    it follows the edges in the parent loop.  The inner loop is
1073    considered as a single statement.  */
1074 
1075 static t_bool
follow_ssa_edge_inner_loop_phi(class loop * outer_loop,gphi * loop_phi_node,gphi * halting_phi,tree * evolution_of_loop,int limit)1076 follow_ssa_edge_inner_loop_phi (class loop *outer_loop,
1077 				gphi *loop_phi_node,
1078 				gphi *halting_phi,
1079 				tree *evolution_of_loop, int limit)
1080 {
1081   class loop *loop = loop_containing_stmt (loop_phi_node);
1082   tree ev = analyze_scalar_evolution (loop, PHI_RESULT (loop_phi_node));
1083 
1084   /* Sometimes, the inner loop is too difficult to analyze, and the
1085      result of the analysis is a symbolic parameter.  */
1086   if (ev == PHI_RESULT (loop_phi_node))
1087     {
1088       t_bool res = t_false;
1089       int i, n = gimple_phi_num_args (loop_phi_node);
1090 
1091       for (i = 0; i < n; i++)
1092 	{
1093 	  tree arg = PHI_ARG_DEF (loop_phi_node, i);
1094 	  basic_block bb;
1095 
1096 	  /* Follow the edges that exit the inner loop.  */
1097 	  bb = gimple_phi_arg_edge (loop_phi_node, i)->src;
1098 	  if (!flow_bb_inside_loop_p (loop, bb))
1099 	    res = follow_ssa_edge_expr (outer_loop, loop_phi_node,
1100 					arg, halting_phi,
1101 					evolution_of_loop, limit);
1102 	  if (res == t_true)
1103 	    break;
1104 	}
1105 
1106       /* If the path crosses this loop-phi, give up.  */
1107       if (res == t_true)
1108 	*evolution_of_loop = chrec_dont_know;
1109 
1110       return res;
1111     }
1112 
1113   /* Otherwise, compute the overall effect of the inner loop.  */
1114   ev = compute_overall_effect_of_inner_loop (loop, ev);
1115   return follow_ssa_edge_expr (outer_loop, loop_phi_node, ev, halting_phi,
1116 			       evolution_of_loop, limit);
1117 }
1118 
1119 /* Follow the ssa edge into the expression EXPR.
1120    Return true if the strongly connected component has been found.  */
1121 
1122 static t_bool
follow_ssa_edge_expr(class loop * loop,gimple * at_stmt,tree expr,gphi * halting_phi,tree * evolution_of_loop,int limit)1123 follow_ssa_edge_expr (class loop *loop, gimple *at_stmt, tree expr,
1124 		      gphi *halting_phi, tree *evolution_of_loop,
1125 		      int limit)
1126 {
1127   enum tree_code code;
1128   tree type, rhs0, rhs1 = NULL_TREE;
1129 
1130   /* The EXPR is one of the following cases:
1131      - an SSA_NAME,
1132      - an INTEGER_CST,
1133      - a PLUS_EXPR,
1134      - a POINTER_PLUS_EXPR,
1135      - a MINUS_EXPR,
1136      - an ASSERT_EXPR,
1137      - other cases are not yet handled.  */
1138 
1139   /* For SSA_NAME look at the definition statement, handling
1140      PHI nodes and otherwise expand appropriately for the expression
1141      handling below.  */
1142 tail_recurse:
1143   if (TREE_CODE (expr) == SSA_NAME)
1144     {
1145       gimple *def = SSA_NAME_DEF_STMT (expr);
1146 
1147       if (gimple_nop_p (def))
1148 	return t_false;
1149 
1150       /* Give up if the path is longer than the MAX that we allow.  */
1151       if (limit > param_scev_max_expr_complexity)
1152 	{
1153 	  *evolution_of_loop = chrec_dont_know;
1154 	  return t_dont_know;
1155 	}
1156 
1157       if (gphi *phi = dyn_cast <gphi *>(def))
1158 	{
1159 	  if (!loop_phi_node_p (phi))
1160 	    /* DEF is a condition-phi-node.  Follow the branches, and
1161 	       record their evolutions.  Finally, merge the collected
1162 	       information and set the approximation to the main
1163 	       variable.  */
1164 	    return follow_ssa_edge_in_condition_phi
1165 		(loop, phi, halting_phi, evolution_of_loop, limit);
1166 
1167 	  /* When the analyzed phi is the halting_phi, the
1168 	     depth-first search is over: we have found a path from
1169 	     the halting_phi to itself in the loop.  */
1170 	  if (phi == halting_phi)
1171 	    return t_true;
1172 
1173 	  /* Otherwise, the evolution of the HALTING_PHI depends
1174 	     on the evolution of another loop-phi-node, i.e. the
1175 	     evolution function is a higher degree polynomial.  */
1176 	  class loop *def_loop = loop_containing_stmt (def);
1177 	  if (def_loop == loop)
1178 	    return t_false;
1179 
1180 	  /* Inner loop.  */
1181 	  if (flow_loop_nested_p (loop, def_loop))
1182 	    return follow_ssa_edge_inner_loop_phi
1183 		(loop, phi, halting_phi, evolution_of_loop,
1184 		 limit + 1);
1185 
1186 	  /* Outer loop.  */
1187 	  return t_false;
1188 	}
1189 
1190       /* At this level of abstraction, the program is just a set
1191 	 of GIMPLE_ASSIGNs and PHI_NODEs.  In principle there is no
1192 	 other def to be handled.  */
1193       if (!is_gimple_assign (def))
1194 	return t_false;
1195 
1196       code = gimple_assign_rhs_code (def);
1197       switch (get_gimple_rhs_class (code))
1198 	{
1199 	case GIMPLE_BINARY_RHS:
1200 	  rhs0 = gimple_assign_rhs1 (def);
1201 	  rhs1 = gimple_assign_rhs2 (def);
1202 	  break;
1203 	case GIMPLE_UNARY_RHS:
1204 	case GIMPLE_SINGLE_RHS:
1205 	  rhs0 = gimple_assign_rhs1 (def);
1206 	  break;
1207 	default:
1208 	  return t_false;
1209 	}
1210       type = TREE_TYPE (gimple_assign_lhs (def));
1211       at_stmt = def;
1212     }
1213   else
1214     {
1215       code = TREE_CODE (expr);
1216       type = TREE_TYPE (expr);
1217       switch (code)
1218 	{
1219 	CASE_CONVERT:
1220 	  rhs0 = TREE_OPERAND (expr, 0);
1221 	  break;
1222 	case POINTER_PLUS_EXPR:
1223 	case PLUS_EXPR:
1224 	case MINUS_EXPR:
1225 	  rhs0 = TREE_OPERAND (expr, 0);
1226 	  rhs1 = TREE_OPERAND (expr, 1);
1227 	  break;
1228 	default:
1229 	  rhs0 = expr;
1230 	}
1231     }
1232 
1233   switch (code)
1234     {
1235     CASE_CONVERT:
1236       {
1237 	/* This assignment is under the form "a_1 = (cast) rhs.  */
1238 	t_bool res = follow_ssa_edge_expr (loop, at_stmt, rhs0, halting_phi,
1239 					   evolution_of_loop, limit);
1240 	*evolution_of_loop = chrec_convert (type, *evolution_of_loop, at_stmt);
1241 	return res;
1242       }
1243 
1244     case INTEGER_CST:
1245       /* This assignment is under the form "a_1 = 7".  */
1246       return t_false;
1247 
1248     case ADDR_EXPR:
1249       {
1250 	/* Handle &MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR.  */
1251 	if (TREE_CODE (TREE_OPERAND (rhs0, 0)) != MEM_REF)
1252 	  return t_false;
1253 	tree mem = TREE_OPERAND (rhs0, 0);
1254 	rhs0 = TREE_OPERAND (mem, 0);
1255 	rhs1 = TREE_OPERAND (mem, 1);
1256 	code = POINTER_PLUS_EXPR;
1257       }
1258       /* Fallthru.  */
1259     case POINTER_PLUS_EXPR:
1260     case PLUS_EXPR:
1261     case MINUS_EXPR:
1262       /* This case is under the form "rhs0 +- rhs1".  */
1263       STRIP_USELESS_TYPE_CONVERSION (rhs0);
1264       STRIP_USELESS_TYPE_CONVERSION (rhs1);
1265       if (TREE_CODE (rhs0) == SSA_NAME
1266 	  && (TREE_CODE (rhs1) != SSA_NAME || code == MINUS_EXPR))
1267 	{
1268 	  /* Match an assignment under the form:
1269 	     "a = b +- ...".
1270 	     Use tail-recursion for the simple case.  */
1271 	  *evolution_of_loop = add_to_evolution
1272 	      (loop->num, chrec_convert (type, *evolution_of_loop,
1273 					 at_stmt),
1274 	       code, rhs1, at_stmt);
1275 	  expr = rhs0;
1276 	  goto tail_recurse;
1277 	}
1278       /* Else search for the SCC in both rhs0 and rhs1.  */
1279       return follow_ssa_edge_binary (loop, at_stmt, type, rhs0, code, rhs1,
1280 				     halting_phi, evolution_of_loop, limit);
1281 
1282     case ASSERT_EXPR:
1283       /* This assignment is of the form: "a_1 = ASSERT_EXPR <a_2, ...>"
1284 	 It must be handled as a copy assignment of the form a_1 = a_2.  */
1285       expr = ASSERT_EXPR_VAR (rhs0);
1286       goto tail_recurse;
1287 
1288     default:
1289       return t_false;
1290     }
1291 }
1292 
1293 
1294 /* Simplify PEELED_CHREC represented by (init_cond, arg) in LOOP.
1295    Handle below case and return the corresponding POLYNOMIAL_CHREC:
1296 
1297    # i_17 = PHI <i_13(5), 0(3)>
1298    # _20 = PHI <_5(5), start_4(D)(3)>
1299    ...
1300    i_13 = i_17 + 1;
1301    _5 = start_4(D) + i_13;
1302 
1303    Though variable _20 appears as a PEELED_CHREC in the form of
1304    (start_4, _5)_LOOP, it's a POLYNOMIAL_CHREC like {start_4, 1}_LOOP.
1305 
1306    See PR41488.  */
1307 
1308 static tree
simplify_peeled_chrec(class loop * loop,tree arg,tree init_cond)1309 simplify_peeled_chrec (class loop *loop, tree arg, tree init_cond)
1310 {
1311   aff_tree aff1, aff2;
1312   tree ev, left, right, type, step_val;
1313   hash_map<tree, name_expansion *> *peeled_chrec_map = NULL;
1314 
1315   ev = instantiate_parameters (loop, analyze_scalar_evolution (loop, arg));
1316   if (ev == NULL_TREE || TREE_CODE (ev) != POLYNOMIAL_CHREC)
1317     return chrec_dont_know;
1318 
1319   left = CHREC_LEFT (ev);
1320   right = CHREC_RIGHT (ev);
1321   type = TREE_TYPE (left);
1322   step_val = chrec_fold_plus (type, init_cond, right);
1323 
1324   /* Transform (init, {left, right}_LOOP)_LOOP to {init, right}_LOOP
1325      if "left" equals to "init + right".  */
1326   if (operand_equal_p (left, step_val, 0))
1327     {
1328       if (dump_file && (dump_flags & TDF_SCEV))
1329 	fprintf (dump_file, "Simplify PEELED_CHREC into POLYNOMIAL_CHREC.\n");
1330 
1331       return build_polynomial_chrec (loop->num, init_cond, right);
1332     }
1333 
1334   /* The affine code only deals with pointer and integer types.  */
1335   if (!POINTER_TYPE_P (type)
1336       && !INTEGRAL_TYPE_P (type))
1337     return chrec_dont_know;
1338 
1339   /* Try harder to check if they are equal.  */
1340   tree_to_aff_combination_expand (left, type, &aff1, &peeled_chrec_map);
1341   tree_to_aff_combination_expand (step_val, type, &aff2, &peeled_chrec_map);
1342   free_affine_expand_cache (&peeled_chrec_map);
1343   aff_combination_scale (&aff2, -1);
1344   aff_combination_add (&aff1, &aff2);
1345 
1346   /* Transform (init, {left, right}_LOOP)_LOOP to {init, right}_LOOP
1347      if "left" equals to "init + right".  */
1348   if (aff_combination_zero_p (&aff1))
1349     {
1350       if (dump_file && (dump_flags & TDF_SCEV))
1351 	fprintf (dump_file, "Simplify PEELED_CHREC into POLYNOMIAL_CHREC.\n");
1352 
1353       return build_polynomial_chrec (loop->num, init_cond, right);
1354     }
1355   return chrec_dont_know;
1356 }
1357 
1358 /* Given a LOOP_PHI_NODE, this function determines the evolution
1359    function from LOOP_PHI_NODE to LOOP_PHI_NODE in the loop.  */
1360 
1361 static tree
analyze_evolution_in_loop(gphi * loop_phi_node,tree init_cond)1362 analyze_evolution_in_loop (gphi *loop_phi_node,
1363 			   tree init_cond)
1364 {
1365   int i, n = gimple_phi_num_args (loop_phi_node);
1366   tree evolution_function = chrec_not_analyzed_yet;
1367   class loop *loop = loop_containing_stmt (loop_phi_node);
1368   basic_block bb;
1369   static bool simplify_peeled_chrec_p = true;
1370 
1371   if (dump_file && (dump_flags & TDF_SCEV))
1372     {
1373       fprintf (dump_file, "(analyze_evolution_in_loop \n");
1374       fprintf (dump_file, "  (loop_phi_node = ");
1375       print_gimple_stmt (dump_file, loop_phi_node, 0);
1376       fprintf (dump_file, ")\n");
1377     }
1378 
1379   for (i = 0; i < n; i++)
1380     {
1381       tree arg = PHI_ARG_DEF (loop_phi_node, i);
1382       tree ev_fn;
1383       t_bool res;
1384 
1385       /* Select the edges that enter the loop body.  */
1386       bb = gimple_phi_arg_edge (loop_phi_node, i)->src;
1387       if (!flow_bb_inside_loop_p (loop, bb))
1388 	continue;
1389 
1390       if (TREE_CODE (arg) == SSA_NAME)
1391 	{
1392 	  bool val = false;
1393 
1394 	  /* Pass in the initial condition to the follow edge function.  */
1395 	  ev_fn = init_cond;
1396 	  res = follow_ssa_edge_expr (loop, loop_phi_node, arg,
1397 				      loop_phi_node, &ev_fn, 0);
1398 
1399 	  /* If ev_fn has no evolution in the inner loop, and the
1400 	     init_cond is not equal to ev_fn, then we have an
1401 	     ambiguity between two possible values, as we cannot know
1402 	     the number of iterations at this point.  */
1403 	  if (TREE_CODE (ev_fn) != POLYNOMIAL_CHREC
1404 	      && no_evolution_in_loop_p (ev_fn, loop->num, &val) && val
1405 	      && !operand_equal_p (init_cond, ev_fn, 0))
1406 	    ev_fn = chrec_dont_know;
1407 	}
1408       else
1409 	res = t_false;
1410 
1411       /* When it is impossible to go back on the same
1412 	 loop_phi_node by following the ssa edges, the
1413 	 evolution is represented by a peeled chrec, i.e. the
1414 	 first iteration, EV_FN has the value INIT_COND, then
1415 	 all the other iterations it has the value of ARG.
1416 	 For the moment, PEELED_CHREC nodes are not built.  */
1417       if (res != t_true)
1418 	{
1419 	  ev_fn = chrec_dont_know;
1420 	  /* Try to recognize POLYNOMIAL_CHREC which appears in
1421 	     the form of PEELED_CHREC, but guard the process with
1422 	     a bool variable to keep the analyzer from infinite
1423 	     recurrence for real PEELED_RECs.  */
1424 	  if (simplify_peeled_chrec_p && TREE_CODE (arg) == SSA_NAME)
1425 	    {
1426 	      simplify_peeled_chrec_p = false;
1427 	      ev_fn = simplify_peeled_chrec (loop, arg, init_cond);
1428 	      simplify_peeled_chrec_p = true;
1429 	    }
1430 	}
1431 
1432       /* When there are multiple back edges of the loop (which in fact never
1433 	 happens currently, but nevertheless), merge their evolutions.  */
1434       evolution_function = chrec_merge (evolution_function, ev_fn);
1435 
1436       if (evolution_function == chrec_dont_know)
1437 	break;
1438     }
1439 
1440   if (dump_file && (dump_flags & TDF_SCEV))
1441     {
1442       fprintf (dump_file, "  (evolution_function = ");
1443       print_generic_expr (dump_file, evolution_function);
1444       fprintf (dump_file, "))\n");
1445     }
1446 
1447   return evolution_function;
1448 }
1449 
1450 /* Looks to see if VAR is a copy of a constant (via straightforward assignments
1451    or degenerate phi's).  If so, returns the constant; else, returns VAR.  */
1452 
1453 static tree
follow_copies_to_constant(tree var)1454 follow_copies_to_constant (tree var)
1455 {
1456   tree res = var;
1457   while (TREE_CODE (res) == SSA_NAME
1458 	 /* We face not updated SSA form in multiple places and this walk
1459 	    may end up in sibling loops so we have to guard it.  */
1460 	 && !name_registered_for_update_p (res))
1461     {
1462       gimple *def = SSA_NAME_DEF_STMT (res);
1463       if (gphi *phi = dyn_cast <gphi *> (def))
1464 	{
1465 	  if (tree rhs = degenerate_phi_result (phi))
1466 	    res = rhs;
1467 	  else
1468 	    break;
1469 	}
1470       else if (gimple_assign_single_p (def))
1471 	/* Will exit loop if not an SSA_NAME.  */
1472 	res = gimple_assign_rhs1 (def);
1473       else
1474 	break;
1475     }
1476   if (CONSTANT_CLASS_P (res))
1477     return res;
1478   return var;
1479 }
1480 
1481 /* Given a loop-phi-node, return the initial conditions of the
1482    variable on entry of the loop.  When the CCP has propagated
1483    constants into the loop-phi-node, the initial condition is
1484    instantiated, otherwise the initial condition is kept symbolic.
1485    This analyzer does not analyze the evolution outside the current
1486    loop, and leaves this task to the on-demand tree reconstructor.  */
1487 
1488 static tree
analyze_initial_condition(gphi * loop_phi_node)1489 analyze_initial_condition (gphi *loop_phi_node)
1490 {
1491   int i, n;
1492   tree init_cond = chrec_not_analyzed_yet;
1493   class loop *loop = loop_containing_stmt (loop_phi_node);
1494 
1495   if (dump_file && (dump_flags & TDF_SCEV))
1496     {
1497       fprintf (dump_file, "(analyze_initial_condition \n");
1498       fprintf (dump_file, "  (loop_phi_node = \n");
1499       print_gimple_stmt (dump_file, loop_phi_node, 0);
1500       fprintf (dump_file, ")\n");
1501     }
1502 
1503   n = gimple_phi_num_args (loop_phi_node);
1504   for (i = 0; i < n; i++)
1505     {
1506       tree branch = PHI_ARG_DEF (loop_phi_node, i);
1507       basic_block bb = gimple_phi_arg_edge (loop_phi_node, i)->src;
1508 
1509       /* When the branch is oriented to the loop's body, it does
1510      	 not contribute to the initial condition.  */
1511       if (flow_bb_inside_loop_p (loop, bb))
1512        	continue;
1513 
1514       if (init_cond == chrec_not_analyzed_yet)
1515 	{
1516 	  init_cond = branch;
1517 	  continue;
1518 	}
1519 
1520       if (TREE_CODE (branch) == SSA_NAME)
1521 	{
1522 	  init_cond = chrec_dont_know;
1523       	  break;
1524 	}
1525 
1526       init_cond = chrec_merge (init_cond, branch);
1527     }
1528 
1529   /* Ooops -- a loop without an entry???  */
1530   if (init_cond == chrec_not_analyzed_yet)
1531     init_cond = chrec_dont_know;
1532 
1533   /* We may not have fully constant propagated IL.  Handle degenerate PHIs here
1534      to not miss important early loop unrollings.  */
1535   init_cond = follow_copies_to_constant (init_cond);
1536 
1537   if (dump_file && (dump_flags & TDF_SCEV))
1538     {
1539       fprintf (dump_file, "  (init_cond = ");
1540       print_generic_expr (dump_file, init_cond);
1541       fprintf (dump_file, "))\n");
1542     }
1543 
1544   return init_cond;
1545 }
1546 
1547 /* Analyze the scalar evolution for LOOP_PHI_NODE.  */
1548 
1549 static tree
interpret_loop_phi(class loop * loop,gphi * loop_phi_node)1550 interpret_loop_phi (class loop *loop, gphi *loop_phi_node)
1551 {
1552   tree res;
1553   class loop *phi_loop = loop_containing_stmt (loop_phi_node);
1554   tree init_cond;
1555 
1556   gcc_assert (phi_loop == loop);
1557 
1558   /* Otherwise really interpret the loop phi.  */
1559   init_cond = analyze_initial_condition (loop_phi_node);
1560   res = analyze_evolution_in_loop (loop_phi_node, init_cond);
1561 
1562   /* Verify we maintained the correct initial condition throughout
1563      possible conversions in the SSA chain.  */
1564   if (res != chrec_dont_know)
1565     {
1566       tree new_init = res;
1567       if (CONVERT_EXPR_P (res)
1568 	  && TREE_CODE (TREE_OPERAND (res, 0)) == POLYNOMIAL_CHREC)
1569 	new_init = fold_convert (TREE_TYPE (res),
1570 				 CHREC_LEFT (TREE_OPERAND (res, 0)));
1571       else if (TREE_CODE (res) == POLYNOMIAL_CHREC)
1572 	new_init = CHREC_LEFT (res);
1573       STRIP_USELESS_TYPE_CONVERSION (new_init);
1574       if (TREE_CODE (new_init) == POLYNOMIAL_CHREC
1575 	  || !operand_equal_p (init_cond, new_init, 0))
1576 	return chrec_dont_know;
1577     }
1578 
1579   return res;
1580 }
1581 
1582 /* This function merges the branches of a condition-phi-node,
1583    contained in the outermost loop, and whose arguments are already
1584    analyzed.  */
1585 
1586 static tree
interpret_condition_phi(class loop * loop,gphi * condition_phi)1587 interpret_condition_phi (class loop *loop, gphi *condition_phi)
1588 {
1589   int i, n = gimple_phi_num_args (condition_phi);
1590   tree res = chrec_not_analyzed_yet;
1591 
1592   for (i = 0; i < n; i++)
1593     {
1594       tree branch_chrec;
1595 
1596       if (backedge_phi_arg_p (condition_phi, i))
1597 	{
1598 	  res = chrec_dont_know;
1599 	  break;
1600 	}
1601 
1602       branch_chrec = analyze_scalar_evolution
1603 	(loop, PHI_ARG_DEF (condition_phi, i));
1604 
1605       res = chrec_merge (res, branch_chrec);
1606       if (res == chrec_dont_know)
1607 	break;
1608     }
1609 
1610   return res;
1611 }
1612 
1613 /* Interpret the operation RHS1 OP RHS2.  If we didn't
1614    analyze this node before, follow the definitions until ending
1615    either on an analyzed GIMPLE_ASSIGN, or on a loop-phi-node.  On the
1616    return path, this function propagates evolutions (ala constant copy
1617    propagation).  OPND1 is not a GIMPLE expression because we could
1618    analyze the effect of an inner loop: see interpret_loop_phi.  */
1619 
1620 static tree
interpret_rhs_expr(class loop * loop,gimple * at_stmt,tree type,tree rhs1,enum tree_code code,tree rhs2)1621 interpret_rhs_expr (class loop *loop, gimple *at_stmt,
1622 		    tree type, tree rhs1, enum tree_code code, tree rhs2)
1623 {
1624   tree res, chrec1, chrec2, ctype;
1625   gimple *def;
1626 
1627   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1628     {
1629       if (is_gimple_min_invariant (rhs1))
1630 	return chrec_convert (type, rhs1, at_stmt);
1631 
1632       if (code == SSA_NAME)
1633 	return chrec_convert (type, analyze_scalar_evolution (loop, rhs1),
1634 			      at_stmt);
1635 
1636       if (code == ASSERT_EXPR)
1637 	{
1638 	  rhs1 = ASSERT_EXPR_VAR (rhs1);
1639 	  return chrec_convert (type, analyze_scalar_evolution (loop, rhs1),
1640 				at_stmt);
1641 	}
1642     }
1643 
1644   switch (code)
1645     {
1646     case ADDR_EXPR:
1647       if (TREE_CODE (TREE_OPERAND (rhs1, 0)) == MEM_REF
1648 	  || handled_component_p (TREE_OPERAND (rhs1, 0)))
1649         {
1650 	  machine_mode mode;
1651 	  poly_int64 bitsize, bitpos;
1652 	  int unsignedp, reversep;
1653 	  int volatilep = 0;
1654 	  tree base, offset;
1655 	  tree chrec3;
1656 	  tree unitpos;
1657 
1658 	  base = get_inner_reference (TREE_OPERAND (rhs1, 0),
1659 				      &bitsize, &bitpos, &offset, &mode,
1660 				      &unsignedp, &reversep, &volatilep);
1661 
1662 	  if (TREE_CODE (base) == MEM_REF)
1663 	    {
1664 	      rhs2 = TREE_OPERAND (base, 1);
1665 	      rhs1 = TREE_OPERAND (base, 0);
1666 
1667 	      chrec1 = analyze_scalar_evolution (loop, rhs1);
1668 	      chrec2 = analyze_scalar_evolution (loop, rhs2);
1669 	      chrec1 = chrec_convert (type, chrec1, at_stmt);
1670 	      chrec2 = chrec_convert (TREE_TYPE (rhs2), chrec2, at_stmt);
1671 	      chrec1 = instantiate_parameters (loop, chrec1);
1672 	      chrec2 = instantiate_parameters (loop, chrec2);
1673 	      res = chrec_fold_plus (type, chrec1, chrec2);
1674 	    }
1675 	  else
1676 	    {
1677 	      chrec1 = analyze_scalar_evolution_for_address_of (loop, base);
1678 	      chrec1 = chrec_convert (type, chrec1, at_stmt);
1679 	      res = chrec1;
1680 	    }
1681 
1682 	  if (offset != NULL_TREE)
1683 	    {
1684 	      chrec2 = analyze_scalar_evolution (loop, offset);
1685 	      chrec2 = chrec_convert (TREE_TYPE (offset), chrec2, at_stmt);
1686 	      chrec2 = instantiate_parameters (loop, chrec2);
1687 	      res = chrec_fold_plus (type, res, chrec2);
1688 	    }
1689 
1690 	  if (maybe_ne (bitpos, 0))
1691 	    {
1692 	      unitpos = size_int (exact_div (bitpos, BITS_PER_UNIT));
1693 	      chrec3 = analyze_scalar_evolution (loop, unitpos);
1694 	      chrec3 = chrec_convert (TREE_TYPE (unitpos), chrec3, at_stmt);
1695 	      chrec3 = instantiate_parameters (loop, chrec3);
1696 	      res = chrec_fold_plus (type, res, chrec3);
1697 	    }
1698         }
1699       else
1700 	res = chrec_dont_know;
1701       break;
1702 
1703     case POINTER_PLUS_EXPR:
1704       chrec1 = analyze_scalar_evolution (loop, rhs1);
1705       chrec2 = analyze_scalar_evolution (loop, rhs2);
1706       chrec1 = chrec_convert (type, chrec1, at_stmt);
1707       chrec2 = chrec_convert (TREE_TYPE (rhs2), chrec2, at_stmt);
1708       chrec1 = instantiate_parameters (loop, chrec1);
1709       chrec2 = instantiate_parameters (loop, chrec2);
1710       res = chrec_fold_plus (type, chrec1, chrec2);
1711       break;
1712 
1713     case PLUS_EXPR:
1714       chrec1 = analyze_scalar_evolution (loop, rhs1);
1715       chrec2 = analyze_scalar_evolution (loop, rhs2);
1716       ctype = type;
1717       /* When the stmt is conditionally executed re-write the CHREC
1718          into a form that has well-defined behavior on overflow.  */
1719       if (at_stmt
1720 	  && INTEGRAL_TYPE_P (type)
1721 	  && ! TYPE_OVERFLOW_WRAPS (type)
1722 	  && ! dominated_by_p (CDI_DOMINATORS, loop->latch,
1723 			       gimple_bb (at_stmt)))
1724 	ctype = unsigned_type_for (type);
1725       chrec1 = chrec_convert (ctype, chrec1, at_stmt);
1726       chrec2 = chrec_convert (ctype, chrec2, at_stmt);
1727       chrec1 = instantiate_parameters (loop, chrec1);
1728       chrec2 = instantiate_parameters (loop, chrec2);
1729       res = chrec_fold_plus (ctype, chrec1, chrec2);
1730       if (type != ctype)
1731 	res = chrec_convert (type, res, at_stmt);
1732       break;
1733 
1734     case MINUS_EXPR:
1735       chrec1 = analyze_scalar_evolution (loop, rhs1);
1736       chrec2 = analyze_scalar_evolution (loop, rhs2);
1737       ctype = type;
1738       /* When the stmt is conditionally executed re-write the CHREC
1739          into a form that has well-defined behavior on overflow.  */
1740       if (at_stmt
1741 	  && INTEGRAL_TYPE_P (type)
1742 	  && ! TYPE_OVERFLOW_WRAPS (type)
1743 	  && ! dominated_by_p (CDI_DOMINATORS,
1744 			       loop->latch, gimple_bb (at_stmt)))
1745 	ctype = unsigned_type_for (type);
1746       chrec1 = chrec_convert (ctype, chrec1, at_stmt);
1747       chrec2 = chrec_convert (ctype, chrec2, at_stmt);
1748       chrec1 = instantiate_parameters (loop, chrec1);
1749       chrec2 = instantiate_parameters (loop, chrec2);
1750       res = chrec_fold_minus (ctype, chrec1, chrec2);
1751       if (type != ctype)
1752 	res = chrec_convert (type, res, at_stmt);
1753       break;
1754 
1755     case NEGATE_EXPR:
1756       chrec1 = analyze_scalar_evolution (loop, rhs1);
1757       ctype = type;
1758       /* When the stmt is conditionally executed re-write the CHREC
1759          into a form that has well-defined behavior on overflow.  */
1760       if (at_stmt
1761 	  && INTEGRAL_TYPE_P (type)
1762 	  && ! TYPE_OVERFLOW_WRAPS (type)
1763 	  && ! dominated_by_p (CDI_DOMINATORS,
1764 			       loop->latch, gimple_bb (at_stmt)))
1765 	ctype = unsigned_type_for (type);
1766       chrec1 = chrec_convert (ctype, chrec1, at_stmt);
1767       /* TYPE may be integer, real or complex, so use fold_convert.  */
1768       chrec1 = instantiate_parameters (loop, chrec1);
1769       res = chrec_fold_multiply (ctype, chrec1,
1770 				 fold_convert (ctype, integer_minus_one_node));
1771       if (type != ctype)
1772 	res = chrec_convert (type, res, at_stmt);
1773       break;
1774 
1775     case BIT_NOT_EXPR:
1776       /* Handle ~X as -1 - X.  */
1777       chrec1 = analyze_scalar_evolution (loop, rhs1);
1778       chrec1 = chrec_convert (type, chrec1, at_stmt);
1779       chrec1 = instantiate_parameters (loop, chrec1);
1780       res = chrec_fold_minus (type,
1781 			      fold_convert (type, integer_minus_one_node),
1782 			      chrec1);
1783       break;
1784 
1785     case MULT_EXPR:
1786       chrec1 = analyze_scalar_evolution (loop, rhs1);
1787       chrec2 = analyze_scalar_evolution (loop, rhs2);
1788       ctype = type;
1789       /* When the stmt is conditionally executed re-write the CHREC
1790          into a form that has well-defined behavior on overflow.  */
1791       if (at_stmt
1792 	  && INTEGRAL_TYPE_P (type)
1793 	  && ! TYPE_OVERFLOW_WRAPS (type)
1794 	  && ! dominated_by_p (CDI_DOMINATORS,
1795 			       loop->latch, gimple_bb (at_stmt)))
1796 	ctype = unsigned_type_for (type);
1797       chrec1 = chrec_convert (ctype, chrec1, at_stmt);
1798       chrec2 = chrec_convert (ctype, chrec2, at_stmt);
1799       chrec1 = instantiate_parameters (loop, chrec1);
1800       chrec2 = instantiate_parameters (loop, chrec2);
1801       res = chrec_fold_multiply (ctype, chrec1, chrec2);
1802       if (type != ctype)
1803 	res = chrec_convert (type, res, at_stmt);
1804       break;
1805 
1806     case LSHIFT_EXPR:
1807       {
1808 	/* Handle A<<B as A * (1<<B).  */
1809 	tree uns = unsigned_type_for (type);
1810 	chrec1 = analyze_scalar_evolution (loop, rhs1);
1811 	chrec2 = analyze_scalar_evolution (loop, rhs2);
1812 	chrec1 = chrec_convert (uns, chrec1, at_stmt);
1813 	chrec1 = instantiate_parameters (loop, chrec1);
1814 	chrec2 = instantiate_parameters (loop, chrec2);
1815 
1816 	tree one = build_int_cst (uns, 1);
1817 	chrec2 = fold_build2 (LSHIFT_EXPR, uns, one, chrec2);
1818 	res = chrec_fold_multiply (uns, chrec1, chrec2);
1819 	res = chrec_convert (type, res, at_stmt);
1820       }
1821       break;
1822 
1823     CASE_CONVERT:
1824       /* In case we have a truncation of a widened operation that in
1825          the truncated type has undefined overflow behavior analyze
1826 	 the operation done in an unsigned type of the same precision
1827 	 as the final truncation.  We cannot derive a scalar evolution
1828 	 for the widened operation but for the truncated result.  */
1829       if (TREE_CODE (type) == INTEGER_TYPE
1830 	  && TREE_CODE (TREE_TYPE (rhs1)) == INTEGER_TYPE
1831 	  && TYPE_PRECISION (type) < TYPE_PRECISION (TREE_TYPE (rhs1))
1832 	  && TYPE_OVERFLOW_UNDEFINED (type)
1833 	  && TREE_CODE (rhs1) == SSA_NAME
1834 	  && (def = SSA_NAME_DEF_STMT (rhs1))
1835 	  && is_gimple_assign (def)
1836 	  && TREE_CODE_CLASS (gimple_assign_rhs_code (def)) == tcc_binary
1837 	  && TREE_CODE (gimple_assign_rhs2 (def)) == INTEGER_CST)
1838 	{
1839 	  tree utype = unsigned_type_for (type);
1840 	  chrec1 = interpret_rhs_expr (loop, at_stmt, utype,
1841 				       gimple_assign_rhs1 (def),
1842 				       gimple_assign_rhs_code (def),
1843 				       gimple_assign_rhs2 (def));
1844 	}
1845       else
1846 	chrec1 = analyze_scalar_evolution (loop, rhs1);
1847       res = chrec_convert (type, chrec1, at_stmt, true, rhs1);
1848       break;
1849 
1850     case BIT_AND_EXPR:
1851       /* Given int variable A, handle A&0xffff as (int)(unsigned short)A.
1852 	 If A is SCEV and its value is in the range of representable set
1853 	 of type unsigned short, the result expression is a (no-overflow)
1854 	 SCEV.  */
1855       res = chrec_dont_know;
1856       if (tree_fits_uhwi_p (rhs2))
1857 	{
1858 	  int precision;
1859 	  unsigned HOST_WIDE_INT val = tree_to_uhwi (rhs2);
1860 
1861 	  val ++;
1862 	  /* Skip if value of rhs2 wraps in unsigned HOST_WIDE_INT or
1863 	     it's not the maximum value of a smaller type than rhs1.  */
1864 	  if (val != 0
1865 	      && (precision = exact_log2 (val)) > 0
1866 	      && (unsigned) precision < TYPE_PRECISION (TREE_TYPE (rhs1)))
1867 	    {
1868 	      tree utype = build_nonstandard_integer_type (precision, 1);
1869 
1870 	      if (TYPE_PRECISION (utype) < TYPE_PRECISION (TREE_TYPE (rhs1)))
1871 		{
1872 		  chrec1 = analyze_scalar_evolution (loop, rhs1);
1873 		  chrec1 = chrec_convert (utype, chrec1, at_stmt);
1874 		  res = chrec_convert (TREE_TYPE (rhs1), chrec1, at_stmt);
1875 		}
1876 	    }
1877 	}
1878       break;
1879 
1880     default:
1881       res = chrec_dont_know;
1882       break;
1883     }
1884 
1885   return res;
1886 }
1887 
1888 /* Interpret the expression EXPR.  */
1889 
1890 static tree
interpret_expr(class loop * loop,gimple * at_stmt,tree expr)1891 interpret_expr (class loop *loop, gimple *at_stmt, tree expr)
1892 {
1893   enum tree_code code;
1894   tree type = TREE_TYPE (expr), op0, op1;
1895 
1896   if (automatically_generated_chrec_p (expr))
1897     return expr;
1898 
1899   if (TREE_CODE (expr) == POLYNOMIAL_CHREC
1900       || TREE_CODE (expr) == CALL_EXPR
1901       || get_gimple_rhs_class (TREE_CODE (expr)) == GIMPLE_TERNARY_RHS)
1902     return chrec_dont_know;
1903 
1904   extract_ops_from_tree (expr, &code, &op0, &op1);
1905 
1906   return interpret_rhs_expr (loop, at_stmt, type,
1907 			     op0, code, op1);
1908 }
1909 
1910 /* Interpret the rhs of the assignment STMT.  */
1911 
1912 static tree
interpret_gimple_assign(class loop * loop,gimple * stmt)1913 interpret_gimple_assign (class loop *loop, gimple *stmt)
1914 {
1915   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
1916   enum tree_code code = gimple_assign_rhs_code (stmt);
1917 
1918   return interpret_rhs_expr (loop, stmt, type,
1919 			     gimple_assign_rhs1 (stmt), code,
1920 			     gimple_assign_rhs2 (stmt));
1921 }
1922 
1923 
1924 
1925 /* This section contains all the entry points:
1926    - number_of_iterations_in_loop,
1927    - analyze_scalar_evolution,
1928    - instantiate_parameters.
1929 */
1930 
1931 /* Helper recursive function.  */
1932 
1933 static tree
analyze_scalar_evolution_1(class loop * loop,tree var)1934 analyze_scalar_evolution_1 (class loop *loop, tree var)
1935 {
1936   gimple *def;
1937   basic_block bb;
1938   class loop *def_loop;
1939   tree res;
1940 
1941   if (TREE_CODE (var) != SSA_NAME)
1942     return interpret_expr (loop, NULL, var);
1943 
1944   def = SSA_NAME_DEF_STMT (var);
1945   bb = gimple_bb (def);
1946   def_loop = bb->loop_father;
1947 
1948   if (!flow_bb_inside_loop_p (loop, bb))
1949     {
1950       /* Keep symbolic form, but look through obvious copies for constants.  */
1951       res = follow_copies_to_constant (var);
1952       goto set_and_end;
1953     }
1954 
1955   if (loop != def_loop)
1956     {
1957       res = analyze_scalar_evolution_1 (def_loop, var);
1958       class loop *loop_to_skip = superloop_at_depth (def_loop,
1959 						      loop_depth (loop) + 1);
1960       res = compute_overall_effect_of_inner_loop (loop_to_skip, res);
1961       if (chrec_contains_symbols_defined_in_loop (res, loop->num))
1962 	res = analyze_scalar_evolution_1 (loop, res);
1963       goto set_and_end;
1964     }
1965 
1966   switch (gimple_code (def))
1967     {
1968     case GIMPLE_ASSIGN:
1969       res = interpret_gimple_assign (loop, def);
1970       break;
1971 
1972     case GIMPLE_PHI:
1973       if (loop_phi_node_p (def))
1974 	res = interpret_loop_phi (loop, as_a <gphi *> (def));
1975       else
1976 	res = interpret_condition_phi (loop, as_a <gphi *> (def));
1977       break;
1978 
1979     default:
1980       res = chrec_dont_know;
1981       break;
1982     }
1983 
1984  set_and_end:
1985 
1986   /* Keep the symbolic form.  */
1987   if (res == chrec_dont_know)
1988     res = var;
1989 
1990   if (loop == def_loop)
1991     set_scalar_evolution (block_before_loop (loop), var, res);
1992 
1993   return res;
1994 }
1995 
1996 /* Analyzes and returns the scalar evolution of the ssa_name VAR in
1997    LOOP.  LOOP is the loop in which the variable is used.
1998 
1999    Example of use: having a pointer VAR to a SSA_NAME node, STMT a
2000    pointer to the statement that uses this variable, in order to
2001    determine the evolution function of the variable, use the following
2002    calls:
2003 
2004    loop_p loop = loop_containing_stmt (stmt);
2005    tree chrec_with_symbols = analyze_scalar_evolution (loop, var);
2006    tree chrec_instantiated = instantiate_parameters (loop, chrec_with_symbols);
2007 */
2008 
2009 tree
analyze_scalar_evolution(class loop * loop,tree var)2010 analyze_scalar_evolution (class loop *loop, tree var)
2011 {
2012   tree res;
2013 
2014   /* ???  Fix callers.  */
2015   if (! loop)
2016     return var;
2017 
2018   if (dump_file && (dump_flags & TDF_SCEV))
2019     {
2020       fprintf (dump_file, "(analyze_scalar_evolution \n");
2021       fprintf (dump_file, "  (loop_nb = %d)\n", loop->num);
2022       fprintf (dump_file, "  (scalar = ");
2023       print_generic_expr (dump_file, var);
2024       fprintf (dump_file, ")\n");
2025     }
2026 
2027   res = get_scalar_evolution (block_before_loop (loop), var);
2028   if (res == chrec_not_analyzed_yet)
2029     {
2030       /* We'll recurse into instantiate_scev, avoid tearing down the
2031          instantiate cache repeatedly and keep it live from here.  */
2032       bool destr = false;
2033       if (!global_cache)
2034 	{
2035 	  global_cache = new instantiate_cache_type;
2036 	  destr = true;
2037 	}
2038       res = analyze_scalar_evolution_1 (loop, var);
2039       if (destr)
2040 	{
2041 	  delete global_cache;
2042 	  global_cache = NULL;
2043 	}
2044     }
2045 
2046   if (dump_file && (dump_flags & TDF_SCEV))
2047     fprintf (dump_file, ")\n");
2048 
2049   return res;
2050 }
2051 
2052 /* Analyzes and returns the scalar evolution of VAR address in LOOP.  */
2053 
2054 static tree
analyze_scalar_evolution_for_address_of(class loop * loop,tree var)2055 analyze_scalar_evolution_for_address_of (class loop *loop, tree var)
2056 {
2057   return analyze_scalar_evolution (loop, build_fold_addr_expr (var));
2058 }
2059 
2060 /* Analyze scalar evolution of use of VERSION in USE_LOOP with respect to
2061    WRTO_LOOP (which should be a superloop of USE_LOOP)
2062 
2063    FOLDED_CASTS is set to true if resolve_mixers used
2064    chrec_convert_aggressive (TODO -- not really, we are way too conservative
2065    at the moment in order to keep things simple).
2066 
2067    To illustrate the meaning of USE_LOOP and WRTO_LOOP, consider the following
2068    example:
2069 
2070    for (i = 0; i < 100; i++)			-- loop 1
2071      {
2072        for (j = 0; j < 100; j++)		-- loop 2
2073          {
2074 	   k1 = i;
2075 	   k2 = j;
2076 
2077 	   use2 (k1, k2);
2078 
2079 	   for (t = 0; t < 100; t++)		-- loop 3
2080 	     use3 (k1, k2);
2081 
2082 	 }
2083        use1 (k1, k2);
2084      }
2085 
2086    Both k1 and k2 are invariants in loop3, thus
2087      analyze_scalar_evolution_in_loop (loop3, loop3, k1) = k1
2088      analyze_scalar_evolution_in_loop (loop3, loop3, k2) = k2
2089 
2090    As they are invariant, it does not matter whether we consider their
2091    usage in loop 3 or loop 2, hence
2092      analyze_scalar_evolution_in_loop (loop2, loop3, k1) =
2093        analyze_scalar_evolution_in_loop (loop2, loop2, k1) = i
2094      analyze_scalar_evolution_in_loop (loop2, loop3, k2) =
2095        analyze_scalar_evolution_in_loop (loop2, loop2, k2) = [0,+,1]_2
2096 
2097    Similarly for their evolutions with respect to loop 1.  The values of K2
2098    in the use in loop 2 vary independently on loop 1, thus we cannot express
2099    the evolution with respect to loop 1:
2100      analyze_scalar_evolution_in_loop (loop1, loop3, k1) =
2101        analyze_scalar_evolution_in_loop (loop1, loop2, k1) = [0,+,1]_1
2102      analyze_scalar_evolution_in_loop (loop1, loop3, k2) =
2103        analyze_scalar_evolution_in_loop (loop1, loop2, k2) = dont_know
2104 
2105    The value of k2 in the use in loop 1 is known, though:
2106      analyze_scalar_evolution_in_loop (loop1, loop1, k1) = [0,+,1]_1
2107      analyze_scalar_evolution_in_loop (loop1, loop1, k2) = 100
2108    */
2109 
2110 static tree
analyze_scalar_evolution_in_loop(class loop * wrto_loop,class loop * use_loop,tree version,bool * folded_casts)2111 analyze_scalar_evolution_in_loop (class loop *wrto_loop, class loop *use_loop,
2112 				  tree version, bool *folded_casts)
2113 {
2114   bool val = false;
2115   tree ev = version, tmp;
2116 
2117   /* We cannot just do
2118 
2119      tmp = analyze_scalar_evolution (use_loop, version);
2120      ev = resolve_mixers (wrto_loop, tmp, folded_casts);
2121 
2122      as resolve_mixers would query the scalar evolution with respect to
2123      wrto_loop.  For example, in the situation described in the function
2124      comment, suppose that wrto_loop = loop1, use_loop = loop3 and
2125      version = k2.  Then
2126 
2127      analyze_scalar_evolution (use_loop, version) = k2
2128 
2129      and resolve_mixers (loop1, k2, folded_casts) finds that the value of
2130      k2 in loop 1 is 100, which is a wrong result, since we are interested
2131      in the value in loop 3.
2132 
2133      Instead, we need to proceed from use_loop to wrto_loop loop by loop,
2134      each time checking that there is no evolution in the inner loop.  */
2135 
2136   if (folded_casts)
2137     *folded_casts = false;
2138   while (1)
2139     {
2140       tmp = analyze_scalar_evolution (use_loop, ev);
2141       ev = resolve_mixers (use_loop, tmp, folded_casts);
2142 
2143       if (use_loop == wrto_loop)
2144 	return ev;
2145 
2146       /* If the value of the use changes in the inner loop, we cannot express
2147 	 its value in the outer loop (we might try to return interval chrec,
2148 	 but we do not have a user for it anyway)  */
2149       if (!no_evolution_in_loop_p (ev, use_loop->num, &val)
2150 	  || !val)
2151 	return chrec_dont_know;
2152 
2153       use_loop = loop_outer (use_loop);
2154     }
2155 }
2156 
2157 
2158 /* Computes a hash function for database element ELT.  */
2159 
2160 static inline hashval_t
hash_idx_scev_info(const void * elt_)2161 hash_idx_scev_info (const void *elt_)
2162 {
2163   unsigned idx = ((size_t) elt_) - 2;
2164   return scev_info_hasher::hash (&global_cache->entries[idx]);
2165 }
2166 
2167 /* Compares database elements E1 and E2.  */
2168 
2169 static inline int
eq_idx_scev_info(const void * e1,const void * e2)2170 eq_idx_scev_info (const void *e1, const void *e2)
2171 {
2172   unsigned idx1 = ((size_t) e1) - 2;
2173   return scev_info_hasher::equal (&global_cache->entries[idx1],
2174 				  (const scev_info_str *) e2);
2175 }
2176 
2177 /* Returns from CACHE the slot number of the cached chrec for NAME.  */
2178 
2179 static unsigned
get_instantiated_value_entry(instantiate_cache_type & cache,tree name,edge instantiate_below)2180 get_instantiated_value_entry (instantiate_cache_type &cache,
2181 			      tree name, edge instantiate_below)
2182 {
2183   if (!cache.map)
2184     {
2185       cache.map = htab_create (10, hash_idx_scev_info, eq_idx_scev_info, NULL);
2186       cache.entries.create (10);
2187     }
2188 
2189   scev_info_str e;
2190   e.name_version = SSA_NAME_VERSION (name);
2191   e.instantiated_below = instantiate_below->dest->index;
2192   void **slot = htab_find_slot_with_hash (cache.map, &e,
2193 					  scev_info_hasher::hash (&e), INSERT);
2194   if (!*slot)
2195     {
2196       e.chrec = chrec_not_analyzed_yet;
2197       *slot = (void *)(size_t)(cache.entries.length () + 2);
2198       cache.entries.safe_push (e);
2199     }
2200 
2201   return ((size_t)*slot) - 2;
2202 }
2203 
2204 
2205 /* Return the closed_loop_phi node for VAR.  If there is none, return
2206    NULL_TREE.  */
2207 
2208 static tree
loop_closed_phi_def(tree var)2209 loop_closed_phi_def (tree var)
2210 {
2211   class loop *loop;
2212   edge exit;
2213   gphi *phi;
2214   gphi_iterator psi;
2215 
2216   if (var == NULL_TREE
2217       || TREE_CODE (var) != SSA_NAME)
2218     return NULL_TREE;
2219 
2220   loop = loop_containing_stmt (SSA_NAME_DEF_STMT (var));
2221   exit = single_exit (loop);
2222   if (!exit)
2223     return NULL_TREE;
2224 
2225   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2226     {
2227       phi = psi.phi ();
2228       if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == var)
2229 	return PHI_RESULT (phi);
2230     }
2231 
2232   return NULL_TREE;
2233 }
2234 
2235 static tree instantiate_scev_r (edge, class loop *, class loop *,
2236 				tree, bool *, int);
2237 
2238 /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
2239    and EVOLUTION_LOOP, that were left under a symbolic form.
2240 
2241    CHREC is an SSA_NAME to be instantiated.
2242 
2243    CACHE is the cache of already instantiated values.
2244 
2245    Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
2246    conversions that may wrap in signed/pointer type are folded, as long
2247    as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
2248    then we don't do such fold.
2249 
2250    SIZE_EXPR is used for computing the size of the expression to be
2251    instantiated, and to stop if it exceeds some limit.  */
2252 
2253 static tree
instantiate_scev_name(edge instantiate_below,class loop * evolution_loop,class loop * inner_loop,tree chrec,bool * fold_conversions,int size_expr)2254 instantiate_scev_name (edge instantiate_below,
2255 		       class loop *evolution_loop, class loop *inner_loop,
2256 		       tree chrec,
2257 		       bool *fold_conversions,
2258 		       int size_expr)
2259 {
2260   tree res;
2261   class loop *def_loop;
2262   basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (chrec));
2263 
2264   /* A parameter, nothing to do.  */
2265   if (!def_bb
2266       || !dominated_by_p (CDI_DOMINATORS, def_bb, instantiate_below->dest))
2267     return chrec;
2268 
2269   /* We cache the value of instantiated variable to avoid exponential
2270      time complexity due to reevaluations.  We also store the convenient
2271      value in the cache in order to prevent infinite recursion -- we do
2272      not want to instantiate the SSA_NAME if it is in a mixer
2273      structure.  This is used for avoiding the instantiation of
2274      recursively defined functions, such as:
2275 
2276      | a_2 -> {0, +, 1, +, a_2}_1  */
2277 
2278   unsigned si = get_instantiated_value_entry (*global_cache,
2279 					      chrec, instantiate_below);
2280   if (global_cache->get (si) != chrec_not_analyzed_yet)
2281     return global_cache->get (si);
2282 
2283   /* On recursion return chrec_dont_know.  */
2284   global_cache->set (si, chrec_dont_know);
2285 
2286   def_loop = find_common_loop (evolution_loop, def_bb->loop_father);
2287 
2288   if (! dominated_by_p (CDI_DOMINATORS,
2289 			def_loop->header, instantiate_below->dest))
2290     {
2291       gimple *def = SSA_NAME_DEF_STMT (chrec);
2292       if (gassign *ass = dyn_cast <gassign *> (def))
2293 	{
2294 	  switch (gimple_assign_rhs_class (ass))
2295 	    {
2296 	    case GIMPLE_UNARY_RHS:
2297 	      {
2298 		tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
2299 					       inner_loop, gimple_assign_rhs1 (ass),
2300 					       fold_conversions, size_expr);
2301 		if (op0 == chrec_dont_know)
2302 		  return chrec_dont_know;
2303 		res = fold_build1 (gimple_assign_rhs_code (ass),
2304 				   TREE_TYPE (chrec), op0);
2305 		break;
2306 	      }
2307 	    case GIMPLE_BINARY_RHS:
2308 	      {
2309 		tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
2310 					       inner_loop, gimple_assign_rhs1 (ass),
2311 					       fold_conversions, size_expr);
2312 		if (op0 == chrec_dont_know)
2313 		  return chrec_dont_know;
2314 		tree op1 = instantiate_scev_r (instantiate_below, evolution_loop,
2315 					       inner_loop, gimple_assign_rhs2 (ass),
2316 					       fold_conversions, size_expr);
2317 		if (op1 == chrec_dont_know)
2318 		  return chrec_dont_know;
2319 		res = fold_build2 (gimple_assign_rhs_code (ass),
2320 				   TREE_TYPE (chrec), op0, op1);
2321 		break;
2322 	      }
2323 	    default:
2324 	      res = chrec_dont_know;
2325 	    }
2326 	}
2327       else
2328 	res = chrec_dont_know;
2329       global_cache->set (si, res);
2330       return res;
2331     }
2332 
2333   /* If the analysis yields a parametric chrec, instantiate the
2334      result again.  */
2335   res = analyze_scalar_evolution (def_loop, chrec);
2336 
2337   /* Don't instantiate default definitions.  */
2338   if (TREE_CODE (res) == SSA_NAME
2339       && SSA_NAME_IS_DEFAULT_DEF (res))
2340     ;
2341 
2342   /* Don't instantiate loop-closed-ssa phi nodes.  */
2343   else if (TREE_CODE (res) == SSA_NAME
2344 	   && loop_depth (loop_containing_stmt (SSA_NAME_DEF_STMT (res)))
2345 	   > loop_depth (def_loop))
2346     {
2347       if (res == chrec)
2348 	res = loop_closed_phi_def (chrec);
2349       else
2350 	res = chrec;
2351 
2352       /* When there is no loop_closed_phi_def, it means that the
2353 	 variable is not used after the loop: try to still compute the
2354 	 value of the variable when exiting the loop.  */
2355       if (res == NULL_TREE)
2356 	{
2357 	  loop_p loop = loop_containing_stmt (SSA_NAME_DEF_STMT (chrec));
2358 	  res = analyze_scalar_evolution (loop, chrec);
2359 	  res = compute_overall_effect_of_inner_loop (loop, res);
2360 	  res = instantiate_scev_r (instantiate_below, evolution_loop,
2361 				    inner_loop, res,
2362 				    fold_conversions, size_expr);
2363 	}
2364       else if (dominated_by_p (CDI_DOMINATORS,
2365 				gimple_bb (SSA_NAME_DEF_STMT (res)),
2366 				instantiate_below->dest))
2367 	res = chrec_dont_know;
2368     }
2369 
2370   else if (res != chrec_dont_know)
2371     {
2372       if (inner_loop
2373 	  && def_bb->loop_father != inner_loop
2374 	  && !flow_loop_nested_p (def_bb->loop_father, inner_loop))
2375 	/* ???  We could try to compute the overall effect of the loop here.  */
2376 	res = chrec_dont_know;
2377       else
2378 	res = instantiate_scev_r (instantiate_below, evolution_loop,
2379 				  inner_loop, res,
2380 				  fold_conversions, size_expr);
2381     }
2382 
2383   /* Store the correct value to the cache.  */
2384   global_cache->set (si, res);
2385   return res;
2386 }
2387 
2388 /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
2389    and EVOLUTION_LOOP, that were left under a symbolic form.
2390 
2391    CHREC is a polynomial chain of recurrence to be instantiated.
2392 
2393    CACHE is the cache of already instantiated values.
2394 
2395    Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
2396    conversions that may wrap in signed/pointer type are folded, as long
2397    as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
2398    then we don't do such fold.
2399 
2400    SIZE_EXPR is used for computing the size of the expression to be
2401    instantiated, and to stop if it exceeds some limit.  */
2402 
2403 static tree
instantiate_scev_poly(edge instantiate_below,class loop * evolution_loop,class loop *,tree chrec,bool * fold_conversions,int size_expr)2404 instantiate_scev_poly (edge instantiate_below,
2405 		       class loop *evolution_loop, class loop *,
2406 		       tree chrec, bool *fold_conversions, int size_expr)
2407 {
2408   tree op1;
2409   tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
2410 				 get_chrec_loop (chrec),
2411 				 CHREC_LEFT (chrec), fold_conversions,
2412 				 size_expr);
2413   if (op0 == chrec_dont_know)
2414     return chrec_dont_know;
2415 
2416   op1 = instantiate_scev_r (instantiate_below, evolution_loop,
2417 			    get_chrec_loop (chrec),
2418 			    CHREC_RIGHT (chrec), fold_conversions,
2419 			    size_expr);
2420   if (op1 == chrec_dont_know)
2421     return chrec_dont_know;
2422 
2423   if (CHREC_LEFT (chrec) != op0
2424       || CHREC_RIGHT (chrec) != op1)
2425     {
2426       op1 = chrec_convert_rhs (chrec_type (op0), op1, NULL);
2427       chrec = build_polynomial_chrec (CHREC_VARIABLE (chrec), op0, op1);
2428     }
2429 
2430   return chrec;
2431 }
2432 
2433 /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
2434    and EVOLUTION_LOOP, that were left under a symbolic form.
2435 
2436    "C0 CODE C1" is a binary expression of type TYPE to be instantiated.
2437 
2438    CACHE is the cache of already instantiated values.
2439 
2440    Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
2441    conversions that may wrap in signed/pointer type are folded, as long
2442    as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
2443    then we don't do such fold.
2444 
2445    SIZE_EXPR is used for computing the size of the expression to be
2446    instantiated, and to stop if it exceeds some limit.  */
2447 
2448 static tree
instantiate_scev_binary(edge instantiate_below,class loop * evolution_loop,class loop * inner_loop,tree chrec,enum tree_code code,tree type,tree c0,tree c1,bool * fold_conversions,int size_expr)2449 instantiate_scev_binary (edge instantiate_below,
2450 			 class loop *evolution_loop, class loop *inner_loop,
2451 			 tree chrec, enum tree_code code,
2452 			 tree type, tree c0, tree c1,
2453 			 bool *fold_conversions, int size_expr)
2454 {
2455   tree op1;
2456   tree op0 = instantiate_scev_r (instantiate_below, evolution_loop, inner_loop,
2457 				 c0, fold_conversions, size_expr);
2458   if (op0 == chrec_dont_know)
2459     return chrec_dont_know;
2460 
2461   /* While we eventually compute the same op1 if c0 == c1 the process
2462      of doing this is expensive so the following short-cut prevents
2463      exponential compile-time behavior.  */
2464   if (c0 != c1)
2465     {
2466       op1 = instantiate_scev_r (instantiate_below, evolution_loop, inner_loop,
2467 				c1, fold_conversions, size_expr);
2468       if (op1 == chrec_dont_know)
2469 	return chrec_dont_know;
2470     }
2471   else
2472     op1 = op0;
2473 
2474   if (c0 != op0
2475       || c1 != op1)
2476     {
2477       op0 = chrec_convert (type, op0, NULL);
2478       op1 = chrec_convert_rhs (type, op1, NULL);
2479 
2480       switch (code)
2481 	{
2482 	case POINTER_PLUS_EXPR:
2483 	case PLUS_EXPR:
2484 	  return chrec_fold_plus (type, op0, op1);
2485 
2486 	case MINUS_EXPR:
2487 	  return chrec_fold_minus (type, op0, op1);
2488 
2489 	case MULT_EXPR:
2490 	  return chrec_fold_multiply (type, op0, op1);
2491 
2492 	default:
2493 	  gcc_unreachable ();
2494 	}
2495     }
2496 
2497   return chrec ? chrec : fold_build2 (code, type, c0, c1);
2498 }
2499 
2500 /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
2501    and EVOLUTION_LOOP, that were left under a symbolic form.
2502 
2503    "CHREC" that stands for a convert expression "(TYPE) OP" is to be
2504    instantiated.
2505 
2506    CACHE is the cache of already instantiated values.
2507 
2508    Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
2509    conversions that may wrap in signed/pointer type are folded, as long
2510    as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
2511    then we don't do such fold.
2512 
2513    SIZE_EXPR is used for computing the size of the expression to be
2514    instantiated, and to stop if it exceeds some limit.  */
2515 
2516 static tree
instantiate_scev_convert(edge instantiate_below,class loop * evolution_loop,class loop * inner_loop,tree chrec,tree type,tree op,bool * fold_conversions,int size_expr)2517 instantiate_scev_convert (edge instantiate_below,
2518 			  class loop *evolution_loop, class loop *inner_loop,
2519 			  tree chrec, tree type, tree op,
2520 			  bool *fold_conversions, int size_expr)
2521 {
2522   tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
2523 				 inner_loop, op,
2524 				 fold_conversions, size_expr);
2525 
2526   if (op0 == chrec_dont_know)
2527     return chrec_dont_know;
2528 
2529   if (fold_conversions)
2530     {
2531       tree tmp = chrec_convert_aggressive (type, op0, fold_conversions);
2532       if (tmp)
2533 	return tmp;
2534 
2535       /* If we used chrec_convert_aggressive, we can no longer assume that
2536 	 signed chrecs do not overflow, as chrec_convert does, so avoid
2537 	 calling it in that case.  */
2538       if (*fold_conversions)
2539 	{
2540 	  if (chrec && op0 == op)
2541 	    return chrec;
2542 
2543 	  return fold_convert (type, op0);
2544 	}
2545     }
2546 
2547   return chrec_convert (type, op0, NULL);
2548 }
2549 
2550 /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
2551    and EVOLUTION_LOOP, that were left under a symbolic form.
2552 
2553    CHREC is a BIT_NOT_EXPR or a NEGATE_EXPR expression to be instantiated.
2554    Handle ~X as -1 - X.
2555    Handle -X as -1 * X.
2556 
2557    CACHE is the cache of already instantiated values.
2558 
2559    Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
2560    conversions that may wrap in signed/pointer type are folded, as long
2561    as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
2562    then we don't do such fold.
2563 
2564    SIZE_EXPR is used for computing the size of the expression to be
2565    instantiated, and to stop if it exceeds some limit.  */
2566 
2567 static tree
instantiate_scev_not(edge instantiate_below,class loop * evolution_loop,class loop * inner_loop,tree chrec,enum tree_code code,tree type,tree op,bool * fold_conversions,int size_expr)2568 instantiate_scev_not (edge instantiate_below,
2569 		      class loop *evolution_loop, class loop *inner_loop,
2570 		      tree chrec,
2571 		      enum tree_code code, tree type, tree op,
2572 		      bool *fold_conversions, int size_expr)
2573 {
2574   tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
2575 				 inner_loop, op,
2576 				 fold_conversions, size_expr);
2577 
2578   if (op0 == chrec_dont_know)
2579     return chrec_dont_know;
2580 
2581   if (op != op0)
2582     {
2583       op0 = chrec_convert (type, op0, NULL);
2584 
2585       switch (code)
2586 	{
2587 	case BIT_NOT_EXPR:
2588 	  return chrec_fold_minus
2589 	    (type, fold_convert (type, integer_minus_one_node), op0);
2590 
2591 	case NEGATE_EXPR:
2592 	  return chrec_fold_multiply
2593 	    (type, fold_convert (type, integer_minus_one_node), op0);
2594 
2595 	default:
2596 	  gcc_unreachable ();
2597 	}
2598     }
2599 
2600   return chrec ? chrec : fold_build1 (code, type, op0);
2601 }
2602 
2603 /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
2604    and EVOLUTION_LOOP, that were left under a symbolic form.
2605 
2606    CHREC is the scalar evolution to instantiate.
2607 
2608    CACHE is the cache of already instantiated values.
2609 
2610    Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
2611    conversions that may wrap in signed/pointer type are folded, as long
2612    as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
2613    then we don't do such fold.
2614 
2615    SIZE_EXPR is used for computing the size of the expression to be
2616    instantiated, and to stop if it exceeds some limit.  */
2617 
2618 static tree
instantiate_scev_r(edge instantiate_below,class loop * evolution_loop,class loop * inner_loop,tree chrec,bool * fold_conversions,int size_expr)2619 instantiate_scev_r (edge instantiate_below,
2620 		    class loop *evolution_loop, class loop *inner_loop,
2621 		    tree chrec,
2622 		    bool *fold_conversions, int size_expr)
2623 {
2624   /* Give up if the expression is larger than the MAX that we allow.  */
2625   if (size_expr++ > param_scev_max_expr_size)
2626     return chrec_dont_know;
2627 
2628   if (chrec == NULL_TREE
2629       || automatically_generated_chrec_p (chrec)
2630       || is_gimple_min_invariant (chrec))
2631     return chrec;
2632 
2633   switch (TREE_CODE (chrec))
2634     {
2635     case SSA_NAME:
2636       return instantiate_scev_name (instantiate_below, evolution_loop,
2637 				    inner_loop, chrec,
2638 				    fold_conversions, size_expr);
2639 
2640     case POLYNOMIAL_CHREC:
2641       return instantiate_scev_poly (instantiate_below, evolution_loop,
2642 				    inner_loop, chrec,
2643 				    fold_conversions, size_expr);
2644 
2645     case POINTER_PLUS_EXPR:
2646     case PLUS_EXPR:
2647     case MINUS_EXPR:
2648     case MULT_EXPR:
2649       return instantiate_scev_binary (instantiate_below, evolution_loop,
2650 				      inner_loop, chrec,
2651 				      TREE_CODE (chrec), chrec_type (chrec),
2652 				      TREE_OPERAND (chrec, 0),
2653 				      TREE_OPERAND (chrec, 1),
2654 				      fold_conversions, size_expr);
2655 
2656     CASE_CONVERT:
2657       return instantiate_scev_convert (instantiate_below, evolution_loop,
2658 				       inner_loop, chrec,
2659 				       TREE_TYPE (chrec), TREE_OPERAND (chrec, 0),
2660 				       fold_conversions, size_expr);
2661 
2662     case NEGATE_EXPR:
2663     case BIT_NOT_EXPR:
2664       return instantiate_scev_not (instantiate_below, evolution_loop,
2665 				   inner_loop, chrec,
2666 				   TREE_CODE (chrec), TREE_TYPE (chrec),
2667 				   TREE_OPERAND (chrec, 0),
2668 				   fold_conversions, size_expr);
2669 
2670     case ADDR_EXPR:
2671       if (is_gimple_min_invariant (chrec))
2672 	return chrec;
2673       /* Fallthru.  */
2674     case SCEV_NOT_KNOWN:
2675       return chrec_dont_know;
2676 
2677     case SCEV_KNOWN:
2678       return chrec_known;
2679 
2680     default:
2681       if (CONSTANT_CLASS_P (chrec))
2682 	return chrec;
2683       return chrec_dont_know;
2684     }
2685 }
2686 
2687 /* Analyze all the parameters of the chrec that were left under a
2688    symbolic form.  INSTANTIATE_BELOW is the basic block that stops the
2689    recursive instantiation of parameters: a parameter is a variable
2690    that is defined in a basic block that dominates INSTANTIATE_BELOW or
2691    a function parameter.  */
2692 
2693 tree
instantiate_scev(edge instantiate_below,class loop * evolution_loop,tree chrec)2694 instantiate_scev (edge instantiate_below, class loop *evolution_loop,
2695 		  tree chrec)
2696 {
2697   tree res;
2698 
2699   if (dump_file && (dump_flags & TDF_SCEV))
2700     {
2701       fprintf (dump_file, "(instantiate_scev \n");
2702       fprintf (dump_file, "  (instantiate_below = %d -> %d)\n",
2703 	       instantiate_below->src->index, instantiate_below->dest->index);
2704       if (evolution_loop)
2705 	fprintf (dump_file, "  (evolution_loop = %d)\n", evolution_loop->num);
2706       fprintf (dump_file, "  (chrec = ");
2707       print_generic_expr (dump_file, chrec);
2708       fprintf (dump_file, ")\n");
2709     }
2710 
2711   bool destr = false;
2712   if (!global_cache)
2713     {
2714       global_cache = new instantiate_cache_type;
2715       destr = true;
2716     }
2717 
2718   res = instantiate_scev_r (instantiate_below, evolution_loop,
2719 			    NULL, chrec, NULL, 0);
2720 
2721   if (destr)
2722     {
2723       delete global_cache;
2724       global_cache = NULL;
2725     }
2726 
2727   if (dump_file && (dump_flags & TDF_SCEV))
2728     {
2729       fprintf (dump_file, "  (res = ");
2730       print_generic_expr (dump_file, res);
2731       fprintf (dump_file, "))\n");
2732     }
2733 
2734   return res;
2735 }
2736 
2737 /* Similar to instantiate_parameters, but does not introduce the
2738    evolutions in outer loops for LOOP invariants in CHREC, and does not
2739    care about causing overflows, as long as they do not affect value
2740    of an expression.  */
2741 
2742 tree
resolve_mixers(class loop * loop,tree chrec,bool * folded_casts)2743 resolve_mixers (class loop *loop, tree chrec, bool *folded_casts)
2744 {
2745   bool destr = false;
2746   bool fold_conversions = false;
2747   if (!global_cache)
2748     {
2749       global_cache = new instantiate_cache_type;
2750       destr = true;
2751     }
2752 
2753   tree ret = instantiate_scev_r (loop_preheader_edge (loop), loop, NULL,
2754 				 chrec, &fold_conversions, 0);
2755 
2756   if (folded_casts && !*folded_casts)
2757     *folded_casts = fold_conversions;
2758 
2759   if (destr)
2760     {
2761       delete global_cache;
2762       global_cache = NULL;
2763     }
2764 
2765   return ret;
2766 }
2767 
2768 /* Entry point for the analysis of the number of iterations pass.
2769    This function tries to safely approximate the number of iterations
2770    the loop will run.  When this property is not decidable at compile
2771    time, the result is chrec_dont_know.  Otherwise the result is a
2772    scalar or a symbolic parameter.  When the number of iterations may
2773    be equal to zero and the property cannot be determined at compile
2774    time, the result is a COND_EXPR that represents in a symbolic form
2775    the conditions under which the number of iterations is not zero.
2776 
2777    Example of analysis: suppose that the loop has an exit condition:
2778 
2779    "if (b > 49) goto end_loop;"
2780 
2781    and that in a previous analysis we have determined that the
2782    variable 'b' has an evolution function:
2783 
2784    "EF = {23, +, 5}_2".
2785 
2786    When we evaluate the function at the point 5, i.e. the value of the
2787    variable 'b' after 5 iterations in the loop, we have EF (5) = 48,
2788    and EF (6) = 53.  In this case the value of 'b' on exit is '53' and
2789    the loop body has been executed 6 times.  */
2790 
2791 tree
number_of_latch_executions(class loop * loop)2792 number_of_latch_executions (class loop *loop)
2793 {
2794   edge exit;
2795   class tree_niter_desc niter_desc;
2796   tree may_be_zero;
2797   tree res;
2798 
2799   /* Determine whether the number of iterations in loop has already
2800      been computed.  */
2801   res = loop->nb_iterations;
2802   if (res)
2803     return res;
2804 
2805   may_be_zero = NULL_TREE;
2806 
2807   if (dump_file && (dump_flags & TDF_SCEV))
2808     fprintf (dump_file, "(number_of_iterations_in_loop = \n");
2809 
2810   res = chrec_dont_know;
2811   exit = single_exit (loop);
2812 
2813   if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false))
2814     {
2815       may_be_zero = niter_desc.may_be_zero;
2816       res = niter_desc.niter;
2817     }
2818 
2819   if (res == chrec_dont_know
2820       || !may_be_zero
2821       || integer_zerop (may_be_zero))
2822     ;
2823   else if (integer_nonzerop (may_be_zero))
2824     res = build_int_cst (TREE_TYPE (res), 0);
2825 
2826   else if (COMPARISON_CLASS_P (may_be_zero))
2827     res = fold_build3 (COND_EXPR, TREE_TYPE (res), may_be_zero,
2828 		       build_int_cst (TREE_TYPE (res), 0), res);
2829   else
2830     res = chrec_dont_know;
2831 
2832   if (dump_file && (dump_flags & TDF_SCEV))
2833     {
2834       fprintf (dump_file, "  (set_nb_iterations_in_loop = ");
2835       print_generic_expr (dump_file, res);
2836       fprintf (dump_file, "))\n");
2837     }
2838 
2839   loop->nb_iterations = res;
2840   return res;
2841 }
2842 
2843 
2844 /* Counters for the stats.  */
2845 
2846 struct chrec_stats
2847 {
2848   unsigned nb_chrecs;
2849   unsigned nb_affine;
2850   unsigned nb_affine_multivar;
2851   unsigned nb_higher_poly;
2852   unsigned nb_chrec_dont_know;
2853   unsigned nb_undetermined;
2854 };
2855 
2856 /* Reset the counters.  */
2857 
2858 static inline void
reset_chrecs_counters(struct chrec_stats * stats)2859 reset_chrecs_counters (struct chrec_stats *stats)
2860 {
2861   stats->nb_chrecs = 0;
2862   stats->nb_affine = 0;
2863   stats->nb_affine_multivar = 0;
2864   stats->nb_higher_poly = 0;
2865   stats->nb_chrec_dont_know = 0;
2866   stats->nb_undetermined = 0;
2867 }
2868 
2869 /* Dump the contents of a CHREC_STATS structure.  */
2870 
2871 static void
dump_chrecs_stats(FILE * file,struct chrec_stats * stats)2872 dump_chrecs_stats (FILE *file, struct chrec_stats *stats)
2873 {
2874   fprintf (file, "\n(\n");
2875   fprintf (file, "-----------------------------------------\n");
2876   fprintf (file, "%d\taffine univariate chrecs\n", stats->nb_affine);
2877   fprintf (file, "%d\taffine multivariate chrecs\n", stats->nb_affine_multivar);
2878   fprintf (file, "%d\tdegree greater than 2 polynomials\n",
2879 	   stats->nb_higher_poly);
2880   fprintf (file, "%d\tchrec_dont_know chrecs\n", stats->nb_chrec_dont_know);
2881   fprintf (file, "-----------------------------------------\n");
2882   fprintf (file, "%d\ttotal chrecs\n", stats->nb_chrecs);
2883   fprintf (file, "%d\twith undetermined coefficients\n",
2884 	   stats->nb_undetermined);
2885   fprintf (file, "-----------------------------------------\n");
2886   fprintf (file, "%d\tchrecs in the scev database\n",
2887 	   (int) scalar_evolution_info->elements ());
2888   fprintf (file, "%d\tsets in the scev database\n", nb_set_scev);
2889   fprintf (file, "%d\tgets in the scev database\n", nb_get_scev);
2890   fprintf (file, "-----------------------------------------\n");
2891   fprintf (file, ")\n\n");
2892 }
2893 
2894 /* Gather statistics about CHREC.  */
2895 
2896 static void
gather_chrec_stats(tree chrec,struct chrec_stats * stats)2897 gather_chrec_stats (tree chrec, struct chrec_stats *stats)
2898 {
2899   if (dump_file && (dump_flags & TDF_STATS))
2900     {
2901       fprintf (dump_file, "(classify_chrec ");
2902       print_generic_expr (dump_file, chrec);
2903       fprintf (dump_file, "\n");
2904     }
2905 
2906   stats->nb_chrecs++;
2907 
2908   if (chrec == NULL_TREE)
2909     {
2910       stats->nb_undetermined++;
2911       return;
2912     }
2913 
2914   switch (TREE_CODE (chrec))
2915     {
2916     case POLYNOMIAL_CHREC:
2917       if (evolution_function_is_affine_p (chrec))
2918 	{
2919 	  if (dump_file && (dump_flags & TDF_STATS))
2920 	    fprintf (dump_file, "  affine_univariate\n");
2921 	  stats->nb_affine++;
2922 	}
2923       else if (evolution_function_is_affine_multivariate_p (chrec, 0))
2924 	{
2925 	  if (dump_file && (dump_flags & TDF_STATS))
2926 	    fprintf (dump_file, "  affine_multivariate\n");
2927 	  stats->nb_affine_multivar++;
2928 	}
2929       else
2930 	{
2931 	  if (dump_file && (dump_flags & TDF_STATS))
2932 	    fprintf (dump_file, "  higher_degree_polynomial\n");
2933 	  stats->nb_higher_poly++;
2934 	}
2935 
2936       break;
2937 
2938     default:
2939       break;
2940     }
2941 
2942   if (chrec_contains_undetermined (chrec))
2943     {
2944       if (dump_file && (dump_flags & TDF_STATS))
2945 	fprintf (dump_file, "  undetermined\n");
2946       stats->nb_undetermined++;
2947     }
2948 
2949   if (dump_file && (dump_flags & TDF_STATS))
2950     fprintf (dump_file, ")\n");
2951 }
2952 
2953 /* Classify the chrecs of the whole database.  */
2954 
2955 void
gather_stats_on_scev_database(void)2956 gather_stats_on_scev_database (void)
2957 {
2958   struct chrec_stats stats;
2959 
2960   if (!dump_file)
2961     return;
2962 
2963   reset_chrecs_counters (&stats);
2964 
2965   hash_table<scev_info_hasher>::iterator iter;
2966   scev_info_str *elt;
2967   FOR_EACH_HASH_TABLE_ELEMENT (*scalar_evolution_info, elt, scev_info_str *,
2968 			       iter)
2969     gather_chrec_stats (elt->chrec, &stats);
2970 
2971   dump_chrecs_stats (dump_file, &stats);
2972 }
2973 
2974 
2975 /* Initialize the analysis of scalar evolutions for LOOPS.  */
2976 
2977 void
scev_initialize(void)2978 scev_initialize (void)
2979 {
2980   class loop *loop;
2981 
2982   gcc_assert (! scev_initialized_p ());
2983 
2984   scalar_evolution_info = hash_table<scev_info_hasher>::create_ggc (100);
2985 
2986   FOR_EACH_LOOP (loop, 0)
2987     {
2988       loop->nb_iterations = NULL_TREE;
2989     }
2990 }
2991 
2992 /* Return true if SCEV is initialized.  */
2993 
2994 bool
scev_initialized_p(void)2995 scev_initialized_p (void)
2996 {
2997   return scalar_evolution_info != NULL;
2998 }
2999 
3000 /* Cleans up the information cached by the scalar evolutions analysis
3001    in the hash table.  */
3002 
3003 void
scev_reset_htab(void)3004 scev_reset_htab (void)
3005 {
3006   if (!scalar_evolution_info)
3007     return;
3008 
3009   scalar_evolution_info->empty ();
3010 }
3011 
3012 /* Cleans up the information cached by the scalar evolutions analysis
3013    in the hash table and in the loop->nb_iterations.  */
3014 
3015 void
scev_reset(void)3016 scev_reset (void)
3017 {
3018   class loop *loop;
3019 
3020   scev_reset_htab ();
3021 
3022   FOR_EACH_LOOP (loop, 0)
3023     {
3024       loop->nb_iterations = NULL_TREE;
3025     }
3026 }
3027 
3028 /* Return true if the IV calculation in TYPE can overflow based on the knowledge
3029    of the upper bound on the number of iterations of LOOP, the BASE and STEP
3030    of IV.
3031 
3032    We do not use information whether TYPE can overflow so it is safe to
3033    use this test even for derived IVs not computed every iteration or
3034    hypotetical IVs to be inserted into code.  */
3035 
3036 bool
iv_can_overflow_p(class loop * loop,tree type,tree base,tree step)3037 iv_can_overflow_p (class loop *loop, tree type, tree base, tree step)
3038 {
3039   widest_int nit;
3040   wide_int base_min, base_max, step_min, step_max, type_min, type_max;
3041   signop sgn = TYPE_SIGN (type);
3042 
3043   if (integer_zerop (step))
3044     return false;
3045 
3046   if (!INTEGRAL_TYPE_P (TREE_TYPE (base))
3047       || get_range_info (base, &base_min, &base_max) != VR_RANGE)
3048     return true;
3049 
3050   if (!INTEGRAL_TYPE_P (TREE_TYPE (step))
3051       || get_range_info (step, &step_min, &step_max) != VR_RANGE)
3052     return true;
3053 
3054   if (!get_max_loop_iterations (loop, &nit))
3055     return true;
3056 
3057   type_min = wi::min_value (type);
3058   type_max = wi::max_value (type);
3059 
3060   /* Just sanity check that we don't see values out of the range of the type.
3061      In this case the arithmetics bellow would overflow.  */
3062   gcc_checking_assert (wi::ge_p (base_min, type_min, sgn)
3063 		       && wi::le_p (base_max, type_max, sgn));
3064 
3065   /* Account the possible increment in the last ieration.  */
3066   wi::overflow_type overflow = wi::OVF_NONE;
3067   nit = wi::add (nit, 1, SIGNED, &overflow);
3068   if (overflow)
3069     return true;
3070 
3071   /* NIT is typeless and can exceed the precision of the type.  In this case
3072      overflow is always possible, because we know STEP is non-zero.  */
3073   if (wi::min_precision (nit, UNSIGNED) > TYPE_PRECISION (type))
3074     return true;
3075   wide_int nit2 = wide_int::from (nit, TYPE_PRECISION (type), UNSIGNED);
3076 
3077   /* If step can be positive, check that nit*step <= type_max-base.
3078      This can be done by unsigned arithmetic and we only need to watch overflow
3079      in the multiplication. The right hand side can always be represented in
3080      the type.  */
3081   if (sgn == UNSIGNED || !wi::neg_p (step_max))
3082     {
3083       wi::overflow_type overflow = wi::OVF_NONE;
3084       if (wi::gtu_p (wi::mul (step_max, nit2, UNSIGNED, &overflow),
3085 		     type_max - base_max)
3086 	  || overflow)
3087 	return true;
3088     }
3089   /* If step can be negative, check that nit*(-step) <= base_min-type_min.  */
3090   if (sgn == SIGNED && wi::neg_p (step_min))
3091     {
3092       wi::overflow_type overflow, overflow2;
3093       overflow = overflow2 = wi::OVF_NONE;
3094       if (wi::gtu_p (wi::mul (wi::neg (step_min, &overflow2),
3095 		     nit2, UNSIGNED, &overflow),
3096 		     base_min - type_min)
3097 	  || overflow || overflow2)
3098         return true;
3099     }
3100 
3101   return false;
3102 }
3103 
3104 /* Given EV with form of "(type) {inner_base, inner_step}_loop", this
3105    function tries to derive condition under which it can be simplified
3106    into "{(type)inner_base, (type)inner_step}_loop".  The condition is
3107    the maximum number that inner iv can iterate.  */
3108 
3109 static tree
derive_simple_iv_with_niters(tree ev,tree * niters)3110 derive_simple_iv_with_niters (tree ev, tree *niters)
3111 {
3112   if (!CONVERT_EXPR_P (ev))
3113     return ev;
3114 
3115   tree inner_ev = TREE_OPERAND (ev, 0);
3116   if (TREE_CODE (inner_ev) != POLYNOMIAL_CHREC)
3117     return ev;
3118 
3119   tree init = CHREC_LEFT (inner_ev);
3120   tree step = CHREC_RIGHT (inner_ev);
3121   if (TREE_CODE (init) != INTEGER_CST
3122       || TREE_CODE (step) != INTEGER_CST || integer_zerop (step))
3123     return ev;
3124 
3125   tree type = TREE_TYPE (ev);
3126   tree inner_type = TREE_TYPE (inner_ev);
3127   if (TYPE_PRECISION (inner_type) >= TYPE_PRECISION (type))
3128     return ev;
3129 
3130   /* Type conversion in "(type) {inner_base, inner_step}_loop" can be
3131      folded only if inner iv won't overflow.  We compute the maximum
3132      number the inner iv can iterate before overflowing and return the
3133      simplified affine iv.  */
3134   tree delta;
3135   init = fold_convert (type, init);
3136   step = fold_convert (type, step);
3137   ev = build_polynomial_chrec (CHREC_VARIABLE (inner_ev), init, step);
3138   if (tree_int_cst_sign_bit (step))
3139     {
3140       tree bound = lower_bound_in_type (inner_type, inner_type);
3141       delta = fold_build2 (MINUS_EXPR, type, init, fold_convert (type, bound));
3142       step = fold_build1 (NEGATE_EXPR, type, step);
3143     }
3144   else
3145     {
3146       tree bound = upper_bound_in_type (inner_type, inner_type);
3147       delta = fold_build2 (MINUS_EXPR, type, fold_convert (type, bound), init);
3148     }
3149   *niters = fold_build2 (FLOOR_DIV_EXPR, type, delta, step);
3150   return ev;
3151 }
3152 
3153 /* Checks whether use of OP in USE_LOOP behaves as a simple affine iv with
3154    respect to WRTO_LOOP and returns its base and step in IV if possible
3155    (see analyze_scalar_evolution_in_loop for more details on USE_LOOP
3156    and WRTO_LOOP).  If ALLOW_NONCONSTANT_STEP is true, we want step to be
3157    invariant in LOOP.  Otherwise we require it to be an integer constant.
3158 
3159    IV->no_overflow is set to true if we are sure the iv cannot overflow (e.g.
3160    because it is computed in signed arithmetics).  Consequently, adding an
3161    induction variable
3162 
3163    for (i = IV->base; ; i += IV->step)
3164 
3165    is only safe if IV->no_overflow is false, or TYPE_OVERFLOW_UNDEFINED is
3166    false for the type of the induction variable, or you can prove that i does
3167    not wrap by some other argument.  Otherwise, this might introduce undefined
3168    behavior, and
3169 
3170    i = iv->base;
3171    for (; ; i = (type) ((unsigned type) i + (unsigned type) iv->step))
3172 
3173    must be used instead.
3174 
3175    When IV_NITERS is not NULL, this function also checks case in which OP
3176    is a conversion of an inner simple iv of below form:
3177 
3178      (outer_type){inner_base, inner_step}_loop.
3179 
3180    If type of inner iv has smaller precision than outer_type, it can't be
3181    folded into {(outer_type)inner_base, (outer_type)inner_step}_loop because
3182    the inner iv could overflow/wrap.  In this case, we derive a condition
3183    under which the inner iv won't overflow/wrap and do the simplification.
3184    The derived condition normally is the maximum number the inner iv can
3185    iterate, and will be stored in IV_NITERS.  This is useful in loop niter
3186    analysis, to derive break conditions when a loop must terminate, when is
3187    infinite.  */
3188 
3189 bool
simple_iv_with_niters(class loop * wrto_loop,class loop * use_loop,tree op,affine_iv * iv,tree * iv_niters,bool allow_nonconstant_step)3190 simple_iv_with_niters (class loop *wrto_loop, class loop *use_loop,
3191 		       tree op, affine_iv *iv, tree *iv_niters,
3192 		       bool allow_nonconstant_step)
3193 {
3194   enum tree_code code;
3195   tree type, ev, base, e;
3196   wide_int extreme;
3197   bool folded_casts;
3198 
3199   iv->base = NULL_TREE;
3200   iv->step = NULL_TREE;
3201   iv->no_overflow = false;
3202 
3203   type = TREE_TYPE (op);
3204   if (!POINTER_TYPE_P (type)
3205       && !INTEGRAL_TYPE_P (type))
3206     return false;
3207 
3208   ev = analyze_scalar_evolution_in_loop (wrto_loop, use_loop, op,
3209 					 &folded_casts);
3210   if (chrec_contains_undetermined (ev)
3211       || chrec_contains_symbols_defined_in_loop (ev, wrto_loop->num))
3212     return false;
3213 
3214   if (tree_does_not_contain_chrecs (ev))
3215     {
3216       iv->base = ev;
3217       iv->step = build_int_cst (TREE_TYPE (ev), 0);
3218       iv->no_overflow = true;
3219       return true;
3220     }
3221 
3222   /* If we can derive valid scalar evolution with assumptions.  */
3223   if (iv_niters && TREE_CODE (ev) != POLYNOMIAL_CHREC)
3224     ev = derive_simple_iv_with_niters (ev, iv_niters);
3225 
3226   if (TREE_CODE (ev) != POLYNOMIAL_CHREC)
3227     return false;
3228 
3229   if (CHREC_VARIABLE (ev) != (unsigned) wrto_loop->num)
3230     return false;
3231 
3232   iv->step = CHREC_RIGHT (ev);
3233   if ((!allow_nonconstant_step && TREE_CODE (iv->step) != INTEGER_CST)
3234       || tree_contains_chrecs (iv->step, NULL))
3235     return false;
3236 
3237   iv->base = CHREC_LEFT (ev);
3238   if (tree_contains_chrecs (iv->base, NULL))
3239     return false;
3240 
3241   iv->no_overflow = !folded_casts && nowrap_type_p (type);
3242 
3243   if (!iv->no_overflow
3244       && !iv_can_overflow_p (wrto_loop, type, iv->base, iv->step))
3245     iv->no_overflow = true;
3246 
3247   /* Try to simplify iv base:
3248 
3249        (signed T) ((unsigned T)base + step) ;; TREE_TYPE (base) == signed T
3250 	 == (signed T)(unsigned T)base + step
3251 	 == base + step
3252 
3253      If we can prove operation (base + step) doesn't overflow or underflow.
3254      Specifically, we try to prove below conditions are satisfied:
3255 
3256 	     base <= UPPER_BOUND (type) - step  ;;step > 0
3257 	     base >= LOWER_BOUND (type) - step  ;;step < 0
3258 
3259      This is done by proving the reverse conditions are false using loop's
3260      initial conditions.
3261 
3262      The is necessary to make loop niter, or iv overflow analysis easier
3263      for below example:
3264 
3265        int foo (int *a, signed char s, signed char l)
3266 	 {
3267 	   signed char i;
3268 	   for (i = s; i < l; i++)
3269 	     a[i] = 0;
3270 	   return 0;
3271 	  }
3272 
3273      Note variable I is firstly converted to type unsigned char, incremented,
3274      then converted back to type signed char.  */
3275 
3276   if (wrto_loop->num != use_loop->num)
3277     return true;
3278 
3279   if (!CONVERT_EXPR_P (iv->base) || TREE_CODE (iv->step) != INTEGER_CST)
3280     return true;
3281 
3282   type = TREE_TYPE (iv->base);
3283   e = TREE_OPERAND (iv->base, 0);
3284   if (TREE_CODE (e) != PLUS_EXPR
3285       || TREE_CODE (TREE_OPERAND (e, 1)) != INTEGER_CST
3286       || !tree_int_cst_equal (iv->step,
3287 			      fold_convert (type, TREE_OPERAND (e, 1))))
3288     return true;
3289   e = TREE_OPERAND (e, 0);
3290   if (!CONVERT_EXPR_P (e))
3291     return true;
3292   base = TREE_OPERAND (e, 0);
3293   if (!useless_type_conversion_p (type, TREE_TYPE (base)))
3294     return true;
3295 
3296   if (tree_int_cst_sign_bit (iv->step))
3297     {
3298       code = LT_EXPR;
3299       extreme = wi::min_value (type);
3300     }
3301   else
3302     {
3303       code = GT_EXPR;
3304       extreme = wi::max_value (type);
3305     }
3306   wi::overflow_type overflow = wi::OVF_NONE;
3307   extreme = wi::sub (extreme, wi::to_wide (iv->step),
3308 		     TYPE_SIGN (type), &overflow);
3309   if (overflow)
3310     return true;
3311   e = fold_build2 (code, boolean_type_node, base,
3312 		   wide_int_to_tree (type, extreme));
3313   e = simplify_using_initial_conditions (use_loop, e);
3314   if (!integer_zerop (e))
3315     return true;
3316 
3317   if (POINTER_TYPE_P (TREE_TYPE (base)))
3318     code = POINTER_PLUS_EXPR;
3319   else
3320     code = PLUS_EXPR;
3321 
3322   iv->base = fold_build2 (code, TREE_TYPE (base), base, iv->step);
3323   return true;
3324 }
3325 
3326 /* Like simple_iv_with_niters, but return TRUE when OP behaves as a simple
3327    affine iv unconditionally.  */
3328 
3329 bool
simple_iv(class loop * wrto_loop,class loop * use_loop,tree op,affine_iv * iv,bool allow_nonconstant_step)3330 simple_iv (class loop *wrto_loop, class loop *use_loop, tree op,
3331 	   affine_iv *iv, bool allow_nonconstant_step)
3332 {
3333   return simple_iv_with_niters (wrto_loop, use_loop, op, iv,
3334 				NULL, allow_nonconstant_step);
3335 }
3336 
3337 /* Finalize the scalar evolution analysis.  */
3338 
3339 void
scev_finalize(void)3340 scev_finalize (void)
3341 {
3342   if (!scalar_evolution_info)
3343     return;
3344   scalar_evolution_info->empty ();
3345   scalar_evolution_info = NULL;
3346   free_numbers_of_iterations_estimates (cfun);
3347 }
3348 
3349 /* Returns true if the expression EXPR is considered to be too expensive
3350    for scev_const_prop.  */
3351 
3352 static bool
expression_expensive_p(tree expr,hash_map<tree,uint64_t> & cache,uint64_t & cost)3353 expression_expensive_p (tree expr, hash_map<tree, uint64_t> &cache,
3354 			uint64_t &cost)
3355 {
3356   enum tree_code code;
3357 
3358   if (is_gimple_val (expr))
3359     return false;
3360 
3361   code = TREE_CODE (expr);
3362   if (code == TRUNC_DIV_EXPR
3363       || code == CEIL_DIV_EXPR
3364       || code == FLOOR_DIV_EXPR
3365       || code == ROUND_DIV_EXPR
3366       || code == TRUNC_MOD_EXPR
3367       || code == CEIL_MOD_EXPR
3368       || code == FLOOR_MOD_EXPR
3369       || code == ROUND_MOD_EXPR
3370       || code == EXACT_DIV_EXPR)
3371     {
3372       /* Division by power of two is usually cheap, so we allow it.
3373 	 Forbid anything else.  */
3374       if (!integer_pow2p (TREE_OPERAND (expr, 1)))
3375 	return true;
3376     }
3377 
3378   bool visited_p;
3379   uint64_t &local_cost = cache.get_or_insert (expr, &visited_p);
3380   if (visited_p)
3381     {
3382       uint64_t tem = cost + local_cost;
3383       if (tem < cost)
3384 	return true;
3385       cost = tem;
3386       return false;
3387     }
3388   local_cost = 1;
3389 
3390   uint64_t op_cost = 0;
3391   if (code == CALL_EXPR)
3392     {
3393       tree arg;
3394       call_expr_arg_iterator iter;
3395       /* Even though is_inexpensive_builtin might say true, we will get a
3396 	 library call for popcount when backend does not have an instruction
3397 	 to do so.  We consider this to be expenseive and generate
3398 	 __builtin_popcount only when backend defines it.  */
3399       combined_fn cfn = get_call_combined_fn (expr);
3400       switch (cfn)
3401 	{
3402 	CASE_CFN_POPCOUNT:
3403 	  /* Check if opcode for popcount is available in the mode required.  */
3404 	  if (optab_handler (popcount_optab,
3405 			     TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (expr, 0))))
3406 	      == CODE_FOR_nothing)
3407 	    {
3408 	      machine_mode mode;
3409 	      mode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (expr, 0)));
3410 	      scalar_int_mode int_mode;
3411 
3412 	      /* If the mode is of 2 * UNITS_PER_WORD size, we can handle
3413 		 double-word popcount by emitting two single-word popcount
3414 		 instructions.  */
3415 	      if (is_a <scalar_int_mode> (mode, &int_mode)
3416 		  && GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD
3417 		  && (optab_handler (popcount_optab, word_mode)
3418 		      != CODE_FOR_nothing))
3419 		  break;
3420 	      return true;
3421 	    }
3422 	default:
3423 	  break;
3424 	}
3425 
3426       if (!is_inexpensive_builtin (get_callee_fndecl (expr)))
3427 	return true;
3428       FOR_EACH_CALL_EXPR_ARG (arg, iter, expr)
3429 	if (expression_expensive_p (arg, cache, op_cost))
3430 	  return true;
3431       *cache.get (expr) += op_cost;
3432       cost += op_cost + 1;
3433       return false;
3434     }
3435 
3436   if (code == COND_EXPR)
3437     {
3438       if (expression_expensive_p (TREE_OPERAND (expr, 0), cache, op_cost)
3439 	  || (EXPR_P (TREE_OPERAND (expr, 1))
3440 	      && EXPR_P (TREE_OPERAND (expr, 2)))
3441 	  /* If either branch has side effects or could trap.  */
3442 	  || TREE_SIDE_EFFECTS (TREE_OPERAND (expr, 1))
3443 	  || generic_expr_could_trap_p (TREE_OPERAND (expr, 1))
3444 	  || TREE_SIDE_EFFECTS (TREE_OPERAND (expr, 0))
3445 	  || generic_expr_could_trap_p (TREE_OPERAND (expr, 0))
3446 	  || expression_expensive_p (TREE_OPERAND (expr, 1),
3447 				     cache, op_cost)
3448 	  || expression_expensive_p (TREE_OPERAND (expr, 2),
3449 				     cache, op_cost))
3450 	return true;
3451       *cache.get (expr) += op_cost;
3452       cost += op_cost + 1;
3453       return false;
3454     }
3455 
3456   switch (TREE_CODE_CLASS (code))
3457     {
3458     case tcc_binary:
3459     case tcc_comparison:
3460       if (expression_expensive_p (TREE_OPERAND (expr, 1), cache, op_cost))
3461 	return true;
3462 
3463       /* Fallthru.  */
3464     case tcc_unary:
3465       if (expression_expensive_p (TREE_OPERAND (expr, 0), cache, op_cost))
3466 	return true;
3467       *cache.get (expr) += op_cost;
3468       cost += op_cost + 1;
3469       return false;
3470 
3471     default:
3472       return true;
3473     }
3474 }
3475 
3476 bool
expression_expensive_p(tree expr)3477 expression_expensive_p (tree expr)
3478 {
3479   hash_map<tree, uint64_t> cache;
3480   uint64_t expanded_size = 0;
3481   return (expression_expensive_p (expr, cache, expanded_size)
3482 	  || expanded_size > cache.elements ());
3483 }
3484 
3485 /* Do final value replacement for LOOP, return true if we did anything.  */
3486 
3487 bool
final_value_replacement_loop(class loop * loop)3488 final_value_replacement_loop (class loop *loop)
3489 {
3490   /* If we do not know exact number of iterations of the loop, we cannot
3491      replace the final value.  */
3492   edge exit = single_exit (loop);
3493   if (!exit)
3494     return false;
3495 
3496   tree niter = number_of_latch_executions (loop);
3497   if (niter == chrec_dont_know)
3498     return false;
3499 
3500   /* Ensure that it is possible to insert new statements somewhere.  */
3501   if (!single_pred_p (exit->dest))
3502     split_loop_exit_edge (exit);
3503 
3504   /* Set stmt insertion pointer.  All stmts are inserted before this point.  */
3505   gimple_stmt_iterator gsi = gsi_after_labels (exit->dest);
3506 
3507   class loop *ex_loop
3508     = superloop_at_depth (loop,
3509 			  loop_depth (exit->dest->loop_father) + 1);
3510 
3511   bool any = false;
3512   gphi_iterator psi;
3513   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); )
3514     {
3515       gphi *phi = psi.phi ();
3516       tree rslt = PHI_RESULT (phi);
3517       tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
3518       if (virtual_operand_p (def))
3519 	{
3520 	  gsi_next (&psi);
3521 	  continue;
3522 	}
3523 
3524       if (!POINTER_TYPE_P (TREE_TYPE (def))
3525 	  && !INTEGRAL_TYPE_P (TREE_TYPE (def)))
3526 	{
3527 	  gsi_next (&psi);
3528 	  continue;
3529 	}
3530 
3531       bool folded_casts;
3532       def = analyze_scalar_evolution_in_loop (ex_loop, loop, def,
3533 					      &folded_casts);
3534       def = compute_overall_effect_of_inner_loop (ex_loop, def);
3535       if (!tree_does_not_contain_chrecs (def)
3536 	  || chrec_contains_symbols_defined_in_loop (def, ex_loop->num)
3537 	  /* Moving the computation from the loop may prolong life range
3538 	     of some ssa names, which may cause problems if they appear
3539 	     on abnormal edges.  */
3540 	  || contains_abnormal_ssa_name_p (def)
3541 	  /* Do not emit expensive expressions.  The rationale is that
3542 	     when someone writes a code like
3543 
3544 	     while (n > 45) n -= 45;
3545 
3546 	     he probably knows that n is not large, and does not want it
3547 	     to be turned into n %= 45.  */
3548 	  || expression_expensive_p (def))
3549 	{
3550 	  if (dump_file && (dump_flags & TDF_DETAILS))
3551 	    {
3552 	      fprintf (dump_file, "not replacing:\n  ");
3553 	      print_gimple_stmt (dump_file, phi, 0);
3554 	      fprintf (dump_file, "\n");
3555 	    }
3556 	  gsi_next (&psi);
3557 	  continue;
3558 	}
3559 
3560       /* Eliminate the PHI node and replace it by a computation outside
3561 	 the loop.  */
3562       if (dump_file)
3563 	{
3564 	  fprintf (dump_file, "\nfinal value replacement:\n  ");
3565 	  print_gimple_stmt (dump_file, phi, 0);
3566 	  fprintf (dump_file, " with expr: ");
3567 	  print_generic_expr (dump_file, def);
3568 	}
3569       any = true;
3570       def = unshare_expr (def);
3571       remove_phi_node (&psi, false);
3572 
3573       /* If def's type has undefined overflow and there were folded
3574 	 casts, rewrite all stmts added for def into arithmetics
3575 	 with defined overflow behavior.  */
3576       if (folded_casts && ANY_INTEGRAL_TYPE_P (TREE_TYPE (def))
3577 	  && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (def)))
3578 	{
3579 	  gimple_seq stmts;
3580 	  gimple_stmt_iterator gsi2;
3581 	  def = force_gimple_operand (def, &stmts, true, NULL_TREE);
3582 	  gsi2 = gsi_start (stmts);
3583 	  while (!gsi_end_p (gsi2))
3584 	    {
3585 	      gimple *stmt = gsi_stmt (gsi2);
3586 	      gimple_stmt_iterator gsi3 = gsi2;
3587 	      gsi_next (&gsi2);
3588 	      gsi_remove (&gsi3, false);
3589 	      if (is_gimple_assign (stmt)
3590 		  && arith_code_with_undefined_signed_overflow
3591 		  (gimple_assign_rhs_code (stmt)))
3592 		gsi_insert_seq_before (&gsi,
3593 				       rewrite_to_defined_overflow (stmt),
3594 				       GSI_SAME_STMT);
3595 	      else
3596 		gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3597 	    }
3598 	}
3599       else
3600 	def = force_gimple_operand_gsi (&gsi, def, false, NULL_TREE,
3601 					true, GSI_SAME_STMT);
3602 
3603       gassign *ass = gimple_build_assign (rslt, def);
3604       gimple_set_location (ass,
3605 			   gimple_phi_arg_location (phi, exit->dest_idx));
3606       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
3607       if (dump_file)
3608 	{
3609 	  fprintf (dump_file, "\n final stmt:\n  ");
3610 	  print_gimple_stmt (dump_file, ass, 0);
3611 	  fprintf (dump_file, "\n");
3612 	}
3613     }
3614 
3615   return any;
3616 }
3617 
3618 #include "gt-tree-scalar-evolution.h"
3619