1 /* Induction variable optimizations.
2    Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This pass tries to find the optimal set of induction variables for the loop.
21    It optimizes just the basic linear induction variables (although adding
22    support for other types should not be too hard).  It includes the
23    optimizations commonly known as strength reduction, induction variable
24    coalescing and induction variable elimination.  It does it in the
25    following steps:
26 
27    1) The interesting uses of induction variables are found.  This includes
28 
29       -- uses of induction variables in non-linear expressions
30       -- addresses of arrays
31       -- comparisons of induction variables
32 
33       Note the interesting uses are categorized and handled in group.
34       Generally, address type uses are grouped together if their iv bases
35       are different in constant offset.
36 
37    2) Candidates for the induction variables are found.  This includes
38 
39       -- old induction variables
40       -- the variables defined by expressions derived from the "interesting
41 	 groups/uses" above
42 
43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
44       cost function assigns a cost to sets of induction variables and consists
45       of three parts:
46 
47       -- The group/use costs.  Each of the interesting groups/uses chooses
48 	 the best induction variable in the set and adds its cost to the sum.
49 	 The cost reflects the time spent on modifying the induction variables
50 	 value to be usable for the given purpose (adding base and offset for
51 	 arrays, etc.).
52       -- The variable costs.  Each of the variables has a cost assigned that
53 	 reflects the costs associated with incrementing the value of the
54 	 variable.  The original variables are somewhat preferred.
55       -- The set cost.  Depending on the size of the set, extra cost may be
56 	 added to reflect register pressure.
57 
58       All the costs are defined in a machine-specific way, using the target
59       hooks and machine descriptions to determine them.
60 
61    4) The trees are transformed to use the new variables, the dead code is
62       removed.
63 
64    All of this is done loop by loop.  Doing it globally is theoretically
65    possible, it might give a better performance and it might enable us
66    to decide costs more precisely, but getting all the interactions right
67    would be complicated.  */
68 
69 #include "config.h"
70 #include "system.h"
71 #include "coretypes.h"
72 #include "backend.h"
73 #include "rtl.h"
74 #include "tree.h"
75 #include "gimple.h"
76 #include "cfghooks.h"
77 #include "tree-pass.h"
78 #include "memmodel.h"
79 #include "tm_p.h"
80 #include "ssa.h"
81 #include "expmed.h"
82 #include "insn-config.h"
83 #include "emit-rtl.h"
84 #include "recog.h"
85 #include "cgraph.h"
86 #include "gimple-pretty-print.h"
87 #include "alias.h"
88 #include "fold-const.h"
89 #include "stor-layout.h"
90 #include "tree-eh.h"
91 #include "gimplify.h"
92 #include "gimple-iterator.h"
93 #include "gimplify-me.h"
94 #include "tree-cfg.h"
95 #include "tree-ssa-loop-ivopts.h"
96 #include "tree-ssa-loop-manip.h"
97 #include "tree-ssa-loop-niter.h"
98 #include "tree-ssa-loop.h"
99 #include "explow.h"
100 #include "expr.h"
101 #include "tree-dfa.h"
102 #include "tree-ssa.h"
103 #include "cfgloop.h"
104 #include "tree-scalar-evolution.h"
105 #include "params.h"
106 #include "tree-affine.h"
107 #include "tree-ssa-propagate.h"
108 #include "tree-ssa-address.h"
109 #include "builtins.h"
110 #include "tree-vectorizer.h"
111 
112 /* For lang_hooks.types.type_for_mode.  */
113 #include "langhooks.h"
114 
115 /* FIXME: Expressions are expanded to RTL in this pass to determine the
116    cost of different addressing modes.  This should be moved to a TBD
117    interface between the GIMPLE and RTL worlds.  */
118 
119 /* The infinite cost.  */
120 #define INFTY 10000000
121 
122 /* Returns the expected number of loop iterations for LOOP.
123    The average trip count is computed from profile data if it
124    exists. */
125 
126 static inline HOST_WIDE_INT
avg_loop_niter(struct loop * loop)127 avg_loop_niter (struct loop *loop)
128 {
129   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
130   if (niter == -1)
131     {
132       niter = likely_max_stmt_executions_int (loop);
133 
134       if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
135 	return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
136     }
137 
138   return niter;
139 }
140 
141 struct iv_use;
142 
143 /* Representation of the induction variable.  */
144 struct iv
145 {
146   tree base;		/* Initial value of the iv.  */
147   tree base_object;	/* A memory object to that the induction variable points.  */
148   tree step;		/* Step of the iv (constant only).  */
149   tree ssa_name;	/* The ssa name with the value.  */
150   struct iv_use *nonlin_use;	/* The identifier in the use if it is the case.  */
151   bool biv_p;		/* Is it a biv?  */
152   bool no_overflow;	/* True if the iv doesn't overflow.  */
153   bool have_address_use;/* For biv, indicate if it's used in any address
154 			   type use.  */
155 };
156 
157 /* Per-ssa version information (induction variable descriptions, etc.).  */
158 struct version_info
159 {
160   tree name;		/* The ssa name.  */
161   struct iv *iv;	/* Induction variable description.  */
162   bool has_nonlin_use;	/* For a loop-level invariant, whether it is used in
163 			   an expression that is not an induction variable.  */
164   bool preserve_biv;	/* For the original biv, whether to preserve it.  */
165   unsigned inv_id;	/* Id of an invariant.  */
166 };
167 
168 /* Types of uses.  */
169 enum use_type
170 {
171   USE_NONLINEAR_EXPR,	/* Use in a nonlinear expression.  */
172   USE_REF_ADDRESS,	/* Use is an address for an explicit memory
173 			   reference.  */
174   USE_PTR_ADDRESS,	/* Use is a pointer argument to a function in
175 			   cases where the expansion of the function
176 			   will turn the argument into a normal address.  */
177   USE_COMPARE		/* Use is a compare.  */
178 };
179 
180 /* Cost of a computation.  */
181 struct comp_cost
182 {
comp_costcomp_cost183   comp_cost (): cost (0), complexity (0), scratch (0)
184   {}
185 
186   comp_cost (int cost, unsigned complexity, int scratch = 0)
costcomp_cost187     : cost (cost), complexity (complexity), scratch (scratch)
188   {}
189 
190   /* Returns true if COST is infinite.  */
191   bool infinite_cost_p ();
192 
193   /* Adds costs COST1 and COST2.  */
194   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
195 
196   /* Adds COST to the comp_cost.  */
197   comp_cost operator+= (comp_cost cost);
198 
199   /* Adds constant C to this comp_cost.  */
200   comp_cost operator+= (HOST_WIDE_INT c);
201 
202   /* Subtracts constant C to this comp_cost.  */
203   comp_cost operator-= (HOST_WIDE_INT c);
204 
205   /* Divide the comp_cost by constant C.  */
206   comp_cost operator/= (HOST_WIDE_INT c);
207 
208   /* Multiply the comp_cost by constant C.  */
209   comp_cost operator*= (HOST_WIDE_INT c);
210 
211   /* Subtracts costs COST1 and COST2.  */
212   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
213 
214   /* Subtracts COST from this comp_cost.  */
215   comp_cost operator-= (comp_cost cost);
216 
217   /* Returns true if COST1 is smaller than COST2.  */
218   friend bool operator< (comp_cost cost1, comp_cost cost2);
219 
220   /* Returns true if COST1 and COST2 are equal.  */
221   friend bool operator== (comp_cost cost1, comp_cost cost2);
222 
223   /* Returns true if COST1 is smaller or equal than COST2.  */
224   friend bool operator<= (comp_cost cost1, comp_cost cost2);
225 
226   int cost;		/* The runtime cost.  */
227   unsigned complexity;  /* The estimate of the complexity of the code for
228 			   the computation (in no concrete units --
229 			   complexity field should be larger for more
230 			   complex expressions and addressing modes).  */
231   int scratch;		/* Scratch used during cost computation.  */
232 };
233 
234 static const comp_cost no_cost;
235 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
236 
237 bool
infinite_cost_p()238 comp_cost::infinite_cost_p ()
239 {
240   return cost == INFTY;
241 }
242 
243 comp_cost
244 operator+ (comp_cost cost1, comp_cost cost2)
245 {
246   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
247     return infinite_cost;
248 
249   cost1.cost += cost2.cost;
250   cost1.complexity += cost2.complexity;
251 
252   return cost1;
253 }
254 
255 comp_cost
256 operator- (comp_cost cost1, comp_cost cost2)
257 {
258   if (cost1.infinite_cost_p ())
259     return infinite_cost;
260 
261   gcc_assert (!cost2.infinite_cost_p ());
262 
263   cost1.cost -= cost2.cost;
264   cost1.complexity -= cost2.complexity;
265 
266   return cost1;
267 }
268 
269 comp_cost
270 comp_cost::operator+= (comp_cost cost)
271 {
272   *this = *this + cost;
273   return *this;
274 }
275 
276 comp_cost
277 comp_cost::operator+= (HOST_WIDE_INT c)
278 {
279   if (infinite_cost_p ())
280     return *this;
281 
282   this->cost += c;
283 
284   return *this;
285 }
286 
287 comp_cost
288 comp_cost::operator-= (HOST_WIDE_INT c)
289 {
290   if (infinite_cost_p ())
291     return *this;
292 
293   this->cost -= c;
294 
295   return *this;
296 }
297 
298 comp_cost
299 comp_cost::operator/= (HOST_WIDE_INT c)
300 {
301   if (infinite_cost_p ())
302     return *this;
303 
304   this->cost /= c;
305 
306   return *this;
307 }
308 
309 comp_cost
310 comp_cost::operator*= (HOST_WIDE_INT c)
311 {
312   if (infinite_cost_p ())
313     return *this;
314 
315   this->cost *= c;
316 
317   return *this;
318 }
319 
320 comp_cost
321 comp_cost::operator-= (comp_cost cost)
322 {
323   *this = *this - cost;
324   return *this;
325 }
326 
327 bool
328 operator< (comp_cost cost1, comp_cost cost2)
329 {
330   if (cost1.cost == cost2.cost)
331     return cost1.complexity < cost2.complexity;
332 
333   return cost1.cost < cost2.cost;
334 }
335 
336 bool
337 operator== (comp_cost cost1, comp_cost cost2)
338 {
339   return cost1.cost == cost2.cost
340     && cost1.complexity == cost2.complexity;
341 }
342 
343 bool
344 operator<= (comp_cost cost1, comp_cost cost2)
345 {
346   return cost1 < cost2 || cost1 == cost2;
347 }
348 
349 struct iv_inv_expr_ent;
350 
351 /* The candidate - cost pair.  */
352 struct cost_pair
353 {
354   struct iv_cand *cand;	/* The candidate.  */
355   comp_cost cost;	/* The cost.  */
356   enum tree_code comp;	/* For iv elimination, the comparison.  */
357   bitmap inv_vars;	/* The list of invariant ssa_vars that have to be
358 			   preserved when representing iv_use with iv_cand.  */
359   bitmap inv_exprs;	/* The list of newly created invariant expressions
360 			   when representing iv_use with iv_cand.  */
361   tree value;		/* For final value elimination, the expression for
362 			   the final value of the iv.  For iv elimination,
363 			   the new bound to compare with.  */
364 };
365 
366 /* Use.  */
367 struct iv_use
368 {
369   unsigned id;		/* The id of the use.  */
370   unsigned group_id;	/* The group id the use belongs to.  */
371   enum use_type type;	/* Type of the use.  */
372   tree mem_type;	/* The memory type to use when testing whether an
373 			   address is legitimate, and what the address's
374 			   cost is.  */
375   struct iv *iv;	/* The induction variable it is based on.  */
376   gimple *stmt;		/* Statement in that it occurs.  */
377   tree *op_p;		/* The place where it occurs.  */
378 
379   tree addr_base;	/* Base address with const offset stripped.  */
380   poly_uint64_pod addr_offset;
381 			/* Const offset stripped from base address.  */
382 };
383 
384 /* Group of uses.  */
385 struct iv_group
386 {
387   /* The id of the group.  */
388   unsigned id;
389   /* Uses of the group are of the same type.  */
390   enum use_type type;
391   /* The set of "related" IV candidates, plus the important ones.  */
392   bitmap related_cands;
393   /* Number of IV candidates in the cost_map.  */
394   unsigned n_map_members;
395   /* The costs wrto the iv candidates.  */
396   struct cost_pair *cost_map;
397   /* The selected candidate for the group.  */
398   struct iv_cand *selected;
399   /* Uses in the group.  */
400   vec<struct iv_use *> vuses;
401 };
402 
403 /* The position where the iv is computed.  */
404 enum iv_position
405 {
406   IP_NORMAL,		/* At the end, just before the exit condition.  */
407   IP_END,		/* At the end of the latch block.  */
408   IP_BEFORE_USE,	/* Immediately before a specific use.  */
409   IP_AFTER_USE,		/* Immediately after a specific use.  */
410   IP_ORIGINAL		/* The original biv.  */
411 };
412 
413 /* The induction variable candidate.  */
414 struct iv_cand
415 {
416   unsigned id;		/* The number of the candidate.  */
417   bool important;	/* Whether this is an "important" candidate, i.e. such
418 			   that it should be considered by all uses.  */
419   ENUM_BITFIELD(iv_position) pos : 8;	/* Where it is computed.  */
420   gimple *incremented_at;/* For original biv, the statement where it is
421 			   incremented.  */
422   tree var_before;	/* The variable used for it before increment.  */
423   tree var_after;	/* The variable used for it after increment.  */
424   struct iv *iv;	/* The value of the candidate.  NULL for
425 			   "pseudocandidate" used to indicate the possibility
426 			   to replace the final value of an iv by direct
427 			   computation of the value.  */
428   unsigned cost;	/* Cost of the candidate.  */
429   unsigned cost_step;	/* Cost of the candidate's increment operation.  */
430   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
431 			      where it is incremented.  */
432   bitmap inv_vars;	/* The list of invariant ssa_vars used in step of the
433 			   iv_cand.  */
434   bitmap inv_exprs;	/* If step is more complicated than a single ssa_var,
435 			   hanlde it as a new invariant expression which will
436 			   be hoisted out of loop.  */
437   struct iv *orig_iv;	/* The original iv if this cand is added from biv with
438 			   smaller type.  */
439 };
440 
441 /* Hashtable entry for common candidate derived from iv uses.  */
442 struct iv_common_cand
443 {
444   tree base;
445   tree step;
446   /* IV uses from which this common candidate is derived.  */
447   auto_vec<struct iv_use *> uses;
448   hashval_t hash;
449 };
450 
451 /* Hashtable helpers.  */
452 
453 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
454 {
455   static inline hashval_t hash (const iv_common_cand *);
456   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
457 };
458 
459 /* Hash function for possible common candidates.  */
460 
461 inline hashval_t
hash(const iv_common_cand * ccand)462 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
463 {
464   return ccand->hash;
465 }
466 
467 /* Hash table equality function for common candidates.  */
468 
469 inline bool
equal(const iv_common_cand * ccand1,const iv_common_cand * ccand2)470 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
471 			      const iv_common_cand *ccand2)
472 {
473   return (ccand1->hash == ccand2->hash
474 	  && operand_equal_p (ccand1->base, ccand2->base, 0)
475 	  && operand_equal_p (ccand1->step, ccand2->step, 0)
476 	  && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
477 	      == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
478 }
479 
480 /* Loop invariant expression hashtable entry.  */
481 
482 struct iv_inv_expr_ent
483 {
484   /* Tree expression of the entry.  */
485   tree expr;
486   /* Unique indentifier.  */
487   int id;
488   /* Hash value.  */
489   hashval_t hash;
490 };
491 
492 /* Sort iv_inv_expr_ent pair A and B by id field.  */
493 
494 static int
sort_iv_inv_expr_ent(const void * a,const void * b)495 sort_iv_inv_expr_ent (const void *a, const void *b)
496 {
497   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
498   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
499 
500   unsigned id1 = (*e1)->id;
501   unsigned id2 = (*e2)->id;
502 
503   if (id1 < id2)
504     return -1;
505   else if (id1 > id2)
506     return 1;
507   else
508     return 0;
509 }
510 
511 /* Hashtable helpers.  */
512 
513 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
514 {
515   static inline hashval_t hash (const iv_inv_expr_ent *);
516   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
517 };
518 
519 /* Return true if uses of type TYPE represent some form of address.  */
520 
521 inline bool
address_p(use_type type)522 address_p (use_type type)
523 {
524   return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
525 }
526 
527 /* Hash function for loop invariant expressions.  */
528 
529 inline hashval_t
hash(const iv_inv_expr_ent * expr)530 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
531 {
532   return expr->hash;
533 }
534 
535 /* Hash table equality function for expressions.  */
536 
537 inline bool
equal(const iv_inv_expr_ent * expr1,const iv_inv_expr_ent * expr2)538 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
539 			   const iv_inv_expr_ent *expr2)
540 {
541   return expr1->hash == expr2->hash
542 	 && operand_equal_p (expr1->expr, expr2->expr, 0);
543 }
544 
545 struct ivopts_data
546 {
547   /* The currently optimized loop.  */
548   struct loop *current_loop;
549   source_location loop_loc;
550 
551   /* Numbers of iterations for all exits of the current loop.  */
552   hash_map<edge, tree_niter_desc *> *niters;
553 
554   /* Number of registers used in it.  */
555   unsigned regs_used;
556 
557   /* The size of version_info array allocated.  */
558   unsigned version_info_size;
559 
560   /* The array of information for the ssa names.  */
561   struct version_info *version_info;
562 
563   /* The hashtable of loop invariant expressions created
564      by ivopt.  */
565   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
566 
567   /* The bitmap of indices in version_info whose value was changed.  */
568   bitmap relevant;
569 
570   /* The uses of induction variables.  */
571   vec<iv_group *> vgroups;
572 
573   /* The candidates.  */
574   vec<iv_cand *> vcands;
575 
576   /* A bitmap of important candidates.  */
577   bitmap important_candidates;
578 
579   /* Cache used by tree_to_aff_combination_expand.  */
580   hash_map<tree, name_expansion *> *name_expansion_cache;
581 
582   /* The hashtable of common candidates derived from iv uses.  */
583   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
584 
585   /* The common candidates.  */
586   vec<iv_common_cand *> iv_common_cands;
587 
588   /* Hash map recording base object information of tree exp.  */
589   hash_map<tree, tree> *base_object_map;
590 
591   /* The maximum invariant variable id.  */
592   unsigned max_inv_var_id;
593 
594   /* The maximum invariant expression id.  */
595   unsigned max_inv_expr_id;
596 
597   /* Number of no_overflow BIVs which are not used in memory address.  */
598   unsigned bivs_not_used_in_addr;
599 
600   /* Obstack for iv structure.  */
601   struct obstack iv_obstack;
602 
603   /* Whether to consider just related and important candidates when replacing a
604      use.  */
605   bool consider_all_candidates;
606 
607   /* Are we optimizing for speed?  */
608   bool speed;
609 
610   /* Whether the loop body includes any function calls.  */
611   bool body_includes_call;
612 
613   /* Whether the loop body can only be exited via single exit.  */
614   bool loop_single_exit_p;
615 };
616 
617 /* An assignment of iv candidates to uses.  */
618 
619 struct iv_ca
620 {
621   /* The number of uses covered by the assignment.  */
622   unsigned upto;
623 
624   /* Number of uses that cannot be expressed by the candidates in the set.  */
625   unsigned bad_groups;
626 
627   /* Candidate assigned to a use, together with the related costs.  */
628   struct cost_pair **cand_for_group;
629 
630   /* Number of times each candidate is used.  */
631   unsigned *n_cand_uses;
632 
633   /* The candidates used.  */
634   bitmap cands;
635 
636   /* The number of candidates in the set.  */
637   unsigned n_cands;
638 
639   /* The number of invariants needed, including both invariant variants and
640      invariant expressions.  */
641   unsigned n_invs;
642 
643   /* Total cost of expressing uses.  */
644   comp_cost cand_use_cost;
645 
646   /* Total cost of candidates.  */
647   unsigned cand_cost;
648 
649   /* Number of times each invariant variable is used.  */
650   unsigned *n_inv_var_uses;
651 
652   /* Number of times each invariant expression is used.  */
653   unsigned *n_inv_expr_uses;
654 
655   /* Total cost of the assignment.  */
656   comp_cost cost;
657 };
658 
659 /* Difference of two iv candidate assignments.  */
660 
661 struct iv_ca_delta
662 {
663   /* Changed group.  */
664   struct iv_group *group;
665 
666   /* An old assignment (for rollback purposes).  */
667   struct cost_pair *old_cp;
668 
669   /* A new assignment.  */
670   struct cost_pair *new_cp;
671 
672   /* Next change in the list.  */
673   struct iv_ca_delta *next;
674 };
675 
676 /* Bound on number of candidates below that all candidates are considered.  */
677 
678 #define CONSIDER_ALL_CANDIDATES_BOUND \
679   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
680 
681 /* If there are more iv occurrences, we just give up (it is quite unlikely that
682    optimizing such a loop would help, and it would take ages).  */
683 
684 #define MAX_CONSIDERED_GROUPS \
685   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
686 
687 /* If there are at most this number of ivs in the set, try removing unnecessary
688    ivs from the set always.  */
689 
690 #define ALWAYS_PRUNE_CAND_SET_BOUND \
691   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
692 
693 /* The list of trees for that the decl_rtl field must be reset is stored
694    here.  */
695 
696 static vec<tree> decl_rtl_to_reset;
697 
698 static comp_cost force_expr_to_var_cost (tree, bool);
699 
700 /* The single loop exit if it dominates the latch, NULL otherwise.  */
701 
702 edge
single_dom_exit(struct loop * loop)703 single_dom_exit (struct loop *loop)
704 {
705   edge exit = single_exit (loop);
706 
707   if (!exit)
708     return NULL;
709 
710   if (!just_once_each_iteration_p (loop, exit->src))
711     return NULL;
712 
713   return exit;
714 }
715 
716 /* Dumps information about the induction variable IV to FILE.  Don't dump
717    variable's name if DUMP_NAME is FALSE.  The information is dumped with
718    preceding spaces indicated by INDENT_LEVEL.  */
719 
720 void
dump_iv(FILE * file,struct iv * iv,bool dump_name,unsigned indent_level)721 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
722 {
723   const char *p;
724   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
725 
726   if (indent_level > 4)
727     indent_level = 4;
728   p = spaces + 8 - (indent_level << 1);
729 
730   fprintf (file, "%sIV struct:\n", p);
731   if (iv->ssa_name && dump_name)
732     {
733       fprintf (file, "%s  SSA_NAME:\t", p);
734       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
735       fprintf (file, "\n");
736     }
737 
738   fprintf (file, "%s  Type:\t", p);
739   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
740   fprintf (file, "\n");
741 
742   fprintf (file, "%s  Base:\t", p);
743   print_generic_expr (file, iv->base, TDF_SLIM);
744   fprintf (file, "\n");
745 
746   fprintf (file, "%s  Step:\t", p);
747   print_generic_expr (file, iv->step, TDF_SLIM);
748   fprintf (file, "\n");
749 
750   if (iv->base_object)
751     {
752       fprintf (file, "%s  Object:\t", p);
753       print_generic_expr (file, iv->base_object, TDF_SLIM);
754       fprintf (file, "\n");
755     }
756 
757   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
758 
759   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
760 	   p, iv->no_overflow ? "No-overflow" : "Overflow");
761 }
762 
763 /* Dumps information about the USE to FILE.  */
764 
765 void
dump_use(FILE * file,struct iv_use * use)766 dump_use (FILE *file, struct iv_use *use)
767 {
768   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
769   fprintf (file, "    At stmt:\t");
770   print_gimple_stmt (file, use->stmt, 0);
771   fprintf (file, "    At pos:\t");
772   if (use->op_p)
773     print_generic_expr (file, *use->op_p, TDF_SLIM);
774   fprintf (file, "\n");
775   dump_iv (file, use->iv, false, 2);
776 }
777 
778 /* Dumps information about the uses to FILE.  */
779 
780 void
dump_groups(FILE * file,struct ivopts_data * data)781 dump_groups (FILE *file, struct ivopts_data *data)
782 {
783   unsigned i, j;
784   struct iv_group *group;
785 
786   for (i = 0; i < data->vgroups.length (); i++)
787     {
788       group = data->vgroups[i];
789       fprintf (file, "Group %d:\n", group->id);
790       if (group->type == USE_NONLINEAR_EXPR)
791 	fprintf (file, "  Type:\tGENERIC\n");
792       else if (group->type == USE_REF_ADDRESS)
793 	fprintf (file, "  Type:\tREFERENCE ADDRESS\n");
794       else if (group->type == USE_PTR_ADDRESS)
795 	fprintf (file, "  Type:\tPOINTER ARGUMENT ADDRESS\n");
796       else
797 	{
798 	  gcc_assert (group->type == USE_COMPARE);
799 	  fprintf (file, "  Type:\tCOMPARE\n");
800 	}
801       for (j = 0; j < group->vuses.length (); j++)
802 	dump_use (file, group->vuses[j]);
803     }
804 }
805 
806 /* Dumps information about induction variable candidate CAND to FILE.  */
807 
808 void
dump_cand(FILE * file,struct iv_cand * cand)809 dump_cand (FILE *file, struct iv_cand *cand)
810 {
811   struct iv *iv = cand->iv;
812 
813   fprintf (file, "Candidate %d:\n", cand->id);
814   if (cand->inv_vars)
815     {
816       fprintf (file, "  Depend on inv.vars: ");
817       dump_bitmap (file, cand->inv_vars);
818     }
819   if (cand->inv_exprs)
820     {
821       fprintf (file, "  Depend on inv.exprs: ");
822       dump_bitmap (file, cand->inv_exprs);
823     }
824 
825   if (cand->var_before)
826     {
827       fprintf (file, "  Var befor: ");
828       print_generic_expr (file, cand->var_before, TDF_SLIM);
829       fprintf (file, "\n");
830     }
831   if (cand->var_after)
832     {
833       fprintf (file, "  Var after: ");
834       print_generic_expr (file, cand->var_after, TDF_SLIM);
835       fprintf (file, "\n");
836     }
837 
838   switch (cand->pos)
839     {
840     case IP_NORMAL:
841       fprintf (file, "  Incr POS: before exit test\n");
842       break;
843 
844     case IP_BEFORE_USE:
845       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
846       break;
847 
848     case IP_AFTER_USE:
849       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
850       break;
851 
852     case IP_END:
853       fprintf (file, "  Incr POS: at end\n");
854       break;
855 
856     case IP_ORIGINAL:
857       fprintf (file, "  Incr POS: orig biv\n");
858       break;
859     }
860 
861   dump_iv (file, iv, false, 1);
862 }
863 
864 /* Returns the info for ssa version VER.  */
865 
866 static inline struct version_info *
ver_info(struct ivopts_data * data,unsigned ver)867 ver_info (struct ivopts_data *data, unsigned ver)
868 {
869   return data->version_info + ver;
870 }
871 
872 /* Returns the info for ssa name NAME.  */
873 
874 static inline struct version_info *
name_info(struct ivopts_data * data,tree name)875 name_info (struct ivopts_data *data, tree name)
876 {
877   return ver_info (data, SSA_NAME_VERSION (name));
878 }
879 
880 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
881    emitted in LOOP.  */
882 
883 static bool
stmt_after_ip_normal_pos(struct loop * loop,gimple * stmt)884 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
885 {
886   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
887 
888   gcc_assert (bb);
889 
890   if (sbb == loop->latch)
891     return true;
892 
893   if (sbb != bb)
894     return false;
895 
896   return stmt == last_stmt (bb);
897 }
898 
899 /* Returns true if STMT if after the place where the original induction
900    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
901    if the positions are identical.  */
902 
903 static bool
stmt_after_inc_pos(struct iv_cand * cand,gimple * stmt,bool true_if_equal)904 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
905 {
906   basic_block cand_bb = gimple_bb (cand->incremented_at);
907   basic_block stmt_bb = gimple_bb (stmt);
908 
909   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
910     return false;
911 
912   if (stmt_bb != cand_bb)
913     return true;
914 
915   if (true_if_equal
916       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
917     return true;
918   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
919 }
920 
921 /* Returns true if STMT if after the place where the induction variable
922    CAND is incremented in LOOP.  */
923 
924 static bool
stmt_after_increment(struct loop * loop,struct iv_cand * cand,gimple * stmt)925 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
926 {
927   switch (cand->pos)
928     {
929     case IP_END:
930       return false;
931 
932     case IP_NORMAL:
933       return stmt_after_ip_normal_pos (loop, stmt);
934 
935     case IP_ORIGINAL:
936     case IP_AFTER_USE:
937       return stmt_after_inc_pos (cand, stmt, false);
938 
939     case IP_BEFORE_USE:
940       return stmt_after_inc_pos (cand, stmt, true);
941 
942     default:
943       gcc_unreachable ();
944     }
945 }
946 
947 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
948 
949 static bool
abnormal_ssa_name_p(tree exp)950 abnormal_ssa_name_p (tree exp)
951 {
952   if (!exp)
953     return false;
954 
955   if (TREE_CODE (exp) != SSA_NAME)
956     return false;
957 
958   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
959 }
960 
961 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
962    abnormal phi node.  Callback for for_each_index.  */
963 
964 static bool
idx_contains_abnormal_ssa_name_p(tree base,tree * index,void * data ATTRIBUTE_UNUSED)965 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
966 				  void *data ATTRIBUTE_UNUSED)
967 {
968   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
969     {
970       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
971 	return false;
972       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
973 	return false;
974     }
975 
976   return !abnormal_ssa_name_p (*index);
977 }
978 
979 /* Returns true if EXPR contains a ssa name that occurs in an
980    abnormal phi node.  */
981 
982 bool
contains_abnormal_ssa_name_p(tree expr)983 contains_abnormal_ssa_name_p (tree expr)
984 {
985   enum tree_code code;
986   enum tree_code_class codeclass;
987 
988   if (!expr)
989     return false;
990 
991   code = TREE_CODE (expr);
992   codeclass = TREE_CODE_CLASS (code);
993 
994   if (code == SSA_NAME)
995     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
996 
997   if (code == INTEGER_CST
998       || is_gimple_min_invariant (expr))
999     return false;
1000 
1001   if (code == ADDR_EXPR)
1002     return !for_each_index (&TREE_OPERAND (expr, 0),
1003 			    idx_contains_abnormal_ssa_name_p,
1004 			    NULL);
1005 
1006   if (code == COND_EXPR)
1007     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
1008       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
1009       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
1010 
1011   switch (codeclass)
1012     {
1013     case tcc_binary:
1014     case tcc_comparison:
1015       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
1016 	return true;
1017 
1018       /* Fallthru.  */
1019     case tcc_unary:
1020       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
1021 	return true;
1022 
1023       break;
1024 
1025     default:
1026       gcc_unreachable ();
1027     }
1028 
1029   return false;
1030 }
1031 
1032 /*  Returns the structure describing number of iterations determined from
1033     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1034 
1035 static struct tree_niter_desc *
niter_for_exit(struct ivopts_data * data,edge exit)1036 niter_for_exit (struct ivopts_data *data, edge exit)
1037 {
1038   struct tree_niter_desc *desc;
1039   tree_niter_desc **slot;
1040 
1041   if (!data->niters)
1042     {
1043       data->niters = new hash_map<edge, tree_niter_desc *>;
1044       slot = NULL;
1045     }
1046   else
1047     slot = data->niters->get (exit);
1048 
1049   if (!slot)
1050     {
1051       /* Try to determine number of iterations.  We cannot safely work with ssa
1052 	 names that appear in phi nodes on abnormal edges, so that we do not
1053 	 create overlapping life ranges for them (PR 27283).  */
1054       desc = XNEW (struct tree_niter_desc);
1055       if (!number_of_iterations_exit (data->current_loop,
1056 				      exit, desc, true)
1057      	  || contains_abnormal_ssa_name_p (desc->niter))
1058 	{
1059 	  XDELETE (desc);
1060 	  desc = NULL;
1061 	}
1062       data->niters->put (exit, desc);
1063     }
1064   else
1065     desc = *slot;
1066 
1067   return desc;
1068 }
1069 
1070 /* Returns the structure describing number of iterations determined from
1071    single dominating exit of DATA->current_loop, or NULL if something
1072    goes wrong.  */
1073 
1074 static struct tree_niter_desc *
niter_for_single_dom_exit(struct ivopts_data * data)1075 niter_for_single_dom_exit (struct ivopts_data *data)
1076 {
1077   edge exit = single_dom_exit (data->current_loop);
1078 
1079   if (!exit)
1080     return NULL;
1081 
1082   return niter_for_exit (data, exit);
1083 }
1084 
1085 /* Initializes data structures used by the iv optimization pass, stored
1086    in DATA.  */
1087 
1088 static void
tree_ssa_iv_optimize_init(struct ivopts_data * data)1089 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1090 {
1091   data->version_info_size = 2 * num_ssa_names;
1092   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1093   data->relevant = BITMAP_ALLOC (NULL);
1094   data->important_candidates = BITMAP_ALLOC (NULL);
1095   data->max_inv_var_id = 0;
1096   data->max_inv_expr_id = 0;
1097   data->niters = NULL;
1098   data->vgroups.create (20);
1099   data->vcands.create (20);
1100   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1101   data->name_expansion_cache = NULL;
1102   data->base_object_map = NULL;
1103   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1104   data->iv_common_cands.create (20);
1105   decl_rtl_to_reset.create (20);
1106   gcc_obstack_init (&data->iv_obstack);
1107 }
1108 
1109 /* walk_tree callback for determine_base_object.  */
1110 
1111 static tree
determine_base_object_1(tree * tp,int * walk_subtrees,void * wdata)1112 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1113 {
1114   tree_code code = TREE_CODE (*tp);
1115   tree obj = NULL_TREE;
1116   if (code == ADDR_EXPR)
1117     {
1118       tree base = get_base_address (TREE_OPERAND (*tp, 0));
1119       if (!base)
1120 	obj = *tp;
1121       else if (TREE_CODE (base) != MEM_REF)
1122 	obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1123     }
1124   else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1125 	obj = fold_convert (ptr_type_node, *tp);
1126 
1127   if (!obj)
1128     {
1129       if (!EXPR_P (*tp))
1130 	*walk_subtrees = 0;
1131 
1132       return NULL_TREE;
1133     }
1134   /* Record special node for multiple base objects and stop.  */
1135   if (*static_cast<tree *> (wdata))
1136     {
1137       *static_cast<tree *> (wdata) = integer_zero_node;
1138       return integer_zero_node;
1139     }
1140   /* Record the base object and continue looking.  */
1141   *static_cast<tree *> (wdata) = obj;
1142   return NULL_TREE;
1143 }
1144 
1145 /* Returns a memory object to that EXPR points with caching.  Return NULL if we
1146    are able to determine that it does not point to any such object; specially
1147    return integer_zero_node if EXPR contains multiple base objects.  */
1148 
1149 static tree
determine_base_object(struct ivopts_data * data,tree expr)1150 determine_base_object (struct ivopts_data *data, tree expr)
1151 {
1152   tree *slot, obj = NULL_TREE;
1153   if (data->base_object_map)
1154     {
1155       if ((slot = data->base_object_map->get(expr)) != NULL)
1156 	return *slot;
1157     }
1158   else
1159     data->base_object_map = new hash_map<tree, tree>;
1160 
1161   (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1162   data->base_object_map->put (expr, obj);
1163   return obj;
1164 }
1165 
1166 /* Return true if address expression with non-DECL_P operand appears
1167    in EXPR.  */
1168 
1169 static bool
contain_complex_addr_expr(tree expr)1170 contain_complex_addr_expr (tree expr)
1171 {
1172   bool res = false;
1173 
1174   STRIP_NOPS (expr);
1175   switch (TREE_CODE (expr))
1176     {
1177     case POINTER_PLUS_EXPR:
1178     case PLUS_EXPR:
1179     case MINUS_EXPR:
1180       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1181       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1182       break;
1183 
1184     case ADDR_EXPR:
1185       return (!DECL_P (TREE_OPERAND (expr, 0)));
1186 
1187     default:
1188       return false;
1189     }
1190 
1191   return res;
1192 }
1193 
1194 /* Allocates an induction variable with given initial value BASE and step STEP
1195    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1196 
1197 static struct iv *
1198 alloc_iv (struct ivopts_data *data, tree base, tree step,
1199 	  bool no_overflow = false)
1200 {
1201   tree expr = base;
1202   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1203 					      sizeof (struct iv));
1204   gcc_assert (step != NULL_TREE);
1205 
1206   /* Lower address expression in base except ones with DECL_P as operand.
1207      By doing this:
1208        1) More accurate cost can be computed for address expressions;
1209        2) Duplicate candidates won't be created for bases in different
1210 	  forms, like &a[0] and &a.  */
1211   STRIP_NOPS (expr);
1212   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1213       || contain_complex_addr_expr (expr))
1214     {
1215       aff_tree comb;
1216       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1217       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1218     }
1219 
1220   iv->base = base;
1221   iv->base_object = determine_base_object (data, base);
1222   iv->step = step;
1223   iv->biv_p = false;
1224   iv->nonlin_use = NULL;
1225   iv->ssa_name = NULL_TREE;
1226   if (!no_overflow
1227        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1228 			      base, step))
1229     no_overflow = true;
1230   iv->no_overflow = no_overflow;
1231   iv->have_address_use = false;
1232 
1233   return iv;
1234 }
1235 
1236 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1237    doesn't overflow.  */
1238 
1239 static void
set_iv(struct ivopts_data * data,tree iv,tree base,tree step,bool no_overflow)1240 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1241 	bool no_overflow)
1242 {
1243   struct version_info *info = name_info (data, iv);
1244 
1245   gcc_assert (!info->iv);
1246 
1247   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1248   info->iv = alloc_iv (data, base, step, no_overflow);
1249   info->iv->ssa_name = iv;
1250 }
1251 
1252 /* Finds induction variable declaration for VAR.  */
1253 
1254 static struct iv *
get_iv(struct ivopts_data * data,tree var)1255 get_iv (struct ivopts_data *data, tree var)
1256 {
1257   basic_block bb;
1258   tree type = TREE_TYPE (var);
1259 
1260   if (!POINTER_TYPE_P (type)
1261       && !INTEGRAL_TYPE_P (type))
1262     return NULL;
1263 
1264   if (!name_info (data, var)->iv)
1265     {
1266       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1267 
1268       if (!bb
1269 	  || !flow_bb_inside_loop_p (data->current_loop, bb))
1270 	set_iv (data, var, var, build_int_cst (type, 0), true);
1271     }
1272 
1273   return name_info (data, var)->iv;
1274 }
1275 
1276 /* Return the first non-invariant ssa var found in EXPR.  */
1277 
1278 static tree
extract_single_var_from_expr(tree expr)1279 extract_single_var_from_expr (tree expr)
1280 {
1281   int i, n;
1282   tree tmp;
1283   enum tree_code code;
1284 
1285   if (!expr || is_gimple_min_invariant (expr))
1286     return NULL;
1287 
1288   code = TREE_CODE (expr);
1289   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1290     {
1291       n = TREE_OPERAND_LENGTH (expr);
1292       for (i = 0; i < n; i++)
1293 	{
1294 	  tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1295 
1296 	  if (tmp)
1297 	    return tmp;
1298 	}
1299     }
1300   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1301 }
1302 
1303 /* Finds basic ivs.  */
1304 
1305 static bool
find_bivs(struct ivopts_data * data)1306 find_bivs (struct ivopts_data *data)
1307 {
1308   gphi *phi;
1309   affine_iv iv;
1310   tree step, type, base, stop;
1311   bool found = false;
1312   struct loop *loop = data->current_loop;
1313   gphi_iterator psi;
1314 
1315   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1316     {
1317       phi = psi.phi ();
1318 
1319       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1320 	continue;
1321 
1322       if (virtual_operand_p (PHI_RESULT (phi)))
1323 	continue;
1324 
1325       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1326 	continue;
1327 
1328       if (integer_zerop (iv.step))
1329 	continue;
1330 
1331       step = iv.step;
1332       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1333       /* Stop expanding iv base at the first ssa var referred by iv step.
1334 	 Ideally we should stop at any ssa var, because that's expensive
1335 	 and unusual to happen, we just do it on the first one.
1336 
1337 	 See PR64705 for the rationale.  */
1338       stop = extract_single_var_from_expr (step);
1339       base = expand_simple_operations (base, stop);
1340       if (contains_abnormal_ssa_name_p (base)
1341 	  || contains_abnormal_ssa_name_p (step))
1342 	continue;
1343 
1344       type = TREE_TYPE (PHI_RESULT (phi));
1345       base = fold_convert (type, base);
1346       if (step)
1347 	{
1348 	  if (POINTER_TYPE_P (type))
1349 	    step = convert_to_ptrofftype (step);
1350 	  else
1351 	    step = fold_convert (type, step);
1352 	}
1353 
1354       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1355       found = true;
1356     }
1357 
1358   return found;
1359 }
1360 
1361 /* Marks basic ivs.  */
1362 
1363 static void
mark_bivs(struct ivopts_data * data)1364 mark_bivs (struct ivopts_data *data)
1365 {
1366   gphi *phi;
1367   gimple *def;
1368   tree var;
1369   struct iv *iv, *incr_iv;
1370   struct loop *loop = data->current_loop;
1371   basic_block incr_bb;
1372   gphi_iterator psi;
1373 
1374   data->bivs_not_used_in_addr = 0;
1375   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1376     {
1377       phi = psi.phi ();
1378 
1379       iv = get_iv (data, PHI_RESULT (phi));
1380       if (!iv)
1381 	continue;
1382 
1383       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1384       def = SSA_NAME_DEF_STMT (var);
1385       /* Don't mark iv peeled from other one as biv.  */
1386       if (def
1387 	  && gimple_code (def) == GIMPLE_PHI
1388 	  && gimple_bb (def) == loop->header)
1389 	continue;
1390 
1391       incr_iv = get_iv (data, var);
1392       if (!incr_iv)
1393 	continue;
1394 
1395       /* If the increment is in the subloop, ignore it.  */
1396       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1397       if (incr_bb->loop_father != data->current_loop
1398 	  || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1399 	continue;
1400 
1401       iv->biv_p = true;
1402       incr_iv->biv_p = true;
1403       if (iv->no_overflow)
1404 	data->bivs_not_used_in_addr++;
1405       if (incr_iv->no_overflow)
1406 	data->bivs_not_used_in_addr++;
1407     }
1408 }
1409 
1410 /* Checks whether STMT defines a linear induction variable and stores its
1411    parameters to IV.  */
1412 
1413 static bool
find_givs_in_stmt_scev(struct ivopts_data * data,gimple * stmt,affine_iv * iv)1414 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1415 {
1416   tree lhs, stop;
1417   struct loop *loop = data->current_loop;
1418 
1419   iv->base = NULL_TREE;
1420   iv->step = NULL_TREE;
1421 
1422   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1423     return false;
1424 
1425   lhs = gimple_assign_lhs (stmt);
1426   if (TREE_CODE (lhs) != SSA_NAME)
1427     return false;
1428 
1429   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1430     return false;
1431 
1432   /* Stop expanding iv base at the first ssa var referred by iv step.
1433      Ideally we should stop at any ssa var, because that's expensive
1434      and unusual to happen, we just do it on the first one.
1435 
1436      See PR64705 for the rationale.  */
1437   stop = extract_single_var_from_expr (iv->step);
1438   iv->base = expand_simple_operations (iv->base, stop);
1439   if (contains_abnormal_ssa_name_p (iv->base)
1440       || contains_abnormal_ssa_name_p (iv->step))
1441     return false;
1442 
1443   /* If STMT could throw, then do not consider STMT as defining a GIV.
1444      While this will suppress optimizations, we can not safely delete this
1445      GIV and associated statements, even if it appears it is not used.  */
1446   if (stmt_could_throw_p (stmt))
1447     return false;
1448 
1449   return true;
1450 }
1451 
1452 /* Finds general ivs in statement STMT.  */
1453 
1454 static void
find_givs_in_stmt(struct ivopts_data * data,gimple * stmt)1455 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1456 {
1457   affine_iv iv;
1458 
1459   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1460     return;
1461 
1462   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1463 }
1464 
1465 /* Finds general ivs in basic block BB.  */
1466 
1467 static void
find_givs_in_bb(struct ivopts_data * data,basic_block bb)1468 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1469 {
1470   gimple_stmt_iterator bsi;
1471 
1472   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1473     find_givs_in_stmt (data, gsi_stmt (bsi));
1474 }
1475 
1476 /* Finds general ivs.  */
1477 
1478 static void
find_givs(struct ivopts_data * data)1479 find_givs (struct ivopts_data *data)
1480 {
1481   struct loop *loop = data->current_loop;
1482   basic_block *body = get_loop_body_in_dom_order (loop);
1483   unsigned i;
1484 
1485   for (i = 0; i < loop->num_nodes; i++)
1486     find_givs_in_bb (data, body[i]);
1487   free (body);
1488 }
1489 
1490 /* For each ssa name defined in LOOP determines whether it is an induction
1491    variable and if so, its initial value and step.  */
1492 
1493 static bool
find_induction_variables(struct ivopts_data * data)1494 find_induction_variables (struct ivopts_data *data)
1495 {
1496   unsigned i;
1497   bitmap_iterator bi;
1498 
1499   if (!find_bivs (data))
1500     return false;
1501 
1502   find_givs (data);
1503   mark_bivs (data);
1504 
1505   if (dump_file && (dump_flags & TDF_DETAILS))
1506     {
1507       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1508 
1509       if (niter)
1510 	{
1511 	  fprintf (dump_file, "  number of iterations ");
1512 	  print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1513 	  if (!integer_zerop (niter->may_be_zero))
1514 	    {
1515 	      fprintf (dump_file, "; zero if ");
1516 	      print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1517 	    }
1518 	  fprintf (dump_file, "\n");
1519 	};
1520 
1521       fprintf (dump_file, "\n<Induction Vars>:\n");
1522       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1523 	{
1524 	  struct version_info *info = ver_info (data, i);
1525 	  if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1526 	    dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1527 	}
1528     }
1529 
1530   return true;
1531 }
1532 
1533 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1534    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1535    is the const offset stripped from IV base and MEM_TYPE is the type
1536    of the memory being addressed.  For uses of other types, ADDR_BASE
1537    and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE.  */
1538 
1539 static struct iv_use *
record_use(struct iv_group * group,tree * use_p,struct iv * iv,gimple * stmt,enum use_type type,tree mem_type,tree addr_base,poly_uint64 addr_offset)1540 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1541 	    gimple *stmt, enum use_type type, tree mem_type,
1542 	    tree addr_base, poly_uint64 addr_offset)
1543 {
1544   struct iv_use *use = XCNEW (struct iv_use);
1545 
1546   use->id = group->vuses.length ();
1547   use->group_id = group->id;
1548   use->type = type;
1549   use->mem_type = mem_type;
1550   use->iv = iv;
1551   use->stmt = stmt;
1552   use->op_p = use_p;
1553   use->addr_base = addr_base;
1554   use->addr_offset = addr_offset;
1555 
1556   group->vuses.safe_push (use);
1557   return use;
1558 }
1559 
1560 /* Checks whether OP is a loop-level invariant and if so, records it.
1561    NONLINEAR_USE is true if the invariant is used in a way we do not
1562    handle specially.  */
1563 
1564 static void
record_invariant(struct ivopts_data * data,tree op,bool nonlinear_use)1565 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1566 {
1567   basic_block bb;
1568   struct version_info *info;
1569 
1570   if (TREE_CODE (op) != SSA_NAME
1571       || virtual_operand_p (op))
1572     return;
1573 
1574   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1575   if (bb
1576       && flow_bb_inside_loop_p (data->current_loop, bb))
1577     return;
1578 
1579   info = name_info (data, op);
1580   info->name = op;
1581   info->has_nonlin_use |= nonlinear_use;
1582   if (!info->inv_id)
1583     info->inv_id = ++data->max_inv_var_id;
1584   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1585 }
1586 
1587 /* Record a group of TYPE.  */
1588 
1589 static struct iv_group *
record_group(struct ivopts_data * data,enum use_type type)1590 record_group (struct ivopts_data *data, enum use_type type)
1591 {
1592   struct iv_group *group = XCNEW (struct iv_group);
1593 
1594   group->id = data->vgroups.length ();
1595   group->type = type;
1596   group->related_cands = BITMAP_ALLOC (NULL);
1597   group->vuses.create (1);
1598 
1599   data->vgroups.safe_push (group);
1600   return group;
1601 }
1602 
1603 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1604    New group will be created if there is no existing group for the use.
1605    MEM_TYPE is the type of memory being addressed, or NULL if this
1606    isn't an address reference.  */
1607 
1608 static struct iv_use *
record_group_use(struct ivopts_data * data,tree * use_p,struct iv * iv,gimple * stmt,enum use_type type,tree mem_type)1609 record_group_use (struct ivopts_data *data, tree *use_p,
1610 		  struct iv *iv, gimple *stmt, enum use_type type,
1611 		  tree mem_type)
1612 {
1613   tree addr_base = NULL;
1614   struct iv_group *group = NULL;
1615   poly_uint64 addr_offset = 0;
1616 
1617   /* Record non address type use in a new group.  */
1618   if (address_p (type))
1619     {
1620       unsigned int i;
1621 
1622       addr_base = strip_offset (iv->base, &addr_offset);
1623       for (i = 0; i < data->vgroups.length (); i++)
1624 	{
1625 	  struct iv_use *use;
1626 
1627 	  group = data->vgroups[i];
1628 	  use = group->vuses[0];
1629 	  if (!address_p (use->type))
1630 	    continue;
1631 
1632 	  /* Check if it has the same stripped base and step.  */
1633 	  if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1634 	      && operand_equal_p (iv->step, use->iv->step, 0)
1635 	      && operand_equal_p (addr_base, use->addr_base, 0))
1636 	    break;
1637 	}
1638       if (i == data->vgroups.length ())
1639 	group = NULL;
1640     }
1641 
1642   if (!group)
1643     group = record_group (data, type);
1644 
1645   return record_use (group, use_p, iv, stmt, type, mem_type,
1646 		     addr_base, addr_offset);
1647 }
1648 
1649 /* Checks whether the use OP is interesting and if so, records it.  */
1650 
1651 static struct iv_use *
find_interesting_uses_op(struct ivopts_data * data,tree op)1652 find_interesting_uses_op (struct ivopts_data *data, tree op)
1653 {
1654   struct iv *iv;
1655   gimple *stmt;
1656   struct iv_use *use;
1657 
1658   if (TREE_CODE (op) != SSA_NAME)
1659     return NULL;
1660 
1661   iv = get_iv (data, op);
1662   if (!iv)
1663     return NULL;
1664 
1665   if (iv->nonlin_use)
1666     {
1667       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1668       return iv->nonlin_use;
1669     }
1670 
1671   if (integer_zerop (iv->step))
1672     {
1673       record_invariant (data, op, true);
1674       return NULL;
1675     }
1676 
1677   stmt = SSA_NAME_DEF_STMT (op);
1678   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1679 
1680   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1681   iv->nonlin_use = use;
1682   return use;
1683 }
1684 
1685 /* Indicate how compare type iv_use can be handled.  */
1686 enum comp_iv_rewrite
1687 {
1688   COMP_IV_NA,
1689   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1690   COMP_IV_EXPR,
1691   /* We may rewrite compare type iv_uses on both sides of comparison by
1692      expressing value of each iv_use.  */
1693   COMP_IV_EXPR_2,
1694   /* We may rewrite compare type iv_use by expressing value of the iv_use
1695      or by eliminating it with other iv_cand.  */
1696   COMP_IV_ELIM
1697 };
1698 
1699 /* Given a condition in statement STMT, checks whether it is a compare
1700    of an induction variable and an invariant.  If this is the case,
1701    CONTROL_VAR is set to location of the iv, BOUND to the location of
1702    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1703    induction variable descriptions, and true is returned.  If this is not
1704    the case, CONTROL_VAR and BOUND are set to the arguments of the
1705    condition and false is returned.  */
1706 
1707 static enum comp_iv_rewrite
extract_cond_operands(struct ivopts_data * data,gimple * stmt,tree ** control_var,tree ** bound,struct iv ** iv_var,struct iv ** iv_bound)1708 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1709 		       tree **control_var, tree **bound,
1710 		       struct iv **iv_var, struct iv **iv_bound)
1711 {
1712   /* The objects returned when COND has constant operands.  */
1713   static struct iv const_iv;
1714   static tree zero;
1715   tree *op0 = &zero, *op1 = &zero;
1716   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1717   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1718 
1719   if (gimple_code (stmt) == GIMPLE_COND)
1720     {
1721       gcond *cond_stmt = as_a <gcond *> (stmt);
1722       op0 = gimple_cond_lhs_ptr (cond_stmt);
1723       op1 = gimple_cond_rhs_ptr (cond_stmt);
1724     }
1725   else
1726     {
1727       op0 = gimple_assign_rhs1_ptr (stmt);
1728       op1 = gimple_assign_rhs2_ptr (stmt);
1729     }
1730 
1731   zero = integer_zero_node;
1732   const_iv.step = integer_zero_node;
1733 
1734   if (TREE_CODE (*op0) == SSA_NAME)
1735     iv0 = get_iv (data, *op0);
1736   if (TREE_CODE (*op1) == SSA_NAME)
1737     iv1 = get_iv (data, *op1);
1738 
1739   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1740   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1741     {
1742       rewrite_type = COMP_IV_EXPR_2;
1743       goto end;
1744     }
1745 
1746   /* If none side of comparison is IV.  */
1747   if ((!iv0 || integer_zerop (iv0->step))
1748       && (!iv1 || integer_zerop (iv1->step)))
1749     goto end;
1750 
1751   /* Control variable may be on the other side.  */
1752   if (!iv0 || integer_zerop (iv0->step))
1753     {
1754       std::swap (op0, op1);
1755       std::swap (iv0, iv1);
1756     }
1757   /* If one side is IV and the other side isn't loop invariant.  */
1758   if (!iv1)
1759     rewrite_type = COMP_IV_EXPR;
1760   /* If one side is IV and the other side is loop invariant.  */
1761   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1762     rewrite_type = COMP_IV_ELIM;
1763 
1764 end:
1765   if (control_var)
1766     *control_var = op0;
1767   if (iv_var)
1768     *iv_var = iv0;
1769   if (bound)
1770     *bound = op1;
1771   if (iv_bound)
1772     *iv_bound = iv1;
1773 
1774   return rewrite_type;
1775 }
1776 
1777 /* Checks whether the condition in STMT is interesting and if so,
1778    records it.  */
1779 
1780 static void
find_interesting_uses_cond(struct ivopts_data * data,gimple * stmt)1781 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1782 {
1783   tree *var_p, *bound_p;
1784   struct iv *var_iv, *bound_iv;
1785   enum comp_iv_rewrite ret;
1786 
1787   ret = extract_cond_operands (data, stmt,
1788 			       &var_p, &bound_p, &var_iv, &bound_iv);
1789   if (ret == COMP_IV_NA)
1790     {
1791       find_interesting_uses_op (data, *var_p);
1792       find_interesting_uses_op (data, *bound_p);
1793       return;
1794     }
1795 
1796   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1797   /* Record compare type iv_use for iv on the other side of comparison.  */
1798   if (ret == COMP_IV_EXPR_2)
1799     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1800 }
1801 
1802 /* Returns the outermost loop EXPR is obviously invariant in
1803    relative to the loop LOOP, i.e. if all its operands are defined
1804    outside of the returned loop.  Returns NULL if EXPR is not
1805    even obviously invariant in LOOP.  */
1806 
1807 struct loop *
outermost_invariant_loop_for_expr(struct loop * loop,tree expr)1808 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1809 {
1810   basic_block def_bb;
1811   unsigned i, len;
1812 
1813   if (is_gimple_min_invariant (expr))
1814     return current_loops->tree_root;
1815 
1816   if (TREE_CODE (expr) == SSA_NAME)
1817     {
1818       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1819       if (def_bb)
1820 	{
1821 	  if (flow_bb_inside_loop_p (loop, def_bb))
1822 	    return NULL;
1823 	  return superloop_at_depth (loop,
1824 				     loop_depth (def_bb->loop_father) + 1);
1825 	}
1826 
1827       return current_loops->tree_root;
1828     }
1829 
1830   if (!EXPR_P (expr))
1831     return NULL;
1832 
1833   unsigned maxdepth = 0;
1834   len = TREE_OPERAND_LENGTH (expr);
1835   for (i = 0; i < len; i++)
1836     {
1837       struct loop *ivloop;
1838       if (!TREE_OPERAND (expr, i))
1839 	continue;
1840 
1841       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1842       if (!ivloop)
1843 	return NULL;
1844       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1845     }
1846 
1847   return superloop_at_depth (loop, maxdepth);
1848 }
1849 
1850 /* Returns true if expression EXPR is obviously invariant in LOOP,
1851    i.e. if all its operands are defined outside of the LOOP.  LOOP
1852    should not be the function body.  */
1853 
1854 bool
expr_invariant_in_loop_p(struct loop * loop,tree expr)1855 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1856 {
1857   basic_block def_bb;
1858   unsigned i, len;
1859 
1860   gcc_assert (loop_depth (loop) > 0);
1861 
1862   if (is_gimple_min_invariant (expr))
1863     return true;
1864 
1865   if (TREE_CODE (expr) == SSA_NAME)
1866     {
1867       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1868       if (def_bb
1869 	  && flow_bb_inside_loop_p (loop, def_bb))
1870 	return false;
1871 
1872       return true;
1873     }
1874 
1875   if (!EXPR_P (expr))
1876     return false;
1877 
1878   len = TREE_OPERAND_LENGTH (expr);
1879   for (i = 0; i < len; i++)
1880     if (TREE_OPERAND (expr, i)
1881 	&& !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1882       return false;
1883 
1884   return true;
1885 }
1886 
1887 /* Given expression EXPR which computes inductive values with respect
1888    to loop recorded in DATA, this function returns biv from which EXPR
1889    is derived by tracing definition chains of ssa variables in EXPR.  */
1890 
1891 static struct iv*
find_deriving_biv_for_expr(struct ivopts_data * data,tree expr)1892 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1893 {
1894   struct iv *iv;
1895   unsigned i, n;
1896   tree e2, e1;
1897   enum tree_code code;
1898   gimple *stmt;
1899 
1900   if (expr == NULL_TREE)
1901     return NULL;
1902 
1903   if (is_gimple_min_invariant (expr))
1904     return NULL;
1905 
1906   code = TREE_CODE (expr);
1907   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1908     {
1909       n = TREE_OPERAND_LENGTH (expr);
1910       for (i = 0; i < n; i++)
1911 	{
1912 	  iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1913 	  if (iv)
1914 	    return iv;
1915 	}
1916     }
1917 
1918   /* Stop if it's not ssa name.  */
1919   if (code != SSA_NAME)
1920     return NULL;
1921 
1922   iv = get_iv (data, expr);
1923   if (!iv || integer_zerop (iv->step))
1924     return NULL;
1925   else if (iv->biv_p)
1926     return iv;
1927 
1928   stmt = SSA_NAME_DEF_STMT (expr);
1929   if (gphi *phi = dyn_cast <gphi *> (stmt))
1930     {
1931       ssa_op_iter iter;
1932       use_operand_p use_p;
1933       basic_block phi_bb = gimple_bb (phi);
1934 
1935       /* Skip loop header PHI that doesn't define biv.  */
1936       if (phi_bb->loop_father == data->current_loop)
1937 	return NULL;
1938 
1939       if (virtual_operand_p (gimple_phi_result (phi)))
1940 	return NULL;
1941 
1942       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1943 	{
1944 	  tree use = USE_FROM_PTR (use_p);
1945 	  iv = find_deriving_biv_for_expr (data, use);
1946 	  if (iv)
1947 	    return iv;
1948 	}
1949       return NULL;
1950     }
1951   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1952     return NULL;
1953 
1954   e1 = gimple_assign_rhs1 (stmt);
1955   code = gimple_assign_rhs_code (stmt);
1956   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1957     return find_deriving_biv_for_expr (data, e1);
1958 
1959   switch (code)
1960     {
1961     case MULT_EXPR:
1962     case PLUS_EXPR:
1963     case MINUS_EXPR:
1964     case POINTER_PLUS_EXPR:
1965       /* Increments, decrements and multiplications by a constant
1966 	 are simple.  */
1967       e2 = gimple_assign_rhs2 (stmt);
1968       iv = find_deriving_biv_for_expr (data, e2);
1969       if (iv)
1970 	return iv;
1971       gcc_fallthrough ();
1972 
1973     CASE_CONVERT:
1974       /* Casts are simple.  */
1975       return find_deriving_biv_for_expr (data, e1);
1976 
1977     default:
1978       break;
1979     }
1980 
1981   return NULL;
1982 }
1983 
1984 /* Record BIV, its predecessor and successor that they are used in
1985    address type uses.  */
1986 
1987 static void
record_biv_for_address_use(struct ivopts_data * data,struct iv * biv)1988 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1989 {
1990   unsigned i;
1991   tree type, base_1, base_2;
1992   bitmap_iterator bi;
1993 
1994   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1995       || biv->have_address_use || !biv->no_overflow)
1996     return;
1997 
1998   type = TREE_TYPE (biv->base);
1999   if (!INTEGRAL_TYPE_P (type))
2000     return;
2001 
2002   biv->have_address_use = true;
2003   data->bivs_not_used_in_addr--;
2004   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
2005   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2006     {
2007       struct iv *iv = ver_info (data, i)->iv;
2008 
2009       if (!iv || !iv->biv_p || integer_zerop (iv->step)
2010 	  || iv->have_address_use || !iv->no_overflow)
2011 	continue;
2012 
2013       if (type != TREE_TYPE (iv->base)
2014 	  || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2015 	continue;
2016 
2017       if (!operand_equal_p (biv->step, iv->step, 0))
2018 	continue;
2019 
2020       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2021       if (operand_equal_p (base_1, iv->base, 0)
2022 	  || operand_equal_p (base_2, biv->base, 0))
2023 	{
2024 	  iv->have_address_use = true;
2025 	  data->bivs_not_used_in_addr--;
2026 	}
2027     }
2028 }
2029 
2030 /* Cumulates the steps of indices into DATA and replaces their values with the
2031    initial ones.  Returns false when the value of the index cannot be determined.
2032    Callback for for_each_index.  */
2033 
2034 struct ifs_ivopts_data
2035 {
2036   struct ivopts_data *ivopts_data;
2037   gimple *stmt;
2038   tree step;
2039 };
2040 
2041 static bool
idx_find_step(tree base,tree * idx,void * data)2042 idx_find_step (tree base, tree *idx, void *data)
2043 {
2044   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2045   struct iv *iv;
2046   bool use_overflow_semantics = false;
2047   tree step, iv_base, iv_step, lbound, off;
2048   struct loop *loop = dta->ivopts_data->current_loop;
2049 
2050   /* If base is a component ref, require that the offset of the reference
2051      be invariant.  */
2052   if (TREE_CODE (base) == COMPONENT_REF)
2053     {
2054       off = component_ref_field_offset (base);
2055       return expr_invariant_in_loop_p (loop, off);
2056     }
2057 
2058   /* If base is array, first check whether we will be able to move the
2059      reference out of the loop (in order to take its address in strength
2060      reduction).  In order for this to work we need both lower bound
2061      and step to be loop invariants.  */
2062   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2063     {
2064       /* Moreover, for a range, the size needs to be invariant as well.  */
2065       if (TREE_CODE (base) == ARRAY_RANGE_REF
2066 	  && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2067 	return false;
2068 
2069       step = array_ref_element_size (base);
2070       lbound = array_ref_low_bound (base);
2071 
2072       if (!expr_invariant_in_loop_p (loop, step)
2073 	  || !expr_invariant_in_loop_p (loop, lbound))
2074 	return false;
2075     }
2076 
2077   if (TREE_CODE (*idx) != SSA_NAME)
2078     return true;
2079 
2080   iv = get_iv (dta->ivopts_data, *idx);
2081   if (!iv)
2082     return false;
2083 
2084   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2085 	  *&x[0], which is not folded and does not trigger the
2086 	  ARRAY_REF path below.  */
2087   *idx = iv->base;
2088 
2089   if (integer_zerop (iv->step))
2090     return true;
2091 
2092   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2093     {
2094       step = array_ref_element_size (base);
2095 
2096       /* We only handle addresses whose step is an integer constant.  */
2097       if (TREE_CODE (step) != INTEGER_CST)
2098 	return false;
2099     }
2100   else
2101     /* The step for pointer arithmetics already is 1 byte.  */
2102     step = size_one_node;
2103 
2104   iv_base = iv->base;
2105   iv_step = iv->step;
2106   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2107     use_overflow_semantics = true;
2108 
2109   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2110 			    sizetype, &iv_base, &iv_step, dta->stmt,
2111 			    use_overflow_semantics))
2112     {
2113       /* The index might wrap.  */
2114       return false;
2115     }
2116 
2117   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2118   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2119 
2120   if (dta->ivopts_data->bivs_not_used_in_addr)
2121     {
2122       if (!iv->biv_p)
2123 	iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2124 
2125       record_biv_for_address_use (dta->ivopts_data, iv);
2126     }
2127   return true;
2128 }
2129 
2130 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2131    object is passed to it in DATA.  */
2132 
2133 static bool
idx_record_use(tree base,tree * idx,void * vdata)2134 idx_record_use (tree base, tree *idx,
2135 		void *vdata)
2136 {
2137   struct ivopts_data *data = (struct ivopts_data *) vdata;
2138   find_interesting_uses_op (data, *idx);
2139   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2140     {
2141       find_interesting_uses_op (data, array_ref_element_size (base));
2142       find_interesting_uses_op (data, array_ref_low_bound (base));
2143     }
2144   return true;
2145 }
2146 
2147 /* If we can prove that TOP = cst * BOT for some constant cst,
2148    store cst to MUL and return true.  Otherwise return false.
2149    The returned value is always sign-extended, regardless of the
2150    signedness of TOP and BOT.  */
2151 
2152 static bool
constant_multiple_of(tree top,tree bot,widest_int * mul)2153 constant_multiple_of (tree top, tree bot, widest_int *mul)
2154 {
2155   tree mby;
2156   enum tree_code code;
2157   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2158   widest_int res, p0, p1;
2159 
2160   STRIP_NOPS (top);
2161   STRIP_NOPS (bot);
2162 
2163   if (operand_equal_p (top, bot, 0))
2164     {
2165       *mul = 1;
2166       return true;
2167     }
2168 
2169   code = TREE_CODE (top);
2170   switch (code)
2171     {
2172     case MULT_EXPR:
2173       mby = TREE_OPERAND (top, 1);
2174       if (TREE_CODE (mby) != INTEGER_CST)
2175 	return false;
2176 
2177       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2178 	return false;
2179 
2180       *mul = wi::sext (res * wi::to_widest (mby), precision);
2181       return true;
2182 
2183     case PLUS_EXPR:
2184     case MINUS_EXPR:
2185       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2186 	  || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2187 	return false;
2188 
2189       if (code == MINUS_EXPR)
2190 	p1 = -p1;
2191       *mul = wi::sext (p0 + p1, precision);
2192       return true;
2193 
2194     case INTEGER_CST:
2195       if (TREE_CODE (bot) != INTEGER_CST)
2196 	return false;
2197 
2198       p0 = widest_int::from (wi::to_wide (top), SIGNED);
2199       p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2200       if (p1 == 0)
2201 	return false;
2202       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2203       return res == 0;
2204 
2205     default:
2206       if (POLY_INT_CST_P (top)
2207 	  && POLY_INT_CST_P (bot)
2208 	  && constant_multiple_p (wi::to_poly_widest (top),
2209 				  wi::to_poly_widest (bot), mul))
2210 	return true;
2211 
2212       return false;
2213     }
2214 }
2215 
2216 /* Return true if memory reference REF with step STEP may be unaligned.  */
2217 
2218 static bool
may_be_unaligned_p(tree ref,tree step)2219 may_be_unaligned_p (tree ref, tree step)
2220 {
2221   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2222      thus they are not misaligned.  */
2223   if (TREE_CODE (ref) == TARGET_MEM_REF)
2224     return false;
2225 
2226   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2227   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2228     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2229 
2230   unsigned HOST_WIDE_INT bitpos;
2231   unsigned int ref_align;
2232   get_object_alignment_1 (ref, &ref_align, &bitpos);
2233   if (ref_align < align
2234       || (bitpos % align) != 0
2235       || (bitpos % BITS_PER_UNIT) != 0)
2236     return true;
2237 
2238   unsigned int trailing_zeros = tree_ctz (step);
2239   if (trailing_zeros < HOST_BITS_PER_INT
2240       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2241     return true;
2242 
2243   return false;
2244 }
2245 
2246 /* Return true if EXPR may be non-addressable.   */
2247 
2248 bool
may_be_nonaddressable_p(tree expr)2249 may_be_nonaddressable_p (tree expr)
2250 {
2251   switch (TREE_CODE (expr))
2252     {
2253     case TARGET_MEM_REF:
2254       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2255 	 target, thus they are always addressable.  */
2256       return false;
2257 
2258     case MEM_REF:
2259       /* Likewise for MEM_REFs, modulo the storage order.  */
2260       return REF_REVERSE_STORAGE_ORDER (expr);
2261 
2262     case BIT_FIELD_REF:
2263       if (REF_REVERSE_STORAGE_ORDER (expr))
2264 	return true;
2265       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2266 
2267     case COMPONENT_REF:
2268       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2269 	return true;
2270       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2271 	     || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2272 
2273     case ARRAY_REF:
2274     case ARRAY_RANGE_REF:
2275       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2276 	return true;
2277       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2278 
2279     case VIEW_CONVERT_EXPR:
2280       /* This kind of view-conversions may wrap non-addressable objects
2281 	 and make them look addressable.  After some processing the
2282 	 non-addressability may be uncovered again, causing ADDR_EXPRs
2283 	 of inappropriate objects to be built.  */
2284       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2285 	  || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2286 	return true;
2287       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2288 
2289     CASE_CONVERT:
2290       return true;
2291 
2292     default:
2293       break;
2294     }
2295 
2296   return false;
2297 }
2298 
2299 /* Finds addresses in *OP_P inside STMT.  */
2300 
2301 static void
find_interesting_uses_address(struct ivopts_data * data,gimple * stmt,tree * op_p)2302 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2303 			       tree *op_p)
2304 {
2305   tree base = *op_p, step = size_zero_node;
2306   struct iv *civ;
2307   struct ifs_ivopts_data ifs_ivopts_data;
2308 
2309   /* Do not play with volatile memory references.  A bit too conservative,
2310      perhaps, but safe.  */
2311   if (gimple_has_volatile_ops (stmt))
2312     goto fail;
2313 
2314   /* Ignore bitfields for now.  Not really something terribly complicated
2315      to handle.  TODO.  */
2316   if (TREE_CODE (base) == BIT_FIELD_REF)
2317     goto fail;
2318 
2319   base = unshare_expr (base);
2320 
2321   if (TREE_CODE (base) == TARGET_MEM_REF)
2322     {
2323       tree type = build_pointer_type (TREE_TYPE (base));
2324       tree astep;
2325 
2326       if (TMR_BASE (base)
2327 	  && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2328 	{
2329 	  civ = get_iv (data, TMR_BASE (base));
2330 	  if (!civ)
2331 	    goto fail;
2332 
2333 	  TMR_BASE (base) = civ->base;
2334 	  step = civ->step;
2335 	}
2336       if (TMR_INDEX2 (base)
2337 	  && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2338 	{
2339 	  civ = get_iv (data, TMR_INDEX2 (base));
2340 	  if (!civ)
2341 	    goto fail;
2342 
2343 	  TMR_INDEX2 (base) = civ->base;
2344 	  step = civ->step;
2345 	}
2346       if (TMR_INDEX (base)
2347 	  && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2348 	{
2349 	  civ = get_iv (data, TMR_INDEX (base));
2350 	  if (!civ)
2351 	    goto fail;
2352 
2353 	  TMR_INDEX (base) = civ->base;
2354 	  astep = civ->step;
2355 
2356 	  if (astep)
2357 	    {
2358 	      if (TMR_STEP (base))
2359 		astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2360 
2361 	      step = fold_build2 (PLUS_EXPR, type, step, astep);
2362 	    }
2363 	}
2364 
2365       if (integer_zerop (step))
2366 	goto fail;
2367       base = tree_mem_ref_addr (type, base);
2368     }
2369   else
2370     {
2371       ifs_ivopts_data.ivopts_data = data;
2372       ifs_ivopts_data.stmt = stmt;
2373       ifs_ivopts_data.step = size_zero_node;
2374       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2375 	  || integer_zerop (ifs_ivopts_data.step))
2376 	goto fail;
2377       step = ifs_ivopts_data.step;
2378 
2379       /* Check that the base expression is addressable.  This needs
2380 	 to be done after substituting bases of IVs into it.  */
2381       if (may_be_nonaddressable_p (base))
2382 	goto fail;
2383 
2384       /* Moreover, on strict alignment platforms, check that it is
2385 	 sufficiently aligned.  */
2386       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2387 	goto fail;
2388 
2389       base = build_fold_addr_expr (base);
2390 
2391       /* Substituting bases of IVs into the base expression might
2392 	 have caused folding opportunities.  */
2393       if (TREE_CODE (base) == ADDR_EXPR)
2394 	{
2395 	  tree *ref = &TREE_OPERAND (base, 0);
2396 	  while (handled_component_p (*ref))
2397 	    ref = &TREE_OPERAND (*ref, 0);
2398 	  if (TREE_CODE (*ref) == MEM_REF)
2399 	    {
2400 	      tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2401 				      TREE_OPERAND (*ref, 0),
2402 				      TREE_OPERAND (*ref, 1));
2403 	      if (tem)
2404 		*ref = tem;
2405 	    }
2406 	}
2407     }
2408 
2409   civ = alloc_iv (data, base, step);
2410   /* Fail if base object of this memory reference is unknown.  */
2411   if (civ->base_object == NULL_TREE)
2412     goto fail;
2413 
2414   record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2415   return;
2416 
2417 fail:
2418   for_each_index (op_p, idx_record_use, data);
2419 }
2420 
2421 /* Finds and records invariants used in STMT.  */
2422 
2423 static void
find_invariants_stmt(struct ivopts_data * data,gimple * stmt)2424 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2425 {
2426   ssa_op_iter iter;
2427   use_operand_p use_p;
2428   tree op;
2429 
2430   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2431     {
2432       op = USE_FROM_PTR (use_p);
2433       record_invariant (data, op, false);
2434     }
2435 }
2436 
2437 /* CALL calls an internal function.  If operand *OP_P will become an
2438    address when the call is expanded, return the type of the memory
2439    being addressed, otherwise return null.  */
2440 
2441 static tree
get_mem_type_for_internal_fn(gcall * call,tree * op_p)2442 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2443 {
2444   switch (gimple_call_internal_fn (call))
2445     {
2446     case IFN_MASK_LOAD:
2447       if (op_p == gimple_call_arg_ptr (call, 0))
2448 	return TREE_TYPE (gimple_call_lhs (call));
2449       return NULL_TREE;
2450 
2451     case IFN_MASK_STORE:
2452       if (op_p == gimple_call_arg_ptr (call, 0))
2453 	return TREE_TYPE (gimple_call_arg (call, 3));
2454       return NULL_TREE;
2455 
2456     default:
2457       return NULL_TREE;
2458     }
2459 }
2460 
2461 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2462    Return true if the operand will become an address when STMT
2463    is expanded and record the associated address use if so.  */
2464 
2465 static bool
find_address_like_use(struct ivopts_data * data,gimple * stmt,tree * op_p,struct iv * iv)2466 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2467 		       struct iv *iv)
2468 {
2469   /* Fail if base object of this memory reference is unknown.  */
2470   if (iv->base_object == NULL_TREE)
2471     return false;
2472 
2473   tree mem_type = NULL_TREE;
2474   if (gcall *call = dyn_cast <gcall *> (stmt))
2475     if (gimple_call_internal_p (call))
2476       mem_type = get_mem_type_for_internal_fn (call, op_p);
2477   if (mem_type)
2478     {
2479       iv = alloc_iv (data, iv->base, iv->step);
2480       record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2481       return true;
2482     }
2483   return false;
2484 }
2485 
2486 /* Finds interesting uses of induction variables in the statement STMT.  */
2487 
2488 static void
find_interesting_uses_stmt(struct ivopts_data * data,gimple * stmt)2489 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2490 {
2491   struct iv *iv;
2492   tree op, *lhs, *rhs;
2493   ssa_op_iter iter;
2494   use_operand_p use_p;
2495   enum tree_code code;
2496 
2497   find_invariants_stmt (data, stmt);
2498 
2499   if (gimple_code (stmt) == GIMPLE_COND)
2500     {
2501       find_interesting_uses_cond (data, stmt);
2502       return;
2503     }
2504 
2505   if (is_gimple_assign (stmt))
2506     {
2507       lhs = gimple_assign_lhs_ptr (stmt);
2508       rhs = gimple_assign_rhs1_ptr (stmt);
2509 
2510       if (TREE_CODE (*lhs) == SSA_NAME)
2511 	{
2512 	  /* If the statement defines an induction variable, the uses are not
2513 	     interesting by themselves.  */
2514 
2515 	  iv = get_iv (data, *lhs);
2516 
2517 	  if (iv && !integer_zerop (iv->step))
2518 	    return;
2519 	}
2520 
2521       code = gimple_assign_rhs_code (stmt);
2522       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2523 	  && (REFERENCE_CLASS_P (*rhs)
2524 	      || is_gimple_val (*rhs)))
2525 	{
2526 	  if (REFERENCE_CLASS_P (*rhs))
2527 	    find_interesting_uses_address (data, stmt, rhs);
2528 	  else
2529 	    find_interesting_uses_op (data, *rhs);
2530 
2531 	  if (REFERENCE_CLASS_P (*lhs))
2532 	    find_interesting_uses_address (data, stmt, lhs);
2533 	  return;
2534 	}
2535       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2536 	{
2537 	  find_interesting_uses_cond (data, stmt);
2538 	  return;
2539 	}
2540 
2541       /* TODO -- we should also handle address uses of type
2542 
2543 	 memory = call (whatever);
2544 
2545 	 and
2546 
2547 	 call (memory).  */
2548     }
2549 
2550   if (gimple_code (stmt) == GIMPLE_PHI
2551       && gimple_bb (stmt) == data->current_loop->header)
2552     {
2553       iv = get_iv (data, PHI_RESULT (stmt));
2554 
2555       if (iv && !integer_zerop (iv->step))
2556 	return;
2557     }
2558 
2559   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2560     {
2561       op = USE_FROM_PTR (use_p);
2562 
2563       if (TREE_CODE (op) != SSA_NAME)
2564 	continue;
2565 
2566       iv = get_iv (data, op);
2567       if (!iv)
2568 	continue;
2569 
2570       if (!find_address_like_use (data, stmt, use_p->use, iv))
2571 	find_interesting_uses_op (data, op);
2572     }
2573 }
2574 
2575 /* Finds interesting uses of induction variables outside of loops
2576    on loop exit edge EXIT.  */
2577 
2578 static void
find_interesting_uses_outside(struct ivopts_data * data,edge exit)2579 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2580 {
2581   gphi *phi;
2582   gphi_iterator psi;
2583   tree def;
2584 
2585   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2586     {
2587       phi = psi.phi ();
2588       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2589       if (!virtual_operand_p (def))
2590 	find_interesting_uses_op (data, def);
2591     }
2592 }
2593 
2594 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2595    mode for memory reference represented by USE.  */
2596 
2597 static GTY (()) vec<rtx, va_gc> *addr_list;
2598 
2599 static bool
addr_offset_valid_p(struct iv_use * use,poly_int64 offset)2600 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2601 {
2602   rtx reg, addr;
2603   unsigned list_index;
2604   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2605   machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2606 
2607   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2608   if (list_index >= vec_safe_length (addr_list))
2609     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2610 
2611   addr = (*addr_list)[list_index];
2612   if (!addr)
2613     {
2614       addr_mode = targetm.addr_space.address_mode (as);
2615       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2616       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2617       (*addr_list)[list_index] = addr;
2618     }
2619   else
2620     addr_mode = GET_MODE (addr);
2621 
2622   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2623   return (memory_address_addr_space_p (mem_mode, addr, as));
2624 }
2625 
2626 /* Comparison function to sort group in ascending order of addr_offset.  */
2627 
2628 static int
group_compare_offset(const void * a,const void * b)2629 group_compare_offset (const void *a, const void *b)
2630 {
2631   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2632   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2633 
2634   return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2635 }
2636 
2637 /* Check if small groups should be split.  Return true if no group
2638    contains more than two uses with distinct addr_offsets.  Return
2639    false otherwise.  We want to split such groups because:
2640 
2641      1) Small groups don't have much benefit and may interfer with
2642 	general candidate selection.
2643      2) Size for problem with only small groups is usually small and
2644 	general algorithm can handle it well.
2645 
2646    TODO -- Above claim may not hold when we want to merge memory
2647    accesses with conseuctive addresses.  */
2648 
2649 static bool
split_small_address_groups_p(struct ivopts_data * data)2650 split_small_address_groups_p (struct ivopts_data *data)
2651 {
2652   unsigned int i, j, distinct = 1;
2653   struct iv_use *pre;
2654   struct iv_group *group;
2655 
2656   for (i = 0; i < data->vgroups.length (); i++)
2657     {
2658       group = data->vgroups[i];
2659       if (group->vuses.length () == 1)
2660 	continue;
2661 
2662       gcc_assert (address_p (group->type));
2663       if (group->vuses.length () == 2)
2664 	{
2665 	  if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2666 				      group->vuses[1]->addr_offset) > 0)
2667 	    std::swap (group->vuses[0], group->vuses[1]);
2668 	}
2669       else
2670 	group->vuses.qsort (group_compare_offset);
2671 
2672       if (distinct > 2)
2673 	continue;
2674 
2675       distinct = 1;
2676       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2677 	{
2678 	  if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2679 	    {
2680 	      pre = group->vuses[j];
2681 	      distinct++;
2682 	    }
2683 
2684 	  if (distinct > 2)
2685 	    break;
2686 	}
2687     }
2688 
2689   return (distinct <= 2);
2690 }
2691 
2692 /* For each group of address type uses, this function further groups
2693    these uses according to the maximum offset supported by target's
2694    [base + offset] addressing mode.  */
2695 
2696 static void
split_address_groups(struct ivopts_data * data)2697 split_address_groups (struct ivopts_data *data)
2698 {
2699   unsigned int i, j;
2700   /* Always split group.  */
2701   bool split_p = split_small_address_groups_p (data);
2702 
2703   for (i = 0; i < data->vgroups.length (); i++)
2704     {
2705       struct iv_group *new_group = NULL;
2706       struct iv_group *group = data->vgroups[i];
2707       struct iv_use *use = group->vuses[0];
2708 
2709       use->id = 0;
2710       use->group_id = group->id;
2711       if (group->vuses.length () == 1)
2712 	continue;
2713 
2714       gcc_assert (address_p (use->type));
2715 
2716       for (j = 1; j < group->vuses.length ();)
2717 	{
2718 	  struct iv_use *next = group->vuses[j];
2719 	  poly_int64 offset = next->addr_offset - use->addr_offset;
2720 
2721 	  /* Split group if aksed to, or the offset against the first
2722 	     use can't fit in offset part of addressing mode.  IV uses
2723 	     having the same offset are still kept in one group.  */
2724 	  if (maybe_ne (offset, 0)
2725 	      && (split_p || !addr_offset_valid_p (use, offset)))
2726 	    {
2727 	      if (!new_group)
2728 		new_group = record_group (data, group->type);
2729 	      group->vuses.ordered_remove (j);
2730 	      new_group->vuses.safe_push (next);
2731 	      continue;
2732 	    }
2733 
2734 	  next->id = j;
2735 	  next->group_id = group->id;
2736 	  j++;
2737 	}
2738     }
2739 }
2740 
2741 /* Finds uses of the induction variables that are interesting.  */
2742 
2743 static void
find_interesting_uses(struct ivopts_data * data)2744 find_interesting_uses (struct ivopts_data *data)
2745 {
2746   basic_block bb;
2747   gimple_stmt_iterator bsi;
2748   basic_block *body = get_loop_body (data->current_loop);
2749   unsigned i;
2750   edge e;
2751 
2752   for (i = 0; i < data->current_loop->num_nodes; i++)
2753     {
2754       edge_iterator ei;
2755       bb = body[i];
2756 
2757       FOR_EACH_EDGE (e, ei, bb->succs)
2758 	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2759 	    && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2760 	  find_interesting_uses_outside (data, e);
2761 
2762       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2763 	find_interesting_uses_stmt (data, gsi_stmt (bsi));
2764       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2765 	if (!is_gimple_debug (gsi_stmt (bsi)))
2766 	  find_interesting_uses_stmt (data, gsi_stmt (bsi));
2767     }
2768   free (body);
2769 
2770   split_address_groups (data);
2771 
2772   if (dump_file && (dump_flags & TDF_DETAILS))
2773     {
2774       fprintf (dump_file, "\n<IV Groups>:\n");
2775       dump_groups (dump_file, data);
2776       fprintf (dump_file, "\n");
2777     }
2778 }
2779 
2780 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2781    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2782    we are at the top-level of the processed address.  */
2783 
2784 static tree
strip_offset_1(tree expr,bool inside_addr,bool top_compref,poly_int64 * offset)2785 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2786 		poly_int64 *offset)
2787 {
2788   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2789   enum tree_code code;
2790   tree type, orig_type = TREE_TYPE (expr);
2791   poly_int64 off0, off1;
2792   HOST_WIDE_INT st;
2793   tree orig_expr = expr;
2794 
2795   STRIP_NOPS (expr);
2796 
2797   type = TREE_TYPE (expr);
2798   code = TREE_CODE (expr);
2799   *offset = 0;
2800 
2801   switch (code)
2802     {
2803     case POINTER_PLUS_EXPR:
2804     case PLUS_EXPR:
2805     case MINUS_EXPR:
2806       op0 = TREE_OPERAND (expr, 0);
2807       op1 = TREE_OPERAND (expr, 1);
2808 
2809       op0 = strip_offset_1 (op0, false, false, &off0);
2810       op1 = strip_offset_1 (op1, false, false, &off1);
2811 
2812       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2813       if (op0 == TREE_OPERAND (expr, 0)
2814 	  && op1 == TREE_OPERAND (expr, 1))
2815 	return orig_expr;
2816 
2817       if (integer_zerop (op1))
2818 	expr = op0;
2819       else if (integer_zerop (op0))
2820 	{
2821 	  if (code == MINUS_EXPR)
2822 	    expr = fold_build1 (NEGATE_EXPR, type, op1);
2823 	  else
2824 	    expr = op1;
2825 	}
2826       else
2827 	expr = fold_build2 (code, type, op0, op1);
2828 
2829       return fold_convert (orig_type, expr);
2830 
2831     case MULT_EXPR:
2832       op1 = TREE_OPERAND (expr, 1);
2833       if (!cst_and_fits_in_hwi (op1))
2834 	return orig_expr;
2835 
2836       op0 = TREE_OPERAND (expr, 0);
2837       op0 = strip_offset_1 (op0, false, false, &off0);
2838       if (op0 == TREE_OPERAND (expr, 0))
2839 	return orig_expr;
2840 
2841       *offset = off0 * int_cst_value (op1);
2842       if (integer_zerop (op0))
2843 	expr = op0;
2844       else
2845 	expr = fold_build2 (MULT_EXPR, type, op0, op1);
2846 
2847       return fold_convert (orig_type, expr);
2848 
2849     case ARRAY_REF:
2850     case ARRAY_RANGE_REF:
2851       if (!inside_addr)
2852 	return orig_expr;
2853 
2854       step = array_ref_element_size (expr);
2855       if (!cst_and_fits_in_hwi (step))
2856 	break;
2857 
2858       st = int_cst_value (step);
2859       op1 = TREE_OPERAND (expr, 1);
2860       op1 = strip_offset_1 (op1, false, false, &off1);
2861       *offset = off1 * st;
2862 
2863       if (top_compref
2864 	  && integer_zerop (op1))
2865 	{
2866 	  /* Strip the component reference completely.  */
2867 	  op0 = TREE_OPERAND (expr, 0);
2868 	  op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2869 	  *offset += off0;
2870 	  return op0;
2871 	}
2872       break;
2873 
2874     case COMPONENT_REF:
2875       {
2876 	tree field;
2877 
2878 	if (!inside_addr)
2879 	  return orig_expr;
2880 
2881 	tmp = component_ref_field_offset (expr);
2882 	field = TREE_OPERAND (expr, 1);
2883 	if (top_compref
2884 	    && cst_and_fits_in_hwi (tmp)
2885 	    && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2886 	  {
2887 	    HOST_WIDE_INT boffset, abs_off;
2888 
2889 	    /* Strip the component reference completely.  */
2890 	    op0 = TREE_OPERAND (expr, 0);
2891 	    op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2892 	    boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2893 	    abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2894 	    if (boffset < 0)
2895 	      abs_off = -abs_off;
2896 
2897 	    *offset = off0 + int_cst_value (tmp) + abs_off;
2898 	    return op0;
2899 	  }
2900       }
2901       break;
2902 
2903     case ADDR_EXPR:
2904       op0 = TREE_OPERAND (expr, 0);
2905       op0 = strip_offset_1 (op0, true, true, &off0);
2906       *offset += off0;
2907 
2908       if (op0 == TREE_OPERAND (expr, 0))
2909 	return orig_expr;
2910 
2911       expr = build_fold_addr_expr (op0);
2912       return fold_convert (orig_type, expr);
2913 
2914     case MEM_REF:
2915       /* ???  Offset operand?  */
2916       inside_addr = false;
2917       break;
2918 
2919     default:
2920       if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2921 	return build_int_cst (orig_type, 0);
2922       return orig_expr;
2923     }
2924 
2925   /* Default handling of expressions for that we want to recurse into
2926      the first operand.  */
2927   op0 = TREE_OPERAND (expr, 0);
2928   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2929   *offset += off0;
2930 
2931   if (op0 == TREE_OPERAND (expr, 0)
2932       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2933     return orig_expr;
2934 
2935   expr = copy_node (expr);
2936   TREE_OPERAND (expr, 0) = op0;
2937   if (op1)
2938     TREE_OPERAND (expr, 1) = op1;
2939 
2940   /* Inside address, we might strip the top level component references,
2941      thus changing type of the expression.  Handling of ADDR_EXPR
2942      will fix that.  */
2943   expr = fold_convert (orig_type, expr);
2944 
2945   return expr;
2946 }
2947 
2948 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2949 
2950 tree
strip_offset(tree expr,poly_uint64_pod * offset)2951 strip_offset (tree expr, poly_uint64_pod *offset)
2952 {
2953   poly_int64 off;
2954   tree core = strip_offset_1 (expr, false, false, &off);
2955   *offset = off;
2956   return core;
2957 }
2958 
2959 /* Returns variant of TYPE that can be used as base for different uses.
2960    We return unsigned type with the same precision, which avoids problems
2961    with overflows.  */
2962 
2963 static tree
generic_type_for(tree type)2964 generic_type_for (tree type)
2965 {
2966   if (POINTER_TYPE_P (type))
2967     return unsigned_type_for (type);
2968 
2969   if (TYPE_UNSIGNED (type))
2970     return type;
2971 
2972   return unsigned_type_for (type);
2973 }
2974 
2975 /* Private data for walk_tree.  */
2976 
2977 struct walk_tree_data
2978 {
2979   bitmap *inv_vars;
2980   struct ivopts_data *idata;
2981 };
2982 
2983 /* Callback function for walk_tree, it records invariants and symbol
2984    reference in *EXPR_P.  DATA is the structure storing result info.  */
2985 
2986 static tree
find_inv_vars_cb(tree * expr_p,int * ws ATTRIBUTE_UNUSED,void * data)2987 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2988 {
2989   tree op = *expr_p;
2990   struct version_info *info;
2991   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2992 
2993   if (TREE_CODE (op) != SSA_NAME)
2994     return NULL_TREE;
2995 
2996   info = name_info (wdata->idata, op);
2997   /* Because we expand simple operations when finding IVs, loop invariant
2998      variable that isn't referred by the original loop could be used now.
2999      Record such invariant variables here.  */
3000   if (!info->iv)
3001     {
3002       struct ivopts_data *idata = wdata->idata;
3003       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
3004 
3005       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
3006 	{
3007 	  set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
3008 	  record_invariant (idata, op, false);
3009 	}
3010     }
3011   if (!info->inv_id || info->has_nonlin_use)
3012     return NULL_TREE;
3013 
3014   if (!*wdata->inv_vars)
3015     *wdata->inv_vars = BITMAP_ALLOC (NULL);
3016   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3017 
3018   return NULL_TREE;
3019 }
3020 
3021 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
3022    store it.  */
3023 
3024 static inline void
find_inv_vars(struct ivopts_data * data,tree * expr_p,bitmap * inv_vars)3025 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3026 {
3027   struct walk_tree_data wdata;
3028 
3029   if (!inv_vars)
3030     return;
3031 
3032   wdata.idata = data;
3033   wdata.inv_vars = inv_vars;
3034   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3035 }
3036 
3037 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
3038    will be recorded if it doesn't exist yet.  Given below two exprs:
3039      inv_expr + cst1, inv_expr + cst2
3040    It's hard to make decision whether constant part should be stripped
3041    or not.  We choose to not strip based on below facts:
3042      1) We need to count ADD cost for constant part if it's stripped,
3043 	which is't always trivial where this functions is called.
3044      2) Stripping constant away may be conflict with following loop
3045 	invariant hoisting pass.
3046      3) Not stripping constant away results in more invariant exprs,
3047 	which usually leads to decision preferring lower reg pressure.  */
3048 
3049 static iv_inv_expr_ent *
get_loop_invariant_expr(struct ivopts_data * data,tree inv_expr)3050 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3051 {
3052   STRIP_NOPS (inv_expr);
3053 
3054   if (poly_int_tree_p (inv_expr)
3055       || TREE_CODE (inv_expr) == SSA_NAME)
3056     return NULL;
3057 
3058   /* Don't strip constant part away as we used to.  */
3059 
3060   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
3061   struct iv_inv_expr_ent ent;
3062   ent.expr = inv_expr;
3063   ent.hash = iterative_hash_expr (inv_expr, 0);
3064   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3065 
3066   if (!*slot)
3067     {
3068       *slot = XNEW (struct iv_inv_expr_ent);
3069       (*slot)->expr = inv_expr;
3070       (*slot)->hash = ent.hash;
3071       (*slot)->id = ++data->max_inv_expr_id;
3072     }
3073 
3074   return *slot;
3075 }
3076 
3077 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3078    position to POS.  If USE is not NULL, the candidate is set as related to
3079    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
3080    replacement of the final value of the iv by a direct computation.  */
3081 
3082 static struct iv_cand *
3083 add_candidate_1 (struct ivopts_data *data,
3084 		 tree base, tree step, bool important, enum iv_position pos,
3085 		 struct iv_use *use, gimple *incremented_at,
3086 		 struct iv *orig_iv = NULL)
3087 {
3088   unsigned i;
3089   struct iv_cand *cand = NULL;
3090   tree type, orig_type;
3091 
3092   gcc_assert (base && step);
3093 
3094   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3095      live, but the ivopts code may replace a real pointer with one
3096      pointing before or after the memory block that is then adjusted
3097      into the memory block during the loop.  FIXME: It would likely be
3098      better to actually force the pointer live and still use ivopts;
3099      for example, it would be enough to write the pointer into memory
3100      and keep it there until after the loop.  */
3101   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3102     return NULL;
3103 
3104   /* For non-original variables, make sure their values are computed in a type
3105      that does not invoke undefined behavior on overflows (since in general,
3106      we cannot prove that these induction variables are non-wrapping).  */
3107   if (pos != IP_ORIGINAL)
3108     {
3109       orig_type = TREE_TYPE (base);
3110       type = generic_type_for (orig_type);
3111       if (type != orig_type)
3112 	{
3113 	  base = fold_convert (type, base);
3114 	  step = fold_convert (type, step);
3115 	}
3116     }
3117 
3118   for (i = 0; i < data->vcands.length (); i++)
3119     {
3120       cand = data->vcands[i];
3121 
3122       if (cand->pos != pos)
3123 	continue;
3124 
3125       if (cand->incremented_at != incremented_at
3126 	  || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3127 	      && cand->ainc_use != use))
3128 	continue;
3129 
3130       if (operand_equal_p (base, cand->iv->base, 0)
3131 	  && operand_equal_p (step, cand->iv->step, 0)
3132 	  && (TYPE_PRECISION (TREE_TYPE (base))
3133 	      == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3134 	break;
3135     }
3136 
3137   if (i == data->vcands.length ())
3138     {
3139       cand = XCNEW (struct iv_cand);
3140       cand->id = i;
3141       cand->iv = alloc_iv (data, base, step);
3142       cand->pos = pos;
3143       if (pos != IP_ORIGINAL)
3144 	{
3145 	  cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3146 	  cand->var_after = cand->var_before;
3147 	}
3148       cand->important = important;
3149       cand->incremented_at = incremented_at;
3150       data->vcands.safe_push (cand);
3151 
3152       if (!poly_int_tree_p (step))
3153 	{
3154 	  find_inv_vars (data, &step, &cand->inv_vars);
3155 
3156 	  iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3157 	  /* Share bitmap between inv_vars and inv_exprs for cand.  */
3158 	  if (inv_expr != NULL)
3159 	    {
3160 	      cand->inv_exprs = cand->inv_vars;
3161 	      cand->inv_vars = NULL;
3162 	      if (cand->inv_exprs)
3163 		bitmap_clear (cand->inv_exprs);
3164 	      else
3165 		cand->inv_exprs = BITMAP_ALLOC (NULL);
3166 
3167 	      bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3168 	    }
3169 	}
3170 
3171       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3172 	cand->ainc_use = use;
3173       else
3174 	cand->ainc_use = NULL;
3175 
3176       cand->orig_iv = orig_iv;
3177       if (dump_file && (dump_flags & TDF_DETAILS))
3178 	dump_cand (dump_file, cand);
3179     }
3180 
3181   cand->important |= important;
3182 
3183   /* Relate candidate to the group for which it is added.  */
3184   if (use)
3185     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3186 
3187   return cand;
3188 }
3189 
3190 /* Returns true if incrementing the induction variable at the end of the LOOP
3191    is allowed.
3192 
3193    The purpose is to avoid splitting latch edge with a biv increment, thus
3194    creating a jump, possibly confusing other optimization passes and leaving
3195    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3196    available (so we do not have a better alternative), or if the latch edge
3197    is already nonempty.  */
3198 
3199 static bool
allow_ip_end_pos_p(struct loop * loop)3200 allow_ip_end_pos_p (struct loop *loop)
3201 {
3202   if (!ip_normal_pos (loop))
3203     return true;
3204 
3205   if (!empty_block_p (ip_end_pos (loop)))
3206     return true;
3207 
3208   return false;
3209 }
3210 
3211 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3212    Important field is set to IMPORTANT.  */
3213 
3214 static void
add_autoinc_candidates(struct ivopts_data * data,tree base,tree step,bool important,struct iv_use * use)3215 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3216 			bool important, struct iv_use *use)
3217 {
3218   basic_block use_bb = gimple_bb (use->stmt);
3219   machine_mode mem_mode;
3220   unsigned HOST_WIDE_INT cstepi;
3221 
3222   /* If we insert the increment in any position other than the standard
3223      ones, we must ensure that it is incremented once per iteration.
3224      It must not be in an inner nested loop, or one side of an if
3225      statement.  */
3226   if (use_bb->loop_father != data->current_loop
3227       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3228       || stmt_can_throw_internal (use->stmt)
3229       || !cst_and_fits_in_hwi (step))
3230     return;
3231 
3232   cstepi = int_cst_value (step);
3233 
3234   mem_mode = TYPE_MODE (use->mem_type);
3235   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3236 	|| USE_STORE_PRE_INCREMENT (mem_mode))
3237        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3238       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3239 	   || USE_STORE_PRE_DECREMENT (mem_mode))
3240 	  && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3241     {
3242       enum tree_code code = MINUS_EXPR;
3243       tree new_base;
3244       tree new_step = step;
3245 
3246       if (POINTER_TYPE_P (TREE_TYPE (base)))
3247 	{
3248 	  new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3249 	  code = POINTER_PLUS_EXPR;
3250 	}
3251       else
3252 	new_step = fold_convert (TREE_TYPE (base), new_step);
3253       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3254       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3255 		       use->stmt);
3256     }
3257   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3258 	|| USE_STORE_POST_INCREMENT (mem_mode))
3259        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3260       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3261 	   || USE_STORE_POST_DECREMENT (mem_mode))
3262 	  && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3263     {
3264       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3265 		       use->stmt);
3266     }
3267 }
3268 
3269 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3270    position to POS.  If USE is not NULL, the candidate is set as related to
3271    it.  The candidate computation is scheduled before exit condition and at
3272    the end of loop.  */
3273 
3274 static void
3275 add_candidate (struct ivopts_data *data,
3276 	       tree base, tree step, bool important, struct iv_use *use,
3277 	       struct iv *orig_iv = NULL)
3278 {
3279   if (ip_normal_pos (data->current_loop))
3280     add_candidate_1 (data, base, step, important,
3281 		     IP_NORMAL, use, NULL, orig_iv);
3282   if (ip_end_pos (data->current_loop)
3283       && allow_ip_end_pos_p (data->current_loop))
3284     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3285 }
3286 
3287 /* Adds standard iv candidates.  */
3288 
3289 static void
add_standard_iv_candidates(struct ivopts_data * data)3290 add_standard_iv_candidates (struct ivopts_data *data)
3291 {
3292   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3293 
3294   /* The same for a double-integer type if it is still fast enough.  */
3295   if (TYPE_PRECISION
3296 	(long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3297       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3298     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3299 		   build_int_cst (long_integer_type_node, 1), true, NULL);
3300 
3301   /* The same for a double-integer type if it is still fast enough.  */
3302   if (TYPE_PRECISION
3303 	(long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3304       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3305     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3306 		   build_int_cst (long_long_integer_type_node, 1), true, NULL);
3307 }
3308 
3309 
3310 /* Adds candidates bases on the old induction variable IV.  */
3311 
3312 static void
add_iv_candidate_for_biv(struct ivopts_data * data,struct iv * iv)3313 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3314 {
3315   gimple *phi;
3316   tree def;
3317   struct iv_cand *cand;
3318 
3319   /* Check if this biv is used in address type use.  */
3320   if (iv->no_overflow  && iv->have_address_use
3321       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3322       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3323     {
3324       tree base = fold_convert (sizetype, iv->base);
3325       tree step = fold_convert (sizetype, iv->step);
3326 
3327       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3328       add_candidate (data, base, step, true, NULL, iv);
3329       /* Add iv cand of the original type only if it has nonlinear use.  */
3330       if (iv->nonlin_use)
3331 	add_candidate (data, iv->base, iv->step, true, NULL);
3332     }
3333   else
3334     add_candidate (data, iv->base, iv->step, true, NULL);
3335 
3336   /* The same, but with initial value zero.  */
3337   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3338     add_candidate (data, size_int (0), iv->step, true, NULL);
3339   else
3340     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3341 		   iv->step, true, NULL);
3342 
3343   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3344   if (gimple_code (phi) == GIMPLE_PHI)
3345     {
3346       /* Additionally record the possibility of leaving the original iv
3347 	 untouched.  */
3348       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3349       /* Don't add candidate if it's from another PHI node because
3350 	 it's an affine iv appearing in the form of PEELED_CHREC.  */
3351       phi = SSA_NAME_DEF_STMT (def);
3352       if (gimple_code (phi) != GIMPLE_PHI)
3353 	{
3354 	  cand = add_candidate_1 (data,
3355 				  iv->base, iv->step, true, IP_ORIGINAL, NULL,
3356 				  SSA_NAME_DEF_STMT (def));
3357 	  if (cand)
3358 	    {
3359 	      cand->var_before = iv->ssa_name;
3360 	      cand->var_after = def;
3361 	    }
3362 	}
3363       else
3364 	gcc_assert (gimple_bb (phi) == data->current_loop->header);
3365     }
3366 }
3367 
3368 /* Adds candidates based on the old induction variables.  */
3369 
3370 static void
add_iv_candidate_for_bivs(struct ivopts_data * data)3371 add_iv_candidate_for_bivs (struct ivopts_data *data)
3372 {
3373   unsigned i;
3374   struct iv *iv;
3375   bitmap_iterator bi;
3376 
3377   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3378     {
3379       iv = ver_info (data, i)->iv;
3380       if (iv && iv->biv_p && !integer_zerop (iv->step))
3381 	add_iv_candidate_for_biv (data, iv);
3382     }
3383 }
3384 
3385 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3386 
3387 static void
record_common_cand(struct ivopts_data * data,tree base,tree step,struct iv_use * use)3388 record_common_cand (struct ivopts_data *data, tree base,
3389 		    tree step, struct iv_use *use)
3390 {
3391   struct iv_common_cand ent;
3392   struct iv_common_cand **slot;
3393 
3394   ent.base = base;
3395   ent.step = step;
3396   ent.hash = iterative_hash_expr (base, 0);
3397   ent.hash = iterative_hash_expr (step, ent.hash);
3398 
3399   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3400   if (*slot == NULL)
3401     {
3402       *slot = new iv_common_cand ();
3403       (*slot)->base = base;
3404       (*slot)->step = step;
3405       (*slot)->uses.create (8);
3406       (*slot)->hash = ent.hash;
3407       data->iv_common_cands.safe_push ((*slot));
3408     }
3409 
3410   gcc_assert (use != NULL);
3411   (*slot)->uses.safe_push (use);
3412   return;
3413 }
3414 
3415 /* Comparison function used to sort common candidates.  */
3416 
3417 static int
common_cand_cmp(const void * p1,const void * p2)3418 common_cand_cmp (const void *p1, const void *p2)
3419 {
3420   unsigned n1, n2;
3421   const struct iv_common_cand *const *const ccand1
3422     = (const struct iv_common_cand *const *)p1;
3423   const struct iv_common_cand *const *const ccand2
3424     = (const struct iv_common_cand *const *)p2;
3425 
3426   n1 = (*ccand1)->uses.length ();
3427   n2 = (*ccand2)->uses.length ();
3428   return n2 - n1;
3429 }
3430 
3431 /* Adds IV candidates based on common candidated recorded.  */
3432 
3433 static void
add_iv_candidate_derived_from_uses(struct ivopts_data * data)3434 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3435 {
3436   unsigned i, j;
3437   struct iv_cand *cand_1, *cand_2;
3438 
3439   data->iv_common_cands.qsort (common_cand_cmp);
3440   for (i = 0; i < data->iv_common_cands.length (); i++)
3441     {
3442       struct iv_common_cand *ptr = data->iv_common_cands[i];
3443 
3444       /* Only add IV candidate if it's derived from multiple uses.  */
3445       if (ptr->uses.length () <= 1)
3446 	break;
3447 
3448       cand_1 = NULL;
3449       cand_2 = NULL;
3450       if (ip_normal_pos (data->current_loop))
3451 	cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3452 				  false, IP_NORMAL, NULL, NULL);
3453 
3454       if (ip_end_pos (data->current_loop)
3455 	  && allow_ip_end_pos_p (data->current_loop))
3456 	cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3457 				  false, IP_END, NULL, NULL);
3458 
3459       /* Bind deriving uses and the new candidates.  */
3460       for (j = 0; j < ptr->uses.length (); j++)
3461 	{
3462 	  struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3463 	  if (cand_1)
3464 	    bitmap_set_bit (group->related_cands, cand_1->id);
3465 	  if (cand_2)
3466 	    bitmap_set_bit (group->related_cands, cand_2->id);
3467 	}
3468     }
3469 
3470   /* Release data since it is useless from this point.  */
3471   data->iv_common_cand_tab->empty ();
3472   data->iv_common_cands.truncate (0);
3473 }
3474 
3475 /* Adds candidates based on the value of USE's iv.  */
3476 
3477 static void
add_iv_candidate_for_use(struct ivopts_data * data,struct iv_use * use)3478 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3479 {
3480   poly_uint64 offset;
3481   tree base;
3482   struct iv *iv = use->iv;
3483   tree basetype = TREE_TYPE (iv->base);
3484 
3485   /* Don't add candidate for iv_use with non integer, pointer or non-mode
3486      precision types, instead, add candidate for the corresponding scev in
3487      unsigned type with the same precision.  See PR93674 for more info.  */
3488   if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3489       || !type_has_mode_precision_p (basetype))
3490     {
3491       basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3492 						 TYPE_UNSIGNED (basetype));
3493       add_candidate (data, fold_convert (basetype, iv->base),
3494 		     fold_convert (basetype, iv->step), false, NULL);
3495       return;
3496     }
3497 
3498   add_candidate (data, iv->base, iv->step, false, use);
3499 
3500   /* Record common candidate for use in case it can be shared by others.  */
3501   record_common_cand (data, iv->base, iv->step, use);
3502 
3503   /* Record common candidate with initial value zero.  */
3504   basetype = TREE_TYPE (iv->base);
3505   if (POINTER_TYPE_P (basetype))
3506     basetype = sizetype;
3507   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3508 
3509   /* Record common candidate with constant offset stripped in base.
3510      Like the use itself, we also add candidate directly for it.  */
3511   base = strip_offset (iv->base, &offset);
3512   if (maybe_ne (offset, 0U) || base != iv->base)
3513     {
3514       record_common_cand (data, base, iv->step, use);
3515       add_candidate (data, base, iv->step, false, use);
3516     }
3517 
3518   /* Record common candidate with base_object removed in base.  */
3519   base = iv->base;
3520   STRIP_NOPS (base);
3521   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3522     {
3523       tree step = iv->step;
3524 
3525       STRIP_NOPS (step);
3526       base = TREE_OPERAND (base, 1);
3527       step = fold_convert (sizetype, step);
3528       record_common_cand (data, base, step, use);
3529       /* Also record common candidate with offset stripped.  */
3530       base = strip_offset (base, &offset);
3531       if (maybe_ne (offset, 0U))
3532 	record_common_cand (data, base, step, use);
3533     }
3534 
3535   /* At last, add auto-incremental candidates.  Make such variables
3536      important since other iv uses with same base object may be based
3537      on it.  */
3538   if (use != NULL && address_p (use->type))
3539     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3540 }
3541 
3542 /* Adds candidates based on the uses.  */
3543 
3544 static void
add_iv_candidate_for_groups(struct ivopts_data * data)3545 add_iv_candidate_for_groups (struct ivopts_data *data)
3546 {
3547   unsigned i;
3548 
3549   /* Only add candidate for the first use in group.  */
3550   for (i = 0; i < data->vgroups.length (); i++)
3551     {
3552       struct iv_group *group = data->vgroups[i];
3553 
3554       gcc_assert (group->vuses[0] != NULL);
3555       add_iv_candidate_for_use (data, group->vuses[0]);
3556     }
3557   add_iv_candidate_derived_from_uses (data);
3558 }
3559 
3560 /* Record important candidates and add them to related_cands bitmaps.  */
3561 
3562 static void
record_important_candidates(struct ivopts_data * data)3563 record_important_candidates (struct ivopts_data *data)
3564 {
3565   unsigned i;
3566   struct iv_group *group;
3567 
3568   for (i = 0; i < data->vcands.length (); i++)
3569     {
3570       struct iv_cand *cand = data->vcands[i];
3571 
3572       if (cand->important)
3573 	bitmap_set_bit (data->important_candidates, i);
3574     }
3575 
3576   data->consider_all_candidates = (data->vcands.length ()
3577 				   <= CONSIDER_ALL_CANDIDATES_BOUND);
3578 
3579   /* Add important candidates to groups' related_cands bitmaps.  */
3580   for (i = 0; i < data->vgroups.length (); i++)
3581     {
3582       group = data->vgroups[i];
3583       bitmap_ior_into (group->related_cands, data->important_candidates);
3584     }
3585 }
3586 
3587 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3588    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3589    we allocate a simple list to every use.  */
3590 
3591 static void
alloc_use_cost_map(struct ivopts_data * data)3592 alloc_use_cost_map (struct ivopts_data *data)
3593 {
3594   unsigned i, size, s;
3595 
3596   for (i = 0; i < data->vgroups.length (); i++)
3597     {
3598       struct iv_group *group = data->vgroups[i];
3599 
3600       if (data->consider_all_candidates)
3601 	size = data->vcands.length ();
3602       else
3603 	{
3604 	  s = bitmap_count_bits (group->related_cands);
3605 
3606 	  /* Round up to the power of two, so that moduling by it is fast.  */
3607 	  size = s ? (1 << ceil_log2 (s)) : 1;
3608 	}
3609 
3610       group->n_map_members = size;
3611       group->cost_map = XCNEWVEC (struct cost_pair, size);
3612     }
3613 }
3614 
3615 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3616    on invariants INV_VARS and that the value used in expressing it is
3617    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3618 
3619 static void
set_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand,comp_cost cost,bitmap inv_vars,tree value,enum tree_code comp,bitmap inv_exprs)3620 set_group_iv_cost (struct ivopts_data *data,
3621 		   struct iv_group *group, struct iv_cand *cand,
3622 		   comp_cost cost, bitmap inv_vars, tree value,
3623 		   enum tree_code comp, bitmap inv_exprs)
3624 {
3625   unsigned i, s;
3626 
3627   if (cost.infinite_cost_p ())
3628     {
3629       BITMAP_FREE (inv_vars);
3630       BITMAP_FREE (inv_exprs);
3631       return;
3632     }
3633 
3634   if (data->consider_all_candidates)
3635     {
3636       group->cost_map[cand->id].cand = cand;
3637       group->cost_map[cand->id].cost = cost;
3638       group->cost_map[cand->id].inv_vars = inv_vars;
3639       group->cost_map[cand->id].inv_exprs = inv_exprs;
3640       group->cost_map[cand->id].value = value;
3641       group->cost_map[cand->id].comp = comp;
3642       return;
3643     }
3644 
3645   /* n_map_members is a power of two, so this computes modulo.  */
3646   s = cand->id & (group->n_map_members - 1);
3647   for (i = s; i < group->n_map_members; i++)
3648     if (!group->cost_map[i].cand)
3649       goto found;
3650   for (i = 0; i < s; i++)
3651     if (!group->cost_map[i].cand)
3652       goto found;
3653 
3654   gcc_unreachable ();
3655 
3656 found:
3657   group->cost_map[i].cand = cand;
3658   group->cost_map[i].cost = cost;
3659   group->cost_map[i].inv_vars = inv_vars;
3660   group->cost_map[i].inv_exprs = inv_exprs;
3661   group->cost_map[i].value = value;
3662   group->cost_map[i].comp = comp;
3663 }
3664 
3665 /* Gets cost of (GROUP, CAND) pair.  */
3666 
3667 static struct cost_pair *
get_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)3668 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3669 		   struct iv_cand *cand)
3670 {
3671   unsigned i, s;
3672   struct cost_pair *ret;
3673 
3674   if (!cand)
3675     return NULL;
3676 
3677   if (data->consider_all_candidates)
3678     {
3679       ret = group->cost_map + cand->id;
3680       if (!ret->cand)
3681 	return NULL;
3682 
3683       return ret;
3684     }
3685 
3686   /* n_map_members is a power of two, so this computes modulo.  */
3687   s = cand->id & (group->n_map_members - 1);
3688   for (i = s; i < group->n_map_members; i++)
3689     if (group->cost_map[i].cand == cand)
3690       return group->cost_map + i;
3691     else if (group->cost_map[i].cand == NULL)
3692       return NULL;
3693   for (i = 0; i < s; i++)
3694     if (group->cost_map[i].cand == cand)
3695       return group->cost_map + i;
3696     else if (group->cost_map[i].cand == NULL)
3697       return NULL;
3698 
3699   return NULL;
3700 }
3701 
3702 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3703 static rtx
produce_memory_decl_rtl(tree obj,int * regno)3704 produce_memory_decl_rtl (tree obj, int *regno)
3705 {
3706   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3707   machine_mode address_mode = targetm.addr_space.address_mode (as);
3708   rtx x;
3709 
3710   gcc_assert (obj);
3711   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3712     {
3713       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3714       x = gen_rtx_SYMBOL_REF (address_mode, name);
3715       SET_SYMBOL_REF_DECL (x, obj);
3716       x = gen_rtx_MEM (DECL_MODE (obj), x);
3717       set_mem_addr_space (x, as);
3718       targetm.encode_section_info (obj, x, true);
3719     }
3720   else
3721     {
3722       x = gen_raw_REG (address_mode, (*regno)++);
3723       x = gen_rtx_MEM (DECL_MODE (obj), x);
3724       set_mem_addr_space (x, as);
3725     }
3726 
3727   return x;
3728 }
3729 
3730 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3731    walk_tree.  DATA contains the actual fake register number.  */
3732 
3733 static tree
prepare_decl_rtl(tree * expr_p,int * ws,void * data)3734 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3735 {
3736   tree obj = NULL_TREE;
3737   rtx x = NULL_RTX;
3738   int *regno = (int *) data;
3739 
3740   switch (TREE_CODE (*expr_p))
3741     {
3742     case ADDR_EXPR:
3743       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3744 	   handled_component_p (*expr_p);
3745 	   expr_p = &TREE_OPERAND (*expr_p, 0))
3746 	continue;
3747       obj = *expr_p;
3748       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3749 	x = produce_memory_decl_rtl (obj, regno);
3750       break;
3751 
3752     case SSA_NAME:
3753       *ws = 0;
3754       obj = SSA_NAME_VAR (*expr_p);
3755       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3756       if (!obj)
3757 	return NULL_TREE;
3758       if (!DECL_RTL_SET_P (obj))
3759 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3760       break;
3761 
3762     case VAR_DECL:
3763     case PARM_DECL:
3764     case RESULT_DECL:
3765       *ws = 0;
3766       obj = *expr_p;
3767 
3768       if (DECL_RTL_SET_P (obj))
3769 	break;
3770 
3771       if (DECL_MODE (obj) == BLKmode)
3772 	x = produce_memory_decl_rtl (obj, regno);
3773       else
3774 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3775 
3776       break;
3777 
3778     default:
3779       break;
3780     }
3781 
3782   if (x)
3783     {
3784       decl_rtl_to_reset.safe_push (obj);
3785       SET_DECL_RTL (obj, x);
3786     }
3787 
3788   return NULL_TREE;
3789 }
3790 
3791 /* Determines cost of the computation of EXPR.  */
3792 
3793 static unsigned
computation_cost(tree expr,bool speed)3794 computation_cost (tree expr, bool speed)
3795 {
3796   rtx_insn *seq;
3797   rtx rslt;
3798   tree type = TREE_TYPE (expr);
3799   unsigned cost;
3800   /* Avoid using hard regs in ways which may be unsupported.  */
3801   int regno = LAST_VIRTUAL_REGISTER + 1;
3802   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3803   enum node_frequency real_frequency = node->frequency;
3804 
3805   node->frequency = NODE_FREQUENCY_NORMAL;
3806   crtl->maybe_hot_insn_p = speed;
3807   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3808   start_sequence ();
3809   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3810   seq = get_insns ();
3811   end_sequence ();
3812   default_rtl_profile ();
3813   node->frequency = real_frequency;
3814 
3815   cost = seq_cost (seq, speed);
3816   if (MEM_P (rslt))
3817     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3818 			  TYPE_ADDR_SPACE (type), speed);
3819   else if (!REG_P (rslt))
3820     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3821 
3822   return cost;
3823 }
3824 
3825 /* Returns variable containing the value of candidate CAND at statement AT.  */
3826 
3827 static tree
var_at_stmt(struct loop * loop,struct iv_cand * cand,gimple * stmt)3828 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3829 {
3830   if (stmt_after_increment (loop, cand, stmt))
3831     return cand->var_after;
3832   else
3833     return cand->var_before;
3834 }
3835 
3836 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3837    same precision that is at least as wide as the precision of TYPE, stores
3838    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3839    type of A and B.  */
3840 
3841 static tree
determine_common_wider_type(tree * a,tree * b)3842 determine_common_wider_type (tree *a, tree *b)
3843 {
3844   tree wider_type = NULL;
3845   tree suba, subb;
3846   tree atype = TREE_TYPE (*a);
3847 
3848   if (CONVERT_EXPR_P (*a))
3849     {
3850       suba = TREE_OPERAND (*a, 0);
3851       wider_type = TREE_TYPE (suba);
3852       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3853 	return atype;
3854     }
3855   else
3856     return atype;
3857 
3858   if (CONVERT_EXPR_P (*b))
3859     {
3860       subb = TREE_OPERAND (*b, 0);
3861       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3862 	return atype;
3863     }
3864   else
3865     return atype;
3866 
3867   *a = suba;
3868   *b = subb;
3869   return wider_type;
3870 }
3871 
3872 /* Determines the expression by that USE is expressed from induction variable
3873    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3874    decomposed form.  The invariant part is stored in AFF_INV; while variant
3875    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3876    non-null.  Returns false if USE cannot be expressed using CAND.  */
3877 
3878 static bool
3879 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3880 		       struct iv_cand *cand, struct aff_tree *aff_inv,
3881 		       struct aff_tree *aff_var, widest_int *prat = NULL)
3882 {
3883   tree ubase = use->iv->base, ustep = use->iv->step;
3884   tree cbase = cand->iv->base, cstep = cand->iv->step;
3885   tree common_type, uutype, var, cstep_common;
3886   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3887   aff_tree aff_cbase;
3888   widest_int rat;
3889 
3890   /* We must have a precision to express the values of use.  */
3891   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3892     return false;
3893 
3894   var = var_at_stmt (loop, cand, at);
3895   uutype = unsigned_type_for (utype);
3896 
3897   /* If the conversion is not noop, perform it.  */
3898   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3899     {
3900       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3901 	  && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3902 	{
3903 	  tree inner_base, inner_step, inner_type;
3904 	  inner_base = TREE_OPERAND (cbase, 0);
3905 	  if (CONVERT_EXPR_P (cstep))
3906 	    inner_step = TREE_OPERAND (cstep, 0);
3907 	  else
3908 	    inner_step = cstep;
3909 
3910 	  inner_type = TREE_TYPE (inner_base);
3911 	  /* If candidate is added from a biv whose type is smaller than
3912 	     ctype, we know both candidate and the biv won't overflow.
3913 	     In this case, it's safe to skip the convertion in candidate.
3914 	     As an example, (unsigned short)((unsigned long)A) equals to
3915 	     (unsigned short)A, if A has a type no larger than short.  */
3916 	  if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3917 	    {
3918 	      cbase = inner_base;
3919 	      cstep = inner_step;
3920 	    }
3921 	}
3922       cbase = fold_convert (uutype, cbase);
3923       cstep = fold_convert (uutype, cstep);
3924       var = fold_convert (uutype, var);
3925     }
3926 
3927   /* Ratio is 1 when computing the value of biv cand by itself.
3928      We can't rely on constant_multiple_of in this case because the
3929      use is created after the original biv is selected.  The call
3930      could fail because of inconsistent fold behavior.  See PR68021
3931      for more information.  */
3932   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3933     {
3934       gcc_assert (is_gimple_assign (use->stmt));
3935       gcc_assert (use->iv->ssa_name == cand->var_after);
3936       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3937       rat = 1;
3938     }
3939   else if (!constant_multiple_of (ustep, cstep, &rat))
3940     return false;
3941 
3942   if (prat)
3943     *prat = rat;
3944 
3945   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3946      type, we achieve better folding by computing their difference in this
3947      wider type, and cast the result to UUTYPE.  We do not need to worry about
3948      overflows, as all the arithmetics will in the end be performed in UUTYPE
3949      anyway.  */
3950   common_type = determine_common_wider_type (&ubase, &cbase);
3951 
3952   /* use = ubase - ratio * cbase + ratio * var.  */
3953   tree_to_aff_combination (ubase, common_type, aff_inv);
3954   tree_to_aff_combination (cbase, common_type, &aff_cbase);
3955   tree_to_aff_combination (var, uutype, aff_var);
3956 
3957   /* We need to shift the value if we are after the increment.  */
3958   if (stmt_after_increment (loop, cand, at))
3959     {
3960       aff_tree cstep_aff;
3961 
3962       if (common_type != uutype)
3963 	cstep_common = fold_convert (common_type, cstep);
3964       else
3965 	cstep_common = cstep;
3966 
3967       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3968       aff_combination_add (&aff_cbase, &cstep_aff);
3969     }
3970 
3971   aff_combination_scale (&aff_cbase, -rat);
3972   aff_combination_add (aff_inv, &aff_cbase);
3973   if (common_type != uutype)
3974     aff_combination_convert (aff_inv, uutype);
3975 
3976   aff_combination_scale (aff_var, rat);
3977   return true;
3978 }
3979 
3980 /* Determines the expression by that USE is expressed from induction variable
3981    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3982    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3983 
3984 static bool
get_computation_aff(struct loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand,struct aff_tree * aff)3985 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3986 		     struct iv_cand *cand, struct aff_tree *aff)
3987 {
3988   aff_tree aff_var;
3989 
3990   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3991     return false;
3992 
3993   aff_combination_add (aff, &aff_var);
3994   return true;
3995 }
3996 
3997 /* Return the type of USE.  */
3998 
3999 static tree
get_use_type(struct iv_use * use)4000 get_use_type (struct iv_use *use)
4001 {
4002   tree base_type = TREE_TYPE (use->iv->base);
4003   tree type;
4004 
4005   if (use->type == USE_REF_ADDRESS)
4006     {
4007       /* The base_type may be a void pointer.  Create a pointer type based on
4008 	 the mem_ref instead.  */
4009       type = build_pointer_type (TREE_TYPE (*use->op_p));
4010       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4011 		  == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4012     }
4013   else
4014     type = base_type;
4015 
4016   return type;
4017 }
4018 
4019 /* Determines the expression by that USE is expressed from induction variable
4020    CAND at statement AT in LOOP.  The computation is unshared.  */
4021 
4022 static tree
get_computation_at(struct loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand)4023 get_computation_at (struct loop *loop, gimple *at,
4024 		    struct iv_use *use, struct iv_cand *cand)
4025 {
4026   aff_tree aff;
4027   tree type = get_use_type (use);
4028 
4029   if (!get_computation_aff (loop, at, use, cand, &aff))
4030     return NULL_TREE;
4031   unshare_aff_combination (&aff);
4032   return fold_convert (type, aff_combination_to_tree (&aff));
4033 }
4034 
4035 /* Adjust the cost COST for being in loop setup rather than loop body.
4036    If we're optimizing for space, the loop setup overhead is constant;
4037    if we're optimizing for speed, amortize it over the per-iteration cost.
4038    If ROUND_UP_P is true, the result is round up rather than to zero when
4039    optimizing for speed.  */
4040 static unsigned
4041 adjust_setup_cost (struct ivopts_data *data, unsigned cost,
4042 		   bool round_up_p = false)
4043 {
4044   if (cost == INFTY)
4045     return cost;
4046   else if (optimize_loop_for_speed_p (data->current_loop))
4047     {
4048       HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
4049       return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
4050     }
4051   else
4052     return cost;
4053 }
4054 
4055 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4056    EXPR operand holding the shift.  COST0 and COST1 are the costs for
4057    calculating the operands of EXPR.  Returns true if successful, and returns
4058    the cost in COST.  */
4059 
4060 static bool
get_shiftadd_cost(tree expr,scalar_int_mode mode,comp_cost cost0,comp_cost cost1,tree mult,bool speed,comp_cost * cost)4061 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4062 		   comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4063 {
4064   comp_cost res;
4065   tree op1 = TREE_OPERAND (expr, 1);
4066   tree cst = TREE_OPERAND (mult, 1);
4067   tree multop = TREE_OPERAND (mult, 0);
4068   int m = exact_log2 (int_cst_value (cst));
4069   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4070   int as_cost, sa_cost;
4071   bool mult_in_op1;
4072 
4073   if (!(m >= 0 && m < maxm))
4074     return false;
4075 
4076   STRIP_NOPS (op1);
4077   mult_in_op1 = operand_equal_p (op1, mult, 0);
4078 
4079   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4080 
4081   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4082      use that in preference to a shift insn followed by an add insn.  */
4083   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4084 	     ? shiftadd_cost (speed, mode, m)
4085 	     : (mult_in_op1
4086 		? shiftsub1_cost (speed, mode, m)
4087 		: shiftsub0_cost (speed, mode, m)));
4088 
4089   res = comp_cost (MIN (as_cost, sa_cost), 0);
4090   res += (mult_in_op1 ? cost0 : cost1);
4091 
4092   STRIP_NOPS (multop);
4093   if (!is_gimple_val (multop))
4094     res += force_expr_to_var_cost (multop, speed);
4095 
4096   *cost = res;
4097   return true;
4098 }
4099 
4100 /* Estimates cost of forcing expression EXPR into a variable.  */
4101 
4102 static comp_cost
force_expr_to_var_cost(tree expr,bool speed)4103 force_expr_to_var_cost (tree expr, bool speed)
4104 {
4105   static bool costs_initialized = false;
4106   static unsigned integer_cost [2];
4107   static unsigned symbol_cost [2];
4108   static unsigned address_cost [2];
4109   tree op0, op1;
4110   comp_cost cost0, cost1, cost;
4111   machine_mode mode;
4112   scalar_int_mode int_mode;
4113 
4114   if (!costs_initialized)
4115     {
4116       tree type = build_pointer_type (integer_type_node);
4117       tree var, addr;
4118       rtx x;
4119       int i;
4120 
4121       var = create_tmp_var_raw (integer_type_node, "test_var");
4122       TREE_STATIC (var) = 1;
4123       x = produce_memory_decl_rtl (var, NULL);
4124       SET_DECL_RTL (var, x);
4125 
4126       addr = build1 (ADDR_EXPR, type, var);
4127 
4128 
4129       for (i = 0; i < 2; i++)
4130 	{
4131 	  integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4132 							     2000), i);
4133 
4134 	  symbol_cost[i] = computation_cost (addr, i) + 1;
4135 
4136 	  address_cost[i]
4137 	    = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4138 	  if (dump_file && (dump_flags & TDF_DETAILS))
4139 	    {
4140 	      fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4141 	      fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4142 	      fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4143 	      fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4144 	      fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4145 	      fprintf (dump_file, "\n");
4146 	    }
4147 	}
4148 
4149       costs_initialized = true;
4150     }
4151 
4152   STRIP_NOPS (expr);
4153 
4154   if (SSA_VAR_P (expr))
4155     return no_cost;
4156 
4157   if (is_gimple_min_invariant (expr))
4158     {
4159       if (poly_int_tree_p (expr))
4160 	return comp_cost (integer_cost [speed], 0);
4161 
4162       if (TREE_CODE (expr) == ADDR_EXPR)
4163 	{
4164 	  tree obj = TREE_OPERAND (expr, 0);
4165 
4166 	  if (VAR_P (obj)
4167 	      || TREE_CODE (obj) == PARM_DECL
4168 	      || TREE_CODE (obj) == RESULT_DECL)
4169 	    return comp_cost (symbol_cost [speed], 0);
4170 	}
4171 
4172       return comp_cost (address_cost [speed], 0);
4173     }
4174 
4175   switch (TREE_CODE (expr))
4176     {
4177     case POINTER_PLUS_EXPR:
4178     case PLUS_EXPR:
4179     case MINUS_EXPR:
4180     case MULT_EXPR:
4181     case TRUNC_DIV_EXPR:
4182     case BIT_AND_EXPR:
4183     case BIT_IOR_EXPR:
4184     case LSHIFT_EXPR:
4185     case RSHIFT_EXPR:
4186       op0 = TREE_OPERAND (expr, 0);
4187       op1 = TREE_OPERAND (expr, 1);
4188       STRIP_NOPS (op0);
4189       STRIP_NOPS (op1);
4190       break;
4191 
4192     CASE_CONVERT:
4193     case NEGATE_EXPR:
4194     case BIT_NOT_EXPR:
4195       op0 = TREE_OPERAND (expr, 0);
4196       STRIP_NOPS (op0);
4197       op1 = NULL_TREE;
4198       break;
4199 
4200     default:
4201       /* Just an arbitrary value, FIXME.  */
4202       return comp_cost (target_spill_cost[speed], 0);
4203     }
4204 
4205   if (op0 == NULL_TREE
4206       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4207     cost0 = no_cost;
4208   else
4209     cost0 = force_expr_to_var_cost (op0, speed);
4210 
4211   if (op1 == NULL_TREE
4212       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4213     cost1 = no_cost;
4214   else
4215     cost1 = force_expr_to_var_cost (op1, speed);
4216 
4217   mode = TYPE_MODE (TREE_TYPE (expr));
4218   switch (TREE_CODE (expr))
4219     {
4220     case POINTER_PLUS_EXPR:
4221     case PLUS_EXPR:
4222     case MINUS_EXPR:
4223     case NEGATE_EXPR:
4224       cost = comp_cost (add_cost (speed, mode), 0);
4225       if (TREE_CODE (expr) != NEGATE_EXPR)
4226 	{
4227 	  tree mult = NULL_TREE;
4228 	  comp_cost sa_cost;
4229 	  if (TREE_CODE (op1) == MULT_EXPR)
4230 	    mult = op1;
4231 	  else if (TREE_CODE (op0) == MULT_EXPR)
4232 	    mult = op0;
4233 
4234 	  if (mult != NULL_TREE
4235 	      && is_a <scalar_int_mode> (mode, &int_mode)
4236 	      && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4237 	      && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4238 				    speed, &sa_cost))
4239 	    return sa_cost;
4240 	}
4241       break;
4242 
4243     CASE_CONVERT:
4244       {
4245 	tree inner_mode, outer_mode;
4246 	outer_mode = TREE_TYPE (expr);
4247 	inner_mode = TREE_TYPE (op0);
4248 	cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4249 				       TYPE_MODE (inner_mode), speed), 0);
4250       }
4251       break;
4252 
4253     case MULT_EXPR:
4254       if (cst_and_fits_in_hwi (op0))
4255 	cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4256 					     mode, speed), 0);
4257       else if (cst_and_fits_in_hwi (op1))
4258 	cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4259 					     mode, speed), 0);
4260       else
4261 	return comp_cost (target_spill_cost [speed], 0);
4262       break;
4263 
4264     case TRUNC_DIV_EXPR:
4265       /* Division by power of two is usually cheap, so we allow it.  Forbid
4266 	 anything else.  */
4267       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4268 	cost = comp_cost (add_cost (speed, mode), 0);
4269       else
4270 	cost = comp_cost (target_spill_cost[speed], 0);
4271       break;
4272 
4273     case BIT_AND_EXPR:
4274     case BIT_IOR_EXPR:
4275     case BIT_NOT_EXPR:
4276     case LSHIFT_EXPR:
4277     case RSHIFT_EXPR:
4278       cost = comp_cost (add_cost (speed, mode), 0);
4279       break;
4280 
4281     default:
4282       gcc_unreachable ();
4283     }
4284 
4285   cost += cost0;
4286   cost += cost1;
4287   return cost;
4288 }
4289 
4290 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4291    invariants the computation depends on.  */
4292 
4293 static comp_cost
force_var_cost(struct ivopts_data * data,tree expr,bitmap * inv_vars)4294 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4295 {
4296   if (!expr)
4297     return no_cost;
4298 
4299   find_inv_vars (data, &expr, inv_vars);
4300   return force_expr_to_var_cost (expr, data->speed);
4301 }
4302 
4303 /* Returns cost of auto-modifying address expression in shape base + offset.
4304    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4305    address expression.  The address expression has ADDR_MODE in addr space
4306    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4307    speed or size.  */
4308 
4309 enum ainc_type
4310 {
4311   AINC_PRE_INC,		/* Pre increment.  */
4312   AINC_PRE_DEC,		/* Pre decrement.  */
4313   AINC_POST_INC,	/* Post increment.  */
4314   AINC_POST_DEC,	/* Post decrement.  */
4315   AINC_NONE		/* Also the number of auto increment types.  */
4316 };
4317 
4318 struct ainc_cost_data
4319 {
4320   unsigned costs[AINC_NONE];
4321 };
4322 
4323 static comp_cost
get_address_cost_ainc(poly_int64 ainc_step,poly_int64 ainc_offset,machine_mode addr_mode,machine_mode mem_mode,addr_space_t as,bool speed)4324 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4325 		       machine_mode addr_mode, machine_mode mem_mode,
4326 		       addr_space_t as, bool speed)
4327 {
4328   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4329       && !USE_STORE_PRE_DECREMENT (mem_mode)
4330       && !USE_LOAD_POST_DECREMENT (mem_mode)
4331       && !USE_STORE_POST_DECREMENT (mem_mode)
4332       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4333       && !USE_STORE_PRE_INCREMENT (mem_mode)
4334       && !USE_LOAD_POST_INCREMENT (mem_mode)
4335       && !USE_STORE_POST_INCREMENT (mem_mode))
4336     return infinite_cost;
4337 
4338   static vec<ainc_cost_data *> ainc_cost_data_list;
4339   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4340   if (idx >= ainc_cost_data_list.length ())
4341     {
4342       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4343 
4344       gcc_assert (nsize > idx);
4345       ainc_cost_data_list.safe_grow_cleared (nsize);
4346     }
4347 
4348   ainc_cost_data *data = ainc_cost_data_list[idx];
4349   if (data == NULL)
4350     {
4351       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4352 
4353       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4354       data->costs[AINC_PRE_DEC] = INFTY;
4355       data->costs[AINC_POST_DEC] = INFTY;
4356       data->costs[AINC_PRE_INC] = INFTY;
4357       data->costs[AINC_POST_INC] = INFTY;
4358       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4359 	  || USE_STORE_PRE_DECREMENT (mem_mode))
4360 	{
4361 	  rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4362 
4363 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4364 	    data->costs[AINC_PRE_DEC]
4365 	      = address_cost (addr, mem_mode, as, speed);
4366 	}
4367       if (USE_LOAD_POST_DECREMENT (mem_mode)
4368 	  || USE_STORE_POST_DECREMENT (mem_mode))
4369 	{
4370 	  rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4371 
4372 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4373 	    data->costs[AINC_POST_DEC]
4374 	      = address_cost (addr, mem_mode, as, speed);
4375 	}
4376       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4377 	  || USE_STORE_PRE_INCREMENT (mem_mode))
4378 	{
4379 	  rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4380 
4381 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4382 	    data->costs[AINC_PRE_INC]
4383 	      = address_cost (addr, mem_mode, as, speed);
4384 	}
4385       if (USE_LOAD_POST_INCREMENT (mem_mode)
4386 	  || USE_STORE_POST_INCREMENT (mem_mode))
4387 	{
4388 	  rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4389 
4390 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4391 	    data->costs[AINC_POST_INC]
4392 	      = address_cost (addr, mem_mode, as, speed);
4393 	}
4394       ainc_cost_data_list[idx] = data;
4395     }
4396 
4397   poly_int64 msize = GET_MODE_SIZE (mem_mode);
4398   if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4399     return comp_cost (data->costs[AINC_POST_INC], 0);
4400   if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4401     return comp_cost (data->costs[AINC_POST_DEC], 0);
4402   if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4403     return comp_cost (data->costs[AINC_PRE_INC], 0);
4404   if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4405     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4406 
4407   return infinite_cost;
4408 }
4409 
4410 /* Return cost of computing USE's address expression by using CAND.
4411    AFF_INV and AFF_VAR represent invariant and variant parts of the
4412    address expression, respectively.  If AFF_INV is simple, store
4413    the loop invariant variables which are depended by it in INV_VARS;
4414    if AFF_INV is complicated, handle it as a new invariant expression
4415    and record it in INV_EXPR.  RATIO indicates multiple times between
4416    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4417    value to it indicating if this is an auto-increment address.  */
4418 
4419 static comp_cost
get_address_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,aff_tree * aff_inv,aff_tree * aff_var,HOST_WIDE_INT ratio,bitmap * inv_vars,iv_inv_expr_ent ** inv_expr,bool * can_autoinc,bool speed)4420 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4421 		  struct iv_cand *cand, aff_tree *aff_inv,
4422 		  aff_tree *aff_var, HOST_WIDE_INT ratio,
4423 		  bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4424 		  bool *can_autoinc, bool speed)
4425 {
4426   rtx addr;
4427   bool simple_inv = true;
4428   tree comp_inv = NULL_TREE, type = aff_var->type;
4429   comp_cost var_cost = no_cost, cost = no_cost;
4430   struct mem_address parts = {NULL_TREE, integer_one_node,
4431 			      NULL_TREE, NULL_TREE, NULL_TREE};
4432   machine_mode addr_mode = TYPE_MODE (type);
4433   machine_mode mem_mode = TYPE_MODE (use->mem_type);
4434   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4435   /* Only true if ratio != 1.  */
4436   bool ok_with_ratio_p = false;
4437   bool ok_without_ratio_p = false;
4438 
4439   if (!aff_combination_const_p (aff_inv))
4440     {
4441       parts.index = integer_one_node;
4442       /* Addressing mode "base + index".  */
4443       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4444       if (ratio != 1)
4445 	{
4446 	  parts.step = wide_int_to_tree (type, ratio);
4447 	  /* Addressing mode "base + index << scale".  */
4448 	  ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4449 	  if (!ok_with_ratio_p)
4450 	    parts.step = NULL_TREE;
4451 	}
4452       if (ok_with_ratio_p || ok_without_ratio_p)
4453 	{
4454 	  if (maybe_ne (aff_inv->offset, 0))
4455 	    {
4456 	      parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4457 	      /* Addressing mode "base + index [<< scale] + offset".  */
4458 	      if (!valid_mem_ref_p (mem_mode, as, &parts))
4459 		parts.offset = NULL_TREE;
4460 	      else
4461 		aff_inv->offset = 0;
4462 	    }
4463 
4464 	  move_fixed_address_to_symbol (&parts, aff_inv);
4465 	  /* Base is fixed address and is moved to symbol part.  */
4466 	  if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4467 	    parts.base = NULL_TREE;
4468 
4469 	  /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4470 	  if (parts.symbol != NULL_TREE
4471 	      && !valid_mem_ref_p (mem_mode, as, &parts))
4472 	    {
4473 	      aff_combination_add_elt (aff_inv, parts.symbol, 1);
4474 	      parts.symbol = NULL_TREE;
4475 	      /* Reset SIMPLE_INV since symbol address needs to be computed
4476 		 outside of address expression in this case.  */
4477 	      simple_inv = false;
4478 	      /* Symbol part is moved back to base part, it can't be NULL.  */
4479 	      parts.base = integer_one_node;
4480 	    }
4481 	}
4482       else
4483 	parts.index = NULL_TREE;
4484     }
4485   else
4486     {
4487       poly_int64 ainc_step;
4488       if (can_autoinc
4489 	  && ratio == 1
4490 	  && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4491 	{
4492 	  poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4493 
4494 	  if (stmt_after_increment (data->current_loop, cand, use->stmt))
4495 	    ainc_offset += ainc_step;
4496 	  cost = get_address_cost_ainc (ainc_step, ainc_offset,
4497 					addr_mode, mem_mode, as, speed);
4498 	  if (!cost.infinite_cost_p ())
4499 	    {
4500 	      *can_autoinc = true;
4501 	      return cost;
4502 	    }
4503 	  cost = no_cost;
4504 	}
4505       if (!aff_combination_zero_p (aff_inv))
4506 	{
4507 	  parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4508 	  /* Addressing mode "base + offset".  */
4509 	  if (!valid_mem_ref_p (mem_mode, as, &parts))
4510 	    parts.offset = NULL_TREE;
4511 	  else
4512 	    aff_inv->offset = 0;
4513 	}
4514     }
4515 
4516   if (simple_inv)
4517     simple_inv = (aff_inv == NULL
4518 		  || aff_combination_const_p (aff_inv)
4519 		  || aff_combination_singleton_var_p (aff_inv));
4520   if (!aff_combination_zero_p (aff_inv))
4521     comp_inv = aff_combination_to_tree (aff_inv);
4522   if (comp_inv != NULL_TREE)
4523     cost = force_var_cost (data, comp_inv, inv_vars);
4524   if (ratio != 1 && parts.step == NULL_TREE)
4525     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4526   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4527     var_cost += add_cost (speed, addr_mode);
4528 
4529   if (comp_inv && inv_expr && !simple_inv)
4530     {
4531       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4532       /* Clear depends on.  */
4533       if (*inv_expr != NULL && inv_vars && *inv_vars)
4534 	bitmap_clear (*inv_vars);
4535 
4536       /* Cost of small invariant expression adjusted against loop niters
4537 	 is usually zero, which makes it difficult to be differentiated
4538 	 from candidate based on loop invariant variables.  Secondly, the
4539 	 generated invariant expression may not be hoisted out of loop by
4540 	 following pass.  We penalize the cost by rounding up in order to
4541 	 neutralize such effects.  */
4542       cost.cost = adjust_setup_cost (data, cost.cost, true);
4543       cost.scratch = cost.cost;
4544     }
4545 
4546   cost += var_cost;
4547   addr = addr_for_mem_ref (&parts, as, false);
4548   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4549   cost += address_cost (addr, mem_mode, as, speed);
4550 
4551   if (parts.symbol != NULL_TREE)
4552     cost.complexity += 1;
4553   /* Don't increase the complexity of adding a scaled index if it's
4554      the only kind of index that the target allows.  */
4555   if (parts.step != NULL_TREE && ok_without_ratio_p)
4556     cost.complexity += 1;
4557   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4558     cost.complexity += 1;
4559   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4560     cost.complexity += 1;
4561 
4562   return cost;
4563 }
4564 
4565 /* Scale (multiply) the computed COST (except scratch part that should be
4566    hoisted out a loop) by header->frequency / AT->frequency, which makes
4567    expected cost more accurate.  */
4568 
4569 static comp_cost
get_scaled_computation_cost_at(ivopts_data * data,gimple * at,comp_cost cost)4570 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4571 {
4572    int loop_freq = data->current_loop->header->count.to_frequency (cfun);
4573    int bb_freq = gimple_bb (at)->count.to_frequency (cfun);
4574    if (loop_freq != 0)
4575      {
4576        gcc_assert (cost.scratch <= cost.cost);
4577        int scaled_cost
4578 	 = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4579 
4580        if (dump_file && (dump_flags & TDF_DETAILS))
4581 	 fprintf (dump_file, "Scaling cost based on bb prob "
4582 		  "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4583 		  1.0f * bb_freq / loop_freq, cost.cost,
4584 		  cost.scratch, scaled_cost, bb_freq, loop_freq);
4585 
4586        cost.cost = scaled_cost;
4587      }
4588 
4589   return cost;
4590 }
4591 
4592 /* Determines the cost of the computation by that USE is expressed
4593    from induction variable CAND.  If ADDRESS_P is true, we just need
4594    to create an address from it, otherwise we want to get it into
4595    register.  A set of invariants we depend on is stored in INV_VARS.
4596    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4597    addressing is likely.  If INV_EXPR is nonnull, record invariant
4598    expr entry in it.  */
4599 
4600 static comp_cost
get_computation_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,bool address_p,bitmap * inv_vars,bool * can_autoinc,iv_inv_expr_ent ** inv_expr)4601 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4602 		      struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4603 		      bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4604 {
4605   gimple *at = use->stmt;
4606   tree ubase = use->iv->base, cbase = cand->iv->base;
4607   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4608   tree comp_inv = NULL_TREE;
4609   HOST_WIDE_INT ratio, aratio;
4610   comp_cost cost;
4611   widest_int rat;
4612   aff_tree aff_inv, aff_var;
4613   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4614 
4615   if (inv_vars)
4616     *inv_vars = NULL;
4617   if (can_autoinc)
4618     *can_autoinc = false;
4619   if (inv_expr)
4620     *inv_expr = NULL;
4621 
4622   /* Check if we have enough precision to express the values of use.  */
4623   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4624     return infinite_cost;
4625 
4626   if (address_p
4627       || (use->iv->base_object
4628 	  && cand->iv->base_object
4629 	  && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4630 	  && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4631     {
4632       /* Do not try to express address of an object with computation based
4633 	 on address of a different object.  This may cause problems in rtl
4634 	 level alias analysis (that does not expect this to be happening,
4635 	 as this is illegal in C), and would be unlikely to be useful
4636 	 anyway.  */
4637       if (use->iv->base_object
4638 	  && cand->iv->base_object
4639 	  && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4640 	return infinite_cost;
4641     }
4642 
4643   if (!get_computation_aff_1 (data->current_loop, at, use,
4644 			      cand, &aff_inv, &aff_var, &rat)
4645       || !wi::fits_shwi_p (rat))
4646     return infinite_cost;
4647 
4648   ratio = rat.to_shwi ();
4649   if (address_p)
4650     {
4651       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4652 			       inv_vars, inv_expr, can_autoinc, speed);
4653       return get_scaled_computation_cost_at (data, at, cost);
4654     }
4655 
4656   bool simple_inv = (aff_combination_const_p (&aff_inv)
4657 		     || aff_combination_singleton_var_p (&aff_inv));
4658   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4659   aff_combination_convert (&aff_inv, signed_type);
4660   if (!aff_combination_zero_p (&aff_inv))
4661     comp_inv = aff_combination_to_tree (&aff_inv);
4662 
4663   cost = force_var_cost (data, comp_inv, inv_vars);
4664   if (comp_inv && inv_expr && !simple_inv)
4665     {
4666       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4667       /* Clear depends on.  */
4668       if (*inv_expr != NULL && inv_vars && *inv_vars)
4669 	bitmap_clear (*inv_vars);
4670 
4671       cost.cost = adjust_setup_cost (data, cost.cost);
4672       /* Record setup cost in scratch field.  */
4673       cost.scratch = cost.cost;
4674     }
4675   /* Cost of constant integer can be covered when adding invariant part to
4676      variant part.  */
4677   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4678     cost = no_cost;
4679 
4680   /* Need type narrowing to represent use with cand.  */
4681   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4682     {
4683       machine_mode outer_mode = TYPE_MODE (utype);
4684       machine_mode inner_mode = TYPE_MODE (ctype);
4685       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4686     }
4687 
4688   /* Turn a + i * (-c) into a - i * c.  */
4689   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4690     aratio = -ratio;
4691   else
4692     aratio = ratio;
4693 
4694   if (ratio != 1)
4695     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4696 
4697   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4698      instruction.  */
4699   /* Need to add up the invariant and variant parts.  */
4700   if (comp_inv && !integer_zerop (comp_inv))
4701     cost += add_cost (speed, TYPE_MODE (utype));
4702 
4703   return get_scaled_computation_cost_at (data, at, cost);
4704 }
4705 
4706 /* Determines cost of computing the use in GROUP with CAND in a generic
4707    expression.  */
4708 
4709 static bool
determine_group_iv_cost_generic(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)4710 determine_group_iv_cost_generic (struct ivopts_data *data,
4711 				 struct iv_group *group, struct iv_cand *cand)
4712 {
4713   comp_cost cost;
4714   iv_inv_expr_ent *inv_expr = NULL;
4715   bitmap inv_vars = NULL, inv_exprs = NULL;
4716   struct iv_use *use = group->vuses[0];
4717 
4718   /* The simple case first -- if we need to express value of the preserved
4719      original biv, the cost is 0.  This also prevents us from counting the
4720      cost of increment twice -- once at this use and once in the cost of
4721      the candidate.  */
4722   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4723     cost = no_cost;
4724   else
4725     cost = get_computation_cost (data, use, cand, false,
4726 				 &inv_vars, NULL, &inv_expr);
4727 
4728   if (inv_expr)
4729     {
4730       inv_exprs = BITMAP_ALLOC (NULL);
4731       bitmap_set_bit (inv_exprs, inv_expr->id);
4732     }
4733   set_group_iv_cost (data, group, cand, cost, inv_vars,
4734 		     NULL_TREE, ERROR_MARK, inv_exprs);
4735   return !cost.infinite_cost_p ();
4736 }
4737 
4738 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
4739 
4740 static bool
determine_group_iv_cost_address(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)4741 determine_group_iv_cost_address (struct ivopts_data *data,
4742 				 struct iv_group *group, struct iv_cand *cand)
4743 {
4744   unsigned i;
4745   bitmap inv_vars = NULL, inv_exprs = NULL;
4746   bool can_autoinc;
4747   iv_inv_expr_ent *inv_expr = NULL;
4748   struct iv_use *use = group->vuses[0];
4749   comp_cost sum_cost = no_cost, cost;
4750 
4751   cost = get_computation_cost (data, use, cand, true,
4752 			       &inv_vars, &can_autoinc, &inv_expr);
4753 
4754   if (inv_expr)
4755     {
4756       inv_exprs = BITMAP_ALLOC (NULL);
4757       bitmap_set_bit (inv_exprs, inv_expr->id);
4758     }
4759   sum_cost = cost;
4760   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4761     {
4762       if (can_autoinc)
4763 	sum_cost -= cand->cost_step;
4764       /* If we generated the candidate solely for exploiting autoincrement
4765 	 opportunities, and it turns out it can't be used, set the cost to
4766 	 infinity to make sure we ignore it.  */
4767       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4768 	sum_cost = infinite_cost;
4769     }
4770 
4771   /* Uses in a group can share setup code, so only add setup cost once.  */
4772   cost -= cost.scratch;
4773   /* Compute and add costs for rest uses of this group.  */
4774   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4775     {
4776       struct iv_use *next = group->vuses[i];
4777 
4778       /* TODO: We could skip computing cost for sub iv_use when it has the
4779 	 same cost as the first iv_use, but the cost really depends on the
4780 	 offset and where the iv_use is.  */
4781 	cost = get_computation_cost (data, next, cand, true,
4782 				     NULL, &can_autoinc, &inv_expr);
4783 	if (inv_expr)
4784 	  {
4785 	    if (!inv_exprs)
4786 	      inv_exprs = BITMAP_ALLOC (NULL);
4787 
4788 	    bitmap_set_bit (inv_exprs, inv_expr->id);
4789 	  }
4790       sum_cost += cost;
4791     }
4792   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4793 		     NULL_TREE, ERROR_MARK, inv_exprs);
4794 
4795   return !sum_cost.infinite_cost_p ();
4796 }
4797 
4798 /* Computes value of candidate CAND at position AT in iteration NITER, and
4799    stores it to VAL.  */
4800 
4801 static void
cand_value_at(struct loop * loop,struct iv_cand * cand,gimple * at,tree niter,aff_tree * val)4802 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4803 	       aff_tree *val)
4804 {
4805   aff_tree step, delta, nit;
4806   struct iv *iv = cand->iv;
4807   tree type = TREE_TYPE (iv->base);
4808   tree steptype;
4809   if (POINTER_TYPE_P (type))
4810     steptype = sizetype;
4811   else
4812     steptype = unsigned_type_for (type);
4813 
4814   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4815   aff_combination_convert (&step, steptype);
4816   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4817   aff_combination_convert (&nit, steptype);
4818   aff_combination_mult (&nit, &step, &delta);
4819   if (stmt_after_increment (loop, cand, at))
4820     aff_combination_add (&delta, &step);
4821 
4822   tree_to_aff_combination (iv->base, type, val);
4823   if (!POINTER_TYPE_P (type))
4824     aff_combination_convert (val, steptype);
4825   aff_combination_add (val, &delta);
4826 }
4827 
4828 /* Returns period of induction variable iv.  */
4829 
4830 static tree
iv_period(struct iv * iv)4831 iv_period (struct iv *iv)
4832 {
4833   tree step = iv->step, period, type;
4834   tree pow2div;
4835 
4836   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4837 
4838   type = unsigned_type_for (TREE_TYPE (step));
4839   /* Period of the iv is lcm (step, type_range)/step -1,
4840      i.e., N*type_range/step - 1. Since type range is power
4841      of two, N == (step >> num_of_ending_zeros_binary (step),
4842      so the final result is
4843 
4844        (type_range >> num_of_ending_zeros_binary (step)) - 1
4845 
4846   */
4847   pow2div = num_ending_zeros (step);
4848 
4849   period = build_low_bits_mask (type,
4850 				(TYPE_PRECISION (type)
4851 				 - tree_to_uhwi (pow2div)));
4852 
4853   return period;
4854 }
4855 
4856 /* Returns the comparison operator used when eliminating the iv USE.  */
4857 
4858 static enum tree_code
iv_elimination_compare(struct ivopts_data * data,struct iv_use * use)4859 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4860 {
4861   struct loop *loop = data->current_loop;
4862   basic_block ex_bb;
4863   edge exit;
4864 
4865   ex_bb = gimple_bb (use->stmt);
4866   exit = EDGE_SUCC (ex_bb, 0);
4867   if (flow_bb_inside_loop_p (loop, exit->dest))
4868     exit = EDGE_SUCC (ex_bb, 1);
4869 
4870   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4871 }
4872 
4873 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4874    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4875    calculation is performed in non-wrapping type.
4876 
4877    TODO: More generally, we could test for the situation that
4878 	 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4879 	 This would require knowing the sign of OFFSET.  */
4880 
4881 static bool
difference_cannot_overflow_p(struct ivopts_data * data,tree base,tree offset)4882 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4883 {
4884   enum tree_code code;
4885   tree e1, e2;
4886   aff_tree aff_e1, aff_e2, aff_offset;
4887 
4888   if (!nowrap_type_p (TREE_TYPE (base)))
4889     return false;
4890 
4891   base = expand_simple_operations (base);
4892 
4893   if (TREE_CODE (base) == SSA_NAME)
4894     {
4895       gimple *stmt = SSA_NAME_DEF_STMT (base);
4896 
4897       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4898 	return false;
4899 
4900       code = gimple_assign_rhs_code (stmt);
4901       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4902 	return false;
4903 
4904       e1 = gimple_assign_rhs1 (stmt);
4905       e2 = gimple_assign_rhs2 (stmt);
4906     }
4907   else
4908     {
4909       code = TREE_CODE (base);
4910       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4911 	return false;
4912       e1 = TREE_OPERAND (base, 0);
4913       e2 = TREE_OPERAND (base, 1);
4914     }
4915 
4916   /* Use affine expansion as deeper inspection to prove the equality.  */
4917   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4918 				  &aff_e2, &data->name_expansion_cache);
4919   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4920 				  &aff_offset, &data->name_expansion_cache);
4921   aff_combination_scale (&aff_offset, -1);
4922   switch (code)
4923     {
4924     case PLUS_EXPR:
4925       aff_combination_add (&aff_e2, &aff_offset);
4926       if (aff_combination_zero_p (&aff_e2))
4927 	return true;
4928 
4929       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4930 				      &aff_e1, &data->name_expansion_cache);
4931       aff_combination_add (&aff_e1, &aff_offset);
4932       return aff_combination_zero_p (&aff_e1);
4933 
4934     case POINTER_PLUS_EXPR:
4935       aff_combination_add (&aff_e2, &aff_offset);
4936       return aff_combination_zero_p (&aff_e2);
4937 
4938     default:
4939       return false;
4940     }
4941 }
4942 
4943 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4944    comparison with CAND.  NITER describes the number of iterations of
4945    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4946 
4947    We aim to handle the following situation:
4948 
4949    sometype *base, *p;
4950    int a, b, i;
4951 
4952    i = a;
4953    p = p_0 = base + a;
4954 
4955    do
4956      {
4957        bla (*p);
4958        p++;
4959        i++;
4960      }
4961    while (i < b);
4962 
4963    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4964    We aim to optimize this to
4965 
4966    p = p_0 = base + a;
4967    do
4968      {
4969        bla (*p);
4970        p++;
4971      }
4972    while (p < p_0 - a + b);
4973 
4974    This preserves the correctness, since the pointer arithmetics does not
4975    overflow.  More precisely:
4976 
4977    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4978       overflow in computing it or the values of p.
4979    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4980       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4981 
4982 static bool
iv_elimination_compare_lt(struct ivopts_data * data,struct iv_cand * cand,enum tree_code * comp_p,struct tree_niter_desc * niter)4983 iv_elimination_compare_lt (struct ivopts_data *data,
4984 			   struct iv_cand *cand, enum tree_code *comp_p,
4985 			   struct tree_niter_desc *niter)
4986 {
4987   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4988   struct aff_tree nit, tmpa, tmpb;
4989   enum tree_code comp;
4990   HOST_WIDE_INT step;
4991 
4992   /* We need to know that the candidate induction variable does not overflow.
4993      While more complex analysis may be used to prove this, for now just
4994      check that the variable appears in the original program and that it
4995      is computed in a type that guarantees no overflows.  */
4996   cand_type = TREE_TYPE (cand->iv->base);
4997   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4998     return false;
4999 
5000   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5001      the calculation of the BOUND could overflow, making the comparison
5002      invalid.  */
5003   if (!data->loop_single_exit_p)
5004     return false;
5005 
5006   /* We need to be able to decide whether candidate is increasing or decreasing
5007      in order to choose the right comparison operator.  */
5008   if (!cst_and_fits_in_hwi (cand->iv->step))
5009     return false;
5010   step = int_cst_value (cand->iv->step);
5011 
5012   /* Check that the number of iterations matches the expected pattern:
5013      a + 1 > b ? 0 : b - a - 1.  */
5014   mbz = niter->may_be_zero;
5015   if (TREE_CODE (mbz) == GT_EXPR)
5016     {
5017       /* Handle a + 1 > b.  */
5018       tree op0 = TREE_OPERAND (mbz, 0);
5019       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5020 	{
5021 	  a = TREE_OPERAND (op0, 0);
5022 	  b = TREE_OPERAND (mbz, 1);
5023 	}
5024       else
5025 	return false;
5026     }
5027   else if (TREE_CODE (mbz) == LT_EXPR)
5028     {
5029       tree op1 = TREE_OPERAND (mbz, 1);
5030 
5031       /* Handle b < a + 1.  */
5032       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5033 	{
5034 	  a = TREE_OPERAND (op1, 0);
5035 	  b = TREE_OPERAND (mbz, 0);
5036 	}
5037       else
5038 	return false;
5039     }
5040   else
5041     return false;
5042 
5043   /* Expected number of iterations is B - A - 1.  Check that it matches
5044      the actual number, i.e., that B - A - NITER = 1.  */
5045   tree_to_aff_combination (niter->niter, nit_type, &nit);
5046   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5047   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5048   aff_combination_scale (&nit, -1);
5049   aff_combination_scale (&tmpa, -1);
5050   aff_combination_add (&tmpb, &tmpa);
5051   aff_combination_add (&tmpb, &nit);
5052   if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5053     return false;
5054 
5055   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5056      overflow.  */
5057   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5058 			cand->iv->step,
5059 			fold_convert (TREE_TYPE (cand->iv->step), a));
5060   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5061     return false;
5062 
5063   /* Determine the new comparison operator.  */
5064   comp = step < 0 ? GT_EXPR : LT_EXPR;
5065   if (*comp_p == NE_EXPR)
5066     *comp_p = comp;
5067   else if (*comp_p == EQ_EXPR)
5068     *comp_p = invert_tree_comparison (comp, false);
5069   else
5070     gcc_unreachable ();
5071 
5072   return true;
5073 }
5074 
5075 /* Check whether it is possible to express the condition in USE by comparison
5076    of candidate CAND.  If so, store the value compared with to BOUND, and the
5077    comparison operator to COMP.  */
5078 
5079 static bool
may_eliminate_iv(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,tree * bound,enum tree_code * comp)5080 may_eliminate_iv (struct ivopts_data *data,
5081 		  struct iv_use *use, struct iv_cand *cand, tree *bound,
5082 		  enum tree_code *comp)
5083 {
5084   basic_block ex_bb;
5085   edge exit;
5086   tree period;
5087   struct loop *loop = data->current_loop;
5088   aff_tree bnd;
5089   struct tree_niter_desc *desc = NULL;
5090 
5091   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5092     return false;
5093 
5094   /* For now works only for exits that dominate the loop latch.
5095      TODO: extend to other conditions inside loop body.  */
5096   ex_bb = gimple_bb (use->stmt);
5097   if (use->stmt != last_stmt (ex_bb)
5098       || gimple_code (use->stmt) != GIMPLE_COND
5099       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5100     return false;
5101 
5102   exit = EDGE_SUCC (ex_bb, 0);
5103   if (flow_bb_inside_loop_p (loop, exit->dest))
5104     exit = EDGE_SUCC (ex_bb, 1);
5105   if (flow_bb_inside_loop_p (loop, exit->dest))
5106     return false;
5107 
5108   desc = niter_for_exit (data, exit);
5109   if (!desc)
5110     return false;
5111 
5112   /* Determine whether we can use the variable to test the exit condition.
5113      This is the case iff the period of the induction variable is greater
5114      than the number of iterations for which the exit condition is true.  */
5115   period = iv_period (cand->iv);
5116 
5117   /* If the number of iterations is constant, compare against it directly.  */
5118   if (TREE_CODE (desc->niter) == INTEGER_CST)
5119     {
5120       /* See cand_value_at.  */
5121       if (stmt_after_increment (loop, cand, use->stmt))
5122 	{
5123 	  if (!tree_int_cst_lt (desc->niter, period))
5124 	    return false;
5125 	}
5126       else
5127 	{
5128 	  if (tree_int_cst_lt (period, desc->niter))
5129 	    return false;
5130 	}
5131     }
5132 
5133   /* If not, and if this is the only possible exit of the loop, see whether
5134      we can get a conservative estimate on the number of iterations of the
5135      entire loop and compare against that instead.  */
5136   else
5137     {
5138       widest_int period_value, max_niter;
5139 
5140       max_niter = desc->max;
5141       if (stmt_after_increment (loop, cand, use->stmt))
5142 	max_niter += 1;
5143       period_value = wi::to_widest (period);
5144       if (wi::gtu_p (max_niter, period_value))
5145 	{
5146 	  /* See if we can take advantage of inferred loop bound
5147 	     information.  */
5148 	  if (data->loop_single_exit_p)
5149 	    {
5150 	      if (!max_loop_iterations (loop, &max_niter))
5151 		return false;
5152 	      /* The loop bound is already adjusted by adding 1.  */
5153 	      if (wi::gtu_p (max_niter, period_value))
5154 		return false;
5155 	    }
5156 	  else
5157 	    return false;
5158 	}
5159     }
5160 
5161   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5162 
5163   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5164 			 aff_combination_to_tree (&bnd));
5165   *comp = iv_elimination_compare (data, use);
5166 
5167   /* It is unlikely that computing the number of iterations using division
5168      would be more profitable than keeping the original induction variable.  */
5169   if (expression_expensive_p (*bound))
5170     return false;
5171 
5172   /* Sometimes, it is possible to handle the situation that the number of
5173      iterations may be zero unless additional assumptions by using <
5174      instead of != in the exit condition.
5175 
5176      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5177 	   base the exit condition on it.  However, that is often too
5178 	   expensive.  */
5179   if (!integer_zerop (desc->may_be_zero))
5180     return iv_elimination_compare_lt (data, cand, comp, desc);
5181 
5182   return true;
5183 }
5184 
5185  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5186     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5187 
5188 static int
parm_decl_cost(struct ivopts_data * data,tree bound)5189 parm_decl_cost (struct ivopts_data *data, tree bound)
5190 {
5191   tree sbound = bound;
5192   STRIP_NOPS (sbound);
5193 
5194   if (TREE_CODE (sbound) == SSA_NAME
5195       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5196       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5197       && data->body_includes_call)
5198     return COSTS_N_INSNS (1);
5199 
5200   return 0;
5201 }
5202 
5203 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5204 
5205 static bool
determine_group_iv_cost_cond(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5206 determine_group_iv_cost_cond (struct ivopts_data *data,
5207 			      struct iv_group *group, struct iv_cand *cand)
5208 {
5209   tree bound = NULL_TREE;
5210   struct iv *cmp_iv;
5211   bitmap inv_exprs = NULL;
5212   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5213   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5214   enum comp_iv_rewrite rewrite_type;
5215   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5216   tree *control_var, *bound_cst;
5217   enum tree_code comp = ERROR_MARK;
5218   struct iv_use *use = group->vuses[0];
5219 
5220   /* Extract condition operands.  */
5221   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5222 					&bound_cst, NULL, &cmp_iv);
5223   gcc_assert (rewrite_type != COMP_IV_NA);
5224 
5225   /* Try iv elimination.  */
5226   if (rewrite_type == COMP_IV_ELIM
5227       && may_eliminate_iv (data, use, cand, &bound, &comp))
5228     {
5229       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5230       if (elim_cost.cost == 0)
5231 	elim_cost.cost = parm_decl_cost (data, bound);
5232       else if (TREE_CODE (bound) == INTEGER_CST)
5233 	elim_cost.cost = 0;
5234       /* If we replace a loop condition 'i < n' with 'p < base + n',
5235 	 inv_vars_elim will have 'base' and 'n' set, which implies that both
5236 	 'base' and 'n' will be live during the loop.	 More likely,
5237 	 'base + n' will be loop invariant, resulting in only one live value
5238 	 during the loop.  So in that case we clear inv_vars_elim and set
5239 	 inv_expr_elim instead.  */
5240       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5241 	{
5242 	  inv_expr_elim = get_loop_invariant_expr (data, bound);
5243 	  bitmap_clear (inv_vars_elim);
5244 	}
5245       /* The bound is a loop invariant, so it will be only computed
5246 	 once.  */
5247       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5248     }
5249 
5250   /* When the condition is a comparison of the candidate IV against
5251      zero, prefer this IV.
5252 
5253      TODO: The constant that we're subtracting from the cost should
5254      be target-dependent.  This information should be added to the
5255      target costs for each backend.  */
5256   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5257       && integer_zerop (*bound_cst)
5258       && (operand_equal_p (*control_var, cand->var_after, 0)
5259 	  || operand_equal_p (*control_var, cand->var_before, 0)))
5260     elim_cost -= 1;
5261 
5262   express_cost = get_computation_cost (data, use, cand, false,
5263 				       &inv_vars_express, NULL,
5264 				       &inv_expr_express);
5265   if (cmp_iv != NULL)
5266     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5267 
5268   /* Count the cost of the original bound as well.  */
5269   bound_cost = force_var_cost (data, *bound_cst, NULL);
5270   if (bound_cost.cost == 0)
5271     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5272   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5273     bound_cost.cost = 0;
5274   express_cost += bound_cost;
5275 
5276   /* Choose the better approach, preferring the eliminated IV. */
5277   if (elim_cost <= express_cost)
5278     {
5279       cost = elim_cost;
5280       inv_vars = inv_vars_elim;
5281       inv_vars_elim = NULL;
5282       inv_expr = inv_expr_elim;
5283     }
5284   else
5285     {
5286       cost = express_cost;
5287       inv_vars = inv_vars_express;
5288       inv_vars_express = NULL;
5289       bound = NULL_TREE;
5290       comp = ERROR_MARK;
5291       inv_expr = inv_expr_express;
5292     }
5293 
5294   if (inv_expr)
5295     {
5296       inv_exprs = BITMAP_ALLOC (NULL);
5297       bitmap_set_bit (inv_exprs, inv_expr->id);
5298     }
5299   set_group_iv_cost (data, group, cand, cost,
5300 		     inv_vars, bound, comp, inv_exprs);
5301 
5302   if (inv_vars_elim)
5303     BITMAP_FREE (inv_vars_elim);
5304   if (inv_vars_express)
5305     BITMAP_FREE (inv_vars_express);
5306 
5307   return !cost.infinite_cost_p ();
5308 }
5309 
5310 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5311    if USE cannot be represented with CAND.  */
5312 
5313 static bool
determine_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5314 determine_group_iv_cost (struct ivopts_data *data,
5315 			 struct iv_group *group, struct iv_cand *cand)
5316 {
5317   switch (group->type)
5318     {
5319     case USE_NONLINEAR_EXPR:
5320       return determine_group_iv_cost_generic (data, group, cand);
5321 
5322     case USE_REF_ADDRESS:
5323     case USE_PTR_ADDRESS:
5324       return determine_group_iv_cost_address (data, group, cand);
5325 
5326     case USE_COMPARE:
5327       return determine_group_iv_cost_cond (data, group, cand);
5328 
5329     default:
5330       gcc_unreachable ();
5331     }
5332 }
5333 
5334 /* Return true if get_computation_cost indicates that autoincrement is
5335    a possibility for the pair of USE and CAND, false otherwise.  */
5336 
5337 static bool
autoinc_possible_for_pair(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5338 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5339 			   struct iv_cand *cand)
5340 {
5341   if (!address_p (use->type))
5342     return false;
5343 
5344   bool can_autoinc = false;
5345   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5346   return can_autoinc;
5347 }
5348 
5349 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5350    use that allows autoincrement, and set their AINC_USE if possible.  */
5351 
5352 static void
set_autoinc_for_original_candidates(struct ivopts_data * data)5353 set_autoinc_for_original_candidates (struct ivopts_data *data)
5354 {
5355   unsigned i, j;
5356 
5357   for (i = 0; i < data->vcands.length (); i++)
5358     {
5359       struct iv_cand *cand = data->vcands[i];
5360       struct iv_use *closest_before = NULL;
5361       struct iv_use *closest_after = NULL;
5362       if (cand->pos != IP_ORIGINAL)
5363 	continue;
5364 
5365       for (j = 0; j < data->vgroups.length (); j++)
5366 	{
5367 	  struct iv_group *group = data->vgroups[j];
5368 	  struct iv_use *use = group->vuses[0];
5369 	  unsigned uid = gimple_uid (use->stmt);
5370 
5371 	  if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5372 	    continue;
5373 
5374 	  if (uid < gimple_uid (cand->incremented_at)
5375 	      && (closest_before == NULL
5376 		  || uid > gimple_uid (closest_before->stmt)))
5377 	    closest_before = use;
5378 
5379 	  if (uid > gimple_uid (cand->incremented_at)
5380 	      && (closest_after == NULL
5381 		  || uid < gimple_uid (closest_after->stmt)))
5382 	    closest_after = use;
5383 	}
5384 
5385       if (closest_before != NULL
5386 	  && autoinc_possible_for_pair (data, closest_before, cand))
5387 	cand->ainc_use = closest_before;
5388       else if (closest_after != NULL
5389 	       && autoinc_possible_for_pair (data, closest_after, cand))
5390 	cand->ainc_use = closest_after;
5391     }
5392 }
5393 
5394 /* Relate compare use with all candidates.  */
5395 
5396 static void
relate_compare_use_with_all_cands(struct ivopts_data * data)5397 relate_compare_use_with_all_cands (struct ivopts_data *data)
5398 {
5399   unsigned i, count = data->vcands.length ();
5400   for (i = 0; i < data->vgroups.length (); i++)
5401     {
5402       struct iv_group *group = data->vgroups[i];
5403 
5404       if (group->type == USE_COMPARE)
5405 	bitmap_set_range (group->related_cands, 0, count);
5406     }
5407 }
5408 
5409 /* Finds the candidates for the induction variables.  */
5410 
5411 static void
find_iv_candidates(struct ivopts_data * data)5412 find_iv_candidates (struct ivopts_data *data)
5413 {
5414   /* Add commonly used ivs.  */
5415   add_standard_iv_candidates (data);
5416 
5417   /* Add old induction variables.  */
5418   add_iv_candidate_for_bivs (data);
5419 
5420   /* Add induction variables derived from uses.  */
5421   add_iv_candidate_for_groups (data);
5422 
5423   set_autoinc_for_original_candidates (data);
5424 
5425   /* Record the important candidates.  */
5426   record_important_candidates (data);
5427 
5428   /* Relate compare iv_use with all candidates.  */
5429   if (!data->consider_all_candidates)
5430     relate_compare_use_with_all_cands (data);
5431 
5432   if (dump_file && (dump_flags & TDF_DETAILS))
5433     {
5434       unsigned i;
5435 
5436       fprintf (dump_file, "\n<Important Candidates>:\t");
5437       for (i = 0; i < data->vcands.length (); i++)
5438 	if (data->vcands[i]->important)
5439 	  fprintf (dump_file, " %d,", data->vcands[i]->id);
5440       fprintf (dump_file, "\n");
5441 
5442       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5443       for (i = 0; i < data->vgroups.length (); i++)
5444 	{
5445 	  struct iv_group *group = data->vgroups[i];
5446 
5447 	  if (group->related_cands)
5448 	    {
5449 	      fprintf (dump_file, "  Group %d:\t", group->id);
5450 	      dump_bitmap (dump_file, group->related_cands);
5451 	    }
5452 	}
5453       fprintf (dump_file, "\n");
5454     }
5455 }
5456 
5457 /* Determines costs of computing use of iv with an iv candidate.  */
5458 
5459 static void
determine_group_iv_costs(struct ivopts_data * data)5460 determine_group_iv_costs (struct ivopts_data *data)
5461 {
5462   unsigned i, j;
5463   struct iv_cand *cand;
5464   struct iv_group *group;
5465   bitmap to_clear = BITMAP_ALLOC (NULL);
5466 
5467   alloc_use_cost_map (data);
5468 
5469   for (i = 0; i < data->vgroups.length (); i++)
5470     {
5471       group = data->vgroups[i];
5472 
5473       if (data->consider_all_candidates)
5474 	{
5475 	  for (j = 0; j < data->vcands.length (); j++)
5476 	    {
5477 	      cand = data->vcands[j];
5478 	      determine_group_iv_cost (data, group, cand);
5479 	    }
5480 	}
5481       else
5482 	{
5483 	  bitmap_iterator bi;
5484 
5485 	  EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5486 	    {
5487 	      cand = data->vcands[j];
5488 	      if (!determine_group_iv_cost (data, group, cand))
5489 		bitmap_set_bit (to_clear, j);
5490 	    }
5491 
5492 	  /* Remove the candidates for that the cost is infinite from
5493 	     the list of related candidates.  */
5494 	  bitmap_and_compl_into (group->related_cands, to_clear);
5495 	  bitmap_clear (to_clear);
5496 	}
5497     }
5498 
5499   BITMAP_FREE (to_clear);
5500 
5501   if (dump_file && (dump_flags & TDF_DETAILS))
5502     {
5503       bitmap_iterator bi;
5504 
5505       /* Dump invariant variables.  */
5506       fprintf (dump_file, "\n<Invariant Vars>:\n");
5507       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5508 	{
5509 	  struct version_info *info = ver_info (data, i);
5510 	  if (info->inv_id)
5511 	    {
5512 	      fprintf (dump_file, "Inv %d:\t", info->inv_id);
5513 	      print_generic_expr (dump_file, info->name, TDF_SLIM);
5514 	      fprintf (dump_file, "%s\n",
5515 		       info->has_nonlin_use ? "" : "\t(eliminable)");
5516 	    }
5517 	}
5518 
5519       /* Dump invariant expressions.  */
5520       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5521       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5522 
5523       for (hash_table<iv_inv_expr_hasher>::iterator it
5524 	   = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5525 	   ++it)
5526 	list.safe_push (*it);
5527 
5528       list.qsort (sort_iv_inv_expr_ent);
5529 
5530       for (i = 0; i < list.length (); ++i)
5531 	{
5532 	  fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5533 	  print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5534 	  fprintf (dump_file, "\n");
5535 	}
5536 
5537       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5538 
5539       for (i = 0; i < data->vgroups.length (); i++)
5540 	{
5541 	  group = data->vgroups[i];
5542 
5543 	  fprintf (dump_file, "Group %d:\n", i);
5544 	  fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5545 	  for (j = 0; j < group->n_map_members; j++)
5546 	    {
5547 	      if (!group->cost_map[j].cand
5548 		  || group->cost_map[j].cost.infinite_cost_p ())
5549 		continue;
5550 
5551 	      fprintf (dump_file, "  %d\t%d\t%d\t",
5552 		       group->cost_map[j].cand->id,
5553 		       group->cost_map[j].cost.cost,
5554 		       group->cost_map[j].cost.complexity);
5555 	      if (!group->cost_map[j].inv_exprs
5556 		  || bitmap_empty_p (group->cost_map[j].inv_exprs))
5557 		fprintf (dump_file, "NIL;\t");
5558 	      else
5559 		bitmap_print (dump_file,
5560 			      group->cost_map[j].inv_exprs, "", ";\t");
5561 	      if (!group->cost_map[j].inv_vars
5562 		  || bitmap_empty_p (group->cost_map[j].inv_vars))
5563 		fprintf (dump_file, "NIL;\n");
5564 	      else
5565 		bitmap_print (dump_file,
5566 			      group->cost_map[j].inv_vars, "", "\n");
5567 	    }
5568 
5569 	  fprintf (dump_file, "\n");
5570 	}
5571       fprintf (dump_file, "\n");
5572     }
5573 }
5574 
5575 /* Determines cost of the candidate CAND.  */
5576 
5577 static void
determine_iv_cost(struct ivopts_data * data,struct iv_cand * cand)5578 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5579 {
5580   comp_cost cost_base;
5581   unsigned cost, cost_step;
5582   tree base;
5583 
5584   gcc_assert (cand->iv != NULL);
5585 
5586   /* There are two costs associated with the candidate -- its increment
5587      and its initialization.  The second is almost negligible for any loop
5588      that rolls enough, so we take it just very little into account.  */
5589 
5590   base = cand->iv->base;
5591   cost_base = force_var_cost (data, base, NULL);
5592   /* It will be exceptional that the iv register happens to be initialized with
5593      the proper value at no cost.  In general, there will at least be a regcopy
5594      or a const set.  */
5595   if (cost_base.cost == 0)
5596     cost_base.cost = COSTS_N_INSNS (1);
5597   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5598 
5599   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5600 
5601   /* Prefer the original ivs unless we may gain something by replacing it.
5602      The reason is to make debugging simpler; so this is not relevant for
5603      artificial ivs created by other optimization passes.  */
5604   if (cand->pos != IP_ORIGINAL
5605       || !SSA_NAME_VAR (cand->var_before)
5606       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5607     cost++;
5608 
5609   /* Prefer not to insert statements into latch unless there are some
5610      already (so that we do not create unnecessary jumps).  */
5611   if (cand->pos == IP_END
5612       && empty_block_p (ip_end_pos (data->current_loop)))
5613     cost++;
5614 
5615   cand->cost = cost;
5616   cand->cost_step = cost_step;
5617 }
5618 
5619 /* Determines costs of computation of the candidates.  */
5620 
5621 static void
determine_iv_costs(struct ivopts_data * data)5622 determine_iv_costs (struct ivopts_data *data)
5623 {
5624   unsigned i;
5625 
5626   if (dump_file && (dump_flags & TDF_DETAILS))
5627     {
5628       fprintf (dump_file, "<Candidate Costs>:\n");
5629       fprintf (dump_file, "  cand\tcost\n");
5630     }
5631 
5632   for (i = 0; i < data->vcands.length (); i++)
5633     {
5634       struct iv_cand *cand = data->vcands[i];
5635 
5636       determine_iv_cost (data, cand);
5637 
5638       if (dump_file && (dump_flags & TDF_DETAILS))
5639 	fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5640     }
5641 
5642   if (dump_file && (dump_flags & TDF_DETAILS))
5643     fprintf (dump_file, "\n");
5644 }
5645 
5646 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5647    induction variables.  Note N_INVS includes both invariant variables and
5648    invariant expressions.  */
5649 
5650 static unsigned
ivopts_estimate_reg_pressure(struct ivopts_data * data,unsigned n_invs,unsigned n_cands)5651 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5652 			      unsigned n_cands)
5653 {
5654   unsigned cost;
5655   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5656   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5657   bool speed = data->speed;
5658 
5659   /* If there is a call in the loop body, the call-clobbered registers
5660      are not available for loop invariants.  */
5661   if (data->body_includes_call)
5662     available_regs = available_regs - target_clobbered_regs;
5663 
5664   /* If we have enough registers.  */
5665   if (regs_needed + target_res_regs < available_regs)
5666     cost = n_new;
5667   /* If close to running out of registers, try to preserve them.  */
5668   else if (regs_needed <= available_regs)
5669     cost = target_reg_cost [speed] * regs_needed;
5670   /* If we run out of available registers but the number of candidates
5671      does not, we penalize extra registers using target_spill_cost.  */
5672   else if (n_cands <= available_regs)
5673     cost = target_reg_cost [speed] * available_regs
5674 	   + target_spill_cost [speed] * (regs_needed - available_regs);
5675   /* If the number of candidates runs out available registers, we penalize
5676      extra candidate registers using target_spill_cost * 2.  Because it is
5677      more expensive to spill induction variable than invariant.  */
5678   else
5679     cost = target_reg_cost [speed] * available_regs
5680 	   + target_spill_cost [speed] * (n_cands - available_regs) * 2
5681 	   + target_spill_cost [speed] * (regs_needed - n_cands);
5682 
5683   /* Finally, add the number of candidates, so that we prefer eliminating
5684      induction variables if possible.  */
5685   return cost + n_cands;
5686 }
5687 
5688 /* For each size of the induction variable set determine the penalty.  */
5689 
5690 static void
determine_set_costs(struct ivopts_data * data)5691 determine_set_costs (struct ivopts_data *data)
5692 {
5693   unsigned j, n;
5694   gphi *phi;
5695   gphi_iterator psi;
5696   tree op;
5697   struct loop *loop = data->current_loop;
5698   bitmap_iterator bi;
5699 
5700   if (dump_file && (dump_flags & TDF_DETAILS))
5701     {
5702       fprintf (dump_file, "<Global Costs>:\n");
5703       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5704       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5705       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5706       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5707     }
5708 
5709   n = 0;
5710   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5711     {
5712       phi = psi.phi ();
5713       op = PHI_RESULT (phi);
5714 
5715       if (virtual_operand_p (op))
5716 	continue;
5717 
5718       if (get_iv (data, op))
5719 	continue;
5720 
5721       if (!POINTER_TYPE_P (TREE_TYPE (op))
5722 	  && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5723 	continue;
5724 
5725       n++;
5726     }
5727 
5728   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5729     {
5730       struct version_info *info = ver_info (data, j);
5731 
5732       if (info->inv_id && info->has_nonlin_use)
5733 	n++;
5734     }
5735 
5736   data->regs_used = n;
5737   if (dump_file && (dump_flags & TDF_DETAILS))
5738     fprintf (dump_file, "  regs_used %d\n", n);
5739 
5740   if (dump_file && (dump_flags & TDF_DETAILS))
5741     {
5742       fprintf (dump_file, "  cost for size:\n");
5743       fprintf (dump_file, "  ivs\tcost\n");
5744       for (j = 0; j <= 2 * target_avail_regs; j++)
5745 	fprintf (dump_file, "  %d\t%d\n", j,
5746 		 ivopts_estimate_reg_pressure (data, 0, j));
5747       fprintf (dump_file, "\n");
5748     }
5749 }
5750 
5751 /* Returns true if A is a cheaper cost pair than B.  */
5752 
5753 static bool
cheaper_cost_pair(struct cost_pair * a,struct cost_pair * b)5754 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5755 {
5756   if (!a)
5757     return false;
5758 
5759   if (!b)
5760     return true;
5761 
5762   if (a->cost < b->cost)
5763     return true;
5764 
5765   if (b->cost < a->cost)
5766     return false;
5767 
5768   /* In case the costs are the same, prefer the cheaper candidate.  */
5769   if (a->cand->cost < b->cand->cost)
5770     return true;
5771 
5772   return false;
5773 }
5774 
5775 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
5776    for more expensive, equal and cheaper respectively.  */
5777 
5778 static int
compare_cost_pair(struct cost_pair * a,struct cost_pair * b)5779 compare_cost_pair (struct cost_pair *a, struct cost_pair *b)
5780 {
5781   if (cheaper_cost_pair (a, b))
5782     return -1;
5783   if (cheaper_cost_pair (b, a))
5784     return 1;
5785 
5786   return 0;
5787 }
5788 
5789 /* Returns candidate by that USE is expressed in IVS.  */
5790 
5791 static struct cost_pair *
iv_ca_cand_for_group(struct iv_ca * ivs,struct iv_group * group)5792 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5793 {
5794   return ivs->cand_for_group[group->id];
5795 }
5796 
5797 /* Computes the cost field of IVS structure.  */
5798 
5799 static void
iv_ca_recount_cost(struct ivopts_data * data,struct iv_ca * ivs)5800 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5801 {
5802   comp_cost cost = ivs->cand_use_cost;
5803 
5804   cost += ivs->cand_cost;
5805   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
5806   ivs->cost = cost;
5807 }
5808 
5809 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5810    and IVS.  */
5811 
5812 static void
iv_ca_set_remove_invs(struct iv_ca * ivs,bitmap invs,unsigned * n_inv_uses)5813 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5814 {
5815   bitmap_iterator bi;
5816   unsigned iid;
5817 
5818   if (!invs)
5819     return;
5820 
5821   gcc_assert (n_inv_uses != NULL);
5822   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5823     {
5824       n_inv_uses[iid]--;
5825       if (n_inv_uses[iid] == 0)
5826 	ivs->n_invs--;
5827     }
5828 }
5829 
5830 /* Set USE not to be expressed by any candidate in IVS.  */
5831 
5832 static void
iv_ca_set_no_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group)5833 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5834 		 struct iv_group *group)
5835 {
5836   unsigned gid = group->id, cid;
5837   struct cost_pair *cp;
5838 
5839   cp = ivs->cand_for_group[gid];
5840   if (!cp)
5841     return;
5842   cid = cp->cand->id;
5843 
5844   ivs->bad_groups++;
5845   ivs->cand_for_group[gid] = NULL;
5846   ivs->n_cand_uses[cid]--;
5847 
5848   if (ivs->n_cand_uses[cid] == 0)
5849     {
5850       bitmap_clear_bit (ivs->cands, cid);
5851       ivs->n_cands--;
5852       ivs->cand_cost -= cp->cand->cost;
5853       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5854       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5855     }
5856 
5857   ivs->cand_use_cost -= cp->cost;
5858   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5859   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5860   iv_ca_recount_cost (data, ivs);
5861 }
5862 
5863 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5864    IVS.  */
5865 
5866 static void
iv_ca_set_add_invs(struct iv_ca * ivs,bitmap invs,unsigned * n_inv_uses)5867 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5868 {
5869   bitmap_iterator bi;
5870   unsigned iid;
5871 
5872   if (!invs)
5873     return;
5874 
5875   gcc_assert (n_inv_uses != NULL);
5876   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5877     {
5878       n_inv_uses[iid]++;
5879       if (n_inv_uses[iid] == 1)
5880 	ivs->n_invs++;
5881     }
5882 }
5883 
5884 /* Set cost pair for GROUP in set IVS to CP.  */
5885 
5886 static void
iv_ca_set_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group,struct cost_pair * cp)5887 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5888 	      struct iv_group *group, struct cost_pair *cp)
5889 {
5890   unsigned gid = group->id, cid;
5891 
5892   if (ivs->cand_for_group[gid] == cp)
5893     return;
5894 
5895   if (ivs->cand_for_group[gid])
5896     iv_ca_set_no_cp (data, ivs, group);
5897 
5898   if (cp)
5899     {
5900       cid = cp->cand->id;
5901 
5902       ivs->bad_groups--;
5903       ivs->cand_for_group[gid] = cp;
5904       ivs->n_cand_uses[cid]++;
5905       if (ivs->n_cand_uses[cid] == 1)
5906 	{
5907 	  bitmap_set_bit (ivs->cands, cid);
5908 	  ivs->n_cands++;
5909 	  ivs->cand_cost += cp->cand->cost;
5910 	  iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5911 	  iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5912 	}
5913 
5914       ivs->cand_use_cost += cp->cost;
5915       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5916       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5917       iv_ca_recount_cost (data, ivs);
5918     }
5919 }
5920 
5921 /* Extend set IVS by expressing USE by some of the candidates in it
5922    if possible.  Consider all important candidates if candidates in
5923    set IVS don't give any result.  */
5924 
5925 static void
iv_ca_add_group(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group)5926 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5927 	       struct iv_group *group)
5928 {
5929   struct cost_pair *best_cp = NULL, *cp;
5930   bitmap_iterator bi;
5931   unsigned i;
5932   struct iv_cand *cand;
5933 
5934   gcc_assert (ivs->upto >= group->id);
5935   ivs->upto++;
5936   ivs->bad_groups++;
5937 
5938   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5939     {
5940       cand = data->vcands[i];
5941       cp = get_group_iv_cost (data, group, cand);
5942       if (cheaper_cost_pair (cp, best_cp))
5943 	best_cp = cp;
5944     }
5945 
5946   if (best_cp == NULL)
5947     {
5948       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5949 	{
5950 	  cand = data->vcands[i];
5951 	  cp = get_group_iv_cost (data, group, cand);
5952 	  if (cheaper_cost_pair (cp, best_cp))
5953 	    best_cp = cp;
5954 	}
5955     }
5956 
5957   iv_ca_set_cp (data, ivs, group, best_cp);
5958 }
5959 
5960 /* Get cost for assignment IVS.  */
5961 
5962 static comp_cost
iv_ca_cost(struct iv_ca * ivs)5963 iv_ca_cost (struct iv_ca *ivs)
5964 {
5965   /* This was a conditional expression but it triggered a bug in
5966      Sun C 5.5.  */
5967   if (ivs->bad_groups)
5968     return infinite_cost;
5969   else
5970     return ivs->cost;
5971 }
5972 
5973 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5974    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
5975    respectively.  */
5976 
5977 static int
iv_ca_compare_deps(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group,struct cost_pair * old_cp,struct cost_pair * new_cp)5978 iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs,
5979 		    struct iv_group *group, struct cost_pair *old_cp,
5980 		    struct cost_pair *new_cp)
5981 {
5982   gcc_assert (old_cp && new_cp && old_cp != new_cp);
5983   unsigned old_n_invs = ivs->n_invs;
5984   iv_ca_set_cp (data, ivs, group, new_cp);
5985   unsigned new_n_invs = ivs->n_invs;
5986   iv_ca_set_cp (data, ivs, group, old_cp);
5987 
5988   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
5989 }
5990 
5991 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5992    it before NEXT.  */
5993 
5994 static struct iv_ca_delta *
iv_ca_delta_add(struct iv_group * group,struct cost_pair * old_cp,struct cost_pair * new_cp,struct iv_ca_delta * next)5995 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
5996 		 struct cost_pair *new_cp, struct iv_ca_delta *next)
5997 {
5998   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5999 
6000   change->group = group;
6001   change->old_cp = old_cp;
6002   change->new_cp = new_cp;
6003   change->next = next;
6004 
6005   return change;
6006 }
6007 
6008 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6009    are rewritten.  */
6010 
6011 static struct iv_ca_delta *
iv_ca_delta_join(struct iv_ca_delta * l1,struct iv_ca_delta * l2)6012 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6013 {
6014   struct iv_ca_delta *last;
6015 
6016   if (!l2)
6017     return l1;
6018 
6019   if (!l1)
6020     return l2;
6021 
6022   for (last = l1; last->next; last = last->next)
6023     continue;
6024   last->next = l2;
6025 
6026   return l1;
6027 }
6028 
6029 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6030 
6031 static struct iv_ca_delta *
iv_ca_delta_reverse(struct iv_ca_delta * delta)6032 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6033 {
6034   struct iv_ca_delta *act, *next, *prev = NULL;
6035 
6036   for (act = delta; act; act = next)
6037     {
6038       next = act->next;
6039       act->next = prev;
6040       prev = act;
6041 
6042       std::swap (act->old_cp, act->new_cp);
6043     }
6044 
6045   return prev;
6046 }
6047 
6048 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6049    reverted instead.  */
6050 
6051 static void
iv_ca_delta_commit(struct ivopts_data * data,struct iv_ca * ivs,struct iv_ca_delta * delta,bool forward)6052 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6053 		    struct iv_ca_delta *delta, bool forward)
6054 {
6055   struct cost_pair *from, *to;
6056   struct iv_ca_delta *act;
6057 
6058   if (!forward)
6059     delta = iv_ca_delta_reverse (delta);
6060 
6061   for (act = delta; act; act = act->next)
6062     {
6063       from = act->old_cp;
6064       to = act->new_cp;
6065       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6066       iv_ca_set_cp (data, ivs, act->group, to);
6067     }
6068 
6069   if (!forward)
6070     iv_ca_delta_reverse (delta);
6071 }
6072 
6073 /* Returns true if CAND is used in IVS.  */
6074 
6075 static bool
iv_ca_cand_used_p(struct iv_ca * ivs,struct iv_cand * cand)6076 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6077 {
6078   return ivs->n_cand_uses[cand->id] > 0;
6079 }
6080 
6081 /* Returns number of induction variable candidates in the set IVS.  */
6082 
6083 static unsigned
iv_ca_n_cands(struct iv_ca * ivs)6084 iv_ca_n_cands (struct iv_ca *ivs)
6085 {
6086   return ivs->n_cands;
6087 }
6088 
6089 /* Free the list of changes DELTA.  */
6090 
6091 static void
iv_ca_delta_free(struct iv_ca_delta ** delta)6092 iv_ca_delta_free (struct iv_ca_delta **delta)
6093 {
6094   struct iv_ca_delta *act, *next;
6095 
6096   for (act = *delta; act; act = next)
6097     {
6098       next = act->next;
6099       free (act);
6100     }
6101 
6102   *delta = NULL;
6103 }
6104 
6105 /* Allocates new iv candidates assignment.  */
6106 
6107 static struct iv_ca *
iv_ca_new(struct ivopts_data * data)6108 iv_ca_new (struct ivopts_data *data)
6109 {
6110   struct iv_ca *nw = XNEW (struct iv_ca);
6111 
6112   nw->upto = 0;
6113   nw->bad_groups = 0;
6114   nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6115 				 data->vgroups.length ());
6116   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6117   nw->cands = BITMAP_ALLOC (NULL);
6118   nw->n_cands = 0;
6119   nw->n_invs = 0;
6120   nw->cand_use_cost = no_cost;
6121   nw->cand_cost = 0;
6122   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6123   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6124   nw->cost = no_cost;
6125 
6126   return nw;
6127 }
6128 
6129 /* Free memory occupied by the set IVS.  */
6130 
6131 static void
iv_ca_free(struct iv_ca ** ivs)6132 iv_ca_free (struct iv_ca **ivs)
6133 {
6134   free ((*ivs)->cand_for_group);
6135   free ((*ivs)->n_cand_uses);
6136   BITMAP_FREE ((*ivs)->cands);
6137   free ((*ivs)->n_inv_var_uses);
6138   free ((*ivs)->n_inv_expr_uses);
6139   free (*ivs);
6140   *ivs = NULL;
6141 }
6142 
6143 /* Dumps IVS to FILE.  */
6144 
6145 static void
iv_ca_dump(struct ivopts_data * data,FILE * file,struct iv_ca * ivs)6146 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6147 {
6148   unsigned i;
6149   comp_cost cost = iv_ca_cost (ivs);
6150 
6151   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost,
6152 	   cost.complexity);
6153   fprintf (file, "  cand_cost: %d\n  cand_group_cost: %d (complexity %d)\n",
6154 	   ivs->cand_cost, ivs->cand_use_cost.cost,
6155 	   ivs->cand_use_cost.complexity);
6156   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6157 
6158   for (i = 0; i < ivs->upto; i++)
6159     {
6160       struct iv_group *group = data->vgroups[i];
6161       struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6162       if (cp)
6163         fprintf (file, "   group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6164 		 group->id, cp->cand->id, cp->cost.cost,
6165 		 cp->cost.complexity);
6166       else
6167 	fprintf (file, "   group:%d --> ??\n", group->id);
6168     }
6169 
6170   const char *pref = "";
6171   fprintf (file, "  invariant variables: ");
6172   for (i = 1; i <= data->max_inv_var_id; i++)
6173     if (ivs->n_inv_var_uses[i])
6174       {
6175 	fprintf (file, "%s%d", pref, i);
6176 	pref = ", ";
6177       }
6178 
6179   pref = "";
6180   fprintf (file, "\n  invariant expressions: ");
6181   for (i = 1; i <= data->max_inv_expr_id; i++)
6182     if (ivs->n_inv_expr_uses[i])
6183       {
6184 	fprintf (file, "%s%d", pref, i);
6185 	pref = ", ";
6186       }
6187 
6188   fprintf (file, "\n\n");
6189 }
6190 
6191 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6192    new set, and store differences in DELTA.  Number of induction variables
6193    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6194    the function will try to find a solution with mimimal iv candidates.  */
6195 
6196 static comp_cost
iv_ca_extend(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_ca_delta ** delta,unsigned * n_ivs,bool min_ncand)6197 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6198 	      struct iv_cand *cand, struct iv_ca_delta **delta,
6199 	      unsigned *n_ivs, bool min_ncand)
6200 {
6201   unsigned i;
6202   comp_cost cost;
6203   struct iv_group *group;
6204   struct cost_pair *old_cp, *new_cp;
6205 
6206   *delta = NULL;
6207   for (i = 0; i < ivs->upto; i++)
6208     {
6209       group = data->vgroups[i];
6210       old_cp = iv_ca_cand_for_group (ivs, group);
6211 
6212       if (old_cp
6213 	  && old_cp->cand == cand)
6214 	continue;
6215 
6216       new_cp = get_group_iv_cost (data, group, cand);
6217       if (!new_cp)
6218 	continue;
6219 
6220       if (!min_ncand)
6221 	{
6222 	  int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6223 	  /* Skip if new_cp depends on more invariants.  */
6224 	  if (cmp_invs > 0)
6225 	    continue;
6226 
6227 	  int cmp_cost = compare_cost_pair (new_cp, old_cp);
6228 	  /* Skip if new_cp is not cheaper.  */
6229 	  if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6230 	    continue;
6231 	}
6232 
6233       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6234     }
6235 
6236   iv_ca_delta_commit (data, ivs, *delta, true);
6237   cost = iv_ca_cost (ivs);
6238   if (n_ivs)
6239     *n_ivs = iv_ca_n_cands (ivs);
6240   iv_ca_delta_commit (data, ivs, *delta, false);
6241 
6242   return cost;
6243 }
6244 
6245 /* Try narrowing set IVS by removing CAND.  Return the cost of
6246    the new set and store the differences in DELTA.  START is
6247    the candidate with which we start narrowing.  */
6248 
6249 static comp_cost
iv_ca_narrow(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_cand * start,struct iv_ca_delta ** delta)6250 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6251 	      struct iv_cand *cand, struct iv_cand *start,
6252 	      struct iv_ca_delta **delta)
6253 {
6254   unsigned i, ci;
6255   struct iv_group *group;
6256   struct cost_pair *old_cp, *new_cp, *cp;
6257   bitmap_iterator bi;
6258   struct iv_cand *cnd;
6259   comp_cost cost, best_cost, acost;
6260 
6261   *delta = NULL;
6262   for (i = 0; i < data->vgroups.length (); i++)
6263     {
6264       group = data->vgroups[i];
6265 
6266       old_cp = iv_ca_cand_for_group (ivs, group);
6267       if (old_cp->cand != cand)
6268 	continue;
6269 
6270       best_cost = iv_ca_cost (ivs);
6271       /* Start narrowing with START.  */
6272       new_cp = get_group_iv_cost (data, group, start);
6273 
6274       if (data->consider_all_candidates)
6275 	{
6276 	  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6277 	    {
6278 	      if (ci == cand->id || (start && ci == start->id))
6279 		continue;
6280 
6281 	      cnd = data->vcands[ci];
6282 
6283 	      cp = get_group_iv_cost (data, group, cnd);
6284 	      if (!cp)
6285 		continue;
6286 
6287 	      iv_ca_set_cp (data, ivs, group, cp);
6288 	      acost = iv_ca_cost (ivs);
6289 
6290 	      if (acost < best_cost)
6291 		{
6292 		  best_cost = acost;
6293 		  new_cp = cp;
6294 		}
6295 	    }
6296 	}
6297       else
6298 	{
6299 	  EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6300 	    {
6301 	      if (ci == cand->id || (start && ci == start->id))
6302 		continue;
6303 
6304 	      cnd = data->vcands[ci];
6305 
6306 	      cp = get_group_iv_cost (data, group, cnd);
6307 	      if (!cp)
6308 		continue;
6309 
6310 	      iv_ca_set_cp (data, ivs, group, cp);
6311 	      acost = iv_ca_cost (ivs);
6312 
6313 	      if (acost < best_cost)
6314 		{
6315 		  best_cost = acost;
6316 		  new_cp = cp;
6317 		}
6318 	    }
6319 	}
6320       /* Restore to old cp for use.  */
6321       iv_ca_set_cp (data, ivs, group, old_cp);
6322 
6323       if (!new_cp)
6324 	{
6325 	  iv_ca_delta_free (delta);
6326 	  return infinite_cost;
6327 	}
6328 
6329       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6330     }
6331 
6332   iv_ca_delta_commit (data, ivs, *delta, true);
6333   cost = iv_ca_cost (ivs);
6334   iv_ca_delta_commit (data, ivs, *delta, false);
6335 
6336   return cost;
6337 }
6338 
6339 /* Try optimizing the set of candidates IVS by removing candidates different
6340    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6341    differences in DELTA.  */
6342 
6343 static comp_cost
iv_ca_prune(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * except_cand,struct iv_ca_delta ** delta)6344 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6345 	     struct iv_cand *except_cand, struct iv_ca_delta **delta)
6346 {
6347   bitmap_iterator bi;
6348   struct iv_ca_delta *act_delta, *best_delta;
6349   unsigned i;
6350   comp_cost best_cost, acost;
6351   struct iv_cand *cand;
6352 
6353   best_delta = NULL;
6354   best_cost = iv_ca_cost (ivs);
6355 
6356   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6357     {
6358       cand = data->vcands[i];
6359 
6360       if (cand == except_cand)
6361 	continue;
6362 
6363       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6364 
6365       if (acost < best_cost)
6366 	{
6367 	  best_cost = acost;
6368 	  iv_ca_delta_free (&best_delta);
6369 	  best_delta = act_delta;
6370 	}
6371       else
6372 	iv_ca_delta_free (&act_delta);
6373     }
6374 
6375   if (!best_delta)
6376     {
6377       *delta = NULL;
6378       return best_cost;
6379     }
6380 
6381   /* Recurse to possibly remove other unnecessary ivs.  */
6382   iv_ca_delta_commit (data, ivs, best_delta, true);
6383   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6384   iv_ca_delta_commit (data, ivs, best_delta, false);
6385   *delta = iv_ca_delta_join (best_delta, *delta);
6386   return best_cost;
6387 }
6388 
6389 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6390    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6391    the corresponding cost_pair, otherwise just return BEST_CP.  */
6392 
6393 static struct cost_pair*
cheaper_cost_with_cand(struct ivopts_data * data,struct iv_group * group,unsigned int cand_idx,struct iv_cand * old_cand,struct cost_pair * best_cp)6394 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6395 			unsigned int cand_idx, struct iv_cand *old_cand,
6396 			struct cost_pair *best_cp)
6397 {
6398   struct iv_cand *cand;
6399   struct cost_pair *cp;
6400 
6401   gcc_assert (old_cand != NULL && best_cp != NULL);
6402   if (cand_idx == old_cand->id)
6403     return best_cp;
6404 
6405   cand = data->vcands[cand_idx];
6406   cp = get_group_iv_cost (data, group, cand);
6407   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6408     return cp;
6409 
6410   return best_cp;
6411 }
6412 
6413 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6414    which are used by more than one iv uses.  For each of those candidates,
6415    this function tries to represent iv uses under that candidate using
6416    other ones with lower local cost, then tries to prune the new set.
6417    If the new set has lower cost, It returns the new cost after recording
6418    candidate replacement in list DELTA.  */
6419 
6420 static comp_cost
iv_ca_replace(struct ivopts_data * data,struct iv_ca * ivs,struct iv_ca_delta ** delta)6421 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6422 	       struct iv_ca_delta **delta)
6423 {
6424   bitmap_iterator bi, bj;
6425   unsigned int i, j, k;
6426   struct iv_cand *cand;
6427   comp_cost orig_cost, acost;
6428   struct iv_ca_delta *act_delta, *tmp_delta;
6429   struct cost_pair *old_cp, *best_cp = NULL;
6430 
6431   *delta = NULL;
6432   orig_cost = iv_ca_cost (ivs);
6433 
6434   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6435     {
6436       if (ivs->n_cand_uses[i] == 1
6437 	  || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6438 	continue;
6439 
6440       cand = data->vcands[i];
6441 
6442       act_delta = NULL;
6443       /*  Represent uses under current candidate using other ones with
6444 	  lower local cost.  */
6445       for (j = 0; j < ivs->upto; j++)
6446 	{
6447 	  struct iv_group *group = data->vgroups[j];
6448 	  old_cp = iv_ca_cand_for_group (ivs, group);
6449 
6450 	  if (old_cp->cand != cand)
6451 	    continue;
6452 
6453 	  best_cp = old_cp;
6454 	  if (data->consider_all_candidates)
6455 	    for (k = 0; k < data->vcands.length (); k++)
6456 	      best_cp = cheaper_cost_with_cand (data, group, k,
6457 						old_cp->cand, best_cp);
6458 	  else
6459 	    EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6460 	      best_cp = cheaper_cost_with_cand (data, group, k,
6461 						old_cp->cand, best_cp);
6462 
6463 	  if (best_cp == old_cp)
6464 	    continue;
6465 
6466 	  act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6467 	}
6468       /* No need for further prune.  */
6469       if (!act_delta)
6470 	continue;
6471 
6472       /* Prune the new candidate set.  */
6473       iv_ca_delta_commit (data, ivs, act_delta, true);
6474       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6475       iv_ca_delta_commit (data, ivs, act_delta, false);
6476       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6477 
6478       if (acost < orig_cost)
6479 	{
6480 	  *delta = act_delta;
6481 	  return acost;
6482 	}
6483       else
6484 	iv_ca_delta_free (&act_delta);
6485     }
6486 
6487   return orig_cost;
6488 }
6489 
6490 /* Tries to extend the sets IVS in the best possible way in order to
6491    express the GROUP.  If ORIGINALP is true, prefer candidates from
6492    the original set of IVs, otherwise favor important candidates not
6493    based on any memory object.  */
6494 
6495 static bool
try_add_cand_for(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group,bool originalp)6496 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6497 		  struct iv_group *group, bool originalp)
6498 {
6499   comp_cost best_cost, act_cost;
6500   unsigned i;
6501   bitmap_iterator bi;
6502   struct iv_cand *cand;
6503   struct iv_ca_delta *best_delta = NULL, *act_delta;
6504   struct cost_pair *cp;
6505 
6506   iv_ca_add_group (data, ivs, group);
6507   best_cost = iv_ca_cost (ivs);
6508   cp = iv_ca_cand_for_group (ivs, group);
6509   if (cp)
6510     {
6511       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6512       iv_ca_set_no_cp (data, ivs, group);
6513     }
6514 
6515   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6516      first try important candidates not based on any memory object.  Only if
6517      this fails, try the specific ones.  Rationale -- in loops with many
6518      variables the best choice often is to use just one generic biv.  If we
6519      added here many ivs specific to the uses, the optimization algorithm later
6520      would be likely to get stuck in a local minimum, thus causing us to create
6521      too many ivs.  The approach from few ivs to more seems more likely to be
6522      successful -- starting from few ivs, replacing an expensive use by a
6523      specific iv should always be a win.  */
6524   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6525     {
6526       cand = data->vcands[i];
6527 
6528       if (originalp && cand->pos !=IP_ORIGINAL)
6529 	continue;
6530 
6531       if (!originalp && cand->iv->base_object != NULL_TREE)
6532 	continue;
6533 
6534       if (iv_ca_cand_used_p (ivs, cand))
6535 	continue;
6536 
6537       cp = get_group_iv_cost (data, group, cand);
6538       if (!cp)
6539 	continue;
6540 
6541       iv_ca_set_cp (data, ivs, group, cp);
6542       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6543 			       true);
6544       iv_ca_set_no_cp (data, ivs, group);
6545       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6546 
6547       if (act_cost < best_cost)
6548 	{
6549 	  best_cost = act_cost;
6550 
6551 	  iv_ca_delta_free (&best_delta);
6552 	  best_delta = act_delta;
6553 	}
6554       else
6555 	iv_ca_delta_free (&act_delta);
6556     }
6557 
6558   if (best_cost.infinite_cost_p ())
6559     {
6560       for (i = 0; i < group->n_map_members; i++)
6561 	{
6562 	  cp = group->cost_map + i;
6563 	  cand = cp->cand;
6564 	  if (!cand)
6565 	    continue;
6566 
6567 	  /* Already tried this.  */
6568 	  if (cand->important)
6569 	    {
6570 	      if (originalp && cand->pos == IP_ORIGINAL)
6571 		continue;
6572 	      if (!originalp && cand->iv->base_object == NULL_TREE)
6573 		continue;
6574 	    }
6575 
6576 	  if (iv_ca_cand_used_p (ivs, cand))
6577 	    continue;
6578 
6579 	  act_delta = NULL;
6580 	  iv_ca_set_cp (data, ivs, group, cp);
6581 	  act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6582 	  iv_ca_set_no_cp (data, ivs, group);
6583 	  act_delta = iv_ca_delta_add (group,
6584 				       iv_ca_cand_for_group (ivs, group),
6585 				       cp, act_delta);
6586 
6587 	  if (act_cost < best_cost)
6588 	    {
6589 	      best_cost = act_cost;
6590 
6591 	      if (best_delta)
6592 		iv_ca_delta_free (&best_delta);
6593 	      best_delta = act_delta;
6594 	    }
6595 	  else
6596 	    iv_ca_delta_free (&act_delta);
6597 	}
6598     }
6599 
6600   iv_ca_delta_commit (data, ivs, best_delta, true);
6601   iv_ca_delta_free (&best_delta);
6602 
6603   return !best_cost.infinite_cost_p ();
6604 }
6605 
6606 /* Finds an initial assignment of candidates to uses.  */
6607 
6608 static struct iv_ca *
get_initial_solution(struct ivopts_data * data,bool originalp)6609 get_initial_solution (struct ivopts_data *data, bool originalp)
6610 {
6611   unsigned i;
6612   struct iv_ca *ivs = iv_ca_new (data);
6613 
6614   for (i = 0; i < data->vgroups.length (); i++)
6615     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6616       {
6617 	iv_ca_free (&ivs);
6618 	return NULL;
6619       }
6620 
6621   return ivs;
6622 }
6623 
6624 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6625    points to a bool variable, this function tries to break local
6626    optimal fixed-point by replacing candidates in IVS if it's true.  */
6627 
6628 static bool
try_improve_iv_set(struct ivopts_data * data,struct iv_ca * ivs,bool * try_replace_p)6629 try_improve_iv_set (struct ivopts_data *data,
6630 		    struct iv_ca *ivs, bool *try_replace_p)
6631 {
6632   unsigned i, n_ivs;
6633   comp_cost acost, best_cost = iv_ca_cost (ivs);
6634   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6635   struct iv_cand *cand;
6636 
6637   /* Try extending the set of induction variables by one.  */
6638   for (i = 0; i < data->vcands.length (); i++)
6639     {
6640       cand = data->vcands[i];
6641 
6642       if (iv_ca_cand_used_p (ivs, cand))
6643 	continue;
6644 
6645       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6646       if (!act_delta)
6647 	continue;
6648 
6649       /* If we successfully added the candidate and the set is small enough,
6650 	 try optimizing it by removing other candidates.  */
6651       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6652       	{
6653 	  iv_ca_delta_commit (data, ivs, act_delta, true);
6654 	  acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6655 	  iv_ca_delta_commit (data, ivs, act_delta, false);
6656 	  act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6657 	}
6658 
6659       if (acost < best_cost)
6660 	{
6661 	  best_cost = acost;
6662 	  iv_ca_delta_free (&best_delta);
6663 	  best_delta = act_delta;
6664 	}
6665       else
6666 	iv_ca_delta_free (&act_delta);
6667     }
6668 
6669   if (!best_delta)
6670     {
6671       /* Try removing the candidates from the set instead.  */
6672       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6673 
6674       if (!best_delta && *try_replace_p)
6675 	{
6676 	  *try_replace_p = false;
6677 	  /* So far candidate selecting algorithm tends to choose fewer IVs
6678 	     so that it can handle cases in which loops have many variables
6679 	     but the best choice is often to use only one general biv.  One
6680 	     weakness is it can't handle opposite cases, in which different
6681 	     candidates should be chosen with respect to each use.  To solve
6682 	     the problem, we replace candidates in a manner described by the
6683 	     comments of iv_ca_replace, thus give general algorithm a chance
6684 	     to break local optimal fixed-point in these cases.  */
6685 	  best_cost = iv_ca_replace (data, ivs, &best_delta);
6686 	}
6687 
6688       if (!best_delta)
6689 	return false;
6690     }
6691 
6692   iv_ca_delta_commit (data, ivs, best_delta, true);
6693   gcc_assert (best_cost == iv_ca_cost (ivs));
6694   iv_ca_delta_free (&best_delta);
6695   return true;
6696 }
6697 
6698 /* Attempts to find the optimal set of induction variables.  We do simple
6699    greedy heuristic -- we try to replace at most one candidate in the selected
6700    solution and remove the unused ivs while this improves the cost.  */
6701 
6702 static struct iv_ca *
find_optimal_iv_set_1(struct ivopts_data * data,bool originalp)6703 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6704 {
6705   struct iv_ca *set;
6706   bool try_replace_p = true;
6707 
6708   /* Get the initial solution.  */
6709   set = get_initial_solution (data, originalp);
6710   if (!set)
6711     {
6712       if (dump_file && (dump_flags & TDF_DETAILS))
6713 	fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6714       return NULL;
6715     }
6716 
6717   if (dump_file && (dump_flags & TDF_DETAILS))
6718     {
6719       fprintf (dump_file, "Initial set of candidates:\n");
6720       iv_ca_dump (data, dump_file, set);
6721     }
6722 
6723   while (try_improve_iv_set (data, set, &try_replace_p))
6724     {
6725       if (dump_file && (dump_flags & TDF_DETAILS))
6726 	{
6727 	  fprintf (dump_file, "Improved to:\n");
6728 	  iv_ca_dump (data, dump_file, set);
6729 	}
6730     }
6731 
6732   return set;
6733 }
6734 
6735 static struct iv_ca *
find_optimal_iv_set(struct ivopts_data * data)6736 find_optimal_iv_set (struct ivopts_data *data)
6737 {
6738   unsigned i;
6739   comp_cost cost, origcost;
6740   struct iv_ca *set, *origset;
6741 
6742   /* Determine the cost based on a strategy that starts with original IVs,
6743      and try again using a strategy that prefers candidates not based
6744      on any IVs.  */
6745   origset = find_optimal_iv_set_1 (data, true);
6746   set = find_optimal_iv_set_1 (data, false);
6747 
6748   if (!origset && !set)
6749     return NULL;
6750 
6751   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6752   cost = set ? iv_ca_cost (set) : infinite_cost;
6753 
6754   if (dump_file && (dump_flags & TDF_DETAILS))
6755     {
6756       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6757 	       origcost.cost, origcost.complexity);
6758       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6759 	       cost.cost, cost.complexity);
6760     }
6761 
6762   /* Choose the one with the best cost.  */
6763   if (origcost <= cost)
6764     {
6765       if (set)
6766 	iv_ca_free (&set);
6767       set = origset;
6768     }
6769   else if (origset)
6770     iv_ca_free (&origset);
6771 
6772   for (i = 0; i < data->vgroups.length (); i++)
6773     {
6774       struct iv_group *group = data->vgroups[i];
6775       group->selected = iv_ca_cand_for_group (set, group)->cand;
6776     }
6777 
6778   return set;
6779 }
6780 
6781 /* Creates a new induction variable corresponding to CAND.  */
6782 
6783 static void
create_new_iv(struct ivopts_data * data,struct iv_cand * cand)6784 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6785 {
6786   gimple_stmt_iterator incr_pos;
6787   tree base;
6788   struct iv_use *use;
6789   struct iv_group *group;
6790   bool after = false;
6791 
6792   gcc_assert (cand->iv != NULL);
6793 
6794   switch (cand->pos)
6795     {
6796     case IP_NORMAL:
6797       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6798       break;
6799 
6800     case IP_END:
6801       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6802       after = true;
6803       break;
6804 
6805     case IP_AFTER_USE:
6806       after = true;
6807       /* fall through */
6808     case IP_BEFORE_USE:
6809       incr_pos = gsi_for_stmt (cand->incremented_at);
6810       break;
6811 
6812     case IP_ORIGINAL:
6813       /* Mark that the iv is preserved.  */
6814       name_info (data, cand->var_before)->preserve_biv = true;
6815       name_info (data, cand->var_after)->preserve_biv = true;
6816 
6817       /* Rewrite the increment so that it uses var_before directly.  */
6818       use = find_interesting_uses_op (data, cand->var_after);
6819       group = data->vgroups[use->group_id];
6820       group->selected = cand;
6821       return;
6822     }
6823 
6824   gimple_add_tmp_var (cand->var_before);
6825 
6826   base = unshare_expr (cand->iv->base);
6827 
6828   create_iv (base, unshare_expr (cand->iv->step),
6829 	     cand->var_before, data->current_loop,
6830 	     &incr_pos, after, &cand->var_before, &cand->var_after);
6831 }
6832 
6833 /* Creates new induction variables described in SET.  */
6834 
6835 static void
create_new_ivs(struct ivopts_data * data,struct iv_ca * set)6836 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6837 {
6838   unsigned i;
6839   struct iv_cand *cand;
6840   bitmap_iterator bi;
6841 
6842   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6843     {
6844       cand = data->vcands[i];
6845       create_new_iv (data, cand);
6846     }
6847 
6848   if (dump_file && (dump_flags & TDF_DETAILS))
6849     {
6850       fprintf (dump_file, "Selected IV set for loop %d",
6851 	       data->current_loop->num);
6852       if (data->loop_loc != UNKNOWN_LOCATION)
6853 	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6854 		 LOCATION_LINE (data->loop_loc));
6855       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
6856 	       avg_loop_niter (data->current_loop));
6857       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6858       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6859 	{
6860 	  cand = data->vcands[i];
6861 	  dump_cand (dump_file, cand);
6862 	}
6863       fprintf (dump_file, "\n");
6864     }
6865 }
6866 
6867 /* Rewrites USE (definition of iv used in a nonlinear expression)
6868    using candidate CAND.  */
6869 
6870 static void
rewrite_use_nonlinear_expr(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)6871 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6872 			    struct iv_use *use, struct iv_cand *cand)
6873 {
6874   gassign *ass;
6875   gimple_stmt_iterator bsi;
6876   tree comp, type = get_use_type (use), tgt;
6877 
6878   /* An important special case -- if we are asked to express value of
6879      the original iv by itself, just exit; there is no need to
6880      introduce a new computation (that might also need casting the
6881      variable to unsigned and back).  */
6882   if (cand->pos == IP_ORIGINAL
6883       && cand->incremented_at == use->stmt)
6884     {
6885       tree op = NULL_TREE;
6886       enum tree_code stmt_code;
6887 
6888       gcc_assert (is_gimple_assign (use->stmt));
6889       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6890 
6891       /* Check whether we may leave the computation unchanged.
6892 	 This is the case only if it does not rely on other
6893 	 computations in the loop -- otherwise, the computation
6894 	 we rely upon may be removed in remove_unused_ivs,
6895 	 thus leading to ICE.  */
6896       stmt_code = gimple_assign_rhs_code (use->stmt);
6897       if (stmt_code == PLUS_EXPR
6898 	  || stmt_code == MINUS_EXPR
6899 	  || stmt_code == POINTER_PLUS_EXPR)
6900 	{
6901 	  if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6902 	    op = gimple_assign_rhs2 (use->stmt);
6903 	  else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6904 	    op = gimple_assign_rhs1 (use->stmt);
6905 	}
6906 
6907       if (op != NULL_TREE)
6908 	{
6909 	  if (expr_invariant_in_loop_p (data->current_loop, op))
6910 	    return;
6911 	  if (TREE_CODE (op) == SSA_NAME)
6912 	    {
6913 	      struct iv *iv = get_iv (data, op);
6914 	      if (iv != NULL && integer_zerop (iv->step))
6915 		return;
6916 	    }
6917 	}
6918     }
6919 
6920   switch (gimple_code (use->stmt))
6921     {
6922     case GIMPLE_PHI:
6923       tgt = PHI_RESULT (use->stmt);
6924 
6925       /* If we should keep the biv, do not replace it.  */
6926       if (name_info (data, tgt)->preserve_biv)
6927 	return;
6928 
6929       bsi = gsi_after_labels (gimple_bb (use->stmt));
6930       break;
6931 
6932     case GIMPLE_ASSIGN:
6933       tgt = gimple_assign_lhs (use->stmt);
6934       bsi = gsi_for_stmt (use->stmt);
6935       break;
6936 
6937     default:
6938       gcc_unreachable ();
6939     }
6940 
6941   aff_tree aff_inv, aff_var;
6942   if (!get_computation_aff_1 (data->current_loop, use->stmt,
6943 			      use, cand, &aff_inv, &aff_var))
6944     gcc_unreachable ();
6945 
6946   unshare_aff_combination (&aff_inv);
6947   unshare_aff_combination (&aff_var);
6948   /* Prefer CSE opportunity than loop invariant by adding offset at last
6949      so that iv_uses have different offsets can be CSEed.  */
6950   poly_widest_int offset = aff_inv.offset;
6951   aff_inv.offset = 0;
6952 
6953   gimple_seq stmt_list = NULL, seq = NULL;
6954   tree comp_op1 = aff_combination_to_tree (&aff_inv);
6955   tree comp_op2 = aff_combination_to_tree (&aff_var);
6956   gcc_assert (comp_op1 && comp_op2);
6957 
6958   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
6959   gimple_seq_add_seq (&stmt_list, seq);
6960   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
6961   gimple_seq_add_seq (&stmt_list, seq);
6962 
6963   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
6964     std::swap (comp_op1, comp_op2);
6965 
6966   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
6967     {
6968       comp = fold_build_pointer_plus (comp_op1,
6969 				      fold_convert (sizetype, comp_op2));
6970       comp = fold_build_pointer_plus (comp,
6971 				      wide_int_to_tree (sizetype, offset));
6972     }
6973   else
6974     {
6975       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
6976 			  fold_convert (TREE_TYPE (comp_op1), comp_op2));
6977       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
6978 			  wide_int_to_tree (TREE_TYPE (comp_op1), offset));
6979     }
6980 
6981   comp = fold_convert (type, comp);
6982   if (!valid_gimple_rhs_p (comp)
6983       || (gimple_code (use->stmt) != GIMPLE_PHI
6984 	  /* We can't allow re-allocating the stmt as it might be pointed
6985 	     to still.  */
6986 	  && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6987 	      >= gimple_num_ops (gsi_stmt (bsi)))))
6988     {
6989       comp = force_gimple_operand (comp, &seq, true, NULL);
6990       gimple_seq_add_seq (&stmt_list, seq);
6991       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6992 	{
6993 	  duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6994 	  /* As this isn't a plain copy we have to reset alignment
6995 	     information.  */
6996 	  if (SSA_NAME_PTR_INFO (comp))
6997 	    mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6998 	}
6999     }
7000 
7001   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7002   if (gimple_code (use->stmt) == GIMPLE_PHI)
7003     {
7004       ass = gimple_build_assign (tgt, comp);
7005       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7006 
7007       bsi = gsi_for_stmt (use->stmt);
7008       remove_phi_node (&bsi, false);
7009     }
7010   else
7011     {
7012       gimple_assign_set_rhs_from_tree (&bsi, comp);
7013       use->stmt = gsi_stmt (bsi);
7014     }
7015 }
7016 
7017 /* Performs a peephole optimization to reorder the iv update statement with
7018    a mem ref to enable instruction combining in later phases. The mem ref uses
7019    the iv value before the update, so the reordering transformation requires
7020    adjustment of the offset. CAND is the selected IV_CAND.
7021 
7022    Example:
7023 
7024    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7025    iv2 = iv1 + 1;
7026 
7027    if (t < val)      (1)
7028      goto L;
7029    goto Head;
7030 
7031 
7032    directly propagating t over to (1) will introduce overlapping live range
7033    thus increase register pressure. This peephole transform it into:
7034 
7035 
7036    iv2 = iv1 + 1;
7037    t = MEM_REF (base, iv2, 8, 8);
7038    if (t < val)
7039      goto L;
7040    goto Head;
7041 */
7042 
7043 static void
adjust_iv_update_pos(struct iv_cand * cand,struct iv_use * use)7044 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7045 {
7046   tree var_after;
7047   gimple *iv_update, *stmt;
7048   basic_block bb;
7049   gimple_stmt_iterator gsi, gsi_iv;
7050 
7051   if (cand->pos != IP_NORMAL)
7052     return;
7053 
7054   var_after = cand->var_after;
7055   iv_update = SSA_NAME_DEF_STMT (var_after);
7056 
7057   bb = gimple_bb (iv_update);
7058   gsi = gsi_last_nondebug_bb (bb);
7059   stmt = gsi_stmt (gsi);
7060 
7061   /* Only handle conditional statement for now.  */
7062   if (gimple_code (stmt) != GIMPLE_COND)
7063     return;
7064 
7065   gsi_prev_nondebug (&gsi);
7066   stmt = gsi_stmt (gsi);
7067   if (stmt != iv_update)
7068     return;
7069 
7070   gsi_prev_nondebug (&gsi);
7071   if (gsi_end_p (gsi))
7072     return;
7073 
7074   stmt = gsi_stmt (gsi);
7075   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7076     return;
7077 
7078   if (stmt != use->stmt)
7079     return;
7080 
7081   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7082     return;
7083 
7084   if (dump_file && (dump_flags & TDF_DETAILS))
7085     {
7086       fprintf (dump_file, "Reordering \n");
7087       print_gimple_stmt (dump_file, iv_update, 0);
7088       print_gimple_stmt (dump_file, use->stmt, 0);
7089       fprintf (dump_file, "\n");
7090     }
7091 
7092   gsi = gsi_for_stmt (use->stmt);
7093   gsi_iv = gsi_for_stmt (iv_update);
7094   gsi_move_before (&gsi_iv, &gsi);
7095 
7096   cand->pos = IP_BEFORE_USE;
7097   cand->incremented_at = use->stmt;
7098 }
7099 
7100 /* Return the alias pointer type that should be used for a MEM_REF
7101    associated with USE, which has type USE_PTR_ADDRESS.  */
7102 
7103 static tree
get_alias_ptr_type_for_ptr_address(iv_use * use)7104 get_alias_ptr_type_for_ptr_address (iv_use *use)
7105 {
7106   gcall *call = as_a <gcall *> (use->stmt);
7107   switch (gimple_call_internal_fn (call))
7108     {
7109     case IFN_MASK_LOAD:
7110     case IFN_MASK_STORE:
7111       /* The second argument contains the correct alias type.  */
7112       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7113       return TREE_TYPE (gimple_call_arg (call, 1));
7114 
7115     default:
7116       gcc_unreachable ();
7117     }
7118 }
7119 
7120 
7121 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7122 
7123 static void
rewrite_use_address(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7124 rewrite_use_address (struct ivopts_data *data,
7125 		     struct iv_use *use, struct iv_cand *cand)
7126 {
7127   aff_tree aff;
7128   bool ok;
7129 
7130   adjust_iv_update_pos (cand, use);
7131   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7132   gcc_assert (ok);
7133   unshare_aff_combination (&aff);
7134 
7135   /* To avoid undefined overflow problems, all IV candidates use unsigned
7136      integer types.  The drawback is that this makes it impossible for
7137      create_mem_ref to distinguish an IV that is based on a memory object
7138      from one that represents simply an offset.
7139 
7140      To work around this problem, we pass a hint to create_mem_ref that
7141      indicates which variable (if any) in aff is an IV based on a memory
7142      object.  Note that we only consider the candidate.  If this is not
7143      based on an object, the base of the reference is in some subexpression
7144      of the use -- but these will use pointer types, so they are recognized
7145      by the create_mem_ref heuristics anyway.  */
7146   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7147   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7148   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7149   tree type = use->mem_type;
7150   tree alias_ptr_type;
7151   if (use->type == USE_PTR_ADDRESS)
7152     alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7153   else
7154     {
7155       gcc_assert (type == TREE_TYPE (*use->op_p));
7156       unsigned int align = get_object_alignment (*use->op_p);
7157       if (align != TYPE_ALIGN (type))
7158 	type = build_aligned_type (type, align);
7159       alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7160     }
7161   tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7162 			     iv, base_hint, data->speed);
7163 
7164   if (use->type == USE_PTR_ADDRESS)
7165     {
7166       ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7167       ref = fold_convert (get_use_type (use), ref);
7168       ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7169 				      true, GSI_SAME_STMT);
7170     }
7171   else
7172     copy_ref_info (ref, *use->op_p);
7173 
7174   *use->op_p = ref;
7175 }
7176 
7177 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7178    candidate CAND.  */
7179 
7180 static void
rewrite_use_compare(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7181 rewrite_use_compare (struct ivopts_data *data,
7182 		     struct iv_use *use, struct iv_cand *cand)
7183 {
7184   tree comp, op, bound;
7185   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7186   enum tree_code compare;
7187   struct iv_group *group = data->vgroups[use->group_id];
7188   struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7189 
7190   bound = cp->value;
7191   if (bound)
7192     {
7193       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7194       tree var_type = TREE_TYPE (var);
7195       gimple_seq stmts;
7196 
7197       if (dump_file && (dump_flags & TDF_DETAILS))
7198 	{
7199 	  fprintf (dump_file, "Replacing exit test: ");
7200 	  print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7201 	}
7202       compare = cp->comp;
7203       bound = unshare_expr (fold_convert (var_type, bound));
7204       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7205       if (stmts)
7206 	gsi_insert_seq_on_edge_immediate (
7207 		loop_preheader_edge (data->current_loop),
7208 		stmts);
7209 
7210       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7211       gimple_cond_set_lhs (cond_stmt, var);
7212       gimple_cond_set_code (cond_stmt, compare);
7213       gimple_cond_set_rhs (cond_stmt, op);
7214       return;
7215     }
7216 
7217   /* The induction variable elimination failed; just express the original
7218      giv.  */
7219   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7220   gcc_assert (comp != NULL_TREE);
7221   gcc_assert (use->op_p != NULL);
7222   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7223 					 SSA_NAME_VAR (*use->op_p),
7224 					 true, GSI_SAME_STMT);
7225 }
7226 
7227 /* Rewrite the groups using the selected induction variables.  */
7228 
7229 static void
rewrite_groups(struct ivopts_data * data)7230 rewrite_groups (struct ivopts_data *data)
7231 {
7232   unsigned i, j;
7233 
7234   for (i = 0; i < data->vgroups.length (); i++)
7235     {
7236       struct iv_group *group = data->vgroups[i];
7237       struct iv_cand *cand = group->selected;
7238 
7239       gcc_assert (cand);
7240 
7241       if (group->type == USE_NONLINEAR_EXPR)
7242 	{
7243 	  for (j = 0; j < group->vuses.length (); j++)
7244 	    {
7245 	      rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7246 	      update_stmt (group->vuses[j]->stmt);
7247 	    }
7248 	}
7249       else if (address_p (group->type))
7250 	{
7251 	  for (j = 0; j < group->vuses.length (); j++)
7252 	    {
7253 	      rewrite_use_address (data, group->vuses[j], cand);
7254 	      update_stmt (group->vuses[j]->stmt);
7255 	    }
7256 	}
7257       else
7258 	{
7259 	  gcc_assert (group->type == USE_COMPARE);
7260 
7261 	  for (j = 0; j < group->vuses.length (); j++)
7262 	    {
7263 	      rewrite_use_compare (data, group->vuses[j], cand);
7264 	      update_stmt (group->vuses[j]->stmt);
7265 	    }
7266 	}
7267     }
7268 }
7269 
7270 /* Removes the ivs that are not used after rewriting.  */
7271 
7272 static void
remove_unused_ivs(struct ivopts_data * data)7273 remove_unused_ivs (struct ivopts_data *data)
7274 {
7275   unsigned j;
7276   bitmap_iterator bi;
7277   bitmap toremove = BITMAP_ALLOC (NULL);
7278 
7279   /* Figure out an order in which to release SSA DEFs so that we don't
7280      release something that we'd have to propagate into a debug stmt
7281      afterwards.  */
7282   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7283     {
7284       struct version_info *info;
7285 
7286       info = ver_info (data, j);
7287       if (info->iv
7288 	  && !integer_zerop (info->iv->step)
7289 	  && !info->inv_id
7290 	  && !info->iv->nonlin_use
7291 	  && !info->preserve_biv)
7292 	{
7293 	  bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7294 
7295 	  tree def = info->iv->ssa_name;
7296 
7297 	  if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7298 	    {
7299 	      imm_use_iterator imm_iter;
7300 	      use_operand_p use_p;
7301 	      gimple *stmt;
7302 	      int count = 0;
7303 
7304 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7305 		{
7306 		  if (!gimple_debug_bind_p (stmt))
7307 		    continue;
7308 
7309 		  /* We just want to determine whether to do nothing
7310 		     (count == 0), to substitute the computed
7311 		     expression into a single use of the SSA DEF by
7312 		     itself (count == 1), or to use a debug temp
7313 		     because the SSA DEF is used multiple times or as
7314 		     part of a larger expression (count > 1). */
7315 		  count++;
7316 		  if (gimple_debug_bind_get_value (stmt) != def)
7317 		    count++;
7318 
7319 		  if (count > 1)
7320 		    BREAK_FROM_IMM_USE_STMT (imm_iter);
7321 		}
7322 
7323 	      if (!count)
7324 		continue;
7325 
7326 	      struct iv_use dummy_use;
7327 	      struct iv_cand *best_cand = NULL, *cand;
7328 	      unsigned i, best_pref = 0, cand_pref;
7329 
7330 	      memset (&dummy_use, 0, sizeof (dummy_use));
7331 	      dummy_use.iv = info->iv;
7332 	      for (i = 0; i < data->vgroups.length () && i < 64; i++)
7333 		{
7334 		  cand = data->vgroups[i]->selected;
7335 		  if (cand == best_cand)
7336 		    continue;
7337 		  cand_pref = operand_equal_p (cand->iv->step,
7338 					       info->iv->step, 0)
7339 		    ? 4 : 0;
7340 		  cand_pref
7341 		    += TYPE_MODE (TREE_TYPE (cand->iv->base))
7342 		    == TYPE_MODE (TREE_TYPE (info->iv->base))
7343 		    ? 2 : 0;
7344 		  cand_pref
7345 		    += TREE_CODE (cand->iv->base) == INTEGER_CST
7346 		    ? 1 : 0;
7347 		  if (best_cand == NULL || best_pref < cand_pref)
7348 		    {
7349 		      best_cand = cand;
7350 		      best_pref = cand_pref;
7351 		    }
7352 		}
7353 
7354 	      if (!best_cand)
7355 		continue;
7356 
7357 	      tree comp = get_computation_at (data->current_loop,
7358 					      SSA_NAME_DEF_STMT (def),
7359 					      &dummy_use, best_cand);
7360 	      if (!comp)
7361 		continue;
7362 
7363 	      if (count > 1)
7364 		{
7365 		  tree vexpr = make_node (DEBUG_EXPR_DECL);
7366 		  DECL_ARTIFICIAL (vexpr) = 1;
7367 		  TREE_TYPE (vexpr) = TREE_TYPE (comp);
7368 		  if (SSA_NAME_VAR (def))
7369 		    SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7370 		  else
7371 		    SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7372 		  gdebug *def_temp
7373 		    = gimple_build_debug_bind (vexpr, comp, NULL);
7374 		  gimple_stmt_iterator gsi;
7375 
7376 		  if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7377 		    gsi = gsi_after_labels (gimple_bb
7378 					    (SSA_NAME_DEF_STMT (def)));
7379 		  else
7380 		    gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7381 
7382 		  gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7383 		  comp = vexpr;
7384 		}
7385 
7386 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7387 		{
7388 		  if (!gimple_debug_bind_p (stmt))
7389 		    continue;
7390 
7391 		  FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7392 		    SET_USE (use_p, comp);
7393 
7394 		  update_stmt (stmt);
7395 		}
7396 	    }
7397 	}
7398     }
7399 
7400   release_defs_bitset (toremove);
7401 
7402   BITMAP_FREE (toremove);
7403 }
7404 
7405 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7406    for hash_map::traverse.  */
7407 
7408 bool
free_tree_niter_desc(edge const &,tree_niter_desc * const & value,void *)7409 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7410 {
7411   free (value);
7412   return true;
7413 }
7414 
7415 /* Frees data allocated by the optimization of a single loop.  */
7416 
7417 static void
free_loop_data(struct ivopts_data * data)7418 free_loop_data (struct ivopts_data *data)
7419 {
7420   unsigned i, j;
7421   bitmap_iterator bi;
7422   tree obj;
7423 
7424   if (data->niters)
7425     {
7426       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7427       delete data->niters;
7428       data->niters = NULL;
7429     }
7430 
7431   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7432     {
7433       struct version_info *info;
7434 
7435       info = ver_info (data, i);
7436       info->iv = NULL;
7437       info->has_nonlin_use = false;
7438       info->preserve_biv = false;
7439       info->inv_id = 0;
7440     }
7441   bitmap_clear (data->relevant);
7442   bitmap_clear (data->important_candidates);
7443 
7444   for (i = 0; i < data->vgroups.length (); i++)
7445     {
7446       struct iv_group *group = data->vgroups[i];
7447 
7448       for (j = 0; j < group->vuses.length (); j++)
7449 	free (group->vuses[j]);
7450       group->vuses.release ();
7451 
7452       BITMAP_FREE (group->related_cands);
7453       for (j = 0; j < group->n_map_members; j++)
7454 	{
7455 	  if (group->cost_map[j].inv_vars)
7456 	    BITMAP_FREE (group->cost_map[j].inv_vars);
7457 	  if (group->cost_map[j].inv_exprs)
7458 	    BITMAP_FREE (group->cost_map[j].inv_exprs);
7459 	}
7460 
7461       free (group->cost_map);
7462       free (group);
7463     }
7464   data->vgroups.truncate (0);
7465 
7466   for (i = 0; i < data->vcands.length (); i++)
7467     {
7468       struct iv_cand *cand = data->vcands[i];
7469 
7470       if (cand->inv_vars)
7471 	BITMAP_FREE (cand->inv_vars);
7472       if (cand->inv_exprs)
7473 	BITMAP_FREE (cand->inv_exprs);
7474       free (cand);
7475     }
7476   data->vcands.truncate (0);
7477 
7478   if (data->version_info_size < num_ssa_names)
7479     {
7480       data->version_info_size = 2 * num_ssa_names;
7481       free (data->version_info);
7482       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7483     }
7484 
7485   data->max_inv_var_id = 0;
7486   data->max_inv_expr_id = 0;
7487 
7488   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7489     SET_DECL_RTL (obj, NULL_RTX);
7490 
7491   decl_rtl_to_reset.truncate (0);
7492 
7493   data->inv_expr_tab->empty ();
7494 
7495   data->iv_common_cand_tab->empty ();
7496   data->iv_common_cands.truncate (0);
7497 }
7498 
7499 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7500    loop tree.  */
7501 
7502 static void
tree_ssa_iv_optimize_finalize(struct ivopts_data * data)7503 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7504 {
7505   free_loop_data (data);
7506   free (data->version_info);
7507   BITMAP_FREE (data->relevant);
7508   BITMAP_FREE (data->important_candidates);
7509 
7510   decl_rtl_to_reset.release ();
7511   data->vgroups.release ();
7512   data->vcands.release ();
7513   delete data->inv_expr_tab;
7514   data->inv_expr_tab = NULL;
7515   free_affine_expand_cache (&data->name_expansion_cache);
7516   if (data->base_object_map)
7517     delete data->base_object_map;
7518   delete data->iv_common_cand_tab;
7519   data->iv_common_cand_tab = NULL;
7520   data->iv_common_cands.release ();
7521   obstack_free (&data->iv_obstack, NULL);
7522 }
7523 
7524 /* Returns true if the loop body BODY includes any function calls.  */
7525 
7526 static bool
loop_body_includes_call(basic_block * body,unsigned num_nodes)7527 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7528 {
7529   gimple_stmt_iterator gsi;
7530   unsigned i;
7531 
7532   for (i = 0; i < num_nodes; i++)
7533     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7534       {
7535 	gimple *stmt = gsi_stmt (gsi);
7536 	if (is_gimple_call (stmt)
7537 	    && !gimple_call_internal_p (stmt)
7538 	    && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7539 	  return true;
7540       }
7541   return false;
7542 }
7543 
7544 /* Optimizes the LOOP.  Returns true if anything changed.  */
7545 
7546 static bool
tree_ssa_iv_optimize_loop(struct ivopts_data * data,struct loop * loop)7547 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7548 {
7549   bool changed = false;
7550   struct iv_ca *iv_ca;
7551   edge exit = single_dom_exit (loop);
7552   basic_block *body;
7553 
7554   gcc_assert (!data->niters);
7555   data->current_loop = loop;
7556   data->loop_loc = find_loop_location (loop);
7557   data->speed = optimize_loop_for_speed_p (loop);
7558 
7559   if (dump_file && (dump_flags & TDF_DETAILS))
7560     {
7561       fprintf (dump_file, "Processing loop %d", loop->num);
7562       if (data->loop_loc != UNKNOWN_LOCATION)
7563 	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7564 		 LOCATION_LINE (data->loop_loc));
7565       fprintf (dump_file, "\n");
7566 
7567       if (exit)
7568 	{
7569 	  fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7570 		   exit->src->index, exit->dest->index);
7571 	  print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7572 	  fprintf (dump_file, "\n");
7573 	}
7574 
7575       fprintf (dump_file, "\n");
7576     }
7577 
7578   body = get_loop_body (loop);
7579   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7580   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7581   free (body);
7582 
7583   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7584 
7585   /* For each ssa name determines whether it behaves as an induction variable
7586      in some loop.  */
7587   if (!find_induction_variables (data))
7588     goto finish;
7589 
7590   /* Finds interesting uses (item 1).  */
7591   find_interesting_uses (data);
7592   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7593     goto finish;
7594 
7595   /* Finds candidates for the induction variables (item 2).  */
7596   find_iv_candidates (data);
7597 
7598   /* Calculates the costs (item 3, part 1).  */
7599   determine_iv_costs (data);
7600   determine_group_iv_costs (data);
7601   determine_set_costs (data);
7602 
7603   /* Find the optimal set of induction variables (item 3, part 2).  */
7604   iv_ca = find_optimal_iv_set (data);
7605   if (!iv_ca)
7606     goto finish;
7607   changed = true;
7608 
7609   /* Create the new induction variables (item 4, part 1).  */
7610   create_new_ivs (data, iv_ca);
7611   iv_ca_free (&iv_ca);
7612 
7613   /* Rewrite the uses (item 4, part 2).  */
7614   rewrite_groups (data);
7615 
7616   /* Remove the ivs that are unused after rewriting.  */
7617   remove_unused_ivs (data);
7618 
7619   /* We have changed the structure of induction variables; it might happen
7620      that definitions in the scev database refer to some of them that were
7621      eliminated.  */
7622   scev_reset ();
7623 
7624 finish:
7625   free_loop_data (data);
7626 
7627   return changed;
7628 }
7629 
7630 /* Main entry point.  Optimizes induction variables in loops.  */
7631 
7632 void
tree_ssa_iv_optimize(void)7633 tree_ssa_iv_optimize (void)
7634 {
7635   struct loop *loop;
7636   struct ivopts_data data;
7637 
7638   tree_ssa_iv_optimize_init (&data);
7639 
7640   /* Optimize the loops starting with the innermost ones.  */
7641   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7642     {
7643       if (dump_file && (dump_flags & TDF_DETAILS))
7644 	flow_loop_dump (loop, dump_file, NULL, 1);
7645 
7646       tree_ssa_iv_optimize_loop (&data, loop);
7647     }
7648 
7649   tree_ssa_iv_optimize_finalize (&data);
7650 }
7651 
7652 #include "gt-tree-ssa-loop-ivopts.h"
7653