1 /* Induction variable optimizations.
2    Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This pass tries to find the optimal set of induction variables for the loop.
21    It optimizes just the basic linear induction variables (although adding
22    support for other types should not be too hard).  It includes the
23    optimizations commonly known as strength reduction, induction variable
24    coalescing and induction variable elimination.  It does it in the
25    following steps:
26 
27    1) The interesting uses of induction variables are found.  This includes
28 
29       -- uses of induction variables in non-linear expressions
30       -- addresses of arrays
31       -- comparisons of induction variables
32 
33       Note the interesting uses are categorized and handled in group.
34       Generally, address type uses are grouped together if their iv bases
35       are different in constant offset.
36 
37    2) Candidates for the induction variables are found.  This includes
38 
39       -- old induction variables
40       -- the variables defined by expressions derived from the "interesting
41 	 groups/uses" above
42 
43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
44       cost function assigns a cost to sets of induction variables and consists
45       of three parts:
46 
47       -- The group/use costs.  Each of the interesting groups/uses chooses
48 	 the best induction variable in the set and adds its cost to the sum.
49 	 The cost reflects the time spent on modifying the induction variables
50 	 value to be usable for the given purpose (adding base and offset for
51 	 arrays, etc.).
52       -- The variable costs.  Each of the variables has a cost assigned that
53 	 reflects the costs associated with incrementing the value of the
54 	 variable.  The original variables are somewhat preferred.
55       -- The set cost.  Depending on the size of the set, extra cost may be
56 	 added to reflect register pressure.
57 
58       All the costs are defined in a machine-specific way, using the target
59       hooks and machine descriptions to determine them.
60 
61    4) The trees are transformed to use the new variables, the dead code is
62       removed.
63 
64    All of this is done loop by loop.  Doing it globally is theoretically
65    possible, it might give a better performance and it might enable us
66    to decide costs more precisely, but getting all the interactions right
67    would be complicated.  */
68 
69 #include "config.h"
70 #include "system.h"
71 #include "coretypes.h"
72 #include "backend.h"
73 #include "rtl.h"
74 #include "tree.h"
75 #include "gimple.h"
76 #include "cfghooks.h"
77 #include "tree-pass.h"
78 #include "memmodel.h"
79 #include "tm_p.h"
80 #include "ssa.h"
81 #include "expmed.h"
82 #include "insn-config.h"
83 #include "emit-rtl.h"
84 #include "recog.h"
85 #include "cgraph.h"
86 #include "gimple-pretty-print.h"
87 #include "alias.h"
88 #include "fold-const.h"
89 #include "stor-layout.h"
90 #include "tree-eh.h"
91 #include "gimplify.h"
92 #include "gimple-iterator.h"
93 #include "gimplify-me.h"
94 #include "tree-cfg.h"
95 #include "tree-ssa-loop-ivopts.h"
96 #include "tree-ssa-loop-manip.h"
97 #include "tree-ssa-loop-niter.h"
98 #include "tree-ssa-loop.h"
99 #include "explow.h"
100 #include "expr.h"
101 #include "tree-dfa.h"
102 #include "tree-ssa.h"
103 #include "cfgloop.h"
104 #include "tree-scalar-evolution.h"
105 #include "params.h"
106 #include "tree-affine.h"
107 #include "tree-ssa-propagate.h"
108 #include "tree-ssa-address.h"
109 #include "builtins.h"
110 #include "tree-vectorizer.h"
111 
112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
113    cost of different addressing modes.  This should be moved to a TBD
114    interface between the GIMPLE and RTL worlds.  */
115 
116 /* The infinite cost.  */
117 #define INFTY 10000000
118 
119 /* Returns the expected number of loop iterations for LOOP.
120    The average trip count is computed from profile data if it
121    exists. */
122 
123 static inline HOST_WIDE_INT
124 avg_loop_niter (struct loop *loop)
125 {
126   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
127   if (niter == -1)
128     {
129       niter = likely_max_stmt_executions_int (loop);
130 
131       if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
132 	return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
133     }
134 
135   return niter;
136 }
137 
138 struct iv_use;
139 
140 /* Representation of the induction variable.  */
141 struct iv
142 {
143   tree base;		/* Initial value of the iv.  */
144   tree base_object;	/* A memory object to that the induction variable points.  */
145   tree step;		/* Step of the iv (constant only).  */
146   tree ssa_name;	/* The ssa name with the value.  */
147   struct iv_use *nonlin_use;	/* The identifier in the use if it is the case.  */
148   bool biv_p;		/* Is it a biv?  */
149   bool no_overflow;	/* True if the iv doesn't overflow.  */
150   bool have_address_use;/* For biv, indicate if it's used in any address
151 			   type use.  */
152 };
153 
154 /* Per-ssa version information (induction variable descriptions, etc.).  */
155 struct version_info
156 {
157   tree name;		/* The ssa name.  */
158   struct iv *iv;	/* Induction variable description.  */
159   bool has_nonlin_use;	/* For a loop-level invariant, whether it is used in
160 			   an expression that is not an induction variable.  */
161   bool preserve_biv;	/* For the original biv, whether to preserve it.  */
162   unsigned inv_id;	/* Id of an invariant.  */
163 };
164 
165 /* Types of uses.  */
166 enum use_type
167 {
168   USE_NONLINEAR_EXPR,	/* Use in a nonlinear expression.  */
169   USE_REF_ADDRESS,	/* Use is an address for an explicit memory
170 			   reference.  */
171   USE_PTR_ADDRESS,	/* Use is a pointer argument to a function in
172 			   cases where the expansion of the function
173 			   will turn the argument into a normal address.  */
174   USE_COMPARE		/* Use is a compare.  */
175 };
176 
177 /* Cost of a computation.  */
178 struct comp_cost
179 {
180   comp_cost (): cost (0), complexity (0), scratch (0)
181   {}
182 
183   comp_cost (int cost, unsigned complexity, int scratch = 0)
184     : cost (cost), complexity (complexity), scratch (scratch)
185   {}
186 
187   /* Returns true if COST is infinite.  */
188   bool infinite_cost_p ();
189 
190   /* Adds costs COST1 and COST2.  */
191   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
192 
193   /* Adds COST to the comp_cost.  */
194   comp_cost operator+= (comp_cost cost);
195 
196   /* Adds constant C to this comp_cost.  */
197   comp_cost operator+= (HOST_WIDE_INT c);
198 
199   /* Subtracts constant C to this comp_cost.  */
200   comp_cost operator-= (HOST_WIDE_INT c);
201 
202   /* Divide the comp_cost by constant C.  */
203   comp_cost operator/= (HOST_WIDE_INT c);
204 
205   /* Multiply the comp_cost by constant C.  */
206   comp_cost operator*= (HOST_WIDE_INT c);
207 
208   /* Subtracts costs COST1 and COST2.  */
209   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
210 
211   /* Subtracts COST from this comp_cost.  */
212   comp_cost operator-= (comp_cost cost);
213 
214   /* Returns true if COST1 is smaller than COST2.  */
215   friend bool operator< (comp_cost cost1, comp_cost cost2);
216 
217   /* Returns true if COST1 and COST2 are equal.  */
218   friend bool operator== (comp_cost cost1, comp_cost cost2);
219 
220   /* Returns true if COST1 is smaller or equal than COST2.  */
221   friend bool operator<= (comp_cost cost1, comp_cost cost2);
222 
223   int cost;		/* The runtime cost.  */
224   unsigned complexity;  /* The estimate of the complexity of the code for
225 			   the computation (in no concrete units --
226 			   complexity field should be larger for more
227 			   complex expressions and addressing modes).  */
228   int scratch;		/* Scratch used during cost computation.  */
229 };
230 
231 static const comp_cost no_cost;
232 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
233 
234 bool
235 comp_cost::infinite_cost_p ()
236 {
237   return cost == INFTY;
238 }
239 
240 comp_cost
241 operator+ (comp_cost cost1, comp_cost cost2)
242 {
243   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
244     return infinite_cost;
245 
246   cost1.cost += cost2.cost;
247   cost1.complexity += cost2.complexity;
248 
249   return cost1;
250 }
251 
252 comp_cost
253 operator- (comp_cost cost1, comp_cost cost2)
254 {
255   if (cost1.infinite_cost_p ())
256     return infinite_cost;
257 
258   gcc_assert (!cost2.infinite_cost_p ());
259 
260   cost1.cost -= cost2.cost;
261   cost1.complexity -= cost2.complexity;
262 
263   return cost1;
264 }
265 
266 comp_cost
267 comp_cost::operator+= (comp_cost cost)
268 {
269   *this = *this + cost;
270   return *this;
271 }
272 
273 comp_cost
274 comp_cost::operator+= (HOST_WIDE_INT c)
275 {
276   if (infinite_cost_p ())
277     return *this;
278 
279   this->cost += c;
280 
281   return *this;
282 }
283 
284 comp_cost
285 comp_cost::operator-= (HOST_WIDE_INT c)
286 {
287   if (infinite_cost_p ())
288     return *this;
289 
290   this->cost -= c;
291 
292   return *this;
293 }
294 
295 comp_cost
296 comp_cost::operator/= (HOST_WIDE_INT c)
297 {
298   if (infinite_cost_p ())
299     return *this;
300 
301   this->cost /= c;
302 
303   return *this;
304 }
305 
306 comp_cost
307 comp_cost::operator*= (HOST_WIDE_INT c)
308 {
309   if (infinite_cost_p ())
310     return *this;
311 
312   this->cost *= c;
313 
314   return *this;
315 }
316 
317 comp_cost
318 comp_cost::operator-= (comp_cost cost)
319 {
320   *this = *this - cost;
321   return *this;
322 }
323 
324 bool
325 operator< (comp_cost cost1, comp_cost cost2)
326 {
327   if (cost1.cost == cost2.cost)
328     return cost1.complexity < cost2.complexity;
329 
330   return cost1.cost < cost2.cost;
331 }
332 
333 bool
334 operator== (comp_cost cost1, comp_cost cost2)
335 {
336   return cost1.cost == cost2.cost
337     && cost1.complexity == cost2.complexity;
338 }
339 
340 bool
341 operator<= (comp_cost cost1, comp_cost cost2)
342 {
343   return cost1 < cost2 || cost1 == cost2;
344 }
345 
346 struct iv_inv_expr_ent;
347 
348 /* The candidate - cost pair.  */
349 struct cost_pair
350 {
351   struct iv_cand *cand;	/* The candidate.  */
352   comp_cost cost;	/* The cost.  */
353   enum tree_code comp;	/* For iv elimination, the comparison.  */
354   bitmap inv_vars;	/* The list of invariant ssa_vars that have to be
355 			   preserved when representing iv_use with iv_cand.  */
356   bitmap inv_exprs;	/* The list of newly created invariant expressions
357 			   when representing iv_use with iv_cand.  */
358   tree value;		/* For final value elimination, the expression for
359 			   the final value of the iv.  For iv elimination,
360 			   the new bound to compare with.  */
361 };
362 
363 /* Use.  */
364 struct iv_use
365 {
366   unsigned id;		/* The id of the use.  */
367   unsigned group_id;	/* The group id the use belongs to.  */
368   enum use_type type;	/* Type of the use.  */
369   tree mem_type;	/* The memory type to use when testing whether an
370 			   address is legitimate, and what the address's
371 			   cost is.  */
372   struct iv *iv;	/* The induction variable it is based on.  */
373   gimple *stmt;		/* Statement in that it occurs.  */
374   tree *op_p;		/* The place where it occurs.  */
375 
376   tree addr_base;	/* Base address with const offset stripped.  */
377   poly_uint64_pod addr_offset;
378 			/* Const offset stripped from base address.  */
379 };
380 
381 /* Group of uses.  */
382 struct iv_group
383 {
384   /* The id of the group.  */
385   unsigned id;
386   /* Uses of the group are of the same type.  */
387   enum use_type type;
388   /* The set of "related" IV candidates, plus the important ones.  */
389   bitmap related_cands;
390   /* Number of IV candidates in the cost_map.  */
391   unsigned n_map_members;
392   /* The costs wrto the iv candidates.  */
393   struct cost_pair *cost_map;
394   /* The selected candidate for the group.  */
395   struct iv_cand *selected;
396   /* Uses in the group.  */
397   vec<struct iv_use *> vuses;
398 };
399 
400 /* The position where the iv is computed.  */
401 enum iv_position
402 {
403   IP_NORMAL,		/* At the end, just before the exit condition.  */
404   IP_END,		/* At the end of the latch block.  */
405   IP_BEFORE_USE,	/* Immediately before a specific use.  */
406   IP_AFTER_USE,		/* Immediately after a specific use.  */
407   IP_ORIGINAL		/* The original biv.  */
408 };
409 
410 /* The induction variable candidate.  */
411 struct iv_cand
412 {
413   unsigned id;		/* The number of the candidate.  */
414   bool important;	/* Whether this is an "important" candidate, i.e. such
415 			   that it should be considered by all uses.  */
416   ENUM_BITFIELD(iv_position) pos : 8;	/* Where it is computed.  */
417   gimple *incremented_at;/* For original biv, the statement where it is
418 			   incremented.  */
419   tree var_before;	/* The variable used for it before increment.  */
420   tree var_after;	/* The variable used for it after increment.  */
421   struct iv *iv;	/* The value of the candidate.  NULL for
422 			   "pseudocandidate" used to indicate the possibility
423 			   to replace the final value of an iv by direct
424 			   computation of the value.  */
425   unsigned cost;	/* Cost of the candidate.  */
426   unsigned cost_step;	/* Cost of the candidate's increment operation.  */
427   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
428 			      where it is incremented.  */
429   bitmap inv_vars;	/* The list of invariant ssa_vars used in step of the
430 			   iv_cand.  */
431   bitmap inv_exprs;	/* If step is more complicated than a single ssa_var,
432 			   hanlde it as a new invariant expression which will
433 			   be hoisted out of loop.  */
434   struct iv *orig_iv;	/* The original iv if this cand is added from biv with
435 			   smaller type.  */
436 };
437 
438 /* Hashtable entry for common candidate derived from iv uses.  */
439 struct iv_common_cand
440 {
441   tree base;
442   tree step;
443   /* IV uses from which this common candidate is derived.  */
444   auto_vec<struct iv_use *> uses;
445   hashval_t hash;
446 };
447 
448 /* Hashtable helpers.  */
449 
450 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
451 {
452   static inline hashval_t hash (const iv_common_cand *);
453   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
454 };
455 
456 /* Hash function for possible common candidates.  */
457 
458 inline hashval_t
459 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
460 {
461   return ccand->hash;
462 }
463 
464 /* Hash table equality function for common candidates.  */
465 
466 inline bool
467 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
468 			      const iv_common_cand *ccand2)
469 {
470   return (ccand1->hash == ccand2->hash
471 	  && operand_equal_p (ccand1->base, ccand2->base, 0)
472 	  && operand_equal_p (ccand1->step, ccand2->step, 0)
473 	  && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
474 	      == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
475 }
476 
477 /* Loop invariant expression hashtable entry.  */
478 
479 struct iv_inv_expr_ent
480 {
481   /* Tree expression of the entry.  */
482   tree expr;
483   /* Unique indentifier.  */
484   int id;
485   /* Hash value.  */
486   hashval_t hash;
487 };
488 
489 /* Sort iv_inv_expr_ent pair A and B by id field.  */
490 
491 static int
492 sort_iv_inv_expr_ent (const void *a, const void *b)
493 {
494   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
495   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
496 
497   unsigned id1 = (*e1)->id;
498   unsigned id2 = (*e2)->id;
499 
500   if (id1 < id2)
501     return -1;
502   else if (id1 > id2)
503     return 1;
504   else
505     return 0;
506 }
507 
508 /* Hashtable helpers.  */
509 
510 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
511 {
512   static inline hashval_t hash (const iv_inv_expr_ent *);
513   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
514 };
515 
516 /* Return true if uses of type TYPE represent some form of address.  */
517 
518 inline bool
519 address_p (use_type type)
520 {
521   return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
522 }
523 
524 /* Hash function for loop invariant expressions.  */
525 
526 inline hashval_t
527 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
528 {
529   return expr->hash;
530 }
531 
532 /* Hash table equality function for expressions.  */
533 
534 inline bool
535 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
536 			   const iv_inv_expr_ent *expr2)
537 {
538   return expr1->hash == expr2->hash
539 	 && operand_equal_p (expr1->expr, expr2->expr, 0);
540 }
541 
542 struct ivopts_data
543 {
544   /* The currently optimized loop.  */
545   struct loop *current_loop;
546   source_location loop_loc;
547 
548   /* Numbers of iterations for all exits of the current loop.  */
549   hash_map<edge, tree_niter_desc *> *niters;
550 
551   /* Number of registers used in it.  */
552   unsigned regs_used;
553 
554   /* The size of version_info array allocated.  */
555   unsigned version_info_size;
556 
557   /* The array of information for the ssa names.  */
558   struct version_info *version_info;
559 
560   /* The hashtable of loop invariant expressions created
561      by ivopt.  */
562   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
563 
564   /* The bitmap of indices in version_info whose value was changed.  */
565   bitmap relevant;
566 
567   /* The uses of induction variables.  */
568   vec<iv_group *> vgroups;
569 
570   /* The candidates.  */
571   vec<iv_cand *> vcands;
572 
573   /* A bitmap of important candidates.  */
574   bitmap important_candidates;
575 
576   /* Cache used by tree_to_aff_combination_expand.  */
577   hash_map<tree, name_expansion *> *name_expansion_cache;
578 
579   /* The hashtable of common candidates derived from iv uses.  */
580   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
581 
582   /* The common candidates.  */
583   vec<iv_common_cand *> iv_common_cands;
584 
585   /* The maximum invariant variable id.  */
586   unsigned max_inv_var_id;
587 
588   /* The maximum invariant expression id.  */
589   unsigned max_inv_expr_id;
590 
591   /* Number of no_overflow BIVs which are not used in memory address.  */
592   unsigned bivs_not_used_in_addr;
593 
594   /* Obstack for iv structure.  */
595   struct obstack iv_obstack;
596 
597   /* Whether to consider just related and important candidates when replacing a
598      use.  */
599   bool consider_all_candidates;
600 
601   /* Are we optimizing for speed?  */
602   bool speed;
603 
604   /* Whether the loop body includes any function calls.  */
605   bool body_includes_call;
606 
607   /* Whether the loop body can only be exited via single exit.  */
608   bool loop_single_exit_p;
609 };
610 
611 /* An assignment of iv candidates to uses.  */
612 
613 struct iv_ca
614 {
615   /* The number of uses covered by the assignment.  */
616   unsigned upto;
617 
618   /* Number of uses that cannot be expressed by the candidates in the set.  */
619   unsigned bad_groups;
620 
621   /* Candidate assigned to a use, together with the related costs.  */
622   struct cost_pair **cand_for_group;
623 
624   /* Number of times each candidate is used.  */
625   unsigned *n_cand_uses;
626 
627   /* The candidates used.  */
628   bitmap cands;
629 
630   /* The number of candidates in the set.  */
631   unsigned n_cands;
632 
633   /* The number of invariants needed, including both invariant variants and
634      invariant expressions.  */
635   unsigned n_invs;
636 
637   /* Total cost of expressing uses.  */
638   comp_cost cand_use_cost;
639 
640   /* Total cost of candidates.  */
641   unsigned cand_cost;
642 
643   /* Number of times each invariant variable is used.  */
644   unsigned *n_inv_var_uses;
645 
646   /* Number of times each invariant expression is used.  */
647   unsigned *n_inv_expr_uses;
648 
649   /* Total cost of the assignment.  */
650   comp_cost cost;
651 };
652 
653 /* Difference of two iv candidate assignments.  */
654 
655 struct iv_ca_delta
656 {
657   /* Changed group.  */
658   struct iv_group *group;
659 
660   /* An old assignment (for rollback purposes).  */
661   struct cost_pair *old_cp;
662 
663   /* A new assignment.  */
664   struct cost_pair *new_cp;
665 
666   /* Next change in the list.  */
667   struct iv_ca_delta *next;
668 };
669 
670 /* Bound on number of candidates below that all candidates are considered.  */
671 
672 #define CONSIDER_ALL_CANDIDATES_BOUND \
673   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
674 
675 /* If there are more iv occurrences, we just give up (it is quite unlikely that
676    optimizing such a loop would help, and it would take ages).  */
677 
678 #define MAX_CONSIDERED_GROUPS \
679   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
680 
681 /* If there are at most this number of ivs in the set, try removing unnecessary
682    ivs from the set always.  */
683 
684 #define ALWAYS_PRUNE_CAND_SET_BOUND \
685   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
686 
687 /* The list of trees for that the decl_rtl field must be reset is stored
688    here.  */
689 
690 static vec<tree> decl_rtl_to_reset;
691 
692 static comp_cost force_expr_to_var_cost (tree, bool);
693 
694 /* The single loop exit if it dominates the latch, NULL otherwise.  */
695 
696 edge
697 single_dom_exit (struct loop *loop)
698 {
699   edge exit = single_exit (loop);
700 
701   if (!exit)
702     return NULL;
703 
704   if (!just_once_each_iteration_p (loop, exit->src))
705     return NULL;
706 
707   return exit;
708 }
709 
710 /* Dumps information about the induction variable IV to FILE.  Don't dump
711    variable's name if DUMP_NAME is FALSE.  The information is dumped with
712    preceding spaces indicated by INDENT_LEVEL.  */
713 
714 void
715 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
716 {
717   const char *p;
718   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
719 
720   if (indent_level > 4)
721     indent_level = 4;
722   p = spaces + 8 - (indent_level << 1);
723 
724   fprintf (file, "%sIV struct:\n", p);
725   if (iv->ssa_name && dump_name)
726     {
727       fprintf (file, "%s  SSA_NAME:\t", p);
728       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
729       fprintf (file, "\n");
730     }
731 
732   fprintf (file, "%s  Type:\t", p);
733   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
734   fprintf (file, "\n");
735 
736   fprintf (file, "%s  Base:\t", p);
737   print_generic_expr (file, iv->base, TDF_SLIM);
738   fprintf (file, "\n");
739 
740   fprintf (file, "%s  Step:\t", p);
741   print_generic_expr (file, iv->step, TDF_SLIM);
742   fprintf (file, "\n");
743 
744   if (iv->base_object)
745     {
746       fprintf (file, "%s  Object:\t", p);
747       print_generic_expr (file, iv->base_object, TDF_SLIM);
748       fprintf (file, "\n");
749     }
750 
751   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
752 
753   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
754 	   p, iv->no_overflow ? "No-overflow" : "Overflow");
755 }
756 
757 /* Dumps information about the USE to FILE.  */
758 
759 void
760 dump_use (FILE *file, struct iv_use *use)
761 {
762   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
763   fprintf (file, "    At stmt:\t");
764   print_gimple_stmt (file, use->stmt, 0);
765   fprintf (file, "    At pos:\t");
766   if (use->op_p)
767     print_generic_expr (file, *use->op_p, TDF_SLIM);
768   fprintf (file, "\n");
769   dump_iv (file, use->iv, false, 2);
770 }
771 
772 /* Dumps information about the uses to FILE.  */
773 
774 void
775 dump_groups (FILE *file, struct ivopts_data *data)
776 {
777   unsigned i, j;
778   struct iv_group *group;
779 
780   for (i = 0; i < data->vgroups.length (); i++)
781     {
782       group = data->vgroups[i];
783       fprintf (file, "Group %d:\n", group->id);
784       if (group->type == USE_NONLINEAR_EXPR)
785 	fprintf (file, "  Type:\tGENERIC\n");
786       else if (group->type == USE_REF_ADDRESS)
787 	fprintf (file, "  Type:\tREFERENCE ADDRESS\n");
788       else if (group->type == USE_PTR_ADDRESS)
789 	fprintf (file, "  Type:\tPOINTER ARGUMENT ADDRESS\n");
790       else
791 	{
792 	  gcc_assert (group->type == USE_COMPARE);
793 	  fprintf (file, "  Type:\tCOMPARE\n");
794 	}
795       for (j = 0; j < group->vuses.length (); j++)
796 	dump_use (file, group->vuses[j]);
797     }
798 }
799 
800 /* Dumps information about induction variable candidate CAND to FILE.  */
801 
802 void
803 dump_cand (FILE *file, struct iv_cand *cand)
804 {
805   struct iv *iv = cand->iv;
806 
807   fprintf (file, "Candidate %d:\n", cand->id);
808   if (cand->inv_vars)
809     {
810       fprintf (file, "  Depend on inv.vars: ");
811       dump_bitmap (file, cand->inv_vars);
812     }
813   if (cand->inv_exprs)
814     {
815       fprintf (file, "  Depend on inv.exprs: ");
816       dump_bitmap (file, cand->inv_exprs);
817     }
818 
819   if (cand->var_before)
820     {
821       fprintf (file, "  Var befor: ");
822       print_generic_expr (file, cand->var_before, TDF_SLIM);
823       fprintf (file, "\n");
824     }
825   if (cand->var_after)
826     {
827       fprintf (file, "  Var after: ");
828       print_generic_expr (file, cand->var_after, TDF_SLIM);
829       fprintf (file, "\n");
830     }
831 
832   switch (cand->pos)
833     {
834     case IP_NORMAL:
835       fprintf (file, "  Incr POS: before exit test\n");
836       break;
837 
838     case IP_BEFORE_USE:
839       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
840       break;
841 
842     case IP_AFTER_USE:
843       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
844       break;
845 
846     case IP_END:
847       fprintf (file, "  Incr POS: at end\n");
848       break;
849 
850     case IP_ORIGINAL:
851       fprintf (file, "  Incr POS: orig biv\n");
852       break;
853     }
854 
855   dump_iv (file, iv, false, 1);
856 }
857 
858 /* Returns the info for ssa version VER.  */
859 
860 static inline struct version_info *
861 ver_info (struct ivopts_data *data, unsigned ver)
862 {
863   return data->version_info + ver;
864 }
865 
866 /* Returns the info for ssa name NAME.  */
867 
868 static inline struct version_info *
869 name_info (struct ivopts_data *data, tree name)
870 {
871   return ver_info (data, SSA_NAME_VERSION (name));
872 }
873 
874 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
875    emitted in LOOP.  */
876 
877 static bool
878 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
879 {
880   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
881 
882   gcc_assert (bb);
883 
884   if (sbb == loop->latch)
885     return true;
886 
887   if (sbb != bb)
888     return false;
889 
890   return stmt == last_stmt (bb);
891 }
892 
893 /* Returns true if STMT if after the place where the original induction
894    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
895    if the positions are identical.  */
896 
897 static bool
898 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
899 {
900   basic_block cand_bb = gimple_bb (cand->incremented_at);
901   basic_block stmt_bb = gimple_bb (stmt);
902 
903   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
904     return false;
905 
906   if (stmt_bb != cand_bb)
907     return true;
908 
909   if (true_if_equal
910       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
911     return true;
912   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
913 }
914 
915 /* Returns true if STMT if after the place where the induction variable
916    CAND is incremented in LOOP.  */
917 
918 static bool
919 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
920 {
921   switch (cand->pos)
922     {
923     case IP_END:
924       return false;
925 
926     case IP_NORMAL:
927       return stmt_after_ip_normal_pos (loop, stmt);
928 
929     case IP_ORIGINAL:
930     case IP_AFTER_USE:
931       return stmt_after_inc_pos (cand, stmt, false);
932 
933     case IP_BEFORE_USE:
934       return stmt_after_inc_pos (cand, stmt, true);
935 
936     default:
937       gcc_unreachable ();
938     }
939 }
940 
941 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
942 
943 static bool
944 abnormal_ssa_name_p (tree exp)
945 {
946   if (!exp)
947     return false;
948 
949   if (TREE_CODE (exp) != SSA_NAME)
950     return false;
951 
952   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
953 }
954 
955 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
956    abnormal phi node.  Callback for for_each_index.  */
957 
958 static bool
959 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
960 				  void *data ATTRIBUTE_UNUSED)
961 {
962   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
963     {
964       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
965 	return false;
966       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
967 	return false;
968     }
969 
970   return !abnormal_ssa_name_p (*index);
971 }
972 
973 /* Returns true if EXPR contains a ssa name that occurs in an
974    abnormal phi node.  */
975 
976 bool
977 contains_abnormal_ssa_name_p (tree expr)
978 {
979   enum tree_code code;
980   enum tree_code_class codeclass;
981 
982   if (!expr)
983     return false;
984 
985   code = TREE_CODE (expr);
986   codeclass = TREE_CODE_CLASS (code);
987 
988   if (code == SSA_NAME)
989     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
990 
991   if (code == INTEGER_CST
992       || is_gimple_min_invariant (expr))
993     return false;
994 
995   if (code == ADDR_EXPR)
996     return !for_each_index (&TREE_OPERAND (expr, 0),
997 			    idx_contains_abnormal_ssa_name_p,
998 			    NULL);
999 
1000   if (code == COND_EXPR)
1001     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
1002       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
1003       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
1004 
1005   switch (codeclass)
1006     {
1007     case tcc_binary:
1008     case tcc_comparison:
1009       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
1010 	return true;
1011 
1012       /* Fallthru.  */
1013     case tcc_unary:
1014       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
1015 	return true;
1016 
1017       break;
1018 
1019     default:
1020       gcc_unreachable ();
1021     }
1022 
1023   return false;
1024 }
1025 
1026 /*  Returns the structure describing number of iterations determined from
1027     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1028 
1029 static struct tree_niter_desc *
1030 niter_for_exit (struct ivopts_data *data, edge exit)
1031 {
1032   struct tree_niter_desc *desc;
1033   tree_niter_desc **slot;
1034 
1035   if (!data->niters)
1036     {
1037       data->niters = new hash_map<edge, tree_niter_desc *>;
1038       slot = NULL;
1039     }
1040   else
1041     slot = data->niters->get (exit);
1042 
1043   if (!slot)
1044     {
1045       /* Try to determine number of iterations.  We cannot safely work with ssa
1046 	 names that appear in phi nodes on abnormal edges, so that we do not
1047 	 create overlapping life ranges for them (PR 27283).  */
1048       desc = XNEW (struct tree_niter_desc);
1049       if (!number_of_iterations_exit (data->current_loop,
1050 				      exit, desc, true)
1051      	  || contains_abnormal_ssa_name_p (desc->niter))
1052 	{
1053 	  XDELETE (desc);
1054 	  desc = NULL;
1055 	}
1056       data->niters->put (exit, desc);
1057     }
1058   else
1059     desc = *slot;
1060 
1061   return desc;
1062 }
1063 
1064 /* Returns the structure describing number of iterations determined from
1065    single dominating exit of DATA->current_loop, or NULL if something
1066    goes wrong.  */
1067 
1068 static struct tree_niter_desc *
1069 niter_for_single_dom_exit (struct ivopts_data *data)
1070 {
1071   edge exit = single_dom_exit (data->current_loop);
1072 
1073   if (!exit)
1074     return NULL;
1075 
1076   return niter_for_exit (data, exit);
1077 }
1078 
1079 /* Initializes data structures used by the iv optimization pass, stored
1080    in DATA.  */
1081 
1082 static void
1083 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1084 {
1085   data->version_info_size = 2 * num_ssa_names;
1086   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1087   data->relevant = BITMAP_ALLOC (NULL);
1088   data->important_candidates = BITMAP_ALLOC (NULL);
1089   data->max_inv_var_id = 0;
1090   data->max_inv_expr_id = 0;
1091   data->niters = NULL;
1092   data->vgroups.create (20);
1093   data->vcands.create (20);
1094   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1095   data->name_expansion_cache = NULL;
1096   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1097   data->iv_common_cands.create (20);
1098   decl_rtl_to_reset.create (20);
1099   gcc_obstack_init (&data->iv_obstack);
1100 }
1101 
1102 /* Returns a memory object to that EXPR points.  In case we are able to
1103    determine that it does not point to any such object, NULL is returned.  */
1104 
1105 static tree
1106 determine_base_object (tree expr)
1107 {
1108   enum tree_code code = TREE_CODE (expr);
1109   tree base, obj;
1110 
1111   /* If this is a pointer casted to any type, we need to determine
1112      the base object for the pointer; so handle conversions before
1113      throwing away non-pointer expressions.  */
1114   if (CONVERT_EXPR_P (expr))
1115     return determine_base_object (TREE_OPERAND (expr, 0));
1116 
1117   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1118     return NULL_TREE;
1119 
1120   switch (code)
1121     {
1122     case INTEGER_CST:
1123       return NULL_TREE;
1124 
1125     case ADDR_EXPR:
1126       obj = TREE_OPERAND (expr, 0);
1127       base = get_base_address (obj);
1128 
1129       if (!base)
1130 	return expr;
1131 
1132       if (TREE_CODE (base) == MEM_REF)
1133 	return determine_base_object (TREE_OPERAND (base, 0));
1134 
1135       return fold_convert (ptr_type_node,
1136 			   build_fold_addr_expr (base));
1137 
1138     case POINTER_PLUS_EXPR:
1139       return determine_base_object (TREE_OPERAND (expr, 0));
1140 
1141     case PLUS_EXPR:
1142     case MINUS_EXPR:
1143       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
1144       gcc_unreachable ();
1145 
1146     default:
1147       if (POLY_INT_CST_P (expr))
1148 	return NULL_TREE;
1149       return fold_convert (ptr_type_node, expr);
1150     }
1151 }
1152 
1153 /* Return true if address expression with non-DECL_P operand appears
1154    in EXPR.  */
1155 
1156 static bool
1157 contain_complex_addr_expr (tree expr)
1158 {
1159   bool res = false;
1160 
1161   STRIP_NOPS (expr);
1162   switch (TREE_CODE (expr))
1163     {
1164     case POINTER_PLUS_EXPR:
1165     case PLUS_EXPR:
1166     case MINUS_EXPR:
1167       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1168       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1169       break;
1170 
1171     case ADDR_EXPR:
1172       return (!DECL_P (TREE_OPERAND (expr, 0)));
1173 
1174     default:
1175       return false;
1176     }
1177 
1178   return res;
1179 }
1180 
1181 /* Allocates an induction variable with given initial value BASE and step STEP
1182    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1183 
1184 static struct iv *
1185 alloc_iv (struct ivopts_data *data, tree base, tree step,
1186 	  bool no_overflow = false)
1187 {
1188   tree expr = base;
1189   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1190 					      sizeof (struct iv));
1191   gcc_assert (step != NULL_TREE);
1192 
1193   /* Lower address expression in base except ones with DECL_P as operand.
1194      By doing this:
1195        1) More accurate cost can be computed for address expressions;
1196        2) Duplicate candidates won't be created for bases in different
1197 	  forms, like &a[0] and &a.  */
1198   STRIP_NOPS (expr);
1199   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1200       || contain_complex_addr_expr (expr))
1201     {
1202       aff_tree comb;
1203       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1204       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1205     }
1206 
1207   iv->base = base;
1208   iv->base_object = determine_base_object (base);
1209   iv->step = step;
1210   iv->biv_p = false;
1211   iv->nonlin_use = NULL;
1212   iv->ssa_name = NULL_TREE;
1213   if (!no_overflow
1214        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1215 			      base, step))
1216     no_overflow = true;
1217   iv->no_overflow = no_overflow;
1218   iv->have_address_use = false;
1219 
1220   return iv;
1221 }
1222 
1223 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1224    doesn't overflow.  */
1225 
1226 static void
1227 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1228 	bool no_overflow)
1229 {
1230   struct version_info *info = name_info (data, iv);
1231 
1232   gcc_assert (!info->iv);
1233 
1234   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1235   info->iv = alloc_iv (data, base, step, no_overflow);
1236   info->iv->ssa_name = iv;
1237 }
1238 
1239 /* Finds induction variable declaration for VAR.  */
1240 
1241 static struct iv *
1242 get_iv (struct ivopts_data *data, tree var)
1243 {
1244   basic_block bb;
1245   tree type = TREE_TYPE (var);
1246 
1247   if (!POINTER_TYPE_P (type)
1248       && !INTEGRAL_TYPE_P (type))
1249     return NULL;
1250 
1251   if (!name_info (data, var)->iv)
1252     {
1253       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1254 
1255       if (!bb
1256 	  || !flow_bb_inside_loop_p (data->current_loop, bb))
1257 	set_iv (data, var, var, build_int_cst (type, 0), true);
1258     }
1259 
1260   return name_info (data, var)->iv;
1261 }
1262 
1263 /* Return the first non-invariant ssa var found in EXPR.  */
1264 
1265 static tree
1266 extract_single_var_from_expr (tree expr)
1267 {
1268   int i, n;
1269   tree tmp;
1270   enum tree_code code;
1271 
1272   if (!expr || is_gimple_min_invariant (expr))
1273     return NULL;
1274 
1275   code = TREE_CODE (expr);
1276   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1277     {
1278       n = TREE_OPERAND_LENGTH (expr);
1279       for (i = 0; i < n; i++)
1280 	{
1281 	  tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1282 
1283 	  if (tmp)
1284 	    return tmp;
1285 	}
1286     }
1287   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1288 }
1289 
1290 /* Finds basic ivs.  */
1291 
1292 static bool
1293 find_bivs (struct ivopts_data *data)
1294 {
1295   gphi *phi;
1296   affine_iv iv;
1297   tree step, type, base, stop;
1298   bool found = false;
1299   struct loop *loop = data->current_loop;
1300   gphi_iterator psi;
1301 
1302   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1303     {
1304       phi = psi.phi ();
1305 
1306       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1307 	continue;
1308 
1309       if (virtual_operand_p (PHI_RESULT (phi)))
1310 	continue;
1311 
1312       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1313 	continue;
1314 
1315       if (integer_zerop (iv.step))
1316 	continue;
1317 
1318       step = iv.step;
1319       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1320       /* Stop expanding iv base at the first ssa var referred by iv step.
1321 	 Ideally we should stop at any ssa var, because that's expensive
1322 	 and unusual to happen, we just do it on the first one.
1323 
1324 	 See PR64705 for the rationale.  */
1325       stop = extract_single_var_from_expr (step);
1326       base = expand_simple_operations (base, stop);
1327       if (contains_abnormal_ssa_name_p (base)
1328 	  || contains_abnormal_ssa_name_p (step))
1329 	continue;
1330 
1331       type = TREE_TYPE (PHI_RESULT (phi));
1332       base = fold_convert (type, base);
1333       if (step)
1334 	{
1335 	  if (POINTER_TYPE_P (type))
1336 	    step = convert_to_ptrofftype (step);
1337 	  else
1338 	    step = fold_convert (type, step);
1339 	}
1340 
1341       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1342       found = true;
1343     }
1344 
1345   return found;
1346 }
1347 
1348 /* Marks basic ivs.  */
1349 
1350 static void
1351 mark_bivs (struct ivopts_data *data)
1352 {
1353   gphi *phi;
1354   gimple *def;
1355   tree var;
1356   struct iv *iv, *incr_iv;
1357   struct loop *loop = data->current_loop;
1358   basic_block incr_bb;
1359   gphi_iterator psi;
1360 
1361   data->bivs_not_used_in_addr = 0;
1362   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1363     {
1364       phi = psi.phi ();
1365 
1366       iv = get_iv (data, PHI_RESULT (phi));
1367       if (!iv)
1368 	continue;
1369 
1370       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1371       def = SSA_NAME_DEF_STMT (var);
1372       /* Don't mark iv peeled from other one as biv.  */
1373       if (def
1374 	  && gimple_code (def) == GIMPLE_PHI
1375 	  && gimple_bb (def) == loop->header)
1376 	continue;
1377 
1378       incr_iv = get_iv (data, var);
1379       if (!incr_iv)
1380 	continue;
1381 
1382       /* If the increment is in the subloop, ignore it.  */
1383       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1384       if (incr_bb->loop_father != data->current_loop
1385 	  || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1386 	continue;
1387 
1388       iv->biv_p = true;
1389       incr_iv->biv_p = true;
1390       if (iv->no_overflow)
1391 	data->bivs_not_used_in_addr++;
1392       if (incr_iv->no_overflow)
1393 	data->bivs_not_used_in_addr++;
1394     }
1395 }
1396 
1397 /* Checks whether STMT defines a linear induction variable and stores its
1398    parameters to IV.  */
1399 
1400 static bool
1401 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1402 {
1403   tree lhs, stop;
1404   struct loop *loop = data->current_loop;
1405 
1406   iv->base = NULL_TREE;
1407   iv->step = NULL_TREE;
1408 
1409   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1410     return false;
1411 
1412   lhs = gimple_assign_lhs (stmt);
1413   if (TREE_CODE (lhs) != SSA_NAME)
1414     return false;
1415 
1416   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1417     return false;
1418 
1419   /* Stop expanding iv base at the first ssa var referred by iv step.
1420      Ideally we should stop at any ssa var, because that's expensive
1421      and unusual to happen, we just do it on the first one.
1422 
1423      See PR64705 for the rationale.  */
1424   stop = extract_single_var_from_expr (iv->step);
1425   iv->base = expand_simple_operations (iv->base, stop);
1426   if (contains_abnormal_ssa_name_p (iv->base)
1427       || contains_abnormal_ssa_name_p (iv->step))
1428     return false;
1429 
1430   /* If STMT could throw, then do not consider STMT as defining a GIV.
1431      While this will suppress optimizations, we can not safely delete this
1432      GIV and associated statements, even if it appears it is not used.  */
1433   if (stmt_could_throw_p (stmt))
1434     return false;
1435 
1436   return true;
1437 }
1438 
1439 /* Finds general ivs in statement STMT.  */
1440 
1441 static void
1442 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1443 {
1444   affine_iv iv;
1445 
1446   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1447     return;
1448 
1449   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1450 }
1451 
1452 /* Finds general ivs in basic block BB.  */
1453 
1454 static void
1455 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1456 {
1457   gimple_stmt_iterator bsi;
1458 
1459   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1460     find_givs_in_stmt (data, gsi_stmt (bsi));
1461 }
1462 
1463 /* Finds general ivs.  */
1464 
1465 static void
1466 find_givs (struct ivopts_data *data)
1467 {
1468   struct loop *loop = data->current_loop;
1469   basic_block *body = get_loop_body_in_dom_order (loop);
1470   unsigned i;
1471 
1472   for (i = 0; i < loop->num_nodes; i++)
1473     find_givs_in_bb (data, body[i]);
1474   free (body);
1475 }
1476 
1477 /* For each ssa name defined in LOOP determines whether it is an induction
1478    variable and if so, its initial value and step.  */
1479 
1480 static bool
1481 find_induction_variables (struct ivopts_data *data)
1482 {
1483   unsigned i;
1484   bitmap_iterator bi;
1485 
1486   if (!find_bivs (data))
1487     return false;
1488 
1489   find_givs (data);
1490   mark_bivs (data);
1491 
1492   if (dump_file && (dump_flags & TDF_DETAILS))
1493     {
1494       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1495 
1496       if (niter)
1497 	{
1498 	  fprintf (dump_file, "  number of iterations ");
1499 	  print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1500 	  if (!integer_zerop (niter->may_be_zero))
1501 	    {
1502 	      fprintf (dump_file, "; zero if ");
1503 	      print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1504 	    }
1505 	  fprintf (dump_file, "\n");
1506 	};
1507 
1508       fprintf (dump_file, "\n<Induction Vars>:\n");
1509       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1510 	{
1511 	  struct version_info *info = ver_info (data, i);
1512 	  if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1513 	    dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1514 	}
1515     }
1516 
1517   return true;
1518 }
1519 
1520 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1521    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1522    is the const offset stripped from IV base and MEM_TYPE is the type
1523    of the memory being addressed.  For uses of other types, ADDR_BASE
1524    and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE.  */
1525 
1526 static struct iv_use *
1527 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1528 	    gimple *stmt, enum use_type type, tree mem_type,
1529 	    tree addr_base, poly_uint64 addr_offset)
1530 {
1531   struct iv_use *use = XCNEW (struct iv_use);
1532 
1533   use->id = group->vuses.length ();
1534   use->group_id = group->id;
1535   use->type = type;
1536   use->mem_type = mem_type;
1537   use->iv = iv;
1538   use->stmt = stmt;
1539   use->op_p = use_p;
1540   use->addr_base = addr_base;
1541   use->addr_offset = addr_offset;
1542 
1543   group->vuses.safe_push (use);
1544   return use;
1545 }
1546 
1547 /* Checks whether OP is a loop-level invariant and if so, records it.
1548    NONLINEAR_USE is true if the invariant is used in a way we do not
1549    handle specially.  */
1550 
1551 static void
1552 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1553 {
1554   basic_block bb;
1555   struct version_info *info;
1556 
1557   if (TREE_CODE (op) != SSA_NAME
1558       || virtual_operand_p (op))
1559     return;
1560 
1561   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1562   if (bb
1563       && flow_bb_inside_loop_p (data->current_loop, bb))
1564     return;
1565 
1566   info = name_info (data, op);
1567   info->name = op;
1568   info->has_nonlin_use |= nonlinear_use;
1569   if (!info->inv_id)
1570     info->inv_id = ++data->max_inv_var_id;
1571   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1572 }
1573 
1574 /* Record a group of TYPE.  */
1575 
1576 static struct iv_group *
1577 record_group (struct ivopts_data *data, enum use_type type)
1578 {
1579   struct iv_group *group = XCNEW (struct iv_group);
1580 
1581   group->id = data->vgroups.length ();
1582   group->type = type;
1583   group->related_cands = BITMAP_ALLOC (NULL);
1584   group->vuses.create (1);
1585 
1586   data->vgroups.safe_push (group);
1587   return group;
1588 }
1589 
1590 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1591    New group will be created if there is no existing group for the use.
1592    MEM_TYPE is the type of memory being addressed, or NULL if this
1593    isn't an address reference.  */
1594 
1595 static struct iv_use *
1596 record_group_use (struct ivopts_data *data, tree *use_p,
1597 		  struct iv *iv, gimple *stmt, enum use_type type,
1598 		  tree mem_type)
1599 {
1600   tree addr_base = NULL;
1601   struct iv_group *group = NULL;
1602   poly_uint64 addr_offset = 0;
1603 
1604   /* Record non address type use in a new group.  */
1605   if (address_p (type))
1606     {
1607       unsigned int i;
1608 
1609       addr_base = strip_offset (iv->base, &addr_offset);
1610       for (i = 0; i < data->vgroups.length (); i++)
1611 	{
1612 	  struct iv_use *use;
1613 
1614 	  group = data->vgroups[i];
1615 	  use = group->vuses[0];
1616 	  if (!address_p (use->type))
1617 	    continue;
1618 
1619 	  /* Check if it has the same stripped base and step.  */
1620 	  if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1621 	      && operand_equal_p (iv->step, use->iv->step, 0)
1622 	      && operand_equal_p (addr_base, use->addr_base, 0))
1623 	    break;
1624 	}
1625       if (i == data->vgroups.length ())
1626 	group = NULL;
1627     }
1628 
1629   if (!group)
1630     group = record_group (data, type);
1631 
1632   return record_use (group, use_p, iv, stmt, type, mem_type,
1633 		     addr_base, addr_offset);
1634 }
1635 
1636 /* Checks whether the use OP is interesting and if so, records it.  */
1637 
1638 static struct iv_use *
1639 find_interesting_uses_op (struct ivopts_data *data, tree op)
1640 {
1641   struct iv *iv;
1642   gimple *stmt;
1643   struct iv_use *use;
1644 
1645   if (TREE_CODE (op) != SSA_NAME)
1646     return NULL;
1647 
1648   iv = get_iv (data, op);
1649   if (!iv)
1650     return NULL;
1651 
1652   if (iv->nonlin_use)
1653     {
1654       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1655       return iv->nonlin_use;
1656     }
1657 
1658   if (integer_zerop (iv->step))
1659     {
1660       record_invariant (data, op, true);
1661       return NULL;
1662     }
1663 
1664   stmt = SSA_NAME_DEF_STMT (op);
1665   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1666 
1667   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1668   iv->nonlin_use = use;
1669   return use;
1670 }
1671 
1672 /* Indicate how compare type iv_use can be handled.  */
1673 enum comp_iv_rewrite
1674 {
1675   COMP_IV_NA,
1676   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1677   COMP_IV_EXPR,
1678   /* We may rewrite compare type iv_uses on both sides of comparison by
1679      expressing value of each iv_use.  */
1680   COMP_IV_EXPR_2,
1681   /* We may rewrite compare type iv_use by expressing value of the iv_use
1682      or by eliminating it with other iv_cand.  */
1683   COMP_IV_ELIM
1684 };
1685 
1686 /* Given a condition in statement STMT, checks whether it is a compare
1687    of an induction variable and an invariant.  If this is the case,
1688    CONTROL_VAR is set to location of the iv, BOUND to the location of
1689    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1690    induction variable descriptions, and true is returned.  If this is not
1691    the case, CONTROL_VAR and BOUND are set to the arguments of the
1692    condition and false is returned.  */
1693 
1694 static enum comp_iv_rewrite
1695 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1696 		       tree **control_var, tree **bound,
1697 		       struct iv **iv_var, struct iv **iv_bound)
1698 {
1699   /* The objects returned when COND has constant operands.  */
1700   static struct iv const_iv;
1701   static tree zero;
1702   tree *op0 = &zero, *op1 = &zero;
1703   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1704   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1705 
1706   if (gimple_code (stmt) == GIMPLE_COND)
1707     {
1708       gcond *cond_stmt = as_a <gcond *> (stmt);
1709       op0 = gimple_cond_lhs_ptr (cond_stmt);
1710       op1 = gimple_cond_rhs_ptr (cond_stmt);
1711     }
1712   else
1713     {
1714       op0 = gimple_assign_rhs1_ptr (stmt);
1715       op1 = gimple_assign_rhs2_ptr (stmt);
1716     }
1717 
1718   zero = integer_zero_node;
1719   const_iv.step = integer_zero_node;
1720 
1721   if (TREE_CODE (*op0) == SSA_NAME)
1722     iv0 = get_iv (data, *op0);
1723   if (TREE_CODE (*op1) == SSA_NAME)
1724     iv1 = get_iv (data, *op1);
1725 
1726   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1727   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1728     {
1729       rewrite_type = COMP_IV_EXPR_2;
1730       goto end;
1731     }
1732 
1733   /* If none side of comparison is IV.  */
1734   if ((!iv0 || integer_zerop (iv0->step))
1735       && (!iv1 || integer_zerop (iv1->step)))
1736     goto end;
1737 
1738   /* Control variable may be on the other side.  */
1739   if (!iv0 || integer_zerop (iv0->step))
1740     {
1741       std::swap (op0, op1);
1742       std::swap (iv0, iv1);
1743     }
1744   /* If one side is IV and the other side isn't loop invariant.  */
1745   if (!iv1)
1746     rewrite_type = COMP_IV_EXPR;
1747   /* If one side is IV and the other side is loop invariant.  */
1748   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1749     rewrite_type = COMP_IV_ELIM;
1750 
1751 end:
1752   if (control_var)
1753     *control_var = op0;
1754   if (iv_var)
1755     *iv_var = iv0;
1756   if (bound)
1757     *bound = op1;
1758   if (iv_bound)
1759     *iv_bound = iv1;
1760 
1761   return rewrite_type;
1762 }
1763 
1764 /* Checks whether the condition in STMT is interesting and if so,
1765    records it.  */
1766 
1767 static void
1768 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1769 {
1770   tree *var_p, *bound_p;
1771   struct iv *var_iv, *bound_iv;
1772   enum comp_iv_rewrite ret;
1773 
1774   ret = extract_cond_operands (data, stmt,
1775 			       &var_p, &bound_p, &var_iv, &bound_iv);
1776   if (ret == COMP_IV_NA)
1777     {
1778       find_interesting_uses_op (data, *var_p);
1779       find_interesting_uses_op (data, *bound_p);
1780       return;
1781     }
1782 
1783   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1784   /* Record compare type iv_use for iv on the other side of comparison.  */
1785   if (ret == COMP_IV_EXPR_2)
1786     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1787 }
1788 
1789 /* Returns the outermost loop EXPR is obviously invariant in
1790    relative to the loop LOOP, i.e. if all its operands are defined
1791    outside of the returned loop.  Returns NULL if EXPR is not
1792    even obviously invariant in LOOP.  */
1793 
1794 struct loop *
1795 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1796 {
1797   basic_block def_bb;
1798   unsigned i, len;
1799 
1800   if (is_gimple_min_invariant (expr))
1801     return current_loops->tree_root;
1802 
1803   if (TREE_CODE (expr) == SSA_NAME)
1804     {
1805       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1806       if (def_bb)
1807 	{
1808 	  if (flow_bb_inside_loop_p (loop, def_bb))
1809 	    return NULL;
1810 	  return superloop_at_depth (loop,
1811 				     loop_depth (def_bb->loop_father) + 1);
1812 	}
1813 
1814       return current_loops->tree_root;
1815     }
1816 
1817   if (!EXPR_P (expr))
1818     return NULL;
1819 
1820   unsigned maxdepth = 0;
1821   len = TREE_OPERAND_LENGTH (expr);
1822   for (i = 0; i < len; i++)
1823     {
1824       struct loop *ivloop;
1825       if (!TREE_OPERAND (expr, i))
1826 	continue;
1827 
1828       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1829       if (!ivloop)
1830 	return NULL;
1831       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1832     }
1833 
1834   return superloop_at_depth (loop, maxdepth);
1835 }
1836 
1837 /* Returns true if expression EXPR is obviously invariant in LOOP,
1838    i.e. if all its operands are defined outside of the LOOP.  LOOP
1839    should not be the function body.  */
1840 
1841 bool
1842 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1843 {
1844   basic_block def_bb;
1845   unsigned i, len;
1846 
1847   gcc_assert (loop_depth (loop) > 0);
1848 
1849   if (is_gimple_min_invariant (expr))
1850     return true;
1851 
1852   if (TREE_CODE (expr) == SSA_NAME)
1853     {
1854       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1855       if (def_bb
1856 	  && flow_bb_inside_loop_p (loop, def_bb))
1857 	return false;
1858 
1859       return true;
1860     }
1861 
1862   if (!EXPR_P (expr))
1863     return false;
1864 
1865   len = TREE_OPERAND_LENGTH (expr);
1866   for (i = 0; i < len; i++)
1867     if (TREE_OPERAND (expr, i)
1868 	&& !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1869       return false;
1870 
1871   return true;
1872 }
1873 
1874 /* Given expression EXPR which computes inductive values with respect
1875    to loop recorded in DATA, this function returns biv from which EXPR
1876    is derived by tracing definition chains of ssa variables in EXPR.  */
1877 
1878 static struct iv*
1879 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1880 {
1881   struct iv *iv;
1882   unsigned i, n;
1883   tree e2, e1;
1884   enum tree_code code;
1885   gimple *stmt;
1886 
1887   if (expr == NULL_TREE)
1888     return NULL;
1889 
1890   if (is_gimple_min_invariant (expr))
1891     return NULL;
1892 
1893   code = TREE_CODE (expr);
1894   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1895     {
1896       n = TREE_OPERAND_LENGTH (expr);
1897       for (i = 0; i < n; i++)
1898 	{
1899 	  iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1900 	  if (iv)
1901 	    return iv;
1902 	}
1903     }
1904 
1905   /* Stop if it's not ssa name.  */
1906   if (code != SSA_NAME)
1907     return NULL;
1908 
1909   iv = get_iv (data, expr);
1910   if (!iv || integer_zerop (iv->step))
1911     return NULL;
1912   else if (iv->biv_p)
1913     return iv;
1914 
1915   stmt = SSA_NAME_DEF_STMT (expr);
1916   if (gphi *phi = dyn_cast <gphi *> (stmt))
1917     {
1918       ssa_op_iter iter;
1919       use_operand_p use_p;
1920       basic_block phi_bb = gimple_bb (phi);
1921 
1922       /* Skip loop header PHI that doesn't define biv.  */
1923       if (phi_bb->loop_father == data->current_loop)
1924 	return NULL;
1925 
1926       if (virtual_operand_p (gimple_phi_result (phi)))
1927 	return NULL;
1928 
1929       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1930 	{
1931 	  tree use = USE_FROM_PTR (use_p);
1932 	  iv = find_deriving_biv_for_expr (data, use);
1933 	  if (iv)
1934 	    return iv;
1935 	}
1936       return NULL;
1937     }
1938   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1939     return NULL;
1940 
1941   e1 = gimple_assign_rhs1 (stmt);
1942   code = gimple_assign_rhs_code (stmt);
1943   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1944     return find_deriving_biv_for_expr (data, e1);
1945 
1946   switch (code)
1947     {
1948     case MULT_EXPR:
1949     case PLUS_EXPR:
1950     case MINUS_EXPR:
1951     case POINTER_PLUS_EXPR:
1952       /* Increments, decrements and multiplications by a constant
1953 	 are simple.  */
1954       e2 = gimple_assign_rhs2 (stmt);
1955       iv = find_deriving_biv_for_expr (data, e2);
1956       if (iv)
1957 	return iv;
1958       gcc_fallthrough ();
1959 
1960     CASE_CONVERT:
1961       /* Casts are simple.  */
1962       return find_deriving_biv_for_expr (data, e1);
1963 
1964     default:
1965       break;
1966     }
1967 
1968   return NULL;
1969 }
1970 
1971 /* Record BIV, its predecessor and successor that they are used in
1972    address type uses.  */
1973 
1974 static void
1975 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1976 {
1977   unsigned i;
1978   tree type, base_1, base_2;
1979   bitmap_iterator bi;
1980 
1981   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1982       || biv->have_address_use || !biv->no_overflow)
1983     return;
1984 
1985   type = TREE_TYPE (biv->base);
1986   if (!INTEGRAL_TYPE_P (type))
1987     return;
1988 
1989   biv->have_address_use = true;
1990   data->bivs_not_used_in_addr--;
1991   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1992   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1993     {
1994       struct iv *iv = ver_info (data, i)->iv;
1995 
1996       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1997 	  || iv->have_address_use || !iv->no_overflow)
1998 	continue;
1999 
2000       if (type != TREE_TYPE (iv->base)
2001 	  || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2002 	continue;
2003 
2004       if (!operand_equal_p (biv->step, iv->step, 0))
2005 	continue;
2006 
2007       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2008       if (operand_equal_p (base_1, iv->base, 0)
2009 	  || operand_equal_p (base_2, biv->base, 0))
2010 	{
2011 	  iv->have_address_use = true;
2012 	  data->bivs_not_used_in_addr--;
2013 	}
2014     }
2015 }
2016 
2017 /* Cumulates the steps of indices into DATA and replaces their values with the
2018    initial ones.  Returns false when the value of the index cannot be determined.
2019    Callback for for_each_index.  */
2020 
2021 struct ifs_ivopts_data
2022 {
2023   struct ivopts_data *ivopts_data;
2024   gimple *stmt;
2025   tree step;
2026 };
2027 
2028 static bool
2029 idx_find_step (tree base, tree *idx, void *data)
2030 {
2031   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2032   struct iv *iv;
2033   bool use_overflow_semantics = false;
2034   tree step, iv_base, iv_step, lbound, off;
2035   struct loop *loop = dta->ivopts_data->current_loop;
2036 
2037   /* If base is a component ref, require that the offset of the reference
2038      be invariant.  */
2039   if (TREE_CODE (base) == COMPONENT_REF)
2040     {
2041       off = component_ref_field_offset (base);
2042       return expr_invariant_in_loop_p (loop, off);
2043     }
2044 
2045   /* If base is array, first check whether we will be able to move the
2046      reference out of the loop (in order to take its address in strength
2047      reduction).  In order for this to work we need both lower bound
2048      and step to be loop invariants.  */
2049   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2050     {
2051       /* Moreover, for a range, the size needs to be invariant as well.  */
2052       if (TREE_CODE (base) == ARRAY_RANGE_REF
2053 	  && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2054 	return false;
2055 
2056       step = array_ref_element_size (base);
2057       lbound = array_ref_low_bound (base);
2058 
2059       if (!expr_invariant_in_loop_p (loop, step)
2060 	  || !expr_invariant_in_loop_p (loop, lbound))
2061 	return false;
2062     }
2063 
2064   if (TREE_CODE (*idx) != SSA_NAME)
2065     return true;
2066 
2067   iv = get_iv (dta->ivopts_data, *idx);
2068   if (!iv)
2069     return false;
2070 
2071   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2072 	  *&x[0], which is not folded and does not trigger the
2073 	  ARRAY_REF path below.  */
2074   *idx = iv->base;
2075 
2076   if (integer_zerop (iv->step))
2077     return true;
2078 
2079   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2080     {
2081       step = array_ref_element_size (base);
2082 
2083       /* We only handle addresses whose step is an integer constant.  */
2084       if (TREE_CODE (step) != INTEGER_CST)
2085 	return false;
2086     }
2087   else
2088     /* The step for pointer arithmetics already is 1 byte.  */
2089     step = size_one_node;
2090 
2091   iv_base = iv->base;
2092   iv_step = iv->step;
2093   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2094     use_overflow_semantics = true;
2095 
2096   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2097 			    sizetype, &iv_base, &iv_step, dta->stmt,
2098 			    use_overflow_semantics))
2099     {
2100       /* The index might wrap.  */
2101       return false;
2102     }
2103 
2104   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2105   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2106 
2107   if (dta->ivopts_data->bivs_not_used_in_addr)
2108     {
2109       if (!iv->biv_p)
2110 	iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2111 
2112       record_biv_for_address_use (dta->ivopts_data, iv);
2113     }
2114   return true;
2115 }
2116 
2117 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2118    object is passed to it in DATA.  */
2119 
2120 static bool
2121 idx_record_use (tree base, tree *idx,
2122 		void *vdata)
2123 {
2124   struct ivopts_data *data = (struct ivopts_data *) vdata;
2125   find_interesting_uses_op (data, *idx);
2126   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2127     {
2128       find_interesting_uses_op (data, array_ref_element_size (base));
2129       find_interesting_uses_op (data, array_ref_low_bound (base));
2130     }
2131   return true;
2132 }
2133 
2134 /* If we can prove that TOP = cst * BOT for some constant cst,
2135    store cst to MUL and return true.  Otherwise return false.
2136    The returned value is always sign-extended, regardless of the
2137    signedness of TOP and BOT.  */
2138 
2139 static bool
2140 constant_multiple_of (tree top, tree bot, widest_int *mul)
2141 {
2142   tree mby;
2143   enum tree_code code;
2144   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2145   widest_int res, p0, p1;
2146 
2147   STRIP_NOPS (top);
2148   STRIP_NOPS (bot);
2149 
2150   if (operand_equal_p (top, bot, 0))
2151     {
2152       *mul = 1;
2153       return true;
2154     }
2155 
2156   code = TREE_CODE (top);
2157   switch (code)
2158     {
2159     case MULT_EXPR:
2160       mby = TREE_OPERAND (top, 1);
2161       if (TREE_CODE (mby) != INTEGER_CST)
2162 	return false;
2163 
2164       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2165 	return false;
2166 
2167       *mul = wi::sext (res * wi::to_widest (mby), precision);
2168       return true;
2169 
2170     case PLUS_EXPR:
2171     case MINUS_EXPR:
2172       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2173 	  || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2174 	return false;
2175 
2176       if (code == MINUS_EXPR)
2177 	p1 = -p1;
2178       *mul = wi::sext (p0 + p1, precision);
2179       return true;
2180 
2181     case INTEGER_CST:
2182       if (TREE_CODE (bot) != INTEGER_CST)
2183 	return false;
2184 
2185       p0 = widest_int::from (wi::to_wide (top), SIGNED);
2186       p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2187       if (p1 == 0)
2188 	return false;
2189       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2190       return res == 0;
2191 
2192     default:
2193       if (POLY_INT_CST_P (top)
2194 	  && POLY_INT_CST_P (bot)
2195 	  && constant_multiple_p (wi::to_poly_widest (top),
2196 				  wi::to_poly_widest (bot), mul))
2197 	return true;
2198 
2199       return false;
2200     }
2201 }
2202 
2203 /* Return true if memory reference REF with step STEP may be unaligned.  */
2204 
2205 static bool
2206 may_be_unaligned_p (tree ref, tree step)
2207 {
2208   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2209      thus they are not misaligned.  */
2210   if (TREE_CODE (ref) == TARGET_MEM_REF)
2211     return false;
2212 
2213   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2214   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2215     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2216 
2217   unsigned HOST_WIDE_INT bitpos;
2218   unsigned int ref_align;
2219   get_object_alignment_1 (ref, &ref_align, &bitpos);
2220   if (ref_align < align
2221       || (bitpos % align) != 0
2222       || (bitpos % BITS_PER_UNIT) != 0)
2223     return true;
2224 
2225   unsigned int trailing_zeros = tree_ctz (step);
2226   if (trailing_zeros < HOST_BITS_PER_INT
2227       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2228     return true;
2229 
2230   return false;
2231 }
2232 
2233 /* Return true if EXPR may be non-addressable.   */
2234 
2235 bool
2236 may_be_nonaddressable_p (tree expr)
2237 {
2238   switch (TREE_CODE (expr))
2239     {
2240     case TARGET_MEM_REF:
2241       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2242 	 target, thus they are always addressable.  */
2243       return false;
2244 
2245     case MEM_REF:
2246       /* Likewise for MEM_REFs, modulo the storage order.  */
2247       return REF_REVERSE_STORAGE_ORDER (expr);
2248 
2249     case BIT_FIELD_REF:
2250       if (REF_REVERSE_STORAGE_ORDER (expr))
2251 	return true;
2252       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2253 
2254     case COMPONENT_REF:
2255       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2256 	return true;
2257       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2258 	     || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2259 
2260     case ARRAY_REF:
2261     case ARRAY_RANGE_REF:
2262       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2263 	return true;
2264       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2265 
2266     case VIEW_CONVERT_EXPR:
2267       /* This kind of view-conversions may wrap non-addressable objects
2268 	 and make them look addressable.  After some processing the
2269 	 non-addressability may be uncovered again, causing ADDR_EXPRs
2270 	 of inappropriate objects to be built.  */
2271       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2272 	  || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2273 	return true;
2274       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2275 
2276     CASE_CONVERT:
2277       return true;
2278 
2279     default:
2280       break;
2281     }
2282 
2283   return false;
2284 }
2285 
2286 /* Finds addresses in *OP_P inside STMT.  */
2287 
2288 static void
2289 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2290 			       tree *op_p)
2291 {
2292   tree base = *op_p, step = size_zero_node;
2293   struct iv *civ;
2294   struct ifs_ivopts_data ifs_ivopts_data;
2295 
2296   /* Do not play with volatile memory references.  A bit too conservative,
2297      perhaps, but safe.  */
2298   if (gimple_has_volatile_ops (stmt))
2299     goto fail;
2300 
2301   /* Ignore bitfields for now.  Not really something terribly complicated
2302      to handle.  TODO.  */
2303   if (TREE_CODE (base) == BIT_FIELD_REF)
2304     goto fail;
2305 
2306   base = unshare_expr (base);
2307 
2308   if (TREE_CODE (base) == TARGET_MEM_REF)
2309     {
2310       tree type = build_pointer_type (TREE_TYPE (base));
2311       tree astep;
2312 
2313       if (TMR_BASE (base)
2314 	  && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2315 	{
2316 	  civ = get_iv (data, TMR_BASE (base));
2317 	  if (!civ)
2318 	    goto fail;
2319 
2320 	  TMR_BASE (base) = civ->base;
2321 	  step = civ->step;
2322 	}
2323       if (TMR_INDEX2 (base)
2324 	  && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2325 	{
2326 	  civ = get_iv (data, TMR_INDEX2 (base));
2327 	  if (!civ)
2328 	    goto fail;
2329 
2330 	  TMR_INDEX2 (base) = civ->base;
2331 	  step = civ->step;
2332 	}
2333       if (TMR_INDEX (base)
2334 	  && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2335 	{
2336 	  civ = get_iv (data, TMR_INDEX (base));
2337 	  if (!civ)
2338 	    goto fail;
2339 
2340 	  TMR_INDEX (base) = civ->base;
2341 	  astep = civ->step;
2342 
2343 	  if (astep)
2344 	    {
2345 	      if (TMR_STEP (base))
2346 		astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2347 
2348 	      step = fold_build2 (PLUS_EXPR, type, step, astep);
2349 	    }
2350 	}
2351 
2352       if (integer_zerop (step))
2353 	goto fail;
2354       base = tree_mem_ref_addr (type, base);
2355     }
2356   else
2357     {
2358       ifs_ivopts_data.ivopts_data = data;
2359       ifs_ivopts_data.stmt = stmt;
2360       ifs_ivopts_data.step = size_zero_node;
2361       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2362 	  || integer_zerop (ifs_ivopts_data.step))
2363 	goto fail;
2364       step = ifs_ivopts_data.step;
2365 
2366       /* Check that the base expression is addressable.  This needs
2367 	 to be done after substituting bases of IVs into it.  */
2368       if (may_be_nonaddressable_p (base))
2369 	goto fail;
2370 
2371       /* Moreover, on strict alignment platforms, check that it is
2372 	 sufficiently aligned.  */
2373       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2374 	goto fail;
2375 
2376       base = build_fold_addr_expr (base);
2377 
2378       /* Substituting bases of IVs into the base expression might
2379 	 have caused folding opportunities.  */
2380       if (TREE_CODE (base) == ADDR_EXPR)
2381 	{
2382 	  tree *ref = &TREE_OPERAND (base, 0);
2383 	  while (handled_component_p (*ref))
2384 	    ref = &TREE_OPERAND (*ref, 0);
2385 	  if (TREE_CODE (*ref) == MEM_REF)
2386 	    {
2387 	      tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2388 				      TREE_OPERAND (*ref, 0),
2389 				      TREE_OPERAND (*ref, 1));
2390 	      if (tem)
2391 		*ref = tem;
2392 	    }
2393 	}
2394     }
2395 
2396   civ = alloc_iv (data, base, step);
2397   /* Fail if base object of this memory reference is unknown.  */
2398   if (civ->base_object == NULL_TREE)
2399     goto fail;
2400 
2401   record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2402   return;
2403 
2404 fail:
2405   for_each_index (op_p, idx_record_use, data);
2406 }
2407 
2408 /* Finds and records invariants used in STMT.  */
2409 
2410 static void
2411 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2412 {
2413   ssa_op_iter iter;
2414   use_operand_p use_p;
2415   tree op;
2416 
2417   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2418     {
2419       op = USE_FROM_PTR (use_p);
2420       record_invariant (data, op, false);
2421     }
2422 }
2423 
2424 /* CALL calls an internal function.  If operand *OP_P will become an
2425    address when the call is expanded, return the type of the memory
2426    being addressed, otherwise return null.  */
2427 
2428 static tree
2429 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2430 {
2431   switch (gimple_call_internal_fn (call))
2432     {
2433     case IFN_MASK_LOAD:
2434       if (op_p == gimple_call_arg_ptr (call, 0))
2435 	return TREE_TYPE (gimple_call_lhs (call));
2436       return NULL_TREE;
2437 
2438     case IFN_MASK_STORE:
2439       if (op_p == gimple_call_arg_ptr (call, 0))
2440 	return TREE_TYPE (gimple_call_arg (call, 3));
2441       return NULL_TREE;
2442 
2443     default:
2444       return NULL_TREE;
2445     }
2446 }
2447 
2448 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2449    Return true if the operand will become an address when STMT
2450    is expanded and record the associated address use if so.  */
2451 
2452 static bool
2453 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2454 		       struct iv *iv)
2455 {
2456   /* Fail if base object of this memory reference is unknown.  */
2457   if (iv->base_object == NULL_TREE)
2458     return false;
2459 
2460   tree mem_type = NULL_TREE;
2461   if (gcall *call = dyn_cast <gcall *> (stmt))
2462     if (gimple_call_internal_p (call))
2463       mem_type = get_mem_type_for_internal_fn (call, op_p);
2464   if (mem_type)
2465     {
2466       iv = alloc_iv (data, iv->base, iv->step);
2467       record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2468       return true;
2469     }
2470   return false;
2471 }
2472 
2473 /* Finds interesting uses of induction variables in the statement STMT.  */
2474 
2475 static void
2476 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2477 {
2478   struct iv *iv;
2479   tree op, *lhs, *rhs;
2480   ssa_op_iter iter;
2481   use_operand_p use_p;
2482   enum tree_code code;
2483 
2484   find_invariants_stmt (data, stmt);
2485 
2486   if (gimple_code (stmt) == GIMPLE_COND)
2487     {
2488       find_interesting_uses_cond (data, stmt);
2489       return;
2490     }
2491 
2492   if (is_gimple_assign (stmt))
2493     {
2494       lhs = gimple_assign_lhs_ptr (stmt);
2495       rhs = gimple_assign_rhs1_ptr (stmt);
2496 
2497       if (TREE_CODE (*lhs) == SSA_NAME)
2498 	{
2499 	  /* If the statement defines an induction variable, the uses are not
2500 	     interesting by themselves.  */
2501 
2502 	  iv = get_iv (data, *lhs);
2503 
2504 	  if (iv && !integer_zerop (iv->step))
2505 	    return;
2506 	}
2507 
2508       code = gimple_assign_rhs_code (stmt);
2509       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2510 	  && (REFERENCE_CLASS_P (*rhs)
2511 	      || is_gimple_val (*rhs)))
2512 	{
2513 	  if (REFERENCE_CLASS_P (*rhs))
2514 	    find_interesting_uses_address (data, stmt, rhs);
2515 	  else
2516 	    find_interesting_uses_op (data, *rhs);
2517 
2518 	  if (REFERENCE_CLASS_P (*lhs))
2519 	    find_interesting_uses_address (data, stmt, lhs);
2520 	  return;
2521 	}
2522       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2523 	{
2524 	  find_interesting_uses_cond (data, stmt);
2525 	  return;
2526 	}
2527 
2528       /* TODO -- we should also handle address uses of type
2529 
2530 	 memory = call (whatever);
2531 
2532 	 and
2533 
2534 	 call (memory).  */
2535     }
2536 
2537   if (gimple_code (stmt) == GIMPLE_PHI
2538       && gimple_bb (stmt) == data->current_loop->header)
2539     {
2540       iv = get_iv (data, PHI_RESULT (stmt));
2541 
2542       if (iv && !integer_zerop (iv->step))
2543 	return;
2544     }
2545 
2546   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2547     {
2548       op = USE_FROM_PTR (use_p);
2549 
2550       if (TREE_CODE (op) != SSA_NAME)
2551 	continue;
2552 
2553       iv = get_iv (data, op);
2554       if (!iv)
2555 	continue;
2556 
2557       if (!find_address_like_use (data, stmt, use_p->use, iv))
2558 	find_interesting_uses_op (data, op);
2559     }
2560 }
2561 
2562 /* Finds interesting uses of induction variables outside of loops
2563    on loop exit edge EXIT.  */
2564 
2565 static void
2566 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2567 {
2568   gphi *phi;
2569   gphi_iterator psi;
2570   tree def;
2571 
2572   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2573     {
2574       phi = psi.phi ();
2575       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2576       if (!virtual_operand_p (def))
2577 	find_interesting_uses_op (data, def);
2578     }
2579 }
2580 
2581 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2582    mode for memory reference represented by USE.  */
2583 
2584 static GTY (()) vec<rtx, va_gc> *addr_list;
2585 
2586 static bool
2587 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2588 {
2589   rtx reg, addr;
2590   unsigned list_index;
2591   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2592   machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2593 
2594   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2595   if (list_index >= vec_safe_length (addr_list))
2596     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2597 
2598   addr = (*addr_list)[list_index];
2599   if (!addr)
2600     {
2601       addr_mode = targetm.addr_space.address_mode (as);
2602       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2603       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2604       (*addr_list)[list_index] = addr;
2605     }
2606   else
2607     addr_mode = GET_MODE (addr);
2608 
2609   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2610   return (memory_address_addr_space_p (mem_mode, addr, as));
2611 }
2612 
2613 /* Comparison function to sort group in ascending order of addr_offset.  */
2614 
2615 static int
2616 group_compare_offset (const void *a, const void *b)
2617 {
2618   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2619   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2620 
2621   return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2622 }
2623 
2624 /* Check if small groups should be split.  Return true if no group
2625    contains more than two uses with distinct addr_offsets.  Return
2626    false otherwise.  We want to split such groups because:
2627 
2628      1) Small groups don't have much benefit and may interfer with
2629 	general candidate selection.
2630      2) Size for problem with only small groups is usually small and
2631 	general algorithm can handle it well.
2632 
2633    TODO -- Above claim may not hold when we want to merge memory
2634    accesses with conseuctive addresses.  */
2635 
2636 static bool
2637 split_small_address_groups_p (struct ivopts_data *data)
2638 {
2639   unsigned int i, j, distinct = 1;
2640   struct iv_use *pre;
2641   struct iv_group *group;
2642 
2643   for (i = 0; i < data->vgroups.length (); i++)
2644     {
2645       group = data->vgroups[i];
2646       if (group->vuses.length () == 1)
2647 	continue;
2648 
2649       gcc_assert (address_p (group->type));
2650       if (group->vuses.length () == 2)
2651 	{
2652 	  if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2653 				      group->vuses[1]->addr_offset) > 0)
2654 	    std::swap (group->vuses[0], group->vuses[1]);
2655 	}
2656       else
2657 	group->vuses.qsort (group_compare_offset);
2658 
2659       if (distinct > 2)
2660 	continue;
2661 
2662       distinct = 1;
2663       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2664 	{
2665 	  if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2666 	    {
2667 	      pre = group->vuses[j];
2668 	      distinct++;
2669 	    }
2670 
2671 	  if (distinct > 2)
2672 	    break;
2673 	}
2674     }
2675 
2676   return (distinct <= 2);
2677 }
2678 
2679 /* For each group of address type uses, this function further groups
2680    these uses according to the maximum offset supported by target's
2681    [base + offset] addressing mode.  */
2682 
2683 static void
2684 split_address_groups (struct ivopts_data *data)
2685 {
2686   unsigned int i, j;
2687   /* Always split group.  */
2688   bool split_p = split_small_address_groups_p (data);
2689 
2690   for (i = 0; i < data->vgroups.length (); i++)
2691     {
2692       struct iv_group *new_group = NULL;
2693       struct iv_group *group = data->vgroups[i];
2694       struct iv_use *use = group->vuses[0];
2695 
2696       use->id = 0;
2697       use->group_id = group->id;
2698       if (group->vuses.length () == 1)
2699 	continue;
2700 
2701       gcc_assert (address_p (use->type));
2702 
2703       for (j = 1; j < group->vuses.length ();)
2704 	{
2705 	  struct iv_use *next = group->vuses[j];
2706 	  poly_int64 offset = next->addr_offset - use->addr_offset;
2707 
2708 	  /* Split group if aksed to, or the offset against the first
2709 	     use can't fit in offset part of addressing mode.  IV uses
2710 	     having the same offset are still kept in one group.  */
2711 	  if (maybe_ne (offset, 0)
2712 	      && (split_p || !addr_offset_valid_p (use, offset)))
2713 	    {
2714 	      if (!new_group)
2715 		new_group = record_group (data, group->type);
2716 	      group->vuses.ordered_remove (j);
2717 	      new_group->vuses.safe_push (next);
2718 	      continue;
2719 	    }
2720 
2721 	  next->id = j;
2722 	  next->group_id = group->id;
2723 	  j++;
2724 	}
2725     }
2726 }
2727 
2728 /* Finds uses of the induction variables that are interesting.  */
2729 
2730 static void
2731 find_interesting_uses (struct ivopts_data *data)
2732 {
2733   basic_block bb;
2734   gimple_stmt_iterator bsi;
2735   basic_block *body = get_loop_body (data->current_loop);
2736   unsigned i;
2737   edge e;
2738 
2739   for (i = 0; i < data->current_loop->num_nodes; i++)
2740     {
2741       edge_iterator ei;
2742       bb = body[i];
2743 
2744       FOR_EACH_EDGE (e, ei, bb->succs)
2745 	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2746 	    && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2747 	  find_interesting_uses_outside (data, e);
2748 
2749       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2750 	find_interesting_uses_stmt (data, gsi_stmt (bsi));
2751       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2752 	if (!is_gimple_debug (gsi_stmt (bsi)))
2753 	  find_interesting_uses_stmt (data, gsi_stmt (bsi));
2754     }
2755   free (body);
2756 
2757   split_address_groups (data);
2758 
2759   if (dump_file && (dump_flags & TDF_DETAILS))
2760     {
2761       fprintf (dump_file, "\n<IV Groups>:\n");
2762       dump_groups (dump_file, data);
2763       fprintf (dump_file, "\n");
2764     }
2765 }
2766 
2767 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2768    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2769    we are at the top-level of the processed address.  */
2770 
2771 static tree
2772 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2773 		poly_int64 *offset)
2774 {
2775   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2776   enum tree_code code;
2777   tree type, orig_type = TREE_TYPE (expr);
2778   poly_int64 off0, off1;
2779   HOST_WIDE_INT st;
2780   tree orig_expr = expr;
2781 
2782   STRIP_NOPS (expr);
2783 
2784   type = TREE_TYPE (expr);
2785   code = TREE_CODE (expr);
2786   *offset = 0;
2787 
2788   switch (code)
2789     {
2790     case POINTER_PLUS_EXPR:
2791     case PLUS_EXPR:
2792     case MINUS_EXPR:
2793       op0 = TREE_OPERAND (expr, 0);
2794       op1 = TREE_OPERAND (expr, 1);
2795 
2796       op0 = strip_offset_1 (op0, false, false, &off0);
2797       op1 = strip_offset_1 (op1, false, false, &off1);
2798 
2799       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2800       if (op0 == TREE_OPERAND (expr, 0)
2801 	  && op1 == TREE_OPERAND (expr, 1))
2802 	return orig_expr;
2803 
2804       if (integer_zerop (op1))
2805 	expr = op0;
2806       else if (integer_zerop (op0))
2807 	{
2808 	  if (code == MINUS_EXPR)
2809 	    expr = fold_build1 (NEGATE_EXPR, type, op1);
2810 	  else
2811 	    expr = op1;
2812 	}
2813       else
2814 	expr = fold_build2 (code, type, op0, op1);
2815 
2816       return fold_convert (orig_type, expr);
2817 
2818     case MULT_EXPR:
2819       op1 = TREE_OPERAND (expr, 1);
2820       if (!cst_and_fits_in_hwi (op1))
2821 	return orig_expr;
2822 
2823       op0 = TREE_OPERAND (expr, 0);
2824       op0 = strip_offset_1 (op0, false, false, &off0);
2825       if (op0 == TREE_OPERAND (expr, 0))
2826 	return orig_expr;
2827 
2828       *offset = off0 * int_cst_value (op1);
2829       if (integer_zerop (op0))
2830 	expr = op0;
2831       else
2832 	expr = fold_build2 (MULT_EXPR, type, op0, op1);
2833 
2834       return fold_convert (orig_type, expr);
2835 
2836     case ARRAY_REF:
2837     case ARRAY_RANGE_REF:
2838       if (!inside_addr)
2839 	return orig_expr;
2840 
2841       step = array_ref_element_size (expr);
2842       if (!cst_and_fits_in_hwi (step))
2843 	break;
2844 
2845       st = int_cst_value (step);
2846       op1 = TREE_OPERAND (expr, 1);
2847       op1 = strip_offset_1 (op1, false, false, &off1);
2848       *offset = off1 * st;
2849 
2850       if (top_compref
2851 	  && integer_zerop (op1))
2852 	{
2853 	  /* Strip the component reference completely.  */
2854 	  op0 = TREE_OPERAND (expr, 0);
2855 	  op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2856 	  *offset += off0;
2857 	  return op0;
2858 	}
2859       break;
2860 
2861     case COMPONENT_REF:
2862       {
2863 	tree field;
2864 
2865 	if (!inside_addr)
2866 	  return orig_expr;
2867 
2868 	tmp = component_ref_field_offset (expr);
2869 	field = TREE_OPERAND (expr, 1);
2870 	if (top_compref
2871 	    && cst_and_fits_in_hwi (tmp)
2872 	    && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2873 	  {
2874 	    HOST_WIDE_INT boffset, abs_off;
2875 
2876 	    /* Strip the component reference completely.  */
2877 	    op0 = TREE_OPERAND (expr, 0);
2878 	    op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2879 	    boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2880 	    abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2881 	    if (boffset < 0)
2882 	      abs_off = -abs_off;
2883 
2884 	    *offset = off0 + int_cst_value (tmp) + abs_off;
2885 	    return op0;
2886 	  }
2887       }
2888       break;
2889 
2890     case ADDR_EXPR:
2891       op0 = TREE_OPERAND (expr, 0);
2892       op0 = strip_offset_1 (op0, true, true, &off0);
2893       *offset += off0;
2894 
2895       if (op0 == TREE_OPERAND (expr, 0))
2896 	return orig_expr;
2897 
2898       expr = build_fold_addr_expr (op0);
2899       return fold_convert (orig_type, expr);
2900 
2901     case MEM_REF:
2902       /* ???  Offset operand?  */
2903       inside_addr = false;
2904       break;
2905 
2906     default:
2907       if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2908 	return build_int_cst (orig_type, 0);
2909       return orig_expr;
2910     }
2911 
2912   /* Default handling of expressions for that we want to recurse into
2913      the first operand.  */
2914   op0 = TREE_OPERAND (expr, 0);
2915   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2916   *offset += off0;
2917 
2918   if (op0 == TREE_OPERAND (expr, 0)
2919       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2920     return orig_expr;
2921 
2922   expr = copy_node (expr);
2923   TREE_OPERAND (expr, 0) = op0;
2924   if (op1)
2925     TREE_OPERAND (expr, 1) = op1;
2926 
2927   /* Inside address, we might strip the top level component references,
2928      thus changing type of the expression.  Handling of ADDR_EXPR
2929      will fix that.  */
2930   expr = fold_convert (orig_type, expr);
2931 
2932   return expr;
2933 }
2934 
2935 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2936 
2937 tree
2938 strip_offset (tree expr, poly_uint64_pod *offset)
2939 {
2940   poly_int64 off;
2941   tree core = strip_offset_1 (expr, false, false, &off);
2942   *offset = off;
2943   return core;
2944 }
2945 
2946 /* Returns variant of TYPE that can be used as base for different uses.
2947    We return unsigned type with the same precision, which avoids problems
2948    with overflows.  */
2949 
2950 static tree
2951 generic_type_for (tree type)
2952 {
2953   if (POINTER_TYPE_P (type))
2954     return unsigned_type_for (type);
2955 
2956   if (TYPE_UNSIGNED (type))
2957     return type;
2958 
2959   return unsigned_type_for (type);
2960 }
2961 
2962 /* Private data for walk_tree.  */
2963 
2964 struct walk_tree_data
2965 {
2966   bitmap *inv_vars;
2967   struct ivopts_data *idata;
2968 };
2969 
2970 /* Callback function for walk_tree, it records invariants and symbol
2971    reference in *EXPR_P.  DATA is the structure storing result info.  */
2972 
2973 static tree
2974 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2975 {
2976   tree op = *expr_p;
2977   struct version_info *info;
2978   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2979 
2980   if (TREE_CODE (op) != SSA_NAME)
2981     return NULL_TREE;
2982 
2983   info = name_info (wdata->idata, op);
2984   /* Because we expand simple operations when finding IVs, loop invariant
2985      variable that isn't referred by the original loop could be used now.
2986      Record such invariant variables here.  */
2987   if (!info->iv)
2988     {
2989       struct ivopts_data *idata = wdata->idata;
2990       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2991 
2992       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
2993 	{
2994 	  set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
2995 	  record_invariant (idata, op, false);
2996 	}
2997     }
2998   if (!info->inv_id || info->has_nonlin_use)
2999     return NULL_TREE;
3000 
3001   if (!*wdata->inv_vars)
3002     *wdata->inv_vars = BITMAP_ALLOC (NULL);
3003   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3004 
3005   return NULL_TREE;
3006 }
3007 
3008 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
3009    store it.  */
3010 
3011 static inline void
3012 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3013 {
3014   struct walk_tree_data wdata;
3015 
3016   if (!inv_vars)
3017     return;
3018 
3019   wdata.idata = data;
3020   wdata.inv_vars = inv_vars;
3021   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3022 }
3023 
3024 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
3025    will be recorded if it doesn't exist yet.  Given below two exprs:
3026      inv_expr + cst1, inv_expr + cst2
3027    It's hard to make decision whether constant part should be stripped
3028    or not.  We choose to not strip based on below facts:
3029      1) We need to count ADD cost for constant part if it's stripped,
3030 	which is't always trivial where this functions is called.
3031      2) Stripping constant away may be conflict with following loop
3032 	invariant hoisting pass.
3033      3) Not stripping constant away results in more invariant exprs,
3034 	which usually leads to decision preferring lower reg pressure.  */
3035 
3036 static iv_inv_expr_ent *
3037 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3038 {
3039   STRIP_NOPS (inv_expr);
3040 
3041   if (poly_int_tree_p (inv_expr)
3042       || TREE_CODE (inv_expr) == SSA_NAME)
3043     return NULL;
3044 
3045   /* Don't strip constant part away as we used to.  */
3046 
3047   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
3048   struct iv_inv_expr_ent ent;
3049   ent.expr = inv_expr;
3050   ent.hash = iterative_hash_expr (inv_expr, 0);
3051   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3052 
3053   if (!*slot)
3054     {
3055       *slot = XNEW (struct iv_inv_expr_ent);
3056       (*slot)->expr = inv_expr;
3057       (*slot)->hash = ent.hash;
3058       (*slot)->id = ++data->max_inv_expr_id;
3059     }
3060 
3061   return *slot;
3062 }
3063 
3064 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3065    position to POS.  If USE is not NULL, the candidate is set as related to
3066    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
3067    replacement of the final value of the iv by a direct computation.  */
3068 
3069 static struct iv_cand *
3070 add_candidate_1 (struct ivopts_data *data,
3071 		 tree base, tree step, bool important, enum iv_position pos,
3072 		 struct iv_use *use, gimple *incremented_at,
3073 		 struct iv *orig_iv = NULL)
3074 {
3075   unsigned i;
3076   struct iv_cand *cand = NULL;
3077   tree type, orig_type;
3078 
3079   gcc_assert (base && step);
3080 
3081   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3082      live, but the ivopts code may replace a real pointer with one
3083      pointing before or after the memory block that is then adjusted
3084      into the memory block during the loop.  FIXME: It would likely be
3085      better to actually force the pointer live and still use ivopts;
3086      for example, it would be enough to write the pointer into memory
3087      and keep it there until after the loop.  */
3088   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3089     return NULL;
3090 
3091   /* For non-original variables, make sure their values are computed in a type
3092      that does not invoke undefined behavior on overflows (since in general,
3093      we cannot prove that these induction variables are non-wrapping).  */
3094   if (pos != IP_ORIGINAL)
3095     {
3096       orig_type = TREE_TYPE (base);
3097       type = generic_type_for (orig_type);
3098       if (type != orig_type)
3099 	{
3100 	  base = fold_convert (type, base);
3101 	  step = fold_convert (type, step);
3102 	}
3103     }
3104 
3105   for (i = 0; i < data->vcands.length (); i++)
3106     {
3107       cand = data->vcands[i];
3108 
3109       if (cand->pos != pos)
3110 	continue;
3111 
3112       if (cand->incremented_at != incremented_at
3113 	  || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3114 	      && cand->ainc_use != use))
3115 	continue;
3116 
3117       if (operand_equal_p (base, cand->iv->base, 0)
3118 	  && operand_equal_p (step, cand->iv->step, 0)
3119 	  && (TYPE_PRECISION (TREE_TYPE (base))
3120 	      == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3121 	break;
3122     }
3123 
3124   if (i == data->vcands.length ())
3125     {
3126       cand = XCNEW (struct iv_cand);
3127       cand->id = i;
3128       cand->iv = alloc_iv (data, base, step);
3129       cand->pos = pos;
3130       if (pos != IP_ORIGINAL)
3131 	{
3132 	  cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3133 	  cand->var_after = cand->var_before;
3134 	}
3135       cand->important = important;
3136       cand->incremented_at = incremented_at;
3137       data->vcands.safe_push (cand);
3138 
3139       if (!poly_int_tree_p (step))
3140 	{
3141 	  find_inv_vars (data, &step, &cand->inv_vars);
3142 
3143 	  iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3144 	  /* Share bitmap between inv_vars and inv_exprs for cand.  */
3145 	  if (inv_expr != NULL)
3146 	    {
3147 	      cand->inv_exprs = cand->inv_vars;
3148 	      cand->inv_vars = NULL;
3149 	      if (cand->inv_exprs)
3150 		bitmap_clear (cand->inv_exprs);
3151 	      else
3152 		cand->inv_exprs = BITMAP_ALLOC (NULL);
3153 
3154 	      bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3155 	    }
3156 	}
3157 
3158       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3159 	cand->ainc_use = use;
3160       else
3161 	cand->ainc_use = NULL;
3162 
3163       cand->orig_iv = orig_iv;
3164       if (dump_file && (dump_flags & TDF_DETAILS))
3165 	dump_cand (dump_file, cand);
3166     }
3167 
3168   cand->important |= important;
3169 
3170   /* Relate candidate to the group for which it is added.  */
3171   if (use)
3172     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3173 
3174   return cand;
3175 }
3176 
3177 /* Returns true if incrementing the induction variable at the end of the LOOP
3178    is allowed.
3179 
3180    The purpose is to avoid splitting latch edge with a biv increment, thus
3181    creating a jump, possibly confusing other optimization passes and leaving
3182    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3183    available (so we do not have a better alternative), or if the latch edge
3184    is already nonempty.  */
3185 
3186 static bool
3187 allow_ip_end_pos_p (struct loop *loop)
3188 {
3189   if (!ip_normal_pos (loop))
3190     return true;
3191 
3192   if (!empty_block_p (ip_end_pos (loop)))
3193     return true;
3194 
3195   return false;
3196 }
3197 
3198 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3199    Important field is set to IMPORTANT.  */
3200 
3201 static void
3202 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3203 			bool important, struct iv_use *use)
3204 {
3205   basic_block use_bb = gimple_bb (use->stmt);
3206   machine_mode mem_mode;
3207   unsigned HOST_WIDE_INT cstepi;
3208 
3209   /* If we insert the increment in any position other than the standard
3210      ones, we must ensure that it is incremented once per iteration.
3211      It must not be in an inner nested loop, or one side of an if
3212      statement.  */
3213   if (use_bb->loop_father != data->current_loop
3214       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3215       || stmt_can_throw_internal (use->stmt)
3216       || !cst_and_fits_in_hwi (step))
3217     return;
3218 
3219   cstepi = int_cst_value (step);
3220 
3221   mem_mode = TYPE_MODE (use->mem_type);
3222   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3223 	|| USE_STORE_PRE_INCREMENT (mem_mode))
3224        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3225       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3226 	   || USE_STORE_PRE_DECREMENT (mem_mode))
3227 	  && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3228     {
3229       enum tree_code code = MINUS_EXPR;
3230       tree new_base;
3231       tree new_step = step;
3232 
3233       if (POINTER_TYPE_P (TREE_TYPE (base)))
3234 	{
3235 	  new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3236 	  code = POINTER_PLUS_EXPR;
3237 	}
3238       else
3239 	new_step = fold_convert (TREE_TYPE (base), new_step);
3240       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3241       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3242 		       use->stmt);
3243     }
3244   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3245 	|| USE_STORE_POST_INCREMENT (mem_mode))
3246        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3247       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3248 	   || USE_STORE_POST_DECREMENT (mem_mode))
3249 	  && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3250     {
3251       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3252 		       use->stmt);
3253     }
3254 }
3255 
3256 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3257    position to POS.  If USE is not NULL, the candidate is set as related to
3258    it.  The candidate computation is scheduled before exit condition and at
3259    the end of loop.  */
3260 
3261 static void
3262 add_candidate (struct ivopts_data *data,
3263 	       tree base, tree step, bool important, struct iv_use *use,
3264 	       struct iv *orig_iv = NULL)
3265 {
3266   if (ip_normal_pos (data->current_loop))
3267     add_candidate_1 (data, base, step, important,
3268 		     IP_NORMAL, use, NULL, orig_iv);
3269   if (ip_end_pos (data->current_loop)
3270       && allow_ip_end_pos_p (data->current_loop))
3271     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3272 }
3273 
3274 /* Adds standard iv candidates.  */
3275 
3276 static void
3277 add_standard_iv_candidates (struct ivopts_data *data)
3278 {
3279   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3280 
3281   /* The same for a double-integer type if it is still fast enough.  */
3282   if (TYPE_PRECISION
3283 	(long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3284       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3285     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3286 		   build_int_cst (long_integer_type_node, 1), true, NULL);
3287 
3288   /* The same for a double-integer type if it is still fast enough.  */
3289   if (TYPE_PRECISION
3290 	(long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3291       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3292     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3293 		   build_int_cst (long_long_integer_type_node, 1), true, NULL);
3294 }
3295 
3296 
3297 /* Adds candidates bases on the old induction variable IV.  */
3298 
3299 static void
3300 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3301 {
3302   gimple *phi;
3303   tree def;
3304   struct iv_cand *cand;
3305 
3306   /* Check if this biv is used in address type use.  */
3307   if (iv->no_overflow  && iv->have_address_use
3308       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3309       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3310     {
3311       tree base = fold_convert (sizetype, iv->base);
3312       tree step = fold_convert (sizetype, iv->step);
3313 
3314       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3315       add_candidate (data, base, step, true, NULL, iv);
3316       /* Add iv cand of the original type only if it has nonlinear use.  */
3317       if (iv->nonlin_use)
3318 	add_candidate (data, iv->base, iv->step, true, NULL);
3319     }
3320   else
3321     add_candidate (data, iv->base, iv->step, true, NULL);
3322 
3323   /* The same, but with initial value zero.  */
3324   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3325     add_candidate (data, size_int (0), iv->step, true, NULL);
3326   else
3327     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3328 		   iv->step, true, NULL);
3329 
3330   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3331   if (gimple_code (phi) == GIMPLE_PHI)
3332     {
3333       /* Additionally record the possibility of leaving the original iv
3334 	 untouched.  */
3335       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3336       /* Don't add candidate if it's from another PHI node because
3337 	 it's an affine iv appearing in the form of PEELED_CHREC.  */
3338       phi = SSA_NAME_DEF_STMT (def);
3339       if (gimple_code (phi) != GIMPLE_PHI)
3340 	{
3341 	  cand = add_candidate_1 (data,
3342 				  iv->base, iv->step, true, IP_ORIGINAL, NULL,
3343 				  SSA_NAME_DEF_STMT (def));
3344 	  if (cand)
3345 	    {
3346 	      cand->var_before = iv->ssa_name;
3347 	      cand->var_after = def;
3348 	    }
3349 	}
3350       else
3351 	gcc_assert (gimple_bb (phi) == data->current_loop->header);
3352     }
3353 }
3354 
3355 /* Adds candidates based on the old induction variables.  */
3356 
3357 static void
3358 add_iv_candidate_for_bivs (struct ivopts_data *data)
3359 {
3360   unsigned i;
3361   struct iv *iv;
3362   bitmap_iterator bi;
3363 
3364   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3365     {
3366       iv = ver_info (data, i)->iv;
3367       if (iv && iv->biv_p && !integer_zerop (iv->step))
3368 	add_iv_candidate_for_biv (data, iv);
3369     }
3370 }
3371 
3372 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3373 
3374 static void
3375 record_common_cand (struct ivopts_data *data, tree base,
3376 		    tree step, struct iv_use *use)
3377 {
3378   struct iv_common_cand ent;
3379   struct iv_common_cand **slot;
3380 
3381   ent.base = base;
3382   ent.step = step;
3383   ent.hash = iterative_hash_expr (base, 0);
3384   ent.hash = iterative_hash_expr (step, ent.hash);
3385 
3386   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3387   if (*slot == NULL)
3388     {
3389       *slot = new iv_common_cand ();
3390       (*slot)->base = base;
3391       (*slot)->step = step;
3392       (*slot)->uses.create (8);
3393       (*slot)->hash = ent.hash;
3394       data->iv_common_cands.safe_push ((*slot));
3395     }
3396 
3397   gcc_assert (use != NULL);
3398   (*slot)->uses.safe_push (use);
3399   return;
3400 }
3401 
3402 /* Comparison function used to sort common candidates.  */
3403 
3404 static int
3405 common_cand_cmp (const void *p1, const void *p2)
3406 {
3407   unsigned n1, n2;
3408   const struct iv_common_cand *const *const ccand1
3409     = (const struct iv_common_cand *const *)p1;
3410   const struct iv_common_cand *const *const ccand2
3411     = (const struct iv_common_cand *const *)p2;
3412 
3413   n1 = (*ccand1)->uses.length ();
3414   n2 = (*ccand2)->uses.length ();
3415   return n2 - n1;
3416 }
3417 
3418 /* Adds IV candidates based on common candidated recorded.  */
3419 
3420 static void
3421 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3422 {
3423   unsigned i, j;
3424   struct iv_cand *cand_1, *cand_2;
3425 
3426   data->iv_common_cands.qsort (common_cand_cmp);
3427   for (i = 0; i < data->iv_common_cands.length (); i++)
3428     {
3429       struct iv_common_cand *ptr = data->iv_common_cands[i];
3430 
3431       /* Only add IV candidate if it's derived from multiple uses.  */
3432       if (ptr->uses.length () <= 1)
3433 	break;
3434 
3435       cand_1 = NULL;
3436       cand_2 = NULL;
3437       if (ip_normal_pos (data->current_loop))
3438 	cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3439 				  false, IP_NORMAL, NULL, NULL);
3440 
3441       if (ip_end_pos (data->current_loop)
3442 	  && allow_ip_end_pos_p (data->current_loop))
3443 	cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3444 				  false, IP_END, NULL, NULL);
3445 
3446       /* Bind deriving uses and the new candidates.  */
3447       for (j = 0; j < ptr->uses.length (); j++)
3448 	{
3449 	  struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3450 	  if (cand_1)
3451 	    bitmap_set_bit (group->related_cands, cand_1->id);
3452 	  if (cand_2)
3453 	    bitmap_set_bit (group->related_cands, cand_2->id);
3454 	}
3455     }
3456 
3457   /* Release data since it is useless from this point.  */
3458   data->iv_common_cand_tab->empty ();
3459   data->iv_common_cands.truncate (0);
3460 }
3461 
3462 /* Adds candidates based on the value of USE's iv.  */
3463 
3464 static void
3465 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3466 {
3467   poly_uint64 offset;
3468   tree base;
3469   tree basetype;
3470   struct iv *iv = use->iv;
3471 
3472   add_candidate (data, iv->base, iv->step, false, use);
3473 
3474   /* Record common candidate for use in case it can be shared by others.  */
3475   record_common_cand (data, iv->base, iv->step, use);
3476 
3477   /* Record common candidate with initial value zero.  */
3478   basetype = TREE_TYPE (iv->base);
3479   if (POINTER_TYPE_P (basetype))
3480     basetype = sizetype;
3481   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3482 
3483   /* Record common candidate with constant offset stripped in base.
3484      Like the use itself, we also add candidate directly for it.  */
3485   base = strip_offset (iv->base, &offset);
3486   if (maybe_ne (offset, 0U) || base != iv->base)
3487     {
3488       record_common_cand (data, base, iv->step, use);
3489       add_candidate (data, base, iv->step, false, use);
3490     }
3491 
3492   /* Record common candidate with base_object removed in base.  */
3493   base = iv->base;
3494   STRIP_NOPS (base);
3495   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3496     {
3497       tree step = iv->step;
3498 
3499       STRIP_NOPS (step);
3500       base = TREE_OPERAND (base, 1);
3501       step = fold_convert (sizetype, step);
3502       record_common_cand (data, base, step, use);
3503       /* Also record common candidate with offset stripped.  */
3504       base = strip_offset (base, &offset);
3505       if (maybe_ne (offset, 0U))
3506 	record_common_cand (data, base, step, use);
3507     }
3508 
3509   /* At last, add auto-incremental candidates.  Make such variables
3510      important since other iv uses with same base object may be based
3511      on it.  */
3512   if (use != NULL && address_p (use->type))
3513     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3514 }
3515 
3516 /* Adds candidates based on the uses.  */
3517 
3518 static void
3519 add_iv_candidate_for_groups (struct ivopts_data *data)
3520 {
3521   unsigned i;
3522 
3523   /* Only add candidate for the first use in group.  */
3524   for (i = 0; i < data->vgroups.length (); i++)
3525     {
3526       struct iv_group *group = data->vgroups[i];
3527 
3528       gcc_assert (group->vuses[0] != NULL);
3529       add_iv_candidate_for_use (data, group->vuses[0]);
3530     }
3531   add_iv_candidate_derived_from_uses (data);
3532 }
3533 
3534 /* Record important candidates and add them to related_cands bitmaps.  */
3535 
3536 static void
3537 record_important_candidates (struct ivopts_data *data)
3538 {
3539   unsigned i;
3540   struct iv_group *group;
3541 
3542   for (i = 0; i < data->vcands.length (); i++)
3543     {
3544       struct iv_cand *cand = data->vcands[i];
3545 
3546       if (cand->important)
3547 	bitmap_set_bit (data->important_candidates, i);
3548     }
3549 
3550   data->consider_all_candidates = (data->vcands.length ()
3551 				   <= CONSIDER_ALL_CANDIDATES_BOUND);
3552 
3553   /* Add important candidates to groups' related_cands bitmaps.  */
3554   for (i = 0; i < data->vgroups.length (); i++)
3555     {
3556       group = data->vgroups[i];
3557       bitmap_ior_into (group->related_cands, data->important_candidates);
3558     }
3559 }
3560 
3561 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3562    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3563    we allocate a simple list to every use.  */
3564 
3565 static void
3566 alloc_use_cost_map (struct ivopts_data *data)
3567 {
3568   unsigned i, size, s;
3569 
3570   for (i = 0; i < data->vgroups.length (); i++)
3571     {
3572       struct iv_group *group = data->vgroups[i];
3573 
3574       if (data->consider_all_candidates)
3575 	size = data->vcands.length ();
3576       else
3577 	{
3578 	  s = bitmap_count_bits (group->related_cands);
3579 
3580 	  /* Round up to the power of two, so that moduling by it is fast.  */
3581 	  size = s ? (1 << ceil_log2 (s)) : 1;
3582 	}
3583 
3584       group->n_map_members = size;
3585       group->cost_map = XCNEWVEC (struct cost_pair, size);
3586     }
3587 }
3588 
3589 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3590    on invariants INV_VARS and that the value used in expressing it is
3591    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3592 
3593 static void
3594 set_group_iv_cost (struct ivopts_data *data,
3595 		   struct iv_group *group, struct iv_cand *cand,
3596 		   comp_cost cost, bitmap inv_vars, tree value,
3597 		   enum tree_code comp, bitmap inv_exprs)
3598 {
3599   unsigned i, s;
3600 
3601   if (cost.infinite_cost_p ())
3602     {
3603       BITMAP_FREE (inv_vars);
3604       BITMAP_FREE (inv_exprs);
3605       return;
3606     }
3607 
3608   if (data->consider_all_candidates)
3609     {
3610       group->cost_map[cand->id].cand = cand;
3611       group->cost_map[cand->id].cost = cost;
3612       group->cost_map[cand->id].inv_vars = inv_vars;
3613       group->cost_map[cand->id].inv_exprs = inv_exprs;
3614       group->cost_map[cand->id].value = value;
3615       group->cost_map[cand->id].comp = comp;
3616       return;
3617     }
3618 
3619   /* n_map_members is a power of two, so this computes modulo.  */
3620   s = cand->id & (group->n_map_members - 1);
3621   for (i = s; i < group->n_map_members; i++)
3622     if (!group->cost_map[i].cand)
3623       goto found;
3624   for (i = 0; i < s; i++)
3625     if (!group->cost_map[i].cand)
3626       goto found;
3627 
3628   gcc_unreachable ();
3629 
3630 found:
3631   group->cost_map[i].cand = cand;
3632   group->cost_map[i].cost = cost;
3633   group->cost_map[i].inv_vars = inv_vars;
3634   group->cost_map[i].inv_exprs = inv_exprs;
3635   group->cost_map[i].value = value;
3636   group->cost_map[i].comp = comp;
3637 }
3638 
3639 /* Gets cost of (GROUP, CAND) pair.  */
3640 
3641 static struct cost_pair *
3642 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3643 		   struct iv_cand *cand)
3644 {
3645   unsigned i, s;
3646   struct cost_pair *ret;
3647 
3648   if (!cand)
3649     return NULL;
3650 
3651   if (data->consider_all_candidates)
3652     {
3653       ret = group->cost_map + cand->id;
3654       if (!ret->cand)
3655 	return NULL;
3656 
3657       return ret;
3658     }
3659 
3660   /* n_map_members is a power of two, so this computes modulo.  */
3661   s = cand->id & (group->n_map_members - 1);
3662   for (i = s; i < group->n_map_members; i++)
3663     if (group->cost_map[i].cand == cand)
3664       return group->cost_map + i;
3665     else if (group->cost_map[i].cand == NULL)
3666       return NULL;
3667   for (i = 0; i < s; i++)
3668     if (group->cost_map[i].cand == cand)
3669       return group->cost_map + i;
3670     else if (group->cost_map[i].cand == NULL)
3671       return NULL;
3672 
3673   return NULL;
3674 }
3675 
3676 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3677 static rtx
3678 produce_memory_decl_rtl (tree obj, int *regno)
3679 {
3680   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3681   machine_mode address_mode = targetm.addr_space.address_mode (as);
3682   rtx x;
3683 
3684   gcc_assert (obj);
3685   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3686     {
3687       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3688       x = gen_rtx_SYMBOL_REF (address_mode, name);
3689       SET_SYMBOL_REF_DECL (x, obj);
3690       x = gen_rtx_MEM (DECL_MODE (obj), x);
3691       set_mem_addr_space (x, as);
3692       targetm.encode_section_info (obj, x, true);
3693     }
3694   else
3695     {
3696       x = gen_raw_REG (address_mode, (*regno)++);
3697       x = gen_rtx_MEM (DECL_MODE (obj), x);
3698       set_mem_addr_space (x, as);
3699     }
3700 
3701   return x;
3702 }
3703 
3704 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3705    walk_tree.  DATA contains the actual fake register number.  */
3706 
3707 static tree
3708 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3709 {
3710   tree obj = NULL_TREE;
3711   rtx x = NULL_RTX;
3712   int *regno = (int *) data;
3713 
3714   switch (TREE_CODE (*expr_p))
3715     {
3716     case ADDR_EXPR:
3717       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3718 	   handled_component_p (*expr_p);
3719 	   expr_p = &TREE_OPERAND (*expr_p, 0))
3720 	continue;
3721       obj = *expr_p;
3722       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3723 	x = produce_memory_decl_rtl (obj, regno);
3724       break;
3725 
3726     case SSA_NAME:
3727       *ws = 0;
3728       obj = SSA_NAME_VAR (*expr_p);
3729       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3730       if (!obj)
3731 	return NULL_TREE;
3732       if (!DECL_RTL_SET_P (obj))
3733 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3734       break;
3735 
3736     case VAR_DECL:
3737     case PARM_DECL:
3738     case RESULT_DECL:
3739       *ws = 0;
3740       obj = *expr_p;
3741 
3742       if (DECL_RTL_SET_P (obj))
3743 	break;
3744 
3745       if (DECL_MODE (obj) == BLKmode)
3746 	x = produce_memory_decl_rtl (obj, regno);
3747       else
3748 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3749 
3750       break;
3751 
3752     default:
3753       break;
3754     }
3755 
3756   if (x)
3757     {
3758       decl_rtl_to_reset.safe_push (obj);
3759       SET_DECL_RTL (obj, x);
3760     }
3761 
3762   return NULL_TREE;
3763 }
3764 
3765 /* Determines cost of the computation of EXPR.  */
3766 
3767 static unsigned
3768 computation_cost (tree expr, bool speed)
3769 {
3770   rtx_insn *seq;
3771   rtx rslt;
3772   tree type = TREE_TYPE (expr);
3773   unsigned cost;
3774   /* Avoid using hard regs in ways which may be unsupported.  */
3775   int regno = LAST_VIRTUAL_REGISTER + 1;
3776   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3777   enum node_frequency real_frequency = node->frequency;
3778 
3779   node->frequency = NODE_FREQUENCY_NORMAL;
3780   crtl->maybe_hot_insn_p = speed;
3781   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3782   start_sequence ();
3783   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3784   seq = get_insns ();
3785   end_sequence ();
3786   default_rtl_profile ();
3787   node->frequency = real_frequency;
3788 
3789   cost = seq_cost (seq, speed);
3790   if (MEM_P (rslt))
3791     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3792 			  TYPE_ADDR_SPACE (type), speed);
3793   else if (!REG_P (rslt))
3794     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3795 
3796   return cost;
3797 }
3798 
3799 /* Returns variable containing the value of candidate CAND at statement AT.  */
3800 
3801 static tree
3802 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3803 {
3804   if (stmt_after_increment (loop, cand, stmt))
3805     return cand->var_after;
3806   else
3807     return cand->var_before;
3808 }
3809 
3810 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3811    same precision that is at least as wide as the precision of TYPE, stores
3812    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3813    type of A and B.  */
3814 
3815 static tree
3816 determine_common_wider_type (tree *a, tree *b)
3817 {
3818   tree wider_type = NULL;
3819   tree suba, subb;
3820   tree atype = TREE_TYPE (*a);
3821 
3822   if (CONVERT_EXPR_P (*a))
3823     {
3824       suba = TREE_OPERAND (*a, 0);
3825       wider_type = TREE_TYPE (suba);
3826       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3827 	return atype;
3828     }
3829   else
3830     return atype;
3831 
3832   if (CONVERT_EXPR_P (*b))
3833     {
3834       subb = TREE_OPERAND (*b, 0);
3835       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3836 	return atype;
3837     }
3838   else
3839     return atype;
3840 
3841   *a = suba;
3842   *b = subb;
3843   return wider_type;
3844 }
3845 
3846 /* Determines the expression by that USE is expressed from induction variable
3847    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3848    decomposed form.  The invariant part is stored in AFF_INV; while variant
3849    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3850    non-null.  Returns false if USE cannot be expressed using CAND.  */
3851 
3852 static bool
3853 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3854 		       struct iv_cand *cand, struct aff_tree *aff_inv,
3855 		       struct aff_tree *aff_var, widest_int *prat = NULL)
3856 {
3857   tree ubase = use->iv->base, ustep = use->iv->step;
3858   tree cbase = cand->iv->base, cstep = cand->iv->step;
3859   tree common_type, uutype, var, cstep_common;
3860   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3861   aff_tree aff_cbase;
3862   widest_int rat;
3863 
3864   /* We must have a precision to express the values of use.  */
3865   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3866     return false;
3867 
3868   var = var_at_stmt (loop, cand, at);
3869   uutype = unsigned_type_for (utype);
3870 
3871   /* If the conversion is not noop, perform it.  */
3872   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3873     {
3874       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3875 	  && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3876 	{
3877 	  tree inner_base, inner_step, inner_type;
3878 	  inner_base = TREE_OPERAND (cbase, 0);
3879 	  if (CONVERT_EXPR_P (cstep))
3880 	    inner_step = TREE_OPERAND (cstep, 0);
3881 	  else
3882 	    inner_step = cstep;
3883 
3884 	  inner_type = TREE_TYPE (inner_base);
3885 	  /* If candidate is added from a biv whose type is smaller than
3886 	     ctype, we know both candidate and the biv won't overflow.
3887 	     In this case, it's safe to skip the convertion in candidate.
3888 	     As an example, (unsigned short)((unsigned long)A) equals to
3889 	     (unsigned short)A, if A has a type no larger than short.  */
3890 	  if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3891 	    {
3892 	      cbase = inner_base;
3893 	      cstep = inner_step;
3894 	    }
3895 	}
3896       cbase = fold_convert (uutype, cbase);
3897       cstep = fold_convert (uutype, cstep);
3898       var = fold_convert (uutype, var);
3899     }
3900 
3901   /* Ratio is 1 when computing the value of biv cand by itself.
3902      We can't rely on constant_multiple_of in this case because the
3903      use is created after the original biv is selected.  The call
3904      could fail because of inconsistent fold behavior.  See PR68021
3905      for more information.  */
3906   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3907     {
3908       gcc_assert (is_gimple_assign (use->stmt));
3909       gcc_assert (use->iv->ssa_name == cand->var_after);
3910       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3911       rat = 1;
3912     }
3913   else if (!constant_multiple_of (ustep, cstep, &rat))
3914     return false;
3915 
3916   if (prat)
3917     *prat = rat;
3918 
3919   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3920      type, we achieve better folding by computing their difference in this
3921      wider type, and cast the result to UUTYPE.  We do not need to worry about
3922      overflows, as all the arithmetics will in the end be performed in UUTYPE
3923      anyway.  */
3924   common_type = determine_common_wider_type (&ubase, &cbase);
3925 
3926   /* use = ubase - ratio * cbase + ratio * var.  */
3927   tree_to_aff_combination (ubase, common_type, aff_inv);
3928   tree_to_aff_combination (cbase, common_type, &aff_cbase);
3929   tree_to_aff_combination (var, uutype, aff_var);
3930 
3931   /* We need to shift the value if we are after the increment.  */
3932   if (stmt_after_increment (loop, cand, at))
3933     {
3934       aff_tree cstep_aff;
3935 
3936       if (common_type != uutype)
3937 	cstep_common = fold_convert (common_type, cstep);
3938       else
3939 	cstep_common = cstep;
3940 
3941       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3942       aff_combination_add (&aff_cbase, &cstep_aff);
3943     }
3944 
3945   aff_combination_scale (&aff_cbase, -rat);
3946   aff_combination_add (aff_inv, &aff_cbase);
3947   if (common_type != uutype)
3948     aff_combination_convert (aff_inv, uutype);
3949 
3950   aff_combination_scale (aff_var, rat);
3951   return true;
3952 }
3953 
3954 /* Determines the expression by that USE is expressed from induction variable
3955    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3956    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3957 
3958 static bool
3959 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3960 		     struct iv_cand *cand, struct aff_tree *aff)
3961 {
3962   aff_tree aff_var;
3963 
3964   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3965     return false;
3966 
3967   aff_combination_add (aff, &aff_var);
3968   return true;
3969 }
3970 
3971 /* Return the type of USE.  */
3972 
3973 static tree
3974 get_use_type (struct iv_use *use)
3975 {
3976   tree base_type = TREE_TYPE (use->iv->base);
3977   tree type;
3978 
3979   if (use->type == USE_REF_ADDRESS)
3980     {
3981       /* The base_type may be a void pointer.  Create a pointer type based on
3982 	 the mem_ref instead.  */
3983       type = build_pointer_type (TREE_TYPE (*use->op_p));
3984       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3985 		  == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3986     }
3987   else
3988     type = base_type;
3989 
3990   return type;
3991 }
3992 
3993 /* Determines the expression by that USE is expressed from induction variable
3994    CAND at statement AT in LOOP.  The computation is unshared.  */
3995 
3996 static tree
3997 get_computation_at (struct loop *loop, gimple *at,
3998 		    struct iv_use *use, struct iv_cand *cand)
3999 {
4000   aff_tree aff;
4001   tree type = get_use_type (use);
4002 
4003   if (!get_computation_aff (loop, at, use, cand, &aff))
4004     return NULL_TREE;
4005   unshare_aff_combination (&aff);
4006   return fold_convert (type, aff_combination_to_tree (&aff));
4007 }
4008 
4009 /* Adjust the cost COST for being in loop setup rather than loop body.
4010    If we're optimizing for space, the loop setup overhead is constant;
4011    if we're optimizing for speed, amortize it over the per-iteration cost.
4012    If ROUND_UP_P is true, the result is round up rather than to zero when
4013    optimizing for speed.  */
4014 static unsigned
4015 adjust_setup_cost (struct ivopts_data *data, unsigned cost,
4016 		   bool round_up_p = false)
4017 {
4018   if (cost == INFTY)
4019     return cost;
4020   else if (optimize_loop_for_speed_p (data->current_loop))
4021     {
4022       HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
4023       return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
4024     }
4025   else
4026     return cost;
4027 }
4028 
4029 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4030    EXPR operand holding the shift.  COST0 and COST1 are the costs for
4031    calculating the operands of EXPR.  Returns true if successful, and returns
4032    the cost in COST.  */
4033 
4034 static bool
4035 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4036 		   comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4037 {
4038   comp_cost res;
4039   tree op1 = TREE_OPERAND (expr, 1);
4040   tree cst = TREE_OPERAND (mult, 1);
4041   tree multop = TREE_OPERAND (mult, 0);
4042   int m = exact_log2 (int_cst_value (cst));
4043   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4044   int as_cost, sa_cost;
4045   bool mult_in_op1;
4046 
4047   if (!(m >= 0 && m < maxm))
4048     return false;
4049 
4050   STRIP_NOPS (op1);
4051   mult_in_op1 = operand_equal_p (op1, mult, 0);
4052 
4053   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4054 
4055   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4056      use that in preference to a shift insn followed by an add insn.  */
4057   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4058 	     ? shiftadd_cost (speed, mode, m)
4059 	     : (mult_in_op1
4060 		? shiftsub1_cost (speed, mode, m)
4061 		: shiftsub0_cost (speed, mode, m)));
4062 
4063   res = comp_cost (MIN (as_cost, sa_cost), 0);
4064   res += (mult_in_op1 ? cost0 : cost1);
4065 
4066   STRIP_NOPS (multop);
4067   if (!is_gimple_val (multop))
4068     res += force_expr_to_var_cost (multop, speed);
4069 
4070   *cost = res;
4071   return true;
4072 }
4073 
4074 /* Estimates cost of forcing expression EXPR into a variable.  */
4075 
4076 static comp_cost
4077 force_expr_to_var_cost (tree expr, bool speed)
4078 {
4079   static bool costs_initialized = false;
4080   static unsigned integer_cost [2];
4081   static unsigned symbol_cost [2];
4082   static unsigned address_cost [2];
4083   tree op0, op1;
4084   comp_cost cost0, cost1, cost;
4085   machine_mode mode;
4086   scalar_int_mode int_mode;
4087 
4088   if (!costs_initialized)
4089     {
4090       tree type = build_pointer_type (integer_type_node);
4091       tree var, addr;
4092       rtx x;
4093       int i;
4094 
4095       var = create_tmp_var_raw (integer_type_node, "test_var");
4096       TREE_STATIC (var) = 1;
4097       x = produce_memory_decl_rtl (var, NULL);
4098       SET_DECL_RTL (var, x);
4099 
4100       addr = build1 (ADDR_EXPR, type, var);
4101 
4102 
4103       for (i = 0; i < 2; i++)
4104 	{
4105 	  integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4106 							     2000), i);
4107 
4108 	  symbol_cost[i] = computation_cost (addr, i) + 1;
4109 
4110 	  address_cost[i]
4111 	    = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4112 	  if (dump_file && (dump_flags & TDF_DETAILS))
4113 	    {
4114 	      fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4115 	      fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4116 	      fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4117 	      fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4118 	      fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4119 	      fprintf (dump_file, "\n");
4120 	    }
4121 	}
4122 
4123       costs_initialized = true;
4124     }
4125 
4126   STRIP_NOPS (expr);
4127 
4128   if (SSA_VAR_P (expr))
4129     return no_cost;
4130 
4131   if (is_gimple_min_invariant (expr))
4132     {
4133       if (poly_int_tree_p (expr))
4134 	return comp_cost (integer_cost [speed], 0);
4135 
4136       if (TREE_CODE (expr) == ADDR_EXPR)
4137 	{
4138 	  tree obj = TREE_OPERAND (expr, 0);
4139 
4140 	  if (VAR_P (obj)
4141 	      || TREE_CODE (obj) == PARM_DECL
4142 	      || TREE_CODE (obj) == RESULT_DECL)
4143 	    return comp_cost (symbol_cost [speed], 0);
4144 	}
4145 
4146       return comp_cost (address_cost [speed], 0);
4147     }
4148 
4149   switch (TREE_CODE (expr))
4150     {
4151     case POINTER_PLUS_EXPR:
4152     case PLUS_EXPR:
4153     case MINUS_EXPR:
4154     case MULT_EXPR:
4155     case TRUNC_DIV_EXPR:
4156     case BIT_AND_EXPR:
4157     case BIT_IOR_EXPR:
4158     case LSHIFT_EXPR:
4159     case RSHIFT_EXPR:
4160       op0 = TREE_OPERAND (expr, 0);
4161       op1 = TREE_OPERAND (expr, 1);
4162       STRIP_NOPS (op0);
4163       STRIP_NOPS (op1);
4164       break;
4165 
4166     CASE_CONVERT:
4167     case NEGATE_EXPR:
4168     case BIT_NOT_EXPR:
4169       op0 = TREE_OPERAND (expr, 0);
4170       STRIP_NOPS (op0);
4171       op1 = NULL_TREE;
4172       break;
4173 
4174     default:
4175       /* Just an arbitrary value, FIXME.  */
4176       return comp_cost (target_spill_cost[speed], 0);
4177     }
4178 
4179   if (op0 == NULL_TREE
4180       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4181     cost0 = no_cost;
4182   else
4183     cost0 = force_expr_to_var_cost (op0, speed);
4184 
4185   if (op1 == NULL_TREE
4186       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4187     cost1 = no_cost;
4188   else
4189     cost1 = force_expr_to_var_cost (op1, speed);
4190 
4191   mode = TYPE_MODE (TREE_TYPE (expr));
4192   switch (TREE_CODE (expr))
4193     {
4194     case POINTER_PLUS_EXPR:
4195     case PLUS_EXPR:
4196     case MINUS_EXPR:
4197     case NEGATE_EXPR:
4198       cost = comp_cost (add_cost (speed, mode), 0);
4199       if (TREE_CODE (expr) != NEGATE_EXPR)
4200 	{
4201 	  tree mult = NULL_TREE;
4202 	  comp_cost sa_cost;
4203 	  if (TREE_CODE (op1) == MULT_EXPR)
4204 	    mult = op1;
4205 	  else if (TREE_CODE (op0) == MULT_EXPR)
4206 	    mult = op0;
4207 
4208 	  if (mult != NULL_TREE
4209 	      && is_a <scalar_int_mode> (mode, &int_mode)
4210 	      && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4211 	      && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4212 				    speed, &sa_cost))
4213 	    return sa_cost;
4214 	}
4215       break;
4216 
4217     CASE_CONVERT:
4218       {
4219 	tree inner_mode, outer_mode;
4220 	outer_mode = TREE_TYPE (expr);
4221 	inner_mode = TREE_TYPE (op0);
4222 	cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4223 				       TYPE_MODE (inner_mode), speed), 0);
4224       }
4225       break;
4226 
4227     case MULT_EXPR:
4228       if (cst_and_fits_in_hwi (op0))
4229 	cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4230 					     mode, speed), 0);
4231       else if (cst_and_fits_in_hwi (op1))
4232 	cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4233 					     mode, speed), 0);
4234       else
4235 	return comp_cost (target_spill_cost [speed], 0);
4236       break;
4237 
4238     case TRUNC_DIV_EXPR:
4239       /* Division by power of two is usually cheap, so we allow it.  Forbid
4240 	 anything else.  */
4241       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4242 	cost = comp_cost (add_cost (speed, mode), 0);
4243       else
4244 	cost = comp_cost (target_spill_cost[speed], 0);
4245       break;
4246 
4247     case BIT_AND_EXPR:
4248     case BIT_IOR_EXPR:
4249     case BIT_NOT_EXPR:
4250     case LSHIFT_EXPR:
4251     case RSHIFT_EXPR:
4252       cost = comp_cost (add_cost (speed, mode), 0);
4253       break;
4254 
4255     default:
4256       gcc_unreachable ();
4257     }
4258 
4259   cost += cost0;
4260   cost += cost1;
4261   return cost;
4262 }
4263 
4264 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4265    invariants the computation depends on.  */
4266 
4267 static comp_cost
4268 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4269 {
4270   if (!expr)
4271     return no_cost;
4272 
4273   find_inv_vars (data, &expr, inv_vars);
4274   return force_expr_to_var_cost (expr, data->speed);
4275 }
4276 
4277 /* Returns cost of auto-modifying address expression in shape base + offset.
4278    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4279    address expression.  The address expression has ADDR_MODE in addr space
4280    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4281    speed or size.  */
4282 
4283 enum ainc_type
4284 {
4285   AINC_PRE_INC,		/* Pre increment.  */
4286   AINC_PRE_DEC,		/* Pre decrement.  */
4287   AINC_POST_INC,	/* Post increment.  */
4288   AINC_POST_DEC,	/* Post decrement.  */
4289   AINC_NONE		/* Also the number of auto increment types.  */
4290 };
4291 
4292 struct ainc_cost_data
4293 {
4294   unsigned costs[AINC_NONE];
4295 };
4296 
4297 static comp_cost
4298 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4299 		       machine_mode addr_mode, machine_mode mem_mode,
4300 		       addr_space_t as, bool speed)
4301 {
4302   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4303       && !USE_STORE_PRE_DECREMENT (mem_mode)
4304       && !USE_LOAD_POST_DECREMENT (mem_mode)
4305       && !USE_STORE_POST_DECREMENT (mem_mode)
4306       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4307       && !USE_STORE_PRE_INCREMENT (mem_mode)
4308       && !USE_LOAD_POST_INCREMENT (mem_mode)
4309       && !USE_STORE_POST_INCREMENT (mem_mode))
4310     return infinite_cost;
4311 
4312   static vec<ainc_cost_data *> ainc_cost_data_list;
4313   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4314   if (idx >= ainc_cost_data_list.length ())
4315     {
4316       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4317 
4318       gcc_assert (nsize > idx);
4319       ainc_cost_data_list.safe_grow_cleared (nsize);
4320     }
4321 
4322   ainc_cost_data *data = ainc_cost_data_list[idx];
4323   if (data == NULL)
4324     {
4325       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4326 
4327       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4328       data->costs[AINC_PRE_DEC] = INFTY;
4329       data->costs[AINC_POST_DEC] = INFTY;
4330       data->costs[AINC_PRE_INC] = INFTY;
4331       data->costs[AINC_POST_INC] = INFTY;
4332       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4333 	  || USE_STORE_PRE_DECREMENT (mem_mode))
4334 	{
4335 	  rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4336 
4337 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4338 	    data->costs[AINC_PRE_DEC]
4339 	      = address_cost (addr, mem_mode, as, speed);
4340 	}
4341       if (USE_LOAD_POST_DECREMENT (mem_mode)
4342 	  || USE_STORE_POST_DECREMENT (mem_mode))
4343 	{
4344 	  rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4345 
4346 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4347 	    data->costs[AINC_POST_DEC]
4348 	      = address_cost (addr, mem_mode, as, speed);
4349 	}
4350       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4351 	  || USE_STORE_PRE_INCREMENT (mem_mode))
4352 	{
4353 	  rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4354 
4355 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4356 	    data->costs[AINC_PRE_INC]
4357 	      = address_cost (addr, mem_mode, as, speed);
4358 	}
4359       if (USE_LOAD_POST_INCREMENT (mem_mode)
4360 	  || USE_STORE_POST_INCREMENT (mem_mode))
4361 	{
4362 	  rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4363 
4364 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4365 	    data->costs[AINC_POST_INC]
4366 	      = address_cost (addr, mem_mode, as, speed);
4367 	}
4368       ainc_cost_data_list[idx] = data;
4369     }
4370 
4371   poly_int64 msize = GET_MODE_SIZE (mem_mode);
4372   if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4373     return comp_cost (data->costs[AINC_POST_INC], 0);
4374   if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4375     return comp_cost (data->costs[AINC_POST_DEC], 0);
4376   if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4377     return comp_cost (data->costs[AINC_PRE_INC], 0);
4378   if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4379     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4380 
4381   return infinite_cost;
4382 }
4383 
4384 /* Return cost of computing USE's address expression by using CAND.
4385    AFF_INV and AFF_VAR represent invariant and variant parts of the
4386    address expression, respectively.  If AFF_INV is simple, store
4387    the loop invariant variables which are depended by it in INV_VARS;
4388    if AFF_INV is complicated, handle it as a new invariant expression
4389    and record it in INV_EXPR.  RATIO indicates multiple times between
4390    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4391    value to it indicating if this is an auto-increment address.  */
4392 
4393 static comp_cost
4394 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4395 		  struct iv_cand *cand, aff_tree *aff_inv,
4396 		  aff_tree *aff_var, HOST_WIDE_INT ratio,
4397 		  bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4398 		  bool *can_autoinc, bool speed)
4399 {
4400   rtx addr;
4401   bool simple_inv = true;
4402   tree comp_inv = NULL_TREE, type = aff_var->type;
4403   comp_cost var_cost = no_cost, cost = no_cost;
4404   struct mem_address parts = {NULL_TREE, integer_one_node,
4405 			      NULL_TREE, NULL_TREE, NULL_TREE};
4406   machine_mode addr_mode = TYPE_MODE (type);
4407   machine_mode mem_mode = TYPE_MODE (use->mem_type);
4408   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4409   /* Only true if ratio != 1.  */
4410   bool ok_with_ratio_p = false;
4411   bool ok_without_ratio_p = false;
4412 
4413   if (!aff_combination_const_p (aff_inv))
4414     {
4415       parts.index = integer_one_node;
4416       /* Addressing mode "base + index".  */
4417       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4418       if (ratio != 1)
4419 	{
4420 	  parts.step = wide_int_to_tree (type, ratio);
4421 	  /* Addressing mode "base + index << scale".  */
4422 	  ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4423 	  if (!ok_with_ratio_p)
4424 	    parts.step = NULL_TREE;
4425 	}
4426       if (ok_with_ratio_p || ok_without_ratio_p)
4427 	{
4428 	  if (maybe_ne (aff_inv->offset, 0))
4429 	    {
4430 	      parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4431 	      /* Addressing mode "base + index [<< scale] + offset".  */
4432 	      if (!valid_mem_ref_p (mem_mode, as, &parts))
4433 		parts.offset = NULL_TREE;
4434 	      else
4435 		aff_inv->offset = 0;
4436 	    }
4437 
4438 	  move_fixed_address_to_symbol (&parts, aff_inv);
4439 	  /* Base is fixed address and is moved to symbol part.  */
4440 	  if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4441 	    parts.base = NULL_TREE;
4442 
4443 	  /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4444 	  if (parts.symbol != NULL_TREE
4445 	      && !valid_mem_ref_p (mem_mode, as, &parts))
4446 	    {
4447 	      aff_combination_add_elt (aff_inv, parts.symbol, 1);
4448 	      parts.symbol = NULL_TREE;
4449 	      /* Reset SIMPLE_INV since symbol address needs to be computed
4450 		 outside of address expression in this case.  */
4451 	      simple_inv = false;
4452 	      /* Symbol part is moved back to base part, it can't be NULL.  */
4453 	      parts.base = integer_one_node;
4454 	    }
4455 	}
4456       else
4457 	parts.index = NULL_TREE;
4458     }
4459   else
4460     {
4461       poly_int64 ainc_step;
4462       if (can_autoinc
4463 	  && ratio == 1
4464 	  && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4465 	{
4466 	  poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4467 
4468 	  if (stmt_after_increment (data->current_loop, cand, use->stmt))
4469 	    ainc_offset += ainc_step;
4470 	  cost = get_address_cost_ainc (ainc_step, ainc_offset,
4471 					addr_mode, mem_mode, as, speed);
4472 	  if (!cost.infinite_cost_p ())
4473 	    {
4474 	      *can_autoinc = true;
4475 	      return cost;
4476 	    }
4477 	  cost = no_cost;
4478 	}
4479       if (!aff_combination_zero_p (aff_inv))
4480 	{
4481 	  parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4482 	  /* Addressing mode "base + offset".  */
4483 	  if (!valid_mem_ref_p (mem_mode, as, &parts))
4484 	    parts.offset = NULL_TREE;
4485 	  else
4486 	    aff_inv->offset = 0;
4487 	}
4488     }
4489 
4490   if (simple_inv)
4491     simple_inv = (aff_inv == NULL
4492 		  || aff_combination_const_p (aff_inv)
4493 		  || aff_combination_singleton_var_p (aff_inv));
4494   if (!aff_combination_zero_p (aff_inv))
4495     comp_inv = aff_combination_to_tree (aff_inv);
4496   if (comp_inv != NULL_TREE)
4497     cost = force_var_cost (data, comp_inv, inv_vars);
4498   if (ratio != 1 && parts.step == NULL_TREE)
4499     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4500   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4501     var_cost += add_cost (speed, addr_mode);
4502 
4503   if (comp_inv && inv_expr && !simple_inv)
4504     {
4505       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4506       /* Clear depends on.  */
4507       if (*inv_expr != NULL && inv_vars && *inv_vars)
4508 	bitmap_clear (*inv_vars);
4509 
4510       /* Cost of small invariant expression adjusted against loop niters
4511 	 is usually zero, which makes it difficult to be differentiated
4512 	 from candidate based on loop invariant variables.  Secondly, the
4513 	 generated invariant expression may not be hoisted out of loop by
4514 	 following pass.  We penalize the cost by rounding up in order to
4515 	 neutralize such effects.  */
4516       cost.cost = adjust_setup_cost (data, cost.cost, true);
4517       cost.scratch = cost.cost;
4518     }
4519 
4520   cost += var_cost;
4521   addr = addr_for_mem_ref (&parts, as, false);
4522   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4523   cost += address_cost (addr, mem_mode, as, speed);
4524 
4525   if (parts.symbol != NULL_TREE)
4526     cost.complexity += 1;
4527   /* Don't increase the complexity of adding a scaled index if it's
4528      the only kind of index that the target allows.  */
4529   if (parts.step != NULL_TREE && ok_without_ratio_p)
4530     cost.complexity += 1;
4531   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4532     cost.complexity += 1;
4533   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4534     cost.complexity += 1;
4535 
4536   return cost;
4537 }
4538 
4539 /* Scale (multiply) the computed COST (except scratch part that should be
4540    hoisted out a loop) by header->frequency / AT->frequency, which makes
4541    expected cost more accurate.  */
4542 
4543 static comp_cost
4544 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4545 {
4546    int loop_freq = data->current_loop->header->count.to_frequency (cfun);
4547    int bb_freq = gimple_bb (at)->count.to_frequency (cfun);
4548    if (loop_freq != 0)
4549      {
4550        gcc_assert (cost.scratch <= cost.cost);
4551        int scaled_cost
4552 	 = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4553 
4554        if (dump_file && (dump_flags & TDF_DETAILS))
4555 	 fprintf (dump_file, "Scaling cost based on bb prob "
4556 		  "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4557 		  1.0f * bb_freq / loop_freq, cost.cost,
4558 		  cost.scratch, scaled_cost, bb_freq, loop_freq);
4559 
4560        cost.cost = scaled_cost;
4561      }
4562 
4563   return cost;
4564 }
4565 
4566 /* Determines the cost of the computation by that USE is expressed
4567    from induction variable CAND.  If ADDRESS_P is true, we just need
4568    to create an address from it, otherwise we want to get it into
4569    register.  A set of invariants we depend on is stored in INV_VARS.
4570    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4571    addressing is likely.  If INV_EXPR is nonnull, record invariant
4572    expr entry in it.  */
4573 
4574 static comp_cost
4575 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4576 		      struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4577 		      bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4578 {
4579   gimple *at = use->stmt;
4580   tree ubase = use->iv->base, cbase = cand->iv->base;
4581   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4582   tree comp_inv = NULL_TREE;
4583   HOST_WIDE_INT ratio, aratio;
4584   comp_cost cost;
4585   widest_int rat;
4586   aff_tree aff_inv, aff_var;
4587   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4588 
4589   if (inv_vars)
4590     *inv_vars = NULL;
4591   if (can_autoinc)
4592     *can_autoinc = false;
4593   if (inv_expr)
4594     *inv_expr = NULL;
4595 
4596   /* Check if we have enough precision to express the values of use.  */
4597   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4598     return infinite_cost;
4599 
4600   if (address_p
4601       || (use->iv->base_object
4602 	  && cand->iv->base_object
4603 	  && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4604 	  && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4605     {
4606       /* Do not try to express address of an object with computation based
4607 	 on address of a different object.  This may cause problems in rtl
4608 	 level alias analysis (that does not expect this to be happening,
4609 	 as this is illegal in C), and would be unlikely to be useful
4610 	 anyway.  */
4611       if (use->iv->base_object
4612 	  && cand->iv->base_object
4613 	  && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4614 	return infinite_cost;
4615     }
4616 
4617   if (!get_computation_aff_1 (data->current_loop, at, use,
4618 			      cand, &aff_inv, &aff_var, &rat)
4619       || !wi::fits_shwi_p (rat))
4620     return infinite_cost;
4621 
4622   ratio = rat.to_shwi ();
4623   if (address_p)
4624     {
4625       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4626 			       inv_vars, inv_expr, can_autoinc, speed);
4627       return get_scaled_computation_cost_at (data, at, cost);
4628     }
4629 
4630   bool simple_inv = (aff_combination_const_p (&aff_inv)
4631 		     || aff_combination_singleton_var_p (&aff_inv));
4632   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4633   aff_combination_convert (&aff_inv, signed_type);
4634   if (!aff_combination_zero_p (&aff_inv))
4635     comp_inv = aff_combination_to_tree (&aff_inv);
4636 
4637   cost = force_var_cost (data, comp_inv, inv_vars);
4638   if (comp_inv && inv_expr && !simple_inv)
4639     {
4640       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4641       /* Clear depends on.  */
4642       if (*inv_expr != NULL && inv_vars && *inv_vars)
4643 	bitmap_clear (*inv_vars);
4644 
4645       cost.cost = adjust_setup_cost (data, cost.cost);
4646       /* Record setup cost in scratch field.  */
4647       cost.scratch = cost.cost;
4648     }
4649   /* Cost of constant integer can be covered when adding invariant part to
4650      variant part.  */
4651   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4652     cost = no_cost;
4653 
4654   /* Need type narrowing to represent use with cand.  */
4655   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4656     {
4657       machine_mode outer_mode = TYPE_MODE (utype);
4658       machine_mode inner_mode = TYPE_MODE (ctype);
4659       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4660     }
4661 
4662   /* Turn a + i * (-c) into a - i * c.  */
4663   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4664     aratio = -ratio;
4665   else
4666     aratio = ratio;
4667 
4668   if (ratio != 1)
4669     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4670 
4671   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4672      instruction.  */
4673   /* Need to add up the invariant and variant parts.  */
4674   if (comp_inv && !integer_zerop (comp_inv))
4675     cost += add_cost (speed, TYPE_MODE (utype));
4676 
4677   return get_scaled_computation_cost_at (data, at, cost);
4678 }
4679 
4680 /* Determines cost of computing the use in GROUP with CAND in a generic
4681    expression.  */
4682 
4683 static bool
4684 determine_group_iv_cost_generic (struct ivopts_data *data,
4685 				 struct iv_group *group, struct iv_cand *cand)
4686 {
4687   comp_cost cost;
4688   iv_inv_expr_ent *inv_expr = NULL;
4689   bitmap inv_vars = NULL, inv_exprs = NULL;
4690   struct iv_use *use = group->vuses[0];
4691 
4692   /* The simple case first -- if we need to express value of the preserved
4693      original biv, the cost is 0.  This also prevents us from counting the
4694      cost of increment twice -- once at this use and once in the cost of
4695      the candidate.  */
4696   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4697     cost = no_cost;
4698   else
4699     cost = get_computation_cost (data, use, cand, false,
4700 				 &inv_vars, NULL, &inv_expr);
4701 
4702   if (inv_expr)
4703     {
4704       inv_exprs = BITMAP_ALLOC (NULL);
4705       bitmap_set_bit (inv_exprs, inv_expr->id);
4706     }
4707   set_group_iv_cost (data, group, cand, cost, inv_vars,
4708 		     NULL_TREE, ERROR_MARK, inv_exprs);
4709   return !cost.infinite_cost_p ();
4710 }
4711 
4712 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
4713 
4714 static bool
4715 determine_group_iv_cost_address (struct ivopts_data *data,
4716 				 struct iv_group *group, struct iv_cand *cand)
4717 {
4718   unsigned i;
4719   bitmap inv_vars = NULL, inv_exprs = NULL;
4720   bool can_autoinc;
4721   iv_inv_expr_ent *inv_expr = NULL;
4722   struct iv_use *use = group->vuses[0];
4723   comp_cost sum_cost = no_cost, cost;
4724 
4725   cost = get_computation_cost (data, use, cand, true,
4726 			       &inv_vars, &can_autoinc, &inv_expr);
4727 
4728   if (inv_expr)
4729     {
4730       inv_exprs = BITMAP_ALLOC (NULL);
4731       bitmap_set_bit (inv_exprs, inv_expr->id);
4732     }
4733   sum_cost = cost;
4734   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4735     {
4736       if (can_autoinc)
4737 	sum_cost -= cand->cost_step;
4738       /* If we generated the candidate solely for exploiting autoincrement
4739 	 opportunities, and it turns out it can't be used, set the cost to
4740 	 infinity to make sure we ignore it.  */
4741       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4742 	sum_cost = infinite_cost;
4743     }
4744 
4745   /* Uses in a group can share setup code, so only add setup cost once.  */
4746   cost -= cost.scratch;
4747   /* Compute and add costs for rest uses of this group.  */
4748   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4749     {
4750       struct iv_use *next = group->vuses[i];
4751 
4752       /* TODO: We could skip computing cost for sub iv_use when it has the
4753 	 same cost as the first iv_use, but the cost really depends on the
4754 	 offset and where the iv_use is.  */
4755 	cost = get_computation_cost (data, next, cand, true,
4756 				     NULL, &can_autoinc, &inv_expr);
4757 	if (inv_expr)
4758 	  {
4759 	    if (!inv_exprs)
4760 	      inv_exprs = BITMAP_ALLOC (NULL);
4761 
4762 	    bitmap_set_bit (inv_exprs, inv_expr->id);
4763 	  }
4764       sum_cost += cost;
4765     }
4766   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4767 		     NULL_TREE, ERROR_MARK, inv_exprs);
4768 
4769   return !sum_cost.infinite_cost_p ();
4770 }
4771 
4772 /* Computes value of candidate CAND at position AT in iteration NITER, and
4773    stores it to VAL.  */
4774 
4775 static void
4776 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4777 	       aff_tree *val)
4778 {
4779   aff_tree step, delta, nit;
4780   struct iv *iv = cand->iv;
4781   tree type = TREE_TYPE (iv->base);
4782   tree steptype;
4783   if (POINTER_TYPE_P (type))
4784     steptype = sizetype;
4785   else
4786     steptype = unsigned_type_for (type);
4787 
4788   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4789   aff_combination_convert (&step, steptype);
4790   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4791   aff_combination_convert (&nit, steptype);
4792   aff_combination_mult (&nit, &step, &delta);
4793   if (stmt_after_increment (loop, cand, at))
4794     aff_combination_add (&delta, &step);
4795 
4796   tree_to_aff_combination (iv->base, type, val);
4797   if (!POINTER_TYPE_P (type))
4798     aff_combination_convert (val, steptype);
4799   aff_combination_add (val, &delta);
4800 }
4801 
4802 /* Returns period of induction variable iv.  */
4803 
4804 static tree
4805 iv_period (struct iv *iv)
4806 {
4807   tree step = iv->step, period, type;
4808   tree pow2div;
4809 
4810   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4811 
4812   type = unsigned_type_for (TREE_TYPE (step));
4813   /* Period of the iv is lcm (step, type_range)/step -1,
4814      i.e., N*type_range/step - 1. Since type range is power
4815      of two, N == (step >> num_of_ending_zeros_binary (step),
4816      so the final result is
4817 
4818        (type_range >> num_of_ending_zeros_binary (step)) - 1
4819 
4820   */
4821   pow2div = num_ending_zeros (step);
4822 
4823   period = build_low_bits_mask (type,
4824 				(TYPE_PRECISION (type)
4825 				 - tree_to_uhwi (pow2div)));
4826 
4827   return period;
4828 }
4829 
4830 /* Returns the comparison operator used when eliminating the iv USE.  */
4831 
4832 static enum tree_code
4833 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4834 {
4835   struct loop *loop = data->current_loop;
4836   basic_block ex_bb;
4837   edge exit;
4838 
4839   ex_bb = gimple_bb (use->stmt);
4840   exit = EDGE_SUCC (ex_bb, 0);
4841   if (flow_bb_inside_loop_p (loop, exit->dest))
4842     exit = EDGE_SUCC (ex_bb, 1);
4843 
4844   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4845 }
4846 
4847 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4848    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4849    calculation is performed in non-wrapping type.
4850 
4851    TODO: More generally, we could test for the situation that
4852 	 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4853 	 This would require knowing the sign of OFFSET.  */
4854 
4855 static bool
4856 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4857 {
4858   enum tree_code code;
4859   tree e1, e2;
4860   aff_tree aff_e1, aff_e2, aff_offset;
4861 
4862   if (!nowrap_type_p (TREE_TYPE (base)))
4863     return false;
4864 
4865   base = expand_simple_operations (base);
4866 
4867   if (TREE_CODE (base) == SSA_NAME)
4868     {
4869       gimple *stmt = SSA_NAME_DEF_STMT (base);
4870 
4871       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4872 	return false;
4873 
4874       code = gimple_assign_rhs_code (stmt);
4875       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4876 	return false;
4877 
4878       e1 = gimple_assign_rhs1 (stmt);
4879       e2 = gimple_assign_rhs2 (stmt);
4880     }
4881   else
4882     {
4883       code = TREE_CODE (base);
4884       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4885 	return false;
4886       e1 = TREE_OPERAND (base, 0);
4887       e2 = TREE_OPERAND (base, 1);
4888     }
4889 
4890   /* Use affine expansion as deeper inspection to prove the equality.  */
4891   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4892 				  &aff_e2, &data->name_expansion_cache);
4893   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4894 				  &aff_offset, &data->name_expansion_cache);
4895   aff_combination_scale (&aff_offset, -1);
4896   switch (code)
4897     {
4898     case PLUS_EXPR:
4899       aff_combination_add (&aff_e2, &aff_offset);
4900       if (aff_combination_zero_p (&aff_e2))
4901 	return true;
4902 
4903       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4904 				      &aff_e1, &data->name_expansion_cache);
4905       aff_combination_add (&aff_e1, &aff_offset);
4906       return aff_combination_zero_p (&aff_e1);
4907 
4908     case POINTER_PLUS_EXPR:
4909       aff_combination_add (&aff_e2, &aff_offset);
4910       return aff_combination_zero_p (&aff_e2);
4911 
4912     default:
4913       return false;
4914     }
4915 }
4916 
4917 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4918    comparison with CAND.  NITER describes the number of iterations of
4919    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4920 
4921    We aim to handle the following situation:
4922 
4923    sometype *base, *p;
4924    int a, b, i;
4925 
4926    i = a;
4927    p = p_0 = base + a;
4928 
4929    do
4930      {
4931        bla (*p);
4932        p++;
4933        i++;
4934      }
4935    while (i < b);
4936 
4937    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4938    We aim to optimize this to
4939 
4940    p = p_0 = base + a;
4941    do
4942      {
4943        bla (*p);
4944        p++;
4945      }
4946    while (p < p_0 - a + b);
4947 
4948    This preserves the correctness, since the pointer arithmetics does not
4949    overflow.  More precisely:
4950 
4951    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4952       overflow in computing it or the values of p.
4953    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4954       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4955 
4956 static bool
4957 iv_elimination_compare_lt (struct ivopts_data *data,
4958 			   struct iv_cand *cand, enum tree_code *comp_p,
4959 			   struct tree_niter_desc *niter)
4960 {
4961   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4962   struct aff_tree nit, tmpa, tmpb;
4963   enum tree_code comp;
4964   HOST_WIDE_INT step;
4965 
4966   /* We need to know that the candidate induction variable does not overflow.
4967      While more complex analysis may be used to prove this, for now just
4968      check that the variable appears in the original program and that it
4969      is computed in a type that guarantees no overflows.  */
4970   cand_type = TREE_TYPE (cand->iv->base);
4971   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4972     return false;
4973 
4974   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4975      the calculation of the BOUND could overflow, making the comparison
4976      invalid.  */
4977   if (!data->loop_single_exit_p)
4978     return false;
4979 
4980   /* We need to be able to decide whether candidate is increasing or decreasing
4981      in order to choose the right comparison operator.  */
4982   if (!cst_and_fits_in_hwi (cand->iv->step))
4983     return false;
4984   step = int_cst_value (cand->iv->step);
4985 
4986   /* Check that the number of iterations matches the expected pattern:
4987      a + 1 > b ? 0 : b - a - 1.  */
4988   mbz = niter->may_be_zero;
4989   if (TREE_CODE (mbz) == GT_EXPR)
4990     {
4991       /* Handle a + 1 > b.  */
4992       tree op0 = TREE_OPERAND (mbz, 0);
4993       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4994 	{
4995 	  a = TREE_OPERAND (op0, 0);
4996 	  b = TREE_OPERAND (mbz, 1);
4997 	}
4998       else
4999 	return false;
5000     }
5001   else if (TREE_CODE (mbz) == LT_EXPR)
5002     {
5003       tree op1 = TREE_OPERAND (mbz, 1);
5004 
5005       /* Handle b < a + 1.  */
5006       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5007 	{
5008 	  a = TREE_OPERAND (op1, 0);
5009 	  b = TREE_OPERAND (mbz, 0);
5010 	}
5011       else
5012 	return false;
5013     }
5014   else
5015     return false;
5016 
5017   /* Expected number of iterations is B - A - 1.  Check that it matches
5018      the actual number, i.e., that B - A - NITER = 1.  */
5019   tree_to_aff_combination (niter->niter, nit_type, &nit);
5020   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5021   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5022   aff_combination_scale (&nit, -1);
5023   aff_combination_scale (&tmpa, -1);
5024   aff_combination_add (&tmpb, &tmpa);
5025   aff_combination_add (&tmpb, &nit);
5026   if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5027     return false;
5028 
5029   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5030      overflow.  */
5031   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5032 			cand->iv->step,
5033 			fold_convert (TREE_TYPE (cand->iv->step), a));
5034   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5035     return false;
5036 
5037   /* Determine the new comparison operator.  */
5038   comp = step < 0 ? GT_EXPR : LT_EXPR;
5039   if (*comp_p == NE_EXPR)
5040     *comp_p = comp;
5041   else if (*comp_p == EQ_EXPR)
5042     *comp_p = invert_tree_comparison (comp, false);
5043   else
5044     gcc_unreachable ();
5045 
5046   return true;
5047 }
5048 
5049 /* Check whether it is possible to express the condition in USE by comparison
5050    of candidate CAND.  If so, store the value compared with to BOUND, and the
5051    comparison operator to COMP.  */
5052 
5053 static bool
5054 may_eliminate_iv (struct ivopts_data *data,
5055 		  struct iv_use *use, struct iv_cand *cand, tree *bound,
5056 		  enum tree_code *comp)
5057 {
5058   basic_block ex_bb;
5059   edge exit;
5060   tree period;
5061   struct loop *loop = data->current_loop;
5062   aff_tree bnd;
5063   struct tree_niter_desc *desc = NULL;
5064 
5065   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5066     return false;
5067 
5068   /* For now works only for exits that dominate the loop latch.
5069      TODO: extend to other conditions inside loop body.  */
5070   ex_bb = gimple_bb (use->stmt);
5071   if (use->stmt != last_stmt (ex_bb)
5072       || gimple_code (use->stmt) != GIMPLE_COND
5073       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5074     return false;
5075 
5076   exit = EDGE_SUCC (ex_bb, 0);
5077   if (flow_bb_inside_loop_p (loop, exit->dest))
5078     exit = EDGE_SUCC (ex_bb, 1);
5079   if (flow_bb_inside_loop_p (loop, exit->dest))
5080     return false;
5081 
5082   desc = niter_for_exit (data, exit);
5083   if (!desc)
5084     return false;
5085 
5086   /* Determine whether we can use the variable to test the exit condition.
5087      This is the case iff the period of the induction variable is greater
5088      than the number of iterations for which the exit condition is true.  */
5089   period = iv_period (cand->iv);
5090 
5091   /* If the number of iterations is constant, compare against it directly.  */
5092   if (TREE_CODE (desc->niter) == INTEGER_CST)
5093     {
5094       /* See cand_value_at.  */
5095       if (stmt_after_increment (loop, cand, use->stmt))
5096 	{
5097 	  if (!tree_int_cst_lt (desc->niter, period))
5098 	    return false;
5099 	}
5100       else
5101 	{
5102 	  if (tree_int_cst_lt (period, desc->niter))
5103 	    return false;
5104 	}
5105     }
5106 
5107   /* If not, and if this is the only possible exit of the loop, see whether
5108      we can get a conservative estimate on the number of iterations of the
5109      entire loop and compare against that instead.  */
5110   else
5111     {
5112       widest_int period_value, max_niter;
5113 
5114       max_niter = desc->max;
5115       if (stmt_after_increment (loop, cand, use->stmt))
5116 	max_niter += 1;
5117       period_value = wi::to_widest (period);
5118       if (wi::gtu_p (max_niter, period_value))
5119 	{
5120 	  /* See if we can take advantage of inferred loop bound
5121 	     information.  */
5122 	  if (data->loop_single_exit_p)
5123 	    {
5124 	      if (!max_loop_iterations (loop, &max_niter))
5125 		return false;
5126 	      /* The loop bound is already adjusted by adding 1.  */
5127 	      if (wi::gtu_p (max_niter, period_value))
5128 		return false;
5129 	    }
5130 	  else
5131 	    return false;
5132 	}
5133     }
5134 
5135   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5136 
5137   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5138 			 aff_combination_to_tree (&bnd));
5139   *comp = iv_elimination_compare (data, use);
5140 
5141   /* It is unlikely that computing the number of iterations using division
5142      would be more profitable than keeping the original induction variable.  */
5143   if (expression_expensive_p (*bound))
5144     return false;
5145 
5146   /* Sometimes, it is possible to handle the situation that the number of
5147      iterations may be zero unless additional assumptions by using <
5148      instead of != in the exit condition.
5149 
5150      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5151 	   base the exit condition on it.  However, that is often too
5152 	   expensive.  */
5153   if (!integer_zerop (desc->may_be_zero))
5154     return iv_elimination_compare_lt (data, cand, comp, desc);
5155 
5156   return true;
5157 }
5158 
5159  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5160     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5161 
5162 static int
5163 parm_decl_cost (struct ivopts_data *data, tree bound)
5164 {
5165   tree sbound = bound;
5166   STRIP_NOPS (sbound);
5167 
5168   if (TREE_CODE (sbound) == SSA_NAME
5169       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5170       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5171       && data->body_includes_call)
5172     return COSTS_N_INSNS (1);
5173 
5174   return 0;
5175 }
5176 
5177 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5178 
5179 static bool
5180 determine_group_iv_cost_cond (struct ivopts_data *data,
5181 			      struct iv_group *group, struct iv_cand *cand)
5182 {
5183   tree bound = NULL_TREE;
5184   struct iv *cmp_iv;
5185   bitmap inv_exprs = NULL;
5186   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5187   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5188   enum comp_iv_rewrite rewrite_type;
5189   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5190   tree *control_var, *bound_cst;
5191   enum tree_code comp = ERROR_MARK;
5192   struct iv_use *use = group->vuses[0];
5193 
5194   /* Extract condition operands.  */
5195   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5196 					&bound_cst, NULL, &cmp_iv);
5197   gcc_assert (rewrite_type != COMP_IV_NA);
5198 
5199   /* Try iv elimination.  */
5200   if (rewrite_type == COMP_IV_ELIM
5201       && may_eliminate_iv (data, use, cand, &bound, &comp))
5202     {
5203       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5204       if (elim_cost.cost == 0)
5205 	elim_cost.cost = parm_decl_cost (data, bound);
5206       else if (TREE_CODE (bound) == INTEGER_CST)
5207 	elim_cost.cost = 0;
5208       /* If we replace a loop condition 'i < n' with 'p < base + n',
5209 	 inv_vars_elim will have 'base' and 'n' set, which implies that both
5210 	 'base' and 'n' will be live during the loop.	 More likely,
5211 	 'base + n' will be loop invariant, resulting in only one live value
5212 	 during the loop.  So in that case we clear inv_vars_elim and set
5213 	 inv_expr_elim instead.  */
5214       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5215 	{
5216 	  inv_expr_elim = get_loop_invariant_expr (data, bound);
5217 	  bitmap_clear (inv_vars_elim);
5218 	}
5219       /* The bound is a loop invariant, so it will be only computed
5220 	 once.  */
5221       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5222     }
5223 
5224   /* When the condition is a comparison of the candidate IV against
5225      zero, prefer this IV.
5226 
5227      TODO: The constant that we're subtracting from the cost should
5228      be target-dependent.  This information should be added to the
5229      target costs for each backend.  */
5230   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5231       && integer_zerop (*bound_cst)
5232       && (operand_equal_p (*control_var, cand->var_after, 0)
5233 	  || operand_equal_p (*control_var, cand->var_before, 0)))
5234     elim_cost -= 1;
5235 
5236   express_cost = get_computation_cost (data, use, cand, false,
5237 				       &inv_vars_express, NULL,
5238 				       &inv_expr_express);
5239   if (cmp_iv != NULL)
5240     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5241 
5242   /* Count the cost of the original bound as well.  */
5243   bound_cost = force_var_cost (data, *bound_cst, NULL);
5244   if (bound_cost.cost == 0)
5245     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5246   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5247     bound_cost.cost = 0;
5248   express_cost += bound_cost;
5249 
5250   /* Choose the better approach, preferring the eliminated IV. */
5251   if (elim_cost <= express_cost)
5252     {
5253       cost = elim_cost;
5254       inv_vars = inv_vars_elim;
5255       inv_vars_elim = NULL;
5256       inv_expr = inv_expr_elim;
5257     }
5258   else
5259     {
5260       cost = express_cost;
5261       inv_vars = inv_vars_express;
5262       inv_vars_express = NULL;
5263       bound = NULL_TREE;
5264       comp = ERROR_MARK;
5265       inv_expr = inv_expr_express;
5266     }
5267 
5268   if (inv_expr)
5269     {
5270       inv_exprs = BITMAP_ALLOC (NULL);
5271       bitmap_set_bit (inv_exprs, inv_expr->id);
5272     }
5273   set_group_iv_cost (data, group, cand, cost,
5274 		     inv_vars, bound, comp, inv_exprs);
5275 
5276   if (inv_vars_elim)
5277     BITMAP_FREE (inv_vars_elim);
5278   if (inv_vars_express)
5279     BITMAP_FREE (inv_vars_express);
5280 
5281   return !cost.infinite_cost_p ();
5282 }
5283 
5284 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5285    if USE cannot be represented with CAND.  */
5286 
5287 static bool
5288 determine_group_iv_cost (struct ivopts_data *data,
5289 			 struct iv_group *group, struct iv_cand *cand)
5290 {
5291   switch (group->type)
5292     {
5293     case USE_NONLINEAR_EXPR:
5294       return determine_group_iv_cost_generic (data, group, cand);
5295 
5296     case USE_REF_ADDRESS:
5297     case USE_PTR_ADDRESS:
5298       return determine_group_iv_cost_address (data, group, cand);
5299 
5300     case USE_COMPARE:
5301       return determine_group_iv_cost_cond (data, group, cand);
5302 
5303     default:
5304       gcc_unreachable ();
5305     }
5306 }
5307 
5308 /* Return true if get_computation_cost indicates that autoincrement is
5309    a possibility for the pair of USE and CAND, false otherwise.  */
5310 
5311 static bool
5312 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5313 			   struct iv_cand *cand)
5314 {
5315   if (!address_p (use->type))
5316     return false;
5317 
5318   bool can_autoinc = false;
5319   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5320   return can_autoinc;
5321 }
5322 
5323 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5324    use that allows autoincrement, and set their AINC_USE if possible.  */
5325 
5326 static void
5327 set_autoinc_for_original_candidates (struct ivopts_data *data)
5328 {
5329   unsigned i, j;
5330 
5331   for (i = 0; i < data->vcands.length (); i++)
5332     {
5333       struct iv_cand *cand = data->vcands[i];
5334       struct iv_use *closest_before = NULL;
5335       struct iv_use *closest_after = NULL;
5336       if (cand->pos != IP_ORIGINAL)
5337 	continue;
5338 
5339       for (j = 0; j < data->vgroups.length (); j++)
5340 	{
5341 	  struct iv_group *group = data->vgroups[j];
5342 	  struct iv_use *use = group->vuses[0];
5343 	  unsigned uid = gimple_uid (use->stmt);
5344 
5345 	  if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5346 	    continue;
5347 
5348 	  if (uid < gimple_uid (cand->incremented_at)
5349 	      && (closest_before == NULL
5350 		  || uid > gimple_uid (closest_before->stmt)))
5351 	    closest_before = use;
5352 
5353 	  if (uid > gimple_uid (cand->incremented_at)
5354 	      && (closest_after == NULL
5355 		  || uid < gimple_uid (closest_after->stmt)))
5356 	    closest_after = use;
5357 	}
5358 
5359       if (closest_before != NULL
5360 	  && autoinc_possible_for_pair (data, closest_before, cand))
5361 	cand->ainc_use = closest_before;
5362       else if (closest_after != NULL
5363 	       && autoinc_possible_for_pair (data, closest_after, cand))
5364 	cand->ainc_use = closest_after;
5365     }
5366 }
5367 
5368 /* Relate compare use with all candidates.  */
5369 
5370 static void
5371 relate_compare_use_with_all_cands (struct ivopts_data *data)
5372 {
5373   unsigned i, count = data->vcands.length ();
5374   for (i = 0; i < data->vgroups.length (); i++)
5375     {
5376       struct iv_group *group = data->vgroups[i];
5377 
5378       if (group->type == USE_COMPARE)
5379 	bitmap_set_range (group->related_cands, 0, count);
5380     }
5381 }
5382 
5383 /* Finds the candidates for the induction variables.  */
5384 
5385 static void
5386 find_iv_candidates (struct ivopts_data *data)
5387 {
5388   /* Add commonly used ivs.  */
5389   add_standard_iv_candidates (data);
5390 
5391   /* Add old induction variables.  */
5392   add_iv_candidate_for_bivs (data);
5393 
5394   /* Add induction variables derived from uses.  */
5395   add_iv_candidate_for_groups (data);
5396 
5397   set_autoinc_for_original_candidates (data);
5398 
5399   /* Record the important candidates.  */
5400   record_important_candidates (data);
5401 
5402   /* Relate compare iv_use with all candidates.  */
5403   if (!data->consider_all_candidates)
5404     relate_compare_use_with_all_cands (data);
5405 
5406   if (dump_file && (dump_flags & TDF_DETAILS))
5407     {
5408       unsigned i;
5409 
5410       fprintf (dump_file, "\n<Important Candidates>:\t");
5411       for (i = 0; i < data->vcands.length (); i++)
5412 	if (data->vcands[i]->important)
5413 	  fprintf (dump_file, " %d,", data->vcands[i]->id);
5414       fprintf (dump_file, "\n");
5415 
5416       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5417       for (i = 0; i < data->vgroups.length (); i++)
5418 	{
5419 	  struct iv_group *group = data->vgroups[i];
5420 
5421 	  if (group->related_cands)
5422 	    {
5423 	      fprintf (dump_file, "  Group %d:\t", group->id);
5424 	      dump_bitmap (dump_file, group->related_cands);
5425 	    }
5426 	}
5427       fprintf (dump_file, "\n");
5428     }
5429 }
5430 
5431 /* Determines costs of computing use of iv with an iv candidate.  */
5432 
5433 static void
5434 determine_group_iv_costs (struct ivopts_data *data)
5435 {
5436   unsigned i, j;
5437   struct iv_cand *cand;
5438   struct iv_group *group;
5439   bitmap to_clear = BITMAP_ALLOC (NULL);
5440 
5441   alloc_use_cost_map (data);
5442 
5443   for (i = 0; i < data->vgroups.length (); i++)
5444     {
5445       group = data->vgroups[i];
5446 
5447       if (data->consider_all_candidates)
5448 	{
5449 	  for (j = 0; j < data->vcands.length (); j++)
5450 	    {
5451 	      cand = data->vcands[j];
5452 	      determine_group_iv_cost (data, group, cand);
5453 	    }
5454 	}
5455       else
5456 	{
5457 	  bitmap_iterator bi;
5458 
5459 	  EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5460 	    {
5461 	      cand = data->vcands[j];
5462 	      if (!determine_group_iv_cost (data, group, cand))
5463 		bitmap_set_bit (to_clear, j);
5464 	    }
5465 
5466 	  /* Remove the candidates for that the cost is infinite from
5467 	     the list of related candidates.  */
5468 	  bitmap_and_compl_into (group->related_cands, to_clear);
5469 	  bitmap_clear (to_clear);
5470 	}
5471     }
5472 
5473   BITMAP_FREE (to_clear);
5474 
5475   if (dump_file && (dump_flags & TDF_DETAILS))
5476     {
5477       bitmap_iterator bi;
5478 
5479       /* Dump invariant variables.  */
5480       fprintf (dump_file, "\n<Invariant Vars>:\n");
5481       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5482 	{
5483 	  struct version_info *info = ver_info (data, i);
5484 	  if (info->inv_id)
5485 	    {
5486 	      fprintf (dump_file, "Inv %d:\t", info->inv_id);
5487 	      print_generic_expr (dump_file, info->name, TDF_SLIM);
5488 	      fprintf (dump_file, "%s\n",
5489 		       info->has_nonlin_use ? "" : "\t(eliminable)");
5490 	    }
5491 	}
5492 
5493       /* Dump invariant expressions.  */
5494       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5495       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5496 
5497       for (hash_table<iv_inv_expr_hasher>::iterator it
5498 	   = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5499 	   ++it)
5500 	list.safe_push (*it);
5501 
5502       list.qsort (sort_iv_inv_expr_ent);
5503 
5504       for (i = 0; i < list.length (); ++i)
5505 	{
5506 	  fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5507 	  print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5508 	  fprintf (dump_file, "\n");
5509 	}
5510 
5511       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5512 
5513       for (i = 0; i < data->vgroups.length (); i++)
5514 	{
5515 	  group = data->vgroups[i];
5516 
5517 	  fprintf (dump_file, "Group %d:\n", i);
5518 	  fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5519 	  for (j = 0; j < group->n_map_members; j++)
5520 	    {
5521 	      if (!group->cost_map[j].cand
5522 		  || group->cost_map[j].cost.infinite_cost_p ())
5523 		continue;
5524 
5525 	      fprintf (dump_file, "  %d\t%d\t%d\t",
5526 		       group->cost_map[j].cand->id,
5527 		       group->cost_map[j].cost.cost,
5528 		       group->cost_map[j].cost.complexity);
5529 	      if (!group->cost_map[j].inv_exprs
5530 		  || bitmap_empty_p (group->cost_map[j].inv_exprs))
5531 		fprintf (dump_file, "NIL;\t");
5532 	      else
5533 		bitmap_print (dump_file,
5534 			      group->cost_map[j].inv_exprs, "", ";\t");
5535 	      if (!group->cost_map[j].inv_vars
5536 		  || bitmap_empty_p (group->cost_map[j].inv_vars))
5537 		fprintf (dump_file, "NIL;\n");
5538 	      else
5539 		bitmap_print (dump_file,
5540 			      group->cost_map[j].inv_vars, "", "\n");
5541 	    }
5542 
5543 	  fprintf (dump_file, "\n");
5544 	}
5545       fprintf (dump_file, "\n");
5546     }
5547 }
5548 
5549 /* Determines cost of the candidate CAND.  */
5550 
5551 static void
5552 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5553 {
5554   comp_cost cost_base;
5555   unsigned cost, cost_step;
5556   tree base;
5557 
5558   gcc_assert (cand->iv != NULL);
5559 
5560   /* There are two costs associated with the candidate -- its increment
5561      and its initialization.  The second is almost negligible for any loop
5562      that rolls enough, so we take it just very little into account.  */
5563 
5564   base = cand->iv->base;
5565   cost_base = force_var_cost (data, base, NULL);
5566   /* It will be exceptional that the iv register happens to be initialized with
5567      the proper value at no cost.  In general, there will at least be a regcopy
5568      or a const set.  */
5569   if (cost_base.cost == 0)
5570     cost_base.cost = COSTS_N_INSNS (1);
5571   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5572 
5573   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5574 
5575   /* Prefer the original ivs unless we may gain something by replacing it.
5576      The reason is to make debugging simpler; so this is not relevant for
5577      artificial ivs created by other optimization passes.  */
5578   if (cand->pos != IP_ORIGINAL
5579       || !SSA_NAME_VAR (cand->var_before)
5580       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5581     cost++;
5582 
5583   /* Prefer not to insert statements into latch unless there are some
5584      already (so that we do not create unnecessary jumps).  */
5585   if (cand->pos == IP_END
5586       && empty_block_p (ip_end_pos (data->current_loop)))
5587     cost++;
5588 
5589   cand->cost = cost;
5590   cand->cost_step = cost_step;
5591 }
5592 
5593 /* Determines costs of computation of the candidates.  */
5594 
5595 static void
5596 determine_iv_costs (struct ivopts_data *data)
5597 {
5598   unsigned i;
5599 
5600   if (dump_file && (dump_flags & TDF_DETAILS))
5601     {
5602       fprintf (dump_file, "<Candidate Costs>:\n");
5603       fprintf (dump_file, "  cand\tcost\n");
5604     }
5605 
5606   for (i = 0; i < data->vcands.length (); i++)
5607     {
5608       struct iv_cand *cand = data->vcands[i];
5609 
5610       determine_iv_cost (data, cand);
5611 
5612       if (dump_file && (dump_flags & TDF_DETAILS))
5613 	fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5614     }
5615 
5616   if (dump_file && (dump_flags & TDF_DETAILS))
5617     fprintf (dump_file, "\n");
5618 }
5619 
5620 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5621    induction variables.  Note N_INVS includes both invariant variables and
5622    invariant expressions.  */
5623 
5624 static unsigned
5625 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5626 			      unsigned n_cands)
5627 {
5628   unsigned cost;
5629   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5630   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5631   bool speed = data->speed;
5632 
5633   /* If there is a call in the loop body, the call-clobbered registers
5634      are not available for loop invariants.  */
5635   if (data->body_includes_call)
5636     available_regs = available_regs - target_clobbered_regs;
5637 
5638   /* If we have enough registers.  */
5639   if (regs_needed + target_res_regs < available_regs)
5640     cost = n_new;
5641   /* If close to running out of registers, try to preserve them.  */
5642   else if (regs_needed <= available_regs)
5643     cost = target_reg_cost [speed] * regs_needed;
5644   /* If we run out of available registers but the number of candidates
5645      does not, we penalize extra registers using target_spill_cost.  */
5646   else if (n_cands <= available_regs)
5647     cost = target_reg_cost [speed] * available_regs
5648 	   + target_spill_cost [speed] * (regs_needed - available_regs);
5649   /* If the number of candidates runs out available registers, we penalize
5650      extra candidate registers using target_spill_cost * 2.  Because it is
5651      more expensive to spill induction variable than invariant.  */
5652   else
5653     cost = target_reg_cost [speed] * available_regs
5654 	   + target_spill_cost [speed] * (n_cands - available_regs) * 2
5655 	   + target_spill_cost [speed] * (regs_needed - n_cands);
5656 
5657   /* Finally, add the number of candidates, so that we prefer eliminating
5658      induction variables if possible.  */
5659   return cost + n_cands;
5660 }
5661 
5662 /* For each size of the induction variable set determine the penalty.  */
5663 
5664 static void
5665 determine_set_costs (struct ivopts_data *data)
5666 {
5667   unsigned j, n;
5668   gphi *phi;
5669   gphi_iterator psi;
5670   tree op;
5671   struct loop *loop = data->current_loop;
5672   bitmap_iterator bi;
5673 
5674   if (dump_file && (dump_flags & TDF_DETAILS))
5675     {
5676       fprintf (dump_file, "<Global Costs>:\n");
5677       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5678       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5679       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5680       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5681     }
5682 
5683   n = 0;
5684   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5685     {
5686       phi = psi.phi ();
5687       op = PHI_RESULT (phi);
5688 
5689       if (virtual_operand_p (op))
5690 	continue;
5691 
5692       if (get_iv (data, op))
5693 	continue;
5694 
5695       if (!POINTER_TYPE_P (TREE_TYPE (op))
5696 	  && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5697 	continue;
5698 
5699       n++;
5700     }
5701 
5702   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5703     {
5704       struct version_info *info = ver_info (data, j);
5705 
5706       if (info->inv_id && info->has_nonlin_use)
5707 	n++;
5708     }
5709 
5710   data->regs_used = n;
5711   if (dump_file && (dump_flags & TDF_DETAILS))
5712     fprintf (dump_file, "  regs_used %d\n", n);
5713 
5714   if (dump_file && (dump_flags & TDF_DETAILS))
5715     {
5716       fprintf (dump_file, "  cost for size:\n");
5717       fprintf (dump_file, "  ivs\tcost\n");
5718       for (j = 0; j <= 2 * target_avail_regs; j++)
5719 	fprintf (dump_file, "  %d\t%d\n", j,
5720 		 ivopts_estimate_reg_pressure (data, 0, j));
5721       fprintf (dump_file, "\n");
5722     }
5723 }
5724 
5725 /* Returns true if A is a cheaper cost pair than B.  */
5726 
5727 static bool
5728 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5729 {
5730   if (!a)
5731     return false;
5732 
5733   if (!b)
5734     return true;
5735 
5736   if (a->cost < b->cost)
5737     return true;
5738 
5739   if (b->cost < a->cost)
5740     return false;
5741 
5742   /* In case the costs are the same, prefer the cheaper candidate.  */
5743   if (a->cand->cost < b->cand->cost)
5744     return true;
5745 
5746   return false;
5747 }
5748 
5749 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
5750    for more expensive, equal and cheaper respectively.  */
5751 
5752 static int
5753 compare_cost_pair (struct cost_pair *a, struct cost_pair *b)
5754 {
5755   if (cheaper_cost_pair (a, b))
5756     return -1;
5757   if (cheaper_cost_pair (b, a))
5758     return 1;
5759 
5760   return 0;
5761 }
5762 
5763 /* Returns candidate by that USE is expressed in IVS.  */
5764 
5765 static struct cost_pair *
5766 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5767 {
5768   return ivs->cand_for_group[group->id];
5769 }
5770 
5771 /* Computes the cost field of IVS structure.  */
5772 
5773 static void
5774 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5775 {
5776   comp_cost cost = ivs->cand_use_cost;
5777 
5778   cost += ivs->cand_cost;
5779   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
5780   ivs->cost = cost;
5781 }
5782 
5783 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5784    and IVS.  */
5785 
5786 static void
5787 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5788 {
5789   bitmap_iterator bi;
5790   unsigned iid;
5791 
5792   if (!invs)
5793     return;
5794 
5795   gcc_assert (n_inv_uses != NULL);
5796   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5797     {
5798       n_inv_uses[iid]--;
5799       if (n_inv_uses[iid] == 0)
5800 	ivs->n_invs--;
5801     }
5802 }
5803 
5804 /* Set USE not to be expressed by any candidate in IVS.  */
5805 
5806 static void
5807 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5808 		 struct iv_group *group)
5809 {
5810   unsigned gid = group->id, cid;
5811   struct cost_pair *cp;
5812 
5813   cp = ivs->cand_for_group[gid];
5814   if (!cp)
5815     return;
5816   cid = cp->cand->id;
5817 
5818   ivs->bad_groups++;
5819   ivs->cand_for_group[gid] = NULL;
5820   ivs->n_cand_uses[cid]--;
5821 
5822   if (ivs->n_cand_uses[cid] == 0)
5823     {
5824       bitmap_clear_bit (ivs->cands, cid);
5825       ivs->n_cands--;
5826       ivs->cand_cost -= cp->cand->cost;
5827       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5828       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5829     }
5830 
5831   ivs->cand_use_cost -= cp->cost;
5832   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5833   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5834   iv_ca_recount_cost (data, ivs);
5835 }
5836 
5837 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5838    IVS.  */
5839 
5840 static void
5841 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5842 {
5843   bitmap_iterator bi;
5844   unsigned iid;
5845 
5846   if (!invs)
5847     return;
5848 
5849   gcc_assert (n_inv_uses != NULL);
5850   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5851     {
5852       n_inv_uses[iid]++;
5853       if (n_inv_uses[iid] == 1)
5854 	ivs->n_invs++;
5855     }
5856 }
5857 
5858 /* Set cost pair for GROUP in set IVS to CP.  */
5859 
5860 static void
5861 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5862 	      struct iv_group *group, struct cost_pair *cp)
5863 {
5864   unsigned gid = group->id, cid;
5865 
5866   if (ivs->cand_for_group[gid] == cp)
5867     return;
5868 
5869   if (ivs->cand_for_group[gid])
5870     iv_ca_set_no_cp (data, ivs, group);
5871 
5872   if (cp)
5873     {
5874       cid = cp->cand->id;
5875 
5876       ivs->bad_groups--;
5877       ivs->cand_for_group[gid] = cp;
5878       ivs->n_cand_uses[cid]++;
5879       if (ivs->n_cand_uses[cid] == 1)
5880 	{
5881 	  bitmap_set_bit (ivs->cands, cid);
5882 	  ivs->n_cands++;
5883 	  ivs->cand_cost += cp->cand->cost;
5884 	  iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5885 	  iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5886 	}
5887 
5888       ivs->cand_use_cost += cp->cost;
5889       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5890       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5891       iv_ca_recount_cost (data, ivs);
5892     }
5893 }
5894 
5895 /* Extend set IVS by expressing USE by some of the candidates in it
5896    if possible.  Consider all important candidates if candidates in
5897    set IVS don't give any result.  */
5898 
5899 static void
5900 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5901 	       struct iv_group *group)
5902 {
5903   struct cost_pair *best_cp = NULL, *cp;
5904   bitmap_iterator bi;
5905   unsigned i;
5906   struct iv_cand *cand;
5907 
5908   gcc_assert (ivs->upto >= group->id);
5909   ivs->upto++;
5910   ivs->bad_groups++;
5911 
5912   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5913     {
5914       cand = data->vcands[i];
5915       cp = get_group_iv_cost (data, group, cand);
5916       if (cheaper_cost_pair (cp, best_cp))
5917 	best_cp = cp;
5918     }
5919 
5920   if (best_cp == NULL)
5921     {
5922       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5923 	{
5924 	  cand = data->vcands[i];
5925 	  cp = get_group_iv_cost (data, group, cand);
5926 	  if (cheaper_cost_pair (cp, best_cp))
5927 	    best_cp = cp;
5928 	}
5929     }
5930 
5931   iv_ca_set_cp (data, ivs, group, best_cp);
5932 }
5933 
5934 /* Get cost for assignment IVS.  */
5935 
5936 static comp_cost
5937 iv_ca_cost (struct iv_ca *ivs)
5938 {
5939   /* This was a conditional expression but it triggered a bug in
5940      Sun C 5.5.  */
5941   if (ivs->bad_groups)
5942     return infinite_cost;
5943   else
5944     return ivs->cost;
5945 }
5946 
5947 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5948    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
5949    respectively.  */
5950 
5951 static int
5952 iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs,
5953 		    struct iv_group *group, struct cost_pair *old_cp,
5954 		    struct cost_pair *new_cp)
5955 {
5956   gcc_assert (old_cp && new_cp && old_cp != new_cp);
5957   unsigned old_n_invs = ivs->n_invs;
5958   iv_ca_set_cp (data, ivs, group, new_cp);
5959   unsigned new_n_invs = ivs->n_invs;
5960   iv_ca_set_cp (data, ivs, group, old_cp);
5961 
5962   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
5963 }
5964 
5965 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5966    it before NEXT.  */
5967 
5968 static struct iv_ca_delta *
5969 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
5970 		 struct cost_pair *new_cp, struct iv_ca_delta *next)
5971 {
5972   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5973 
5974   change->group = group;
5975   change->old_cp = old_cp;
5976   change->new_cp = new_cp;
5977   change->next = next;
5978 
5979   return change;
5980 }
5981 
5982 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5983    are rewritten.  */
5984 
5985 static struct iv_ca_delta *
5986 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5987 {
5988   struct iv_ca_delta *last;
5989 
5990   if (!l2)
5991     return l1;
5992 
5993   if (!l1)
5994     return l2;
5995 
5996   for (last = l1; last->next; last = last->next)
5997     continue;
5998   last->next = l2;
5999 
6000   return l1;
6001 }
6002 
6003 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6004 
6005 static struct iv_ca_delta *
6006 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6007 {
6008   struct iv_ca_delta *act, *next, *prev = NULL;
6009 
6010   for (act = delta; act; act = next)
6011     {
6012       next = act->next;
6013       act->next = prev;
6014       prev = act;
6015 
6016       std::swap (act->old_cp, act->new_cp);
6017     }
6018 
6019   return prev;
6020 }
6021 
6022 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6023    reverted instead.  */
6024 
6025 static void
6026 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6027 		    struct iv_ca_delta *delta, bool forward)
6028 {
6029   struct cost_pair *from, *to;
6030   struct iv_ca_delta *act;
6031 
6032   if (!forward)
6033     delta = iv_ca_delta_reverse (delta);
6034 
6035   for (act = delta; act; act = act->next)
6036     {
6037       from = act->old_cp;
6038       to = act->new_cp;
6039       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6040       iv_ca_set_cp (data, ivs, act->group, to);
6041     }
6042 
6043   if (!forward)
6044     iv_ca_delta_reverse (delta);
6045 }
6046 
6047 /* Returns true if CAND is used in IVS.  */
6048 
6049 static bool
6050 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6051 {
6052   return ivs->n_cand_uses[cand->id] > 0;
6053 }
6054 
6055 /* Returns number of induction variable candidates in the set IVS.  */
6056 
6057 static unsigned
6058 iv_ca_n_cands (struct iv_ca *ivs)
6059 {
6060   return ivs->n_cands;
6061 }
6062 
6063 /* Free the list of changes DELTA.  */
6064 
6065 static void
6066 iv_ca_delta_free (struct iv_ca_delta **delta)
6067 {
6068   struct iv_ca_delta *act, *next;
6069 
6070   for (act = *delta; act; act = next)
6071     {
6072       next = act->next;
6073       free (act);
6074     }
6075 
6076   *delta = NULL;
6077 }
6078 
6079 /* Allocates new iv candidates assignment.  */
6080 
6081 static struct iv_ca *
6082 iv_ca_new (struct ivopts_data *data)
6083 {
6084   struct iv_ca *nw = XNEW (struct iv_ca);
6085 
6086   nw->upto = 0;
6087   nw->bad_groups = 0;
6088   nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6089 				 data->vgroups.length ());
6090   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6091   nw->cands = BITMAP_ALLOC (NULL);
6092   nw->n_cands = 0;
6093   nw->n_invs = 0;
6094   nw->cand_use_cost = no_cost;
6095   nw->cand_cost = 0;
6096   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6097   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6098   nw->cost = no_cost;
6099 
6100   return nw;
6101 }
6102 
6103 /* Free memory occupied by the set IVS.  */
6104 
6105 static void
6106 iv_ca_free (struct iv_ca **ivs)
6107 {
6108   free ((*ivs)->cand_for_group);
6109   free ((*ivs)->n_cand_uses);
6110   BITMAP_FREE ((*ivs)->cands);
6111   free ((*ivs)->n_inv_var_uses);
6112   free ((*ivs)->n_inv_expr_uses);
6113   free (*ivs);
6114   *ivs = NULL;
6115 }
6116 
6117 /* Dumps IVS to FILE.  */
6118 
6119 static void
6120 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6121 {
6122   unsigned i;
6123   comp_cost cost = iv_ca_cost (ivs);
6124 
6125   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost,
6126 	   cost.complexity);
6127   fprintf (file, "  cand_cost: %d\n  cand_group_cost: %d (complexity %d)\n",
6128 	   ivs->cand_cost, ivs->cand_use_cost.cost,
6129 	   ivs->cand_use_cost.complexity);
6130   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6131 
6132   for (i = 0; i < ivs->upto; i++)
6133     {
6134       struct iv_group *group = data->vgroups[i];
6135       struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6136       if (cp)
6137         fprintf (file, "   group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6138 		 group->id, cp->cand->id, cp->cost.cost,
6139 		 cp->cost.complexity);
6140       else
6141 	fprintf (file, "   group:%d --> ??\n", group->id);
6142     }
6143 
6144   const char *pref = "";
6145   fprintf (file, "  invariant variables: ");
6146   for (i = 1; i <= data->max_inv_var_id; i++)
6147     if (ivs->n_inv_var_uses[i])
6148       {
6149 	fprintf (file, "%s%d", pref, i);
6150 	pref = ", ";
6151       }
6152 
6153   pref = "";
6154   fprintf (file, "\n  invariant expressions: ");
6155   for (i = 1; i <= data->max_inv_expr_id; i++)
6156     if (ivs->n_inv_expr_uses[i])
6157       {
6158 	fprintf (file, "%s%d", pref, i);
6159 	pref = ", ";
6160       }
6161 
6162   fprintf (file, "\n\n");
6163 }
6164 
6165 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6166    new set, and store differences in DELTA.  Number of induction variables
6167    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6168    the function will try to find a solution with mimimal iv candidates.  */
6169 
6170 static comp_cost
6171 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6172 	      struct iv_cand *cand, struct iv_ca_delta **delta,
6173 	      unsigned *n_ivs, bool min_ncand)
6174 {
6175   unsigned i;
6176   comp_cost cost;
6177   struct iv_group *group;
6178   struct cost_pair *old_cp, *new_cp;
6179 
6180   *delta = NULL;
6181   for (i = 0; i < ivs->upto; i++)
6182     {
6183       group = data->vgroups[i];
6184       old_cp = iv_ca_cand_for_group (ivs, group);
6185 
6186       if (old_cp
6187 	  && old_cp->cand == cand)
6188 	continue;
6189 
6190       new_cp = get_group_iv_cost (data, group, cand);
6191       if (!new_cp)
6192 	continue;
6193 
6194       if (!min_ncand)
6195 	{
6196 	  int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6197 	  /* Skip if new_cp depends on more invariants.  */
6198 	  if (cmp_invs > 0)
6199 	    continue;
6200 
6201 	  int cmp_cost = compare_cost_pair (new_cp, old_cp);
6202 	  /* Skip if new_cp is not cheaper.  */
6203 	  if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6204 	    continue;
6205 	}
6206 
6207       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6208     }
6209 
6210   iv_ca_delta_commit (data, ivs, *delta, true);
6211   cost = iv_ca_cost (ivs);
6212   if (n_ivs)
6213     *n_ivs = iv_ca_n_cands (ivs);
6214   iv_ca_delta_commit (data, ivs, *delta, false);
6215 
6216   return cost;
6217 }
6218 
6219 /* Try narrowing set IVS by removing CAND.  Return the cost of
6220    the new set and store the differences in DELTA.  START is
6221    the candidate with which we start narrowing.  */
6222 
6223 static comp_cost
6224 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6225 	      struct iv_cand *cand, struct iv_cand *start,
6226 	      struct iv_ca_delta **delta)
6227 {
6228   unsigned i, ci;
6229   struct iv_group *group;
6230   struct cost_pair *old_cp, *new_cp, *cp;
6231   bitmap_iterator bi;
6232   struct iv_cand *cnd;
6233   comp_cost cost, best_cost, acost;
6234 
6235   *delta = NULL;
6236   for (i = 0; i < data->vgroups.length (); i++)
6237     {
6238       group = data->vgroups[i];
6239 
6240       old_cp = iv_ca_cand_for_group (ivs, group);
6241       if (old_cp->cand != cand)
6242 	continue;
6243 
6244       best_cost = iv_ca_cost (ivs);
6245       /* Start narrowing with START.  */
6246       new_cp = get_group_iv_cost (data, group, start);
6247 
6248       if (data->consider_all_candidates)
6249 	{
6250 	  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6251 	    {
6252 	      if (ci == cand->id || (start && ci == start->id))
6253 		continue;
6254 
6255 	      cnd = data->vcands[ci];
6256 
6257 	      cp = get_group_iv_cost (data, group, cnd);
6258 	      if (!cp)
6259 		continue;
6260 
6261 	      iv_ca_set_cp (data, ivs, group, cp);
6262 	      acost = iv_ca_cost (ivs);
6263 
6264 	      if (acost < best_cost)
6265 		{
6266 		  best_cost = acost;
6267 		  new_cp = cp;
6268 		}
6269 	    }
6270 	}
6271       else
6272 	{
6273 	  EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6274 	    {
6275 	      if (ci == cand->id || (start && ci == start->id))
6276 		continue;
6277 
6278 	      cnd = data->vcands[ci];
6279 
6280 	      cp = get_group_iv_cost (data, group, cnd);
6281 	      if (!cp)
6282 		continue;
6283 
6284 	      iv_ca_set_cp (data, ivs, group, cp);
6285 	      acost = iv_ca_cost (ivs);
6286 
6287 	      if (acost < best_cost)
6288 		{
6289 		  best_cost = acost;
6290 		  new_cp = cp;
6291 		}
6292 	    }
6293 	}
6294       /* Restore to old cp for use.  */
6295       iv_ca_set_cp (data, ivs, group, old_cp);
6296 
6297       if (!new_cp)
6298 	{
6299 	  iv_ca_delta_free (delta);
6300 	  return infinite_cost;
6301 	}
6302 
6303       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6304     }
6305 
6306   iv_ca_delta_commit (data, ivs, *delta, true);
6307   cost = iv_ca_cost (ivs);
6308   iv_ca_delta_commit (data, ivs, *delta, false);
6309 
6310   return cost;
6311 }
6312 
6313 /* Try optimizing the set of candidates IVS by removing candidates different
6314    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6315    differences in DELTA.  */
6316 
6317 static comp_cost
6318 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6319 	     struct iv_cand *except_cand, struct iv_ca_delta **delta)
6320 {
6321   bitmap_iterator bi;
6322   struct iv_ca_delta *act_delta, *best_delta;
6323   unsigned i;
6324   comp_cost best_cost, acost;
6325   struct iv_cand *cand;
6326 
6327   best_delta = NULL;
6328   best_cost = iv_ca_cost (ivs);
6329 
6330   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6331     {
6332       cand = data->vcands[i];
6333 
6334       if (cand == except_cand)
6335 	continue;
6336 
6337       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6338 
6339       if (acost < best_cost)
6340 	{
6341 	  best_cost = acost;
6342 	  iv_ca_delta_free (&best_delta);
6343 	  best_delta = act_delta;
6344 	}
6345       else
6346 	iv_ca_delta_free (&act_delta);
6347     }
6348 
6349   if (!best_delta)
6350     {
6351       *delta = NULL;
6352       return best_cost;
6353     }
6354 
6355   /* Recurse to possibly remove other unnecessary ivs.  */
6356   iv_ca_delta_commit (data, ivs, best_delta, true);
6357   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6358   iv_ca_delta_commit (data, ivs, best_delta, false);
6359   *delta = iv_ca_delta_join (best_delta, *delta);
6360   return best_cost;
6361 }
6362 
6363 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6364    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6365    the corresponding cost_pair, otherwise just return BEST_CP.  */
6366 
6367 static struct cost_pair*
6368 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6369 			unsigned int cand_idx, struct iv_cand *old_cand,
6370 			struct cost_pair *best_cp)
6371 {
6372   struct iv_cand *cand;
6373   struct cost_pair *cp;
6374 
6375   gcc_assert (old_cand != NULL && best_cp != NULL);
6376   if (cand_idx == old_cand->id)
6377     return best_cp;
6378 
6379   cand = data->vcands[cand_idx];
6380   cp = get_group_iv_cost (data, group, cand);
6381   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6382     return cp;
6383 
6384   return best_cp;
6385 }
6386 
6387 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6388    which are used by more than one iv uses.  For each of those candidates,
6389    this function tries to represent iv uses under that candidate using
6390    other ones with lower local cost, then tries to prune the new set.
6391    If the new set has lower cost, It returns the new cost after recording
6392    candidate replacement in list DELTA.  */
6393 
6394 static comp_cost
6395 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6396 	       struct iv_ca_delta **delta)
6397 {
6398   bitmap_iterator bi, bj;
6399   unsigned int i, j, k;
6400   struct iv_cand *cand;
6401   comp_cost orig_cost, acost;
6402   struct iv_ca_delta *act_delta, *tmp_delta;
6403   struct cost_pair *old_cp, *best_cp = NULL;
6404 
6405   *delta = NULL;
6406   orig_cost = iv_ca_cost (ivs);
6407 
6408   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6409     {
6410       if (ivs->n_cand_uses[i] == 1
6411 	  || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6412 	continue;
6413 
6414       cand = data->vcands[i];
6415 
6416       act_delta = NULL;
6417       /*  Represent uses under current candidate using other ones with
6418 	  lower local cost.  */
6419       for (j = 0; j < ivs->upto; j++)
6420 	{
6421 	  struct iv_group *group = data->vgroups[j];
6422 	  old_cp = iv_ca_cand_for_group (ivs, group);
6423 
6424 	  if (old_cp->cand != cand)
6425 	    continue;
6426 
6427 	  best_cp = old_cp;
6428 	  if (data->consider_all_candidates)
6429 	    for (k = 0; k < data->vcands.length (); k++)
6430 	      best_cp = cheaper_cost_with_cand (data, group, k,
6431 						old_cp->cand, best_cp);
6432 	  else
6433 	    EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6434 	      best_cp = cheaper_cost_with_cand (data, group, k,
6435 						old_cp->cand, best_cp);
6436 
6437 	  if (best_cp == old_cp)
6438 	    continue;
6439 
6440 	  act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6441 	}
6442       /* No need for further prune.  */
6443       if (!act_delta)
6444 	continue;
6445 
6446       /* Prune the new candidate set.  */
6447       iv_ca_delta_commit (data, ivs, act_delta, true);
6448       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6449       iv_ca_delta_commit (data, ivs, act_delta, false);
6450       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6451 
6452       if (acost < orig_cost)
6453 	{
6454 	  *delta = act_delta;
6455 	  return acost;
6456 	}
6457       else
6458 	iv_ca_delta_free (&act_delta);
6459     }
6460 
6461   return orig_cost;
6462 }
6463 
6464 /* Tries to extend the sets IVS in the best possible way in order to
6465    express the GROUP.  If ORIGINALP is true, prefer candidates from
6466    the original set of IVs, otherwise favor important candidates not
6467    based on any memory object.  */
6468 
6469 static bool
6470 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6471 		  struct iv_group *group, bool originalp)
6472 {
6473   comp_cost best_cost, act_cost;
6474   unsigned i;
6475   bitmap_iterator bi;
6476   struct iv_cand *cand;
6477   struct iv_ca_delta *best_delta = NULL, *act_delta;
6478   struct cost_pair *cp;
6479 
6480   iv_ca_add_group (data, ivs, group);
6481   best_cost = iv_ca_cost (ivs);
6482   cp = iv_ca_cand_for_group (ivs, group);
6483   if (cp)
6484     {
6485       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6486       iv_ca_set_no_cp (data, ivs, group);
6487     }
6488 
6489   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6490      first try important candidates not based on any memory object.  Only if
6491      this fails, try the specific ones.  Rationale -- in loops with many
6492      variables the best choice often is to use just one generic biv.  If we
6493      added here many ivs specific to the uses, the optimization algorithm later
6494      would be likely to get stuck in a local minimum, thus causing us to create
6495      too many ivs.  The approach from few ivs to more seems more likely to be
6496      successful -- starting from few ivs, replacing an expensive use by a
6497      specific iv should always be a win.  */
6498   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6499     {
6500       cand = data->vcands[i];
6501 
6502       if (originalp && cand->pos !=IP_ORIGINAL)
6503 	continue;
6504 
6505       if (!originalp && cand->iv->base_object != NULL_TREE)
6506 	continue;
6507 
6508       if (iv_ca_cand_used_p (ivs, cand))
6509 	continue;
6510 
6511       cp = get_group_iv_cost (data, group, cand);
6512       if (!cp)
6513 	continue;
6514 
6515       iv_ca_set_cp (data, ivs, group, cp);
6516       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6517 			       true);
6518       iv_ca_set_no_cp (data, ivs, group);
6519       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6520 
6521       if (act_cost < best_cost)
6522 	{
6523 	  best_cost = act_cost;
6524 
6525 	  iv_ca_delta_free (&best_delta);
6526 	  best_delta = act_delta;
6527 	}
6528       else
6529 	iv_ca_delta_free (&act_delta);
6530     }
6531 
6532   if (best_cost.infinite_cost_p ())
6533     {
6534       for (i = 0; i < group->n_map_members; i++)
6535 	{
6536 	  cp = group->cost_map + i;
6537 	  cand = cp->cand;
6538 	  if (!cand)
6539 	    continue;
6540 
6541 	  /* Already tried this.  */
6542 	  if (cand->important)
6543 	    {
6544 	      if (originalp && cand->pos == IP_ORIGINAL)
6545 		continue;
6546 	      if (!originalp && cand->iv->base_object == NULL_TREE)
6547 		continue;
6548 	    }
6549 
6550 	  if (iv_ca_cand_used_p (ivs, cand))
6551 	    continue;
6552 
6553 	  act_delta = NULL;
6554 	  iv_ca_set_cp (data, ivs, group, cp);
6555 	  act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6556 	  iv_ca_set_no_cp (data, ivs, group);
6557 	  act_delta = iv_ca_delta_add (group,
6558 				       iv_ca_cand_for_group (ivs, group),
6559 				       cp, act_delta);
6560 
6561 	  if (act_cost < best_cost)
6562 	    {
6563 	      best_cost = act_cost;
6564 
6565 	      if (best_delta)
6566 		iv_ca_delta_free (&best_delta);
6567 	      best_delta = act_delta;
6568 	    }
6569 	  else
6570 	    iv_ca_delta_free (&act_delta);
6571 	}
6572     }
6573 
6574   iv_ca_delta_commit (data, ivs, best_delta, true);
6575   iv_ca_delta_free (&best_delta);
6576 
6577   return !best_cost.infinite_cost_p ();
6578 }
6579 
6580 /* Finds an initial assignment of candidates to uses.  */
6581 
6582 static struct iv_ca *
6583 get_initial_solution (struct ivopts_data *data, bool originalp)
6584 {
6585   unsigned i;
6586   struct iv_ca *ivs = iv_ca_new (data);
6587 
6588   for (i = 0; i < data->vgroups.length (); i++)
6589     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6590       {
6591 	iv_ca_free (&ivs);
6592 	return NULL;
6593       }
6594 
6595   return ivs;
6596 }
6597 
6598 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6599    points to a bool variable, this function tries to break local
6600    optimal fixed-point by replacing candidates in IVS if it's true.  */
6601 
6602 static bool
6603 try_improve_iv_set (struct ivopts_data *data,
6604 		    struct iv_ca *ivs, bool *try_replace_p)
6605 {
6606   unsigned i, n_ivs;
6607   comp_cost acost, best_cost = iv_ca_cost (ivs);
6608   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6609   struct iv_cand *cand;
6610 
6611   /* Try extending the set of induction variables by one.  */
6612   for (i = 0; i < data->vcands.length (); i++)
6613     {
6614       cand = data->vcands[i];
6615 
6616       if (iv_ca_cand_used_p (ivs, cand))
6617 	continue;
6618 
6619       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6620       if (!act_delta)
6621 	continue;
6622 
6623       /* If we successfully added the candidate and the set is small enough,
6624 	 try optimizing it by removing other candidates.  */
6625       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6626       	{
6627 	  iv_ca_delta_commit (data, ivs, act_delta, true);
6628 	  acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6629 	  iv_ca_delta_commit (data, ivs, act_delta, false);
6630 	  act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6631 	}
6632 
6633       if (acost < best_cost)
6634 	{
6635 	  best_cost = acost;
6636 	  iv_ca_delta_free (&best_delta);
6637 	  best_delta = act_delta;
6638 	}
6639       else
6640 	iv_ca_delta_free (&act_delta);
6641     }
6642 
6643   if (!best_delta)
6644     {
6645       /* Try removing the candidates from the set instead.  */
6646       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6647 
6648       if (!best_delta && *try_replace_p)
6649 	{
6650 	  *try_replace_p = false;
6651 	  /* So far candidate selecting algorithm tends to choose fewer IVs
6652 	     so that it can handle cases in which loops have many variables
6653 	     but the best choice is often to use only one general biv.  One
6654 	     weakness is it can't handle opposite cases, in which different
6655 	     candidates should be chosen with respect to each use.  To solve
6656 	     the problem, we replace candidates in a manner described by the
6657 	     comments of iv_ca_replace, thus give general algorithm a chance
6658 	     to break local optimal fixed-point in these cases.  */
6659 	  best_cost = iv_ca_replace (data, ivs, &best_delta);
6660 	}
6661 
6662       if (!best_delta)
6663 	return false;
6664     }
6665 
6666   iv_ca_delta_commit (data, ivs, best_delta, true);
6667   gcc_assert (best_cost == iv_ca_cost (ivs));
6668   iv_ca_delta_free (&best_delta);
6669   return true;
6670 }
6671 
6672 /* Attempts to find the optimal set of induction variables.  We do simple
6673    greedy heuristic -- we try to replace at most one candidate in the selected
6674    solution and remove the unused ivs while this improves the cost.  */
6675 
6676 static struct iv_ca *
6677 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6678 {
6679   struct iv_ca *set;
6680   bool try_replace_p = true;
6681 
6682   /* Get the initial solution.  */
6683   set = get_initial_solution (data, originalp);
6684   if (!set)
6685     {
6686       if (dump_file && (dump_flags & TDF_DETAILS))
6687 	fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6688       return NULL;
6689     }
6690 
6691   if (dump_file && (dump_flags & TDF_DETAILS))
6692     {
6693       fprintf (dump_file, "Initial set of candidates:\n");
6694       iv_ca_dump (data, dump_file, set);
6695     }
6696 
6697   while (try_improve_iv_set (data, set, &try_replace_p))
6698     {
6699       if (dump_file && (dump_flags & TDF_DETAILS))
6700 	{
6701 	  fprintf (dump_file, "Improved to:\n");
6702 	  iv_ca_dump (data, dump_file, set);
6703 	}
6704     }
6705 
6706   return set;
6707 }
6708 
6709 static struct iv_ca *
6710 find_optimal_iv_set (struct ivopts_data *data)
6711 {
6712   unsigned i;
6713   comp_cost cost, origcost;
6714   struct iv_ca *set, *origset;
6715 
6716   /* Determine the cost based on a strategy that starts with original IVs,
6717      and try again using a strategy that prefers candidates not based
6718      on any IVs.  */
6719   origset = find_optimal_iv_set_1 (data, true);
6720   set = find_optimal_iv_set_1 (data, false);
6721 
6722   if (!origset && !set)
6723     return NULL;
6724 
6725   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6726   cost = set ? iv_ca_cost (set) : infinite_cost;
6727 
6728   if (dump_file && (dump_flags & TDF_DETAILS))
6729     {
6730       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6731 	       origcost.cost, origcost.complexity);
6732       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6733 	       cost.cost, cost.complexity);
6734     }
6735 
6736   /* Choose the one with the best cost.  */
6737   if (origcost <= cost)
6738     {
6739       if (set)
6740 	iv_ca_free (&set);
6741       set = origset;
6742     }
6743   else if (origset)
6744     iv_ca_free (&origset);
6745 
6746   for (i = 0; i < data->vgroups.length (); i++)
6747     {
6748       struct iv_group *group = data->vgroups[i];
6749       group->selected = iv_ca_cand_for_group (set, group)->cand;
6750     }
6751 
6752   return set;
6753 }
6754 
6755 /* Creates a new induction variable corresponding to CAND.  */
6756 
6757 static void
6758 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6759 {
6760   gimple_stmt_iterator incr_pos;
6761   tree base;
6762   struct iv_use *use;
6763   struct iv_group *group;
6764   bool after = false;
6765 
6766   gcc_assert (cand->iv != NULL);
6767 
6768   switch (cand->pos)
6769     {
6770     case IP_NORMAL:
6771       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6772       break;
6773 
6774     case IP_END:
6775       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6776       after = true;
6777       break;
6778 
6779     case IP_AFTER_USE:
6780       after = true;
6781       /* fall through */
6782     case IP_BEFORE_USE:
6783       incr_pos = gsi_for_stmt (cand->incremented_at);
6784       break;
6785 
6786     case IP_ORIGINAL:
6787       /* Mark that the iv is preserved.  */
6788       name_info (data, cand->var_before)->preserve_biv = true;
6789       name_info (data, cand->var_after)->preserve_biv = true;
6790 
6791       /* Rewrite the increment so that it uses var_before directly.  */
6792       use = find_interesting_uses_op (data, cand->var_after);
6793       group = data->vgroups[use->group_id];
6794       group->selected = cand;
6795       return;
6796     }
6797 
6798   gimple_add_tmp_var (cand->var_before);
6799 
6800   base = unshare_expr (cand->iv->base);
6801 
6802   create_iv (base, unshare_expr (cand->iv->step),
6803 	     cand->var_before, data->current_loop,
6804 	     &incr_pos, after, &cand->var_before, &cand->var_after);
6805 }
6806 
6807 /* Creates new induction variables described in SET.  */
6808 
6809 static void
6810 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6811 {
6812   unsigned i;
6813   struct iv_cand *cand;
6814   bitmap_iterator bi;
6815 
6816   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6817     {
6818       cand = data->vcands[i];
6819       create_new_iv (data, cand);
6820     }
6821 
6822   if (dump_file && (dump_flags & TDF_DETAILS))
6823     {
6824       fprintf (dump_file, "Selected IV set for loop %d",
6825 	       data->current_loop->num);
6826       if (data->loop_loc != UNKNOWN_LOCATION)
6827 	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6828 		 LOCATION_LINE (data->loop_loc));
6829       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
6830 	       avg_loop_niter (data->current_loop));
6831       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6832       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6833 	{
6834 	  cand = data->vcands[i];
6835 	  dump_cand (dump_file, cand);
6836 	}
6837       fprintf (dump_file, "\n");
6838     }
6839 }
6840 
6841 /* Rewrites USE (definition of iv used in a nonlinear expression)
6842    using candidate CAND.  */
6843 
6844 static void
6845 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6846 			    struct iv_use *use, struct iv_cand *cand)
6847 {
6848   gassign *ass;
6849   gimple_stmt_iterator bsi;
6850   tree comp, type = get_use_type (use), tgt;
6851 
6852   /* An important special case -- if we are asked to express value of
6853      the original iv by itself, just exit; there is no need to
6854      introduce a new computation (that might also need casting the
6855      variable to unsigned and back).  */
6856   if (cand->pos == IP_ORIGINAL
6857       && cand->incremented_at == use->stmt)
6858     {
6859       tree op = NULL_TREE;
6860       enum tree_code stmt_code;
6861 
6862       gcc_assert (is_gimple_assign (use->stmt));
6863       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6864 
6865       /* Check whether we may leave the computation unchanged.
6866 	 This is the case only if it does not rely on other
6867 	 computations in the loop -- otherwise, the computation
6868 	 we rely upon may be removed in remove_unused_ivs,
6869 	 thus leading to ICE.  */
6870       stmt_code = gimple_assign_rhs_code (use->stmt);
6871       if (stmt_code == PLUS_EXPR
6872 	  || stmt_code == MINUS_EXPR
6873 	  || stmt_code == POINTER_PLUS_EXPR)
6874 	{
6875 	  if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6876 	    op = gimple_assign_rhs2 (use->stmt);
6877 	  else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6878 	    op = gimple_assign_rhs1 (use->stmt);
6879 	}
6880 
6881       if (op != NULL_TREE)
6882 	{
6883 	  if (expr_invariant_in_loop_p (data->current_loop, op))
6884 	    return;
6885 	  if (TREE_CODE (op) == SSA_NAME)
6886 	    {
6887 	      struct iv *iv = get_iv (data, op);
6888 	      if (iv != NULL && integer_zerop (iv->step))
6889 		return;
6890 	    }
6891 	}
6892     }
6893 
6894   switch (gimple_code (use->stmt))
6895     {
6896     case GIMPLE_PHI:
6897       tgt = PHI_RESULT (use->stmt);
6898 
6899       /* If we should keep the biv, do not replace it.  */
6900       if (name_info (data, tgt)->preserve_biv)
6901 	return;
6902 
6903       bsi = gsi_after_labels (gimple_bb (use->stmt));
6904       break;
6905 
6906     case GIMPLE_ASSIGN:
6907       tgt = gimple_assign_lhs (use->stmt);
6908       bsi = gsi_for_stmt (use->stmt);
6909       break;
6910 
6911     default:
6912       gcc_unreachable ();
6913     }
6914 
6915   aff_tree aff_inv, aff_var;
6916   if (!get_computation_aff_1 (data->current_loop, use->stmt,
6917 			      use, cand, &aff_inv, &aff_var))
6918     gcc_unreachable ();
6919 
6920   unshare_aff_combination (&aff_inv);
6921   unshare_aff_combination (&aff_var);
6922   /* Prefer CSE opportunity than loop invariant by adding offset at last
6923      so that iv_uses have different offsets can be CSEed.  */
6924   poly_widest_int offset = aff_inv.offset;
6925   aff_inv.offset = 0;
6926 
6927   gimple_seq stmt_list = NULL, seq = NULL;
6928   tree comp_op1 = aff_combination_to_tree (&aff_inv);
6929   tree comp_op2 = aff_combination_to_tree (&aff_var);
6930   gcc_assert (comp_op1 && comp_op2);
6931 
6932   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
6933   gimple_seq_add_seq (&stmt_list, seq);
6934   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
6935   gimple_seq_add_seq (&stmt_list, seq);
6936 
6937   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
6938     std::swap (comp_op1, comp_op2);
6939 
6940   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
6941     {
6942       comp = fold_build_pointer_plus (comp_op1,
6943 				      fold_convert (sizetype, comp_op2));
6944       comp = fold_build_pointer_plus (comp,
6945 				      wide_int_to_tree (sizetype, offset));
6946     }
6947   else
6948     {
6949       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
6950 			  fold_convert (TREE_TYPE (comp_op1), comp_op2));
6951       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
6952 			  wide_int_to_tree (TREE_TYPE (comp_op1), offset));
6953     }
6954 
6955   comp = fold_convert (type, comp);
6956   if (!valid_gimple_rhs_p (comp)
6957       || (gimple_code (use->stmt) != GIMPLE_PHI
6958 	  /* We can't allow re-allocating the stmt as it might be pointed
6959 	     to still.  */
6960 	  && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6961 	      >= gimple_num_ops (gsi_stmt (bsi)))))
6962     {
6963       comp = force_gimple_operand (comp, &seq, true, NULL);
6964       gimple_seq_add_seq (&stmt_list, seq);
6965       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6966 	{
6967 	  duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6968 	  /* As this isn't a plain copy we have to reset alignment
6969 	     information.  */
6970 	  if (SSA_NAME_PTR_INFO (comp))
6971 	    mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6972 	}
6973     }
6974 
6975   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
6976   if (gimple_code (use->stmt) == GIMPLE_PHI)
6977     {
6978       ass = gimple_build_assign (tgt, comp);
6979       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6980 
6981       bsi = gsi_for_stmt (use->stmt);
6982       remove_phi_node (&bsi, false);
6983     }
6984   else
6985     {
6986       gimple_assign_set_rhs_from_tree (&bsi, comp);
6987       use->stmt = gsi_stmt (bsi);
6988     }
6989 }
6990 
6991 /* Performs a peephole optimization to reorder the iv update statement with
6992    a mem ref to enable instruction combining in later phases. The mem ref uses
6993    the iv value before the update, so the reordering transformation requires
6994    adjustment of the offset. CAND is the selected IV_CAND.
6995 
6996    Example:
6997 
6998    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6999    iv2 = iv1 + 1;
7000 
7001    if (t < val)      (1)
7002      goto L;
7003    goto Head;
7004 
7005 
7006    directly propagating t over to (1) will introduce overlapping live range
7007    thus increase register pressure. This peephole transform it into:
7008 
7009 
7010    iv2 = iv1 + 1;
7011    t = MEM_REF (base, iv2, 8, 8);
7012    if (t < val)
7013      goto L;
7014    goto Head;
7015 */
7016 
7017 static void
7018 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7019 {
7020   tree var_after;
7021   gimple *iv_update, *stmt;
7022   basic_block bb;
7023   gimple_stmt_iterator gsi, gsi_iv;
7024 
7025   if (cand->pos != IP_NORMAL)
7026     return;
7027 
7028   var_after = cand->var_after;
7029   iv_update = SSA_NAME_DEF_STMT (var_after);
7030 
7031   bb = gimple_bb (iv_update);
7032   gsi = gsi_last_nondebug_bb (bb);
7033   stmt = gsi_stmt (gsi);
7034 
7035   /* Only handle conditional statement for now.  */
7036   if (gimple_code (stmt) != GIMPLE_COND)
7037     return;
7038 
7039   gsi_prev_nondebug (&gsi);
7040   stmt = gsi_stmt (gsi);
7041   if (stmt != iv_update)
7042     return;
7043 
7044   gsi_prev_nondebug (&gsi);
7045   if (gsi_end_p (gsi))
7046     return;
7047 
7048   stmt = gsi_stmt (gsi);
7049   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7050     return;
7051 
7052   if (stmt != use->stmt)
7053     return;
7054 
7055   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7056     return;
7057 
7058   if (dump_file && (dump_flags & TDF_DETAILS))
7059     {
7060       fprintf (dump_file, "Reordering \n");
7061       print_gimple_stmt (dump_file, iv_update, 0);
7062       print_gimple_stmt (dump_file, use->stmt, 0);
7063       fprintf (dump_file, "\n");
7064     }
7065 
7066   gsi = gsi_for_stmt (use->stmt);
7067   gsi_iv = gsi_for_stmt (iv_update);
7068   gsi_move_before (&gsi_iv, &gsi);
7069 
7070   cand->pos = IP_BEFORE_USE;
7071   cand->incremented_at = use->stmt;
7072 }
7073 
7074 /* Return the alias pointer type that should be used for a MEM_REF
7075    associated with USE, which has type USE_PTR_ADDRESS.  */
7076 
7077 static tree
7078 get_alias_ptr_type_for_ptr_address (iv_use *use)
7079 {
7080   gcall *call = as_a <gcall *> (use->stmt);
7081   switch (gimple_call_internal_fn (call))
7082     {
7083     case IFN_MASK_LOAD:
7084     case IFN_MASK_STORE:
7085       /* The second argument contains the correct alias type.  */
7086       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7087       return TREE_TYPE (gimple_call_arg (call, 1));
7088 
7089     default:
7090       gcc_unreachable ();
7091     }
7092 }
7093 
7094 
7095 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7096 
7097 static void
7098 rewrite_use_address (struct ivopts_data *data,
7099 		     struct iv_use *use, struct iv_cand *cand)
7100 {
7101   aff_tree aff;
7102   bool ok;
7103 
7104   adjust_iv_update_pos (cand, use);
7105   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7106   gcc_assert (ok);
7107   unshare_aff_combination (&aff);
7108 
7109   /* To avoid undefined overflow problems, all IV candidates use unsigned
7110      integer types.  The drawback is that this makes it impossible for
7111      create_mem_ref to distinguish an IV that is based on a memory object
7112      from one that represents simply an offset.
7113 
7114      To work around this problem, we pass a hint to create_mem_ref that
7115      indicates which variable (if any) in aff is an IV based on a memory
7116      object.  Note that we only consider the candidate.  If this is not
7117      based on an object, the base of the reference is in some subexpression
7118      of the use -- but these will use pointer types, so they are recognized
7119      by the create_mem_ref heuristics anyway.  */
7120   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7121   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7122   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7123   tree type = use->mem_type;
7124   tree alias_ptr_type;
7125   if (use->type == USE_PTR_ADDRESS)
7126     alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7127   else
7128     {
7129       gcc_assert (type == TREE_TYPE (*use->op_p));
7130       unsigned int align = get_object_alignment (*use->op_p);
7131       if (align != TYPE_ALIGN (type))
7132 	type = build_aligned_type (type, align);
7133       alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7134     }
7135   tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7136 			     iv, base_hint, data->speed);
7137 
7138   if (use->type == USE_PTR_ADDRESS)
7139     {
7140       ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7141       ref = fold_convert (get_use_type (use), ref);
7142       ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7143 				      true, GSI_SAME_STMT);
7144     }
7145   else
7146     copy_ref_info (ref, *use->op_p);
7147 
7148   *use->op_p = ref;
7149 }
7150 
7151 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7152    candidate CAND.  */
7153 
7154 static void
7155 rewrite_use_compare (struct ivopts_data *data,
7156 		     struct iv_use *use, struct iv_cand *cand)
7157 {
7158   tree comp, op, bound;
7159   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7160   enum tree_code compare;
7161   struct iv_group *group = data->vgroups[use->group_id];
7162   struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7163 
7164   bound = cp->value;
7165   if (bound)
7166     {
7167       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7168       tree var_type = TREE_TYPE (var);
7169       gimple_seq stmts;
7170 
7171       if (dump_file && (dump_flags & TDF_DETAILS))
7172 	{
7173 	  fprintf (dump_file, "Replacing exit test: ");
7174 	  print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7175 	}
7176       compare = cp->comp;
7177       bound = unshare_expr (fold_convert (var_type, bound));
7178       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7179       if (stmts)
7180 	gsi_insert_seq_on_edge_immediate (
7181 		loop_preheader_edge (data->current_loop),
7182 		stmts);
7183 
7184       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7185       gimple_cond_set_lhs (cond_stmt, var);
7186       gimple_cond_set_code (cond_stmt, compare);
7187       gimple_cond_set_rhs (cond_stmt, op);
7188       return;
7189     }
7190 
7191   /* The induction variable elimination failed; just express the original
7192      giv.  */
7193   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7194   gcc_assert (comp != NULL_TREE);
7195   gcc_assert (use->op_p != NULL);
7196   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7197 					 SSA_NAME_VAR (*use->op_p),
7198 					 true, GSI_SAME_STMT);
7199 }
7200 
7201 /* Rewrite the groups using the selected induction variables.  */
7202 
7203 static void
7204 rewrite_groups (struct ivopts_data *data)
7205 {
7206   unsigned i, j;
7207 
7208   for (i = 0; i < data->vgroups.length (); i++)
7209     {
7210       struct iv_group *group = data->vgroups[i];
7211       struct iv_cand *cand = group->selected;
7212 
7213       gcc_assert (cand);
7214 
7215       if (group->type == USE_NONLINEAR_EXPR)
7216 	{
7217 	  for (j = 0; j < group->vuses.length (); j++)
7218 	    {
7219 	      rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7220 	      update_stmt (group->vuses[j]->stmt);
7221 	    }
7222 	}
7223       else if (address_p (group->type))
7224 	{
7225 	  for (j = 0; j < group->vuses.length (); j++)
7226 	    {
7227 	      rewrite_use_address (data, group->vuses[j], cand);
7228 	      update_stmt (group->vuses[j]->stmt);
7229 	    }
7230 	}
7231       else
7232 	{
7233 	  gcc_assert (group->type == USE_COMPARE);
7234 
7235 	  for (j = 0; j < group->vuses.length (); j++)
7236 	    {
7237 	      rewrite_use_compare (data, group->vuses[j], cand);
7238 	      update_stmt (group->vuses[j]->stmt);
7239 	    }
7240 	}
7241     }
7242 }
7243 
7244 /* Removes the ivs that are not used after rewriting.  */
7245 
7246 static void
7247 remove_unused_ivs (struct ivopts_data *data)
7248 {
7249   unsigned j;
7250   bitmap_iterator bi;
7251   bitmap toremove = BITMAP_ALLOC (NULL);
7252 
7253   /* Figure out an order in which to release SSA DEFs so that we don't
7254      release something that we'd have to propagate into a debug stmt
7255      afterwards.  */
7256   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7257     {
7258       struct version_info *info;
7259 
7260       info = ver_info (data, j);
7261       if (info->iv
7262 	  && !integer_zerop (info->iv->step)
7263 	  && !info->inv_id
7264 	  && !info->iv->nonlin_use
7265 	  && !info->preserve_biv)
7266 	{
7267 	  bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7268 
7269 	  tree def = info->iv->ssa_name;
7270 
7271 	  if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7272 	    {
7273 	      imm_use_iterator imm_iter;
7274 	      use_operand_p use_p;
7275 	      gimple *stmt;
7276 	      int count = 0;
7277 
7278 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7279 		{
7280 		  if (!gimple_debug_bind_p (stmt))
7281 		    continue;
7282 
7283 		  /* We just want to determine whether to do nothing
7284 		     (count == 0), to substitute the computed
7285 		     expression into a single use of the SSA DEF by
7286 		     itself (count == 1), or to use a debug temp
7287 		     because the SSA DEF is used multiple times or as
7288 		     part of a larger expression (count > 1). */
7289 		  count++;
7290 		  if (gimple_debug_bind_get_value (stmt) != def)
7291 		    count++;
7292 
7293 		  if (count > 1)
7294 		    BREAK_FROM_IMM_USE_STMT (imm_iter);
7295 		}
7296 
7297 	      if (!count)
7298 		continue;
7299 
7300 	      struct iv_use dummy_use;
7301 	      struct iv_cand *best_cand = NULL, *cand;
7302 	      unsigned i, best_pref = 0, cand_pref;
7303 
7304 	      memset (&dummy_use, 0, sizeof (dummy_use));
7305 	      dummy_use.iv = info->iv;
7306 	      for (i = 0; i < data->vgroups.length () && i < 64; i++)
7307 		{
7308 		  cand = data->vgroups[i]->selected;
7309 		  if (cand == best_cand)
7310 		    continue;
7311 		  cand_pref = operand_equal_p (cand->iv->step,
7312 					       info->iv->step, 0)
7313 		    ? 4 : 0;
7314 		  cand_pref
7315 		    += TYPE_MODE (TREE_TYPE (cand->iv->base))
7316 		    == TYPE_MODE (TREE_TYPE (info->iv->base))
7317 		    ? 2 : 0;
7318 		  cand_pref
7319 		    += TREE_CODE (cand->iv->base) == INTEGER_CST
7320 		    ? 1 : 0;
7321 		  if (best_cand == NULL || best_pref < cand_pref)
7322 		    {
7323 		      best_cand = cand;
7324 		      best_pref = cand_pref;
7325 		    }
7326 		}
7327 
7328 	      if (!best_cand)
7329 		continue;
7330 
7331 	      tree comp = get_computation_at (data->current_loop,
7332 					      SSA_NAME_DEF_STMT (def),
7333 					      &dummy_use, best_cand);
7334 	      if (!comp)
7335 		continue;
7336 
7337 	      if (count > 1)
7338 		{
7339 		  tree vexpr = make_node (DEBUG_EXPR_DECL);
7340 		  DECL_ARTIFICIAL (vexpr) = 1;
7341 		  TREE_TYPE (vexpr) = TREE_TYPE (comp);
7342 		  if (SSA_NAME_VAR (def))
7343 		    SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7344 		  else
7345 		    SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7346 		  gdebug *def_temp
7347 		    = gimple_build_debug_bind (vexpr, comp, NULL);
7348 		  gimple_stmt_iterator gsi;
7349 
7350 		  if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7351 		    gsi = gsi_after_labels (gimple_bb
7352 					    (SSA_NAME_DEF_STMT (def)));
7353 		  else
7354 		    gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7355 
7356 		  gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7357 		  comp = vexpr;
7358 		}
7359 
7360 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7361 		{
7362 		  if (!gimple_debug_bind_p (stmt))
7363 		    continue;
7364 
7365 		  FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7366 		    SET_USE (use_p, comp);
7367 
7368 		  update_stmt (stmt);
7369 		}
7370 	    }
7371 	}
7372     }
7373 
7374   release_defs_bitset (toremove);
7375 
7376   BITMAP_FREE (toremove);
7377 }
7378 
7379 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7380    for hash_map::traverse.  */
7381 
7382 bool
7383 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7384 {
7385   free (value);
7386   return true;
7387 }
7388 
7389 /* Frees data allocated by the optimization of a single loop.  */
7390 
7391 static void
7392 free_loop_data (struct ivopts_data *data)
7393 {
7394   unsigned i, j;
7395   bitmap_iterator bi;
7396   tree obj;
7397 
7398   if (data->niters)
7399     {
7400       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7401       delete data->niters;
7402       data->niters = NULL;
7403     }
7404 
7405   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7406     {
7407       struct version_info *info;
7408 
7409       info = ver_info (data, i);
7410       info->iv = NULL;
7411       info->has_nonlin_use = false;
7412       info->preserve_biv = false;
7413       info->inv_id = 0;
7414     }
7415   bitmap_clear (data->relevant);
7416   bitmap_clear (data->important_candidates);
7417 
7418   for (i = 0; i < data->vgroups.length (); i++)
7419     {
7420       struct iv_group *group = data->vgroups[i];
7421 
7422       for (j = 0; j < group->vuses.length (); j++)
7423 	free (group->vuses[j]);
7424       group->vuses.release ();
7425 
7426       BITMAP_FREE (group->related_cands);
7427       for (j = 0; j < group->n_map_members; j++)
7428 	{
7429 	  if (group->cost_map[j].inv_vars)
7430 	    BITMAP_FREE (group->cost_map[j].inv_vars);
7431 	  if (group->cost_map[j].inv_exprs)
7432 	    BITMAP_FREE (group->cost_map[j].inv_exprs);
7433 	}
7434 
7435       free (group->cost_map);
7436       free (group);
7437     }
7438   data->vgroups.truncate (0);
7439 
7440   for (i = 0; i < data->vcands.length (); i++)
7441     {
7442       struct iv_cand *cand = data->vcands[i];
7443 
7444       if (cand->inv_vars)
7445 	BITMAP_FREE (cand->inv_vars);
7446       if (cand->inv_exprs)
7447 	BITMAP_FREE (cand->inv_exprs);
7448       free (cand);
7449     }
7450   data->vcands.truncate (0);
7451 
7452   if (data->version_info_size < num_ssa_names)
7453     {
7454       data->version_info_size = 2 * num_ssa_names;
7455       free (data->version_info);
7456       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7457     }
7458 
7459   data->max_inv_var_id = 0;
7460   data->max_inv_expr_id = 0;
7461 
7462   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7463     SET_DECL_RTL (obj, NULL_RTX);
7464 
7465   decl_rtl_to_reset.truncate (0);
7466 
7467   data->inv_expr_tab->empty ();
7468 
7469   data->iv_common_cand_tab->empty ();
7470   data->iv_common_cands.truncate (0);
7471 }
7472 
7473 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7474    loop tree.  */
7475 
7476 static void
7477 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7478 {
7479   free_loop_data (data);
7480   free (data->version_info);
7481   BITMAP_FREE (data->relevant);
7482   BITMAP_FREE (data->important_candidates);
7483 
7484   decl_rtl_to_reset.release ();
7485   data->vgroups.release ();
7486   data->vcands.release ();
7487   delete data->inv_expr_tab;
7488   data->inv_expr_tab = NULL;
7489   free_affine_expand_cache (&data->name_expansion_cache);
7490   delete data->iv_common_cand_tab;
7491   data->iv_common_cand_tab = NULL;
7492   data->iv_common_cands.release ();
7493   obstack_free (&data->iv_obstack, NULL);
7494 }
7495 
7496 /* Returns true if the loop body BODY includes any function calls.  */
7497 
7498 static bool
7499 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7500 {
7501   gimple_stmt_iterator gsi;
7502   unsigned i;
7503 
7504   for (i = 0; i < num_nodes; i++)
7505     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7506       {
7507 	gimple *stmt = gsi_stmt (gsi);
7508 	if (is_gimple_call (stmt)
7509 	    && !gimple_call_internal_p (stmt)
7510 	    && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7511 	  return true;
7512       }
7513   return false;
7514 }
7515 
7516 /* Optimizes the LOOP.  Returns true if anything changed.  */
7517 
7518 static bool
7519 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7520 {
7521   bool changed = false;
7522   struct iv_ca *iv_ca;
7523   edge exit = single_dom_exit (loop);
7524   basic_block *body;
7525 
7526   gcc_assert (!data->niters);
7527   data->current_loop = loop;
7528   data->loop_loc = find_loop_location (loop);
7529   data->speed = optimize_loop_for_speed_p (loop);
7530 
7531   if (dump_file && (dump_flags & TDF_DETAILS))
7532     {
7533       fprintf (dump_file, "Processing loop %d", loop->num);
7534       if (data->loop_loc != UNKNOWN_LOCATION)
7535 	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7536 		 LOCATION_LINE (data->loop_loc));
7537       fprintf (dump_file, "\n");
7538 
7539       if (exit)
7540 	{
7541 	  fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7542 		   exit->src->index, exit->dest->index);
7543 	  print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7544 	  fprintf (dump_file, "\n");
7545 	}
7546 
7547       fprintf (dump_file, "\n");
7548     }
7549 
7550   body = get_loop_body (loop);
7551   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7552   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7553   free (body);
7554 
7555   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7556 
7557   /* For each ssa name determines whether it behaves as an induction variable
7558      in some loop.  */
7559   if (!find_induction_variables (data))
7560     goto finish;
7561 
7562   /* Finds interesting uses (item 1).  */
7563   find_interesting_uses (data);
7564   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7565     goto finish;
7566 
7567   /* Finds candidates for the induction variables (item 2).  */
7568   find_iv_candidates (data);
7569 
7570   /* Calculates the costs (item 3, part 1).  */
7571   determine_iv_costs (data);
7572   determine_group_iv_costs (data);
7573   determine_set_costs (data);
7574 
7575   /* Find the optimal set of induction variables (item 3, part 2).  */
7576   iv_ca = find_optimal_iv_set (data);
7577   if (!iv_ca)
7578     goto finish;
7579   changed = true;
7580 
7581   /* Create the new induction variables (item 4, part 1).  */
7582   create_new_ivs (data, iv_ca);
7583   iv_ca_free (&iv_ca);
7584 
7585   /* Rewrite the uses (item 4, part 2).  */
7586   rewrite_groups (data);
7587 
7588   /* Remove the ivs that are unused after rewriting.  */
7589   remove_unused_ivs (data);
7590 
7591   /* We have changed the structure of induction variables; it might happen
7592      that definitions in the scev database refer to some of them that were
7593      eliminated.  */
7594   scev_reset ();
7595 
7596 finish:
7597   free_loop_data (data);
7598 
7599   return changed;
7600 }
7601 
7602 /* Main entry point.  Optimizes induction variables in loops.  */
7603 
7604 void
7605 tree_ssa_iv_optimize (void)
7606 {
7607   struct loop *loop;
7608   struct ivopts_data data;
7609 
7610   tree_ssa_iv_optimize_init (&data);
7611 
7612   /* Optimize the loops starting with the innermost ones.  */
7613   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7614     {
7615       if (dump_file && (dump_flags & TDF_DETAILS))
7616 	flow_loop_dump (loop, dump_file, NULL, 1);
7617 
7618       tree_ssa_iv_optimize_loop (&data, loop);
7619     }
7620 
7621   tree_ssa_iv_optimize_finalize (&data);
7622 }
7623 
7624 #include "gt-tree-ssa-loop-ivopts.h"
7625