1 /* Induction variable optimizations.
2    Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This pass tries to find the optimal set of induction variables for the loop.
21    It optimizes just the basic linear induction variables (although adding
22    support for other types should not be too hard).  It includes the
23    optimizations commonly known as strength reduction, induction variable
24    coalescing and induction variable elimination.  It does it in the
25    following steps:
26 
27    1) The interesting uses of induction variables are found.  This includes
28 
29       -- uses of induction variables in non-linear expressions
30       -- addresses of arrays
31       -- comparisons of induction variables
32 
33       Note the interesting uses are categorized and handled in group.
34       Generally, address type uses are grouped together if their iv bases
35       are different in constant offset.
36 
37    2) Candidates for the induction variables are found.  This includes
38 
39       -- old induction variables
40       -- the variables defined by expressions derived from the "interesting
41 	 groups/uses" above
42 
43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
44       cost function assigns a cost to sets of induction variables and consists
45       of three parts:
46 
47       -- The group/use costs.  Each of the interesting groups/uses chooses
48 	 the best induction variable in the set and adds its cost to the sum.
49 	 The cost reflects the time spent on modifying the induction variables
50 	 value to be usable for the given purpose (adding base and offset for
51 	 arrays, etc.).
52       -- The variable costs.  Each of the variables has a cost assigned that
53 	 reflects the costs associated with incrementing the value of the
54 	 variable.  The original variables are somewhat preferred.
55       -- The set cost.  Depending on the size of the set, extra cost may be
56 	 added to reflect register pressure.
57 
58       All the costs are defined in a machine-specific way, using the target
59       hooks and machine descriptions to determine them.
60 
61    4) The trees are transformed to use the new variables, the dead code is
62       removed.
63 
64    All of this is done loop by loop.  Doing it globally is theoretically
65    possible, it might give a better performance and it might enable us
66    to decide costs more precisely, but getting all the interactions right
67    would be complicated.  */
68 
69 #include "config.h"
70 #include "system.h"
71 #include "coretypes.h"
72 #include "backend.h"
73 #include "rtl.h"
74 #include "tree.h"
75 #include "gimple.h"
76 #include "cfghooks.h"
77 #include "tree-pass.h"
78 #include "memmodel.h"
79 #include "tm_p.h"
80 #include "ssa.h"
81 #include "expmed.h"
82 #include "insn-config.h"
83 #include "emit-rtl.h"
84 #include "recog.h"
85 #include "cgraph.h"
86 #include "gimple-pretty-print.h"
87 #include "alias.h"
88 #include "fold-const.h"
89 #include "stor-layout.h"
90 #include "tree-eh.h"
91 #include "gimplify.h"
92 #include "gimple-iterator.h"
93 #include "gimplify-me.h"
94 #include "tree-cfg.h"
95 #include "tree-ssa-loop-ivopts.h"
96 #include "tree-ssa-loop-manip.h"
97 #include "tree-ssa-loop-niter.h"
98 #include "tree-ssa-loop.h"
99 #include "explow.h"
100 #include "expr.h"
101 #include "tree-dfa.h"
102 #include "tree-ssa.h"
103 #include "cfgloop.h"
104 #include "tree-scalar-evolution.h"
105 #include "params.h"
106 #include "tree-affine.h"
107 #include "tree-ssa-propagate.h"
108 #include "tree-ssa-address.h"
109 #include "builtins.h"
110 #include "tree-vectorizer.h"
111 
112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
113    cost of different addressing modes.  This should be moved to a TBD
114    interface between the GIMPLE and RTL worlds.  */
115 
116 /* The infinite cost.  */
117 #define INFTY 10000000
118 
119 /* Returns the expected number of loop iterations for LOOP.
120    The average trip count is computed from profile data if it
121    exists. */
122 
123 static inline HOST_WIDE_INT
avg_loop_niter(struct loop * loop)124 avg_loop_niter (struct loop *loop)
125 {
126   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
127   if (niter == -1)
128     {
129       niter = likely_max_stmt_executions_int (loop);
130 
131       if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
132 	return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
133     }
134 
135   return niter;
136 }
137 
138 struct iv_use;
139 
140 /* Representation of the induction variable.  */
141 struct iv
142 {
143   tree base;		/* Initial value of the iv.  */
144   tree base_object;	/* A memory object to that the induction variable points.  */
145   tree step;		/* Step of the iv (constant only).  */
146   tree ssa_name;	/* The ssa name with the value.  */
147   struct iv_use *nonlin_use;	/* The identifier in the use if it is the case.  */
148   bool biv_p;		/* Is it a biv?  */
149   bool no_overflow;	/* True if the iv doesn't overflow.  */
150   bool have_address_use;/* For biv, indicate if it's used in any address
151 			   type use.  */
152 };
153 
154 /* Per-ssa version information (induction variable descriptions, etc.).  */
155 struct version_info
156 {
157   tree name;		/* The ssa name.  */
158   struct iv *iv;	/* Induction variable description.  */
159   bool has_nonlin_use;	/* For a loop-level invariant, whether it is used in
160 			   an expression that is not an induction variable.  */
161   bool preserve_biv;	/* For the original biv, whether to preserve it.  */
162   unsigned inv_id;	/* Id of an invariant.  */
163 };
164 
165 /* Types of uses.  */
166 enum use_type
167 {
168   USE_NONLINEAR_EXPR,	/* Use in a nonlinear expression.  */
169   USE_REF_ADDRESS,	/* Use is an address for an explicit memory
170 			   reference.  */
171   USE_PTR_ADDRESS,	/* Use is a pointer argument to a function in
172 			   cases where the expansion of the function
173 			   will turn the argument into a normal address.  */
174   USE_COMPARE		/* Use is a compare.  */
175 };
176 
177 /* Cost of a computation.  */
178 struct comp_cost
179 {
comp_costcomp_cost180   comp_cost (): cost (0), complexity (0), scratch (0)
181   {}
182 
183   comp_cost (int cost, unsigned complexity, int scratch = 0)
costcomp_cost184     : cost (cost), complexity (complexity), scratch (scratch)
185   {}
186 
187   /* Returns true if COST is infinite.  */
188   bool infinite_cost_p ();
189 
190   /* Adds costs COST1 and COST2.  */
191   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
192 
193   /* Adds COST to the comp_cost.  */
194   comp_cost operator+= (comp_cost cost);
195 
196   /* Adds constant C to this comp_cost.  */
197   comp_cost operator+= (HOST_WIDE_INT c);
198 
199   /* Subtracts constant C to this comp_cost.  */
200   comp_cost operator-= (HOST_WIDE_INT c);
201 
202   /* Divide the comp_cost by constant C.  */
203   comp_cost operator/= (HOST_WIDE_INT c);
204 
205   /* Multiply the comp_cost by constant C.  */
206   comp_cost operator*= (HOST_WIDE_INT c);
207 
208   /* Subtracts costs COST1 and COST2.  */
209   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
210 
211   /* Subtracts COST from this comp_cost.  */
212   comp_cost operator-= (comp_cost cost);
213 
214   /* Returns true if COST1 is smaller than COST2.  */
215   friend bool operator< (comp_cost cost1, comp_cost cost2);
216 
217   /* Returns true if COST1 and COST2 are equal.  */
218   friend bool operator== (comp_cost cost1, comp_cost cost2);
219 
220   /* Returns true if COST1 is smaller or equal than COST2.  */
221   friend bool operator<= (comp_cost cost1, comp_cost cost2);
222 
223   int cost;		/* The runtime cost.  */
224   unsigned complexity;  /* The estimate of the complexity of the code for
225 			   the computation (in no concrete units --
226 			   complexity field should be larger for more
227 			   complex expressions and addressing modes).  */
228   int scratch;		/* Scratch used during cost computation.  */
229 };
230 
231 static const comp_cost no_cost;
232 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
233 
234 bool
infinite_cost_p()235 comp_cost::infinite_cost_p ()
236 {
237   return cost == INFTY;
238 }
239 
240 comp_cost
241 operator+ (comp_cost cost1, comp_cost cost2)
242 {
243   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
244     return infinite_cost;
245 
246   cost1.cost += cost2.cost;
247   cost1.complexity += cost2.complexity;
248 
249   return cost1;
250 }
251 
252 comp_cost
253 operator- (comp_cost cost1, comp_cost cost2)
254 {
255   if (cost1.infinite_cost_p ())
256     return infinite_cost;
257 
258   gcc_assert (!cost2.infinite_cost_p ());
259 
260   cost1.cost -= cost2.cost;
261   cost1.complexity -= cost2.complexity;
262 
263   return cost1;
264 }
265 
266 comp_cost
267 comp_cost::operator+= (comp_cost cost)
268 {
269   *this = *this + cost;
270   return *this;
271 }
272 
273 comp_cost
274 comp_cost::operator+= (HOST_WIDE_INT c)
275 {
276   if (infinite_cost_p ())
277     return *this;
278 
279   this->cost += c;
280 
281   return *this;
282 }
283 
284 comp_cost
285 comp_cost::operator-= (HOST_WIDE_INT c)
286 {
287   if (infinite_cost_p ())
288     return *this;
289 
290   this->cost -= c;
291 
292   return *this;
293 }
294 
295 comp_cost
296 comp_cost::operator/= (HOST_WIDE_INT c)
297 {
298   if (infinite_cost_p ())
299     return *this;
300 
301   this->cost /= c;
302 
303   return *this;
304 }
305 
306 comp_cost
307 comp_cost::operator*= (HOST_WIDE_INT c)
308 {
309   if (infinite_cost_p ())
310     return *this;
311 
312   this->cost *= c;
313 
314   return *this;
315 }
316 
317 comp_cost
318 comp_cost::operator-= (comp_cost cost)
319 {
320   *this = *this - cost;
321   return *this;
322 }
323 
324 bool
325 operator< (comp_cost cost1, comp_cost cost2)
326 {
327   if (cost1.cost == cost2.cost)
328     return cost1.complexity < cost2.complexity;
329 
330   return cost1.cost < cost2.cost;
331 }
332 
333 bool
334 operator== (comp_cost cost1, comp_cost cost2)
335 {
336   return cost1.cost == cost2.cost
337     && cost1.complexity == cost2.complexity;
338 }
339 
340 bool
341 operator<= (comp_cost cost1, comp_cost cost2)
342 {
343   return cost1 < cost2 || cost1 == cost2;
344 }
345 
346 struct iv_inv_expr_ent;
347 
348 /* The candidate - cost pair.  */
349 struct cost_pair
350 {
351   struct iv_cand *cand;	/* The candidate.  */
352   comp_cost cost;	/* The cost.  */
353   enum tree_code comp;	/* For iv elimination, the comparison.  */
354   bitmap inv_vars;	/* The list of invariant ssa_vars that have to be
355 			   preserved when representing iv_use with iv_cand.  */
356   bitmap inv_exprs;	/* The list of newly created invariant expressions
357 			   when representing iv_use with iv_cand.  */
358   tree value;		/* For final value elimination, the expression for
359 			   the final value of the iv.  For iv elimination,
360 			   the new bound to compare with.  */
361 };
362 
363 /* Use.  */
364 struct iv_use
365 {
366   unsigned id;		/* The id of the use.  */
367   unsigned group_id;	/* The group id the use belongs to.  */
368   enum use_type type;	/* Type of the use.  */
369   tree mem_type;	/* The memory type to use when testing whether an
370 			   address is legitimate, and what the address's
371 			   cost is.  */
372   struct iv *iv;	/* The induction variable it is based on.  */
373   gimple *stmt;		/* Statement in that it occurs.  */
374   tree *op_p;		/* The place where it occurs.  */
375 
376   tree addr_base;	/* Base address with const offset stripped.  */
377   poly_uint64_pod addr_offset;
378 			/* Const offset stripped from base address.  */
379 };
380 
381 /* Group of uses.  */
382 struct iv_group
383 {
384   /* The id of the group.  */
385   unsigned id;
386   /* Uses of the group are of the same type.  */
387   enum use_type type;
388   /* The set of "related" IV candidates, plus the important ones.  */
389   bitmap related_cands;
390   /* Number of IV candidates in the cost_map.  */
391   unsigned n_map_members;
392   /* The costs wrto the iv candidates.  */
393   struct cost_pair *cost_map;
394   /* The selected candidate for the group.  */
395   struct iv_cand *selected;
396   /* Uses in the group.  */
397   vec<struct iv_use *> vuses;
398 };
399 
400 /* The position where the iv is computed.  */
401 enum iv_position
402 {
403   IP_NORMAL,		/* At the end, just before the exit condition.  */
404   IP_END,		/* At the end of the latch block.  */
405   IP_BEFORE_USE,	/* Immediately before a specific use.  */
406   IP_AFTER_USE,		/* Immediately after a specific use.  */
407   IP_ORIGINAL		/* The original biv.  */
408 };
409 
410 /* The induction variable candidate.  */
411 struct iv_cand
412 {
413   unsigned id;		/* The number of the candidate.  */
414   bool important;	/* Whether this is an "important" candidate, i.e. such
415 			   that it should be considered by all uses.  */
416   ENUM_BITFIELD(iv_position) pos : 8;	/* Where it is computed.  */
417   gimple *incremented_at;/* For original biv, the statement where it is
418 			   incremented.  */
419   tree var_before;	/* The variable used for it before increment.  */
420   tree var_after;	/* The variable used for it after increment.  */
421   struct iv *iv;	/* The value of the candidate.  NULL for
422 			   "pseudocandidate" used to indicate the possibility
423 			   to replace the final value of an iv by direct
424 			   computation of the value.  */
425   unsigned cost;	/* Cost of the candidate.  */
426   unsigned cost_step;	/* Cost of the candidate's increment operation.  */
427   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
428 			      where it is incremented.  */
429   bitmap inv_vars;	/* The list of invariant ssa_vars used in step of the
430 			   iv_cand.  */
431   bitmap inv_exprs;	/* If step is more complicated than a single ssa_var,
432 			   hanlde it as a new invariant expression which will
433 			   be hoisted out of loop.  */
434   struct iv *orig_iv;	/* The original iv if this cand is added from biv with
435 			   smaller type.  */
436 };
437 
438 /* Hashtable entry for common candidate derived from iv uses.  */
439 struct iv_common_cand
440 {
441   tree base;
442   tree step;
443   /* IV uses from which this common candidate is derived.  */
444   auto_vec<struct iv_use *> uses;
445   hashval_t hash;
446 };
447 
448 /* Hashtable helpers.  */
449 
450 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
451 {
452   static inline hashval_t hash (const iv_common_cand *);
453   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
454 };
455 
456 /* Hash function for possible common candidates.  */
457 
458 inline hashval_t
hash(const iv_common_cand * ccand)459 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
460 {
461   return ccand->hash;
462 }
463 
464 /* Hash table equality function for common candidates.  */
465 
466 inline bool
equal(const iv_common_cand * ccand1,const iv_common_cand * ccand2)467 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
468 			      const iv_common_cand *ccand2)
469 {
470   return (ccand1->hash == ccand2->hash
471 	  && operand_equal_p (ccand1->base, ccand2->base, 0)
472 	  && operand_equal_p (ccand1->step, ccand2->step, 0)
473 	  && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
474 	      == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
475 }
476 
477 /* Loop invariant expression hashtable entry.  */
478 
479 struct iv_inv_expr_ent
480 {
481   /* Tree expression of the entry.  */
482   tree expr;
483   /* Unique indentifier.  */
484   int id;
485   /* Hash value.  */
486   hashval_t hash;
487 };
488 
489 /* Sort iv_inv_expr_ent pair A and B by id field.  */
490 
491 static int
sort_iv_inv_expr_ent(const void * a,const void * b)492 sort_iv_inv_expr_ent (const void *a, const void *b)
493 {
494   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
495   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
496 
497   unsigned id1 = (*e1)->id;
498   unsigned id2 = (*e2)->id;
499 
500   if (id1 < id2)
501     return -1;
502   else if (id1 > id2)
503     return 1;
504   else
505     return 0;
506 }
507 
508 /* Hashtable helpers.  */
509 
510 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
511 {
512   static inline hashval_t hash (const iv_inv_expr_ent *);
513   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
514 };
515 
516 /* Return true if uses of type TYPE represent some form of address.  */
517 
518 inline bool
address_p(use_type type)519 address_p (use_type type)
520 {
521   return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
522 }
523 
524 /* Hash function for loop invariant expressions.  */
525 
526 inline hashval_t
hash(const iv_inv_expr_ent * expr)527 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
528 {
529   return expr->hash;
530 }
531 
532 /* Hash table equality function for expressions.  */
533 
534 inline bool
equal(const iv_inv_expr_ent * expr1,const iv_inv_expr_ent * expr2)535 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
536 			   const iv_inv_expr_ent *expr2)
537 {
538   return expr1->hash == expr2->hash
539 	 && operand_equal_p (expr1->expr, expr2->expr, 0);
540 }
541 
542 struct ivopts_data
543 {
544   /* The currently optimized loop.  */
545   struct loop *current_loop;
546   location_t loop_loc;
547 
548   /* Numbers of iterations for all exits of the current loop.  */
549   hash_map<edge, tree_niter_desc *> *niters;
550 
551   /* Number of registers used in it.  */
552   unsigned regs_used;
553 
554   /* The size of version_info array allocated.  */
555   unsigned version_info_size;
556 
557   /* The array of information for the ssa names.  */
558   struct version_info *version_info;
559 
560   /* The hashtable of loop invariant expressions created
561      by ivopt.  */
562   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
563 
564   /* The bitmap of indices in version_info whose value was changed.  */
565   bitmap relevant;
566 
567   /* The uses of induction variables.  */
568   vec<iv_group *> vgroups;
569 
570   /* The candidates.  */
571   vec<iv_cand *> vcands;
572 
573   /* A bitmap of important candidates.  */
574   bitmap important_candidates;
575 
576   /* Cache used by tree_to_aff_combination_expand.  */
577   hash_map<tree, name_expansion *> *name_expansion_cache;
578 
579   /* The hashtable of common candidates derived from iv uses.  */
580   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
581 
582   /* The common candidates.  */
583   vec<iv_common_cand *> iv_common_cands;
584 
585   /* The maximum invariant variable id.  */
586   unsigned max_inv_var_id;
587 
588   /* The maximum invariant expression id.  */
589   unsigned max_inv_expr_id;
590 
591   /* Number of no_overflow BIVs which are not used in memory address.  */
592   unsigned bivs_not_used_in_addr;
593 
594   /* Obstack for iv structure.  */
595   struct obstack iv_obstack;
596 
597   /* Whether to consider just related and important candidates when replacing a
598      use.  */
599   bool consider_all_candidates;
600 
601   /* Are we optimizing for speed?  */
602   bool speed;
603 
604   /* Whether the loop body includes any function calls.  */
605   bool body_includes_call;
606 
607   /* Whether the loop body can only be exited via single exit.  */
608   bool loop_single_exit_p;
609 };
610 
611 /* An assignment of iv candidates to uses.  */
612 
613 struct iv_ca
614 {
615   /* The number of uses covered by the assignment.  */
616   unsigned upto;
617 
618   /* Number of uses that cannot be expressed by the candidates in the set.  */
619   unsigned bad_groups;
620 
621   /* Candidate assigned to a use, together with the related costs.  */
622   struct cost_pair **cand_for_group;
623 
624   /* Number of times each candidate is used.  */
625   unsigned *n_cand_uses;
626 
627   /* The candidates used.  */
628   bitmap cands;
629 
630   /* The number of candidates in the set.  */
631   unsigned n_cands;
632 
633   /* The number of invariants needed, including both invariant variants and
634      invariant expressions.  */
635   unsigned n_invs;
636 
637   /* Total cost of expressing uses.  */
638   comp_cost cand_use_cost;
639 
640   /* Total cost of candidates.  */
641   unsigned cand_cost;
642 
643   /* Number of times each invariant variable is used.  */
644   unsigned *n_inv_var_uses;
645 
646   /* Number of times each invariant expression is used.  */
647   unsigned *n_inv_expr_uses;
648 
649   /* Total cost of the assignment.  */
650   comp_cost cost;
651 };
652 
653 /* Difference of two iv candidate assignments.  */
654 
655 struct iv_ca_delta
656 {
657   /* Changed group.  */
658   struct iv_group *group;
659 
660   /* An old assignment (for rollback purposes).  */
661   struct cost_pair *old_cp;
662 
663   /* A new assignment.  */
664   struct cost_pair *new_cp;
665 
666   /* Next change in the list.  */
667   struct iv_ca_delta *next;
668 };
669 
670 /* Bound on number of candidates below that all candidates are considered.  */
671 
672 #define CONSIDER_ALL_CANDIDATES_BOUND \
673   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
674 
675 /* If there are more iv occurrences, we just give up (it is quite unlikely that
676    optimizing such a loop would help, and it would take ages).  */
677 
678 #define MAX_CONSIDERED_GROUPS \
679   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
680 
681 /* If there are at most this number of ivs in the set, try removing unnecessary
682    ivs from the set always.  */
683 
684 #define ALWAYS_PRUNE_CAND_SET_BOUND \
685   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
686 
687 /* The list of trees for that the decl_rtl field must be reset is stored
688    here.  */
689 
690 static vec<tree> decl_rtl_to_reset;
691 
692 static comp_cost force_expr_to_var_cost (tree, bool);
693 
694 /* The single loop exit if it dominates the latch, NULL otherwise.  */
695 
696 edge
single_dom_exit(struct loop * loop)697 single_dom_exit (struct loop *loop)
698 {
699   edge exit = single_exit (loop);
700 
701   if (!exit)
702     return NULL;
703 
704   if (!just_once_each_iteration_p (loop, exit->src))
705     return NULL;
706 
707   return exit;
708 }
709 
710 /* Dumps information about the induction variable IV to FILE.  Don't dump
711    variable's name if DUMP_NAME is FALSE.  The information is dumped with
712    preceding spaces indicated by INDENT_LEVEL.  */
713 
714 void
dump_iv(FILE * file,struct iv * iv,bool dump_name,unsigned indent_level)715 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
716 {
717   const char *p;
718   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
719 
720   if (indent_level > 4)
721     indent_level = 4;
722   p = spaces + 8 - (indent_level << 1);
723 
724   fprintf (file, "%sIV struct:\n", p);
725   if (iv->ssa_name && dump_name)
726     {
727       fprintf (file, "%s  SSA_NAME:\t", p);
728       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
729       fprintf (file, "\n");
730     }
731 
732   fprintf (file, "%s  Type:\t", p);
733   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
734   fprintf (file, "\n");
735 
736   fprintf (file, "%s  Base:\t", p);
737   print_generic_expr (file, iv->base, TDF_SLIM);
738   fprintf (file, "\n");
739 
740   fprintf (file, "%s  Step:\t", p);
741   print_generic_expr (file, iv->step, TDF_SLIM);
742   fprintf (file, "\n");
743 
744   if (iv->base_object)
745     {
746       fprintf (file, "%s  Object:\t", p);
747       print_generic_expr (file, iv->base_object, TDF_SLIM);
748       fprintf (file, "\n");
749     }
750 
751   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
752 
753   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
754 	   p, iv->no_overflow ? "No-overflow" : "Overflow");
755 }
756 
757 /* Dumps information about the USE to FILE.  */
758 
759 void
dump_use(FILE * file,struct iv_use * use)760 dump_use (FILE *file, struct iv_use *use)
761 {
762   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
763   fprintf (file, "    At stmt:\t");
764   print_gimple_stmt (file, use->stmt, 0);
765   fprintf (file, "    At pos:\t");
766   if (use->op_p)
767     print_generic_expr (file, *use->op_p, TDF_SLIM);
768   fprintf (file, "\n");
769   dump_iv (file, use->iv, false, 2);
770 }
771 
772 /* Dumps information about the uses to FILE.  */
773 
774 void
dump_groups(FILE * file,struct ivopts_data * data)775 dump_groups (FILE *file, struct ivopts_data *data)
776 {
777   unsigned i, j;
778   struct iv_group *group;
779 
780   for (i = 0; i < data->vgroups.length (); i++)
781     {
782       group = data->vgroups[i];
783       fprintf (file, "Group %d:\n", group->id);
784       if (group->type == USE_NONLINEAR_EXPR)
785 	fprintf (file, "  Type:\tGENERIC\n");
786       else if (group->type == USE_REF_ADDRESS)
787 	fprintf (file, "  Type:\tREFERENCE ADDRESS\n");
788       else if (group->type == USE_PTR_ADDRESS)
789 	fprintf (file, "  Type:\tPOINTER ARGUMENT ADDRESS\n");
790       else
791 	{
792 	  gcc_assert (group->type == USE_COMPARE);
793 	  fprintf (file, "  Type:\tCOMPARE\n");
794 	}
795       for (j = 0; j < group->vuses.length (); j++)
796 	dump_use (file, group->vuses[j]);
797     }
798 }
799 
800 /* Dumps information about induction variable candidate CAND to FILE.  */
801 
802 void
dump_cand(FILE * file,struct iv_cand * cand)803 dump_cand (FILE *file, struct iv_cand *cand)
804 {
805   struct iv *iv = cand->iv;
806 
807   fprintf (file, "Candidate %d:\n", cand->id);
808   if (cand->inv_vars)
809     {
810       fprintf (file, "  Depend on inv.vars: ");
811       dump_bitmap (file, cand->inv_vars);
812     }
813   if (cand->inv_exprs)
814     {
815       fprintf (file, "  Depend on inv.exprs: ");
816       dump_bitmap (file, cand->inv_exprs);
817     }
818 
819   if (cand->var_before)
820     {
821       fprintf (file, "  Var befor: ");
822       print_generic_expr (file, cand->var_before, TDF_SLIM);
823       fprintf (file, "\n");
824     }
825   if (cand->var_after)
826     {
827       fprintf (file, "  Var after: ");
828       print_generic_expr (file, cand->var_after, TDF_SLIM);
829       fprintf (file, "\n");
830     }
831 
832   switch (cand->pos)
833     {
834     case IP_NORMAL:
835       fprintf (file, "  Incr POS: before exit test\n");
836       break;
837 
838     case IP_BEFORE_USE:
839       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
840       break;
841 
842     case IP_AFTER_USE:
843       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
844       break;
845 
846     case IP_END:
847       fprintf (file, "  Incr POS: at end\n");
848       break;
849 
850     case IP_ORIGINAL:
851       fprintf (file, "  Incr POS: orig biv\n");
852       break;
853     }
854 
855   dump_iv (file, iv, false, 1);
856 }
857 
858 /* Returns the info for ssa version VER.  */
859 
860 static inline struct version_info *
ver_info(struct ivopts_data * data,unsigned ver)861 ver_info (struct ivopts_data *data, unsigned ver)
862 {
863   return data->version_info + ver;
864 }
865 
866 /* Returns the info for ssa name NAME.  */
867 
868 static inline struct version_info *
name_info(struct ivopts_data * data,tree name)869 name_info (struct ivopts_data *data, tree name)
870 {
871   return ver_info (data, SSA_NAME_VERSION (name));
872 }
873 
874 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
875    emitted in LOOP.  */
876 
877 static bool
stmt_after_ip_normal_pos(struct loop * loop,gimple * stmt)878 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
879 {
880   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
881 
882   gcc_assert (bb);
883 
884   if (sbb == loop->latch)
885     return true;
886 
887   if (sbb != bb)
888     return false;
889 
890   return stmt == last_stmt (bb);
891 }
892 
893 /* Returns true if STMT if after the place where the original induction
894    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
895    if the positions are identical.  */
896 
897 static bool
stmt_after_inc_pos(struct iv_cand * cand,gimple * stmt,bool true_if_equal)898 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
899 {
900   basic_block cand_bb = gimple_bb (cand->incremented_at);
901   basic_block stmt_bb = gimple_bb (stmt);
902 
903   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
904     return false;
905 
906   if (stmt_bb != cand_bb)
907     return true;
908 
909   if (true_if_equal
910       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
911     return true;
912   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
913 }
914 
915 /* Returns true if STMT if after the place where the induction variable
916    CAND is incremented in LOOP.  */
917 
918 static bool
stmt_after_increment(struct loop * loop,struct iv_cand * cand,gimple * stmt)919 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
920 {
921   switch (cand->pos)
922     {
923     case IP_END:
924       return false;
925 
926     case IP_NORMAL:
927       return stmt_after_ip_normal_pos (loop, stmt);
928 
929     case IP_ORIGINAL:
930     case IP_AFTER_USE:
931       return stmt_after_inc_pos (cand, stmt, false);
932 
933     case IP_BEFORE_USE:
934       return stmt_after_inc_pos (cand, stmt, true);
935 
936     default:
937       gcc_unreachable ();
938     }
939 }
940 
941 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
942 
943 static bool
abnormal_ssa_name_p(tree exp)944 abnormal_ssa_name_p (tree exp)
945 {
946   if (!exp)
947     return false;
948 
949   if (TREE_CODE (exp) != SSA_NAME)
950     return false;
951 
952   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
953 }
954 
955 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
956    abnormal phi node.  Callback for for_each_index.  */
957 
958 static bool
idx_contains_abnormal_ssa_name_p(tree base,tree * index,void * data ATTRIBUTE_UNUSED)959 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
960 				  void *data ATTRIBUTE_UNUSED)
961 {
962   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
963     {
964       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
965 	return false;
966       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
967 	return false;
968     }
969 
970   return !abnormal_ssa_name_p (*index);
971 }
972 
973 /* Returns true if EXPR contains a ssa name that occurs in an
974    abnormal phi node.  */
975 
976 bool
contains_abnormal_ssa_name_p(tree expr)977 contains_abnormal_ssa_name_p (tree expr)
978 {
979   enum tree_code code;
980   enum tree_code_class codeclass;
981 
982   if (!expr)
983     return false;
984 
985   code = TREE_CODE (expr);
986   codeclass = TREE_CODE_CLASS (code);
987 
988   if (code == CALL_EXPR)
989     {
990       tree arg;
991       call_expr_arg_iterator iter;
992       FOR_EACH_CALL_EXPR_ARG (arg, iter, expr)
993 	if (contains_abnormal_ssa_name_p (arg))
994 	  return true;
995       return false;
996     }
997 
998   if (code == SSA_NAME)
999     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
1000 
1001   if (code == INTEGER_CST
1002       || is_gimple_min_invariant (expr))
1003     return false;
1004 
1005   if (code == ADDR_EXPR)
1006     return !for_each_index (&TREE_OPERAND (expr, 0),
1007 			    idx_contains_abnormal_ssa_name_p,
1008 			    NULL);
1009 
1010   if (code == COND_EXPR)
1011     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
1012       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
1013       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
1014 
1015   switch (codeclass)
1016     {
1017     case tcc_binary:
1018     case tcc_comparison:
1019       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
1020 	return true;
1021 
1022       /* Fallthru.  */
1023     case tcc_unary:
1024       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
1025 	return true;
1026 
1027       break;
1028 
1029     default:
1030       gcc_unreachable ();
1031     }
1032 
1033   return false;
1034 }
1035 
1036 /*  Returns the structure describing number of iterations determined from
1037     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1038 
1039 static struct tree_niter_desc *
niter_for_exit(struct ivopts_data * data,edge exit)1040 niter_for_exit (struct ivopts_data *data, edge exit)
1041 {
1042   struct tree_niter_desc *desc;
1043   tree_niter_desc **slot;
1044 
1045   if (!data->niters)
1046     {
1047       data->niters = new hash_map<edge, tree_niter_desc *>;
1048       slot = NULL;
1049     }
1050   else
1051     slot = data->niters->get (exit);
1052 
1053   if (!slot)
1054     {
1055       /* Try to determine number of iterations.  We cannot safely work with ssa
1056 	 names that appear in phi nodes on abnormal edges, so that we do not
1057 	 create overlapping life ranges for them (PR 27283).  */
1058       desc = XNEW (struct tree_niter_desc);
1059       if (!number_of_iterations_exit (data->current_loop,
1060 				      exit, desc, true)
1061      	  || contains_abnormal_ssa_name_p (desc->niter))
1062 	{
1063 	  XDELETE (desc);
1064 	  desc = NULL;
1065 	}
1066       data->niters->put (exit, desc);
1067     }
1068   else
1069     desc = *slot;
1070 
1071   return desc;
1072 }
1073 
1074 /* Returns the structure describing number of iterations determined from
1075    single dominating exit of DATA->current_loop, or NULL if something
1076    goes wrong.  */
1077 
1078 static struct tree_niter_desc *
niter_for_single_dom_exit(struct ivopts_data * data)1079 niter_for_single_dom_exit (struct ivopts_data *data)
1080 {
1081   edge exit = single_dom_exit (data->current_loop);
1082 
1083   if (!exit)
1084     return NULL;
1085 
1086   return niter_for_exit (data, exit);
1087 }
1088 
1089 /* Initializes data structures used by the iv optimization pass, stored
1090    in DATA.  */
1091 
1092 static void
tree_ssa_iv_optimize_init(struct ivopts_data * data)1093 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1094 {
1095   data->version_info_size = 2 * num_ssa_names;
1096   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1097   data->relevant = BITMAP_ALLOC (NULL);
1098   data->important_candidates = BITMAP_ALLOC (NULL);
1099   data->max_inv_var_id = 0;
1100   data->max_inv_expr_id = 0;
1101   data->niters = NULL;
1102   data->vgroups.create (20);
1103   data->vcands.create (20);
1104   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1105   data->name_expansion_cache = NULL;
1106   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1107   data->iv_common_cands.create (20);
1108   decl_rtl_to_reset.create (20);
1109   gcc_obstack_init (&data->iv_obstack);
1110 }
1111 
1112 /* Returns a memory object to that EXPR points.  In case we are able to
1113    determine that it does not point to any such object, NULL is returned.  */
1114 
1115 static tree
determine_base_object(tree expr)1116 determine_base_object (tree expr)
1117 {
1118   enum tree_code code = TREE_CODE (expr);
1119   tree base, obj;
1120 
1121   /* If this is a pointer casted to any type, we need to determine
1122      the base object for the pointer; so handle conversions before
1123      throwing away non-pointer expressions.  */
1124   if (CONVERT_EXPR_P (expr))
1125     return determine_base_object (TREE_OPERAND (expr, 0));
1126 
1127   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1128     return NULL_TREE;
1129 
1130   switch (code)
1131     {
1132     case INTEGER_CST:
1133       return NULL_TREE;
1134 
1135     case ADDR_EXPR:
1136       obj = TREE_OPERAND (expr, 0);
1137       base = get_base_address (obj);
1138 
1139       if (!base)
1140 	return expr;
1141 
1142       if (TREE_CODE (base) == MEM_REF)
1143 	return determine_base_object (TREE_OPERAND (base, 0));
1144 
1145       return fold_convert (ptr_type_node,
1146 			   build_fold_addr_expr (base));
1147 
1148     case POINTER_PLUS_EXPR:
1149       return determine_base_object (TREE_OPERAND (expr, 0));
1150 
1151     case PLUS_EXPR:
1152     case MINUS_EXPR:
1153       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
1154       gcc_unreachable ();
1155 
1156     default:
1157       if (POLY_INT_CST_P (expr))
1158 	return NULL_TREE;
1159       return fold_convert (ptr_type_node, expr);
1160     }
1161 }
1162 
1163 /* Return true if address expression with non-DECL_P operand appears
1164    in EXPR.  */
1165 
1166 static bool
contain_complex_addr_expr(tree expr)1167 contain_complex_addr_expr (tree expr)
1168 {
1169   bool res = false;
1170 
1171   STRIP_NOPS (expr);
1172   switch (TREE_CODE (expr))
1173     {
1174     case POINTER_PLUS_EXPR:
1175     case PLUS_EXPR:
1176     case MINUS_EXPR:
1177       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1178       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1179       break;
1180 
1181     case ADDR_EXPR:
1182       return (!DECL_P (TREE_OPERAND (expr, 0)));
1183 
1184     default:
1185       return false;
1186     }
1187 
1188   return res;
1189 }
1190 
1191 /* Allocates an induction variable with given initial value BASE and step STEP
1192    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1193 
1194 static struct iv *
1195 alloc_iv (struct ivopts_data *data, tree base, tree step,
1196 	  bool no_overflow = false)
1197 {
1198   tree expr = base;
1199   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1200 					      sizeof (struct iv));
1201   gcc_assert (step != NULL_TREE);
1202 
1203   /* Lower address expression in base except ones with DECL_P as operand.
1204      By doing this:
1205        1) More accurate cost can be computed for address expressions;
1206        2) Duplicate candidates won't be created for bases in different
1207 	  forms, like &a[0] and &a.  */
1208   STRIP_NOPS (expr);
1209   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1210       || contain_complex_addr_expr (expr))
1211     {
1212       aff_tree comb;
1213       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1214       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1215     }
1216 
1217   iv->base = base;
1218   iv->base_object = determine_base_object (base);
1219   iv->step = step;
1220   iv->biv_p = false;
1221   iv->nonlin_use = NULL;
1222   iv->ssa_name = NULL_TREE;
1223   if (!no_overflow
1224        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1225 			      base, step))
1226     no_overflow = true;
1227   iv->no_overflow = no_overflow;
1228   iv->have_address_use = false;
1229 
1230   return iv;
1231 }
1232 
1233 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1234    doesn't overflow.  */
1235 
1236 static void
set_iv(struct ivopts_data * data,tree iv,tree base,tree step,bool no_overflow)1237 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1238 	bool no_overflow)
1239 {
1240   struct version_info *info = name_info (data, iv);
1241 
1242   gcc_assert (!info->iv);
1243 
1244   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1245   info->iv = alloc_iv (data, base, step, no_overflow);
1246   info->iv->ssa_name = iv;
1247 }
1248 
1249 /* Finds induction variable declaration for VAR.  */
1250 
1251 static struct iv *
get_iv(struct ivopts_data * data,tree var)1252 get_iv (struct ivopts_data *data, tree var)
1253 {
1254   basic_block bb;
1255   tree type = TREE_TYPE (var);
1256 
1257   if (!POINTER_TYPE_P (type)
1258       && !INTEGRAL_TYPE_P (type))
1259     return NULL;
1260 
1261   if (!name_info (data, var)->iv)
1262     {
1263       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1264 
1265       if (!bb
1266 	  || !flow_bb_inside_loop_p (data->current_loop, bb))
1267 	set_iv (data, var, var, build_int_cst (type, 0), true);
1268     }
1269 
1270   return name_info (data, var)->iv;
1271 }
1272 
1273 /* Return the first non-invariant ssa var found in EXPR.  */
1274 
1275 static tree
extract_single_var_from_expr(tree expr)1276 extract_single_var_from_expr (tree expr)
1277 {
1278   int i, n;
1279   tree tmp;
1280   enum tree_code code;
1281 
1282   if (!expr || is_gimple_min_invariant (expr))
1283     return NULL;
1284 
1285   code = TREE_CODE (expr);
1286   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1287     {
1288       n = TREE_OPERAND_LENGTH (expr);
1289       for (i = 0; i < n; i++)
1290 	{
1291 	  tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1292 
1293 	  if (tmp)
1294 	    return tmp;
1295 	}
1296     }
1297   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1298 }
1299 
1300 /* Finds basic ivs.  */
1301 
1302 static bool
find_bivs(struct ivopts_data * data)1303 find_bivs (struct ivopts_data *data)
1304 {
1305   gphi *phi;
1306   affine_iv iv;
1307   tree step, type, base, stop;
1308   bool found = false;
1309   struct loop *loop = data->current_loop;
1310   gphi_iterator psi;
1311 
1312   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1313     {
1314       phi = psi.phi ();
1315 
1316       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1317 	continue;
1318 
1319       if (virtual_operand_p (PHI_RESULT (phi)))
1320 	continue;
1321 
1322       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1323 	continue;
1324 
1325       if (integer_zerop (iv.step))
1326 	continue;
1327 
1328       step = iv.step;
1329       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1330       /* Stop expanding iv base at the first ssa var referred by iv step.
1331 	 Ideally we should stop at any ssa var, because that's expensive
1332 	 and unusual to happen, we just do it on the first one.
1333 
1334 	 See PR64705 for the rationale.  */
1335       stop = extract_single_var_from_expr (step);
1336       base = expand_simple_operations (base, stop);
1337       if (contains_abnormal_ssa_name_p (base)
1338 	  || contains_abnormal_ssa_name_p (step))
1339 	continue;
1340 
1341       type = TREE_TYPE (PHI_RESULT (phi));
1342       base = fold_convert (type, base);
1343       if (step)
1344 	{
1345 	  if (POINTER_TYPE_P (type))
1346 	    step = convert_to_ptrofftype (step);
1347 	  else
1348 	    step = fold_convert (type, step);
1349 	}
1350 
1351       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1352       found = true;
1353     }
1354 
1355   return found;
1356 }
1357 
1358 /* Marks basic ivs.  */
1359 
1360 static void
mark_bivs(struct ivopts_data * data)1361 mark_bivs (struct ivopts_data *data)
1362 {
1363   gphi *phi;
1364   gimple *def;
1365   tree var;
1366   struct iv *iv, *incr_iv;
1367   struct loop *loop = data->current_loop;
1368   basic_block incr_bb;
1369   gphi_iterator psi;
1370 
1371   data->bivs_not_used_in_addr = 0;
1372   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1373     {
1374       phi = psi.phi ();
1375 
1376       iv = get_iv (data, PHI_RESULT (phi));
1377       if (!iv)
1378 	continue;
1379 
1380       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1381       def = SSA_NAME_DEF_STMT (var);
1382       /* Don't mark iv peeled from other one as biv.  */
1383       if (def
1384 	  && gimple_code (def) == GIMPLE_PHI
1385 	  && gimple_bb (def) == loop->header)
1386 	continue;
1387 
1388       incr_iv = get_iv (data, var);
1389       if (!incr_iv)
1390 	continue;
1391 
1392       /* If the increment is in the subloop, ignore it.  */
1393       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1394       if (incr_bb->loop_father != data->current_loop
1395 	  || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1396 	continue;
1397 
1398       iv->biv_p = true;
1399       incr_iv->biv_p = true;
1400       if (iv->no_overflow)
1401 	data->bivs_not_used_in_addr++;
1402       if (incr_iv->no_overflow)
1403 	data->bivs_not_used_in_addr++;
1404     }
1405 }
1406 
1407 /* Checks whether STMT defines a linear induction variable and stores its
1408    parameters to IV.  */
1409 
1410 static bool
find_givs_in_stmt_scev(struct ivopts_data * data,gimple * stmt,affine_iv * iv)1411 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1412 {
1413   tree lhs, stop;
1414   struct loop *loop = data->current_loop;
1415 
1416   iv->base = NULL_TREE;
1417   iv->step = NULL_TREE;
1418 
1419   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1420     return false;
1421 
1422   lhs = gimple_assign_lhs (stmt);
1423   if (TREE_CODE (lhs) != SSA_NAME)
1424     return false;
1425 
1426   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1427     return false;
1428 
1429   /* Stop expanding iv base at the first ssa var referred by iv step.
1430      Ideally we should stop at any ssa var, because that's expensive
1431      and unusual to happen, we just do it on the first one.
1432 
1433      See PR64705 for the rationale.  */
1434   stop = extract_single_var_from_expr (iv->step);
1435   iv->base = expand_simple_operations (iv->base, stop);
1436   if (contains_abnormal_ssa_name_p (iv->base)
1437       || contains_abnormal_ssa_name_p (iv->step))
1438     return false;
1439 
1440   /* If STMT could throw, then do not consider STMT as defining a GIV.
1441      While this will suppress optimizations, we cannot safely delete this
1442      GIV and associated statements, even if it appears it is not used.  */
1443   if (stmt_could_throw_p (cfun, stmt))
1444     return false;
1445 
1446   return true;
1447 }
1448 
1449 /* Finds general ivs in statement STMT.  */
1450 
1451 static void
find_givs_in_stmt(struct ivopts_data * data,gimple * stmt)1452 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1453 {
1454   affine_iv iv;
1455 
1456   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1457     return;
1458 
1459   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1460 }
1461 
1462 /* Finds general ivs in basic block BB.  */
1463 
1464 static void
find_givs_in_bb(struct ivopts_data * data,basic_block bb)1465 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1466 {
1467   gimple_stmt_iterator bsi;
1468 
1469   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1470     find_givs_in_stmt (data, gsi_stmt (bsi));
1471 }
1472 
1473 /* Finds general ivs.  */
1474 
1475 static void
find_givs(struct ivopts_data * data)1476 find_givs (struct ivopts_data *data)
1477 {
1478   struct loop *loop = data->current_loop;
1479   basic_block *body = get_loop_body_in_dom_order (loop);
1480   unsigned i;
1481 
1482   for (i = 0; i < loop->num_nodes; i++)
1483     find_givs_in_bb (data, body[i]);
1484   free (body);
1485 }
1486 
1487 /* For each ssa name defined in LOOP determines whether it is an induction
1488    variable and if so, its initial value and step.  */
1489 
1490 static bool
find_induction_variables(struct ivopts_data * data)1491 find_induction_variables (struct ivopts_data *data)
1492 {
1493   unsigned i;
1494   bitmap_iterator bi;
1495 
1496   if (!find_bivs (data))
1497     return false;
1498 
1499   find_givs (data);
1500   mark_bivs (data);
1501 
1502   if (dump_file && (dump_flags & TDF_DETAILS))
1503     {
1504       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1505 
1506       if (niter)
1507 	{
1508 	  fprintf (dump_file, "  number of iterations ");
1509 	  print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1510 	  if (!integer_zerop (niter->may_be_zero))
1511 	    {
1512 	      fprintf (dump_file, "; zero if ");
1513 	      print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1514 	    }
1515 	  fprintf (dump_file, "\n");
1516 	};
1517 
1518       fprintf (dump_file, "\n<Induction Vars>:\n");
1519       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1520 	{
1521 	  struct version_info *info = ver_info (data, i);
1522 	  if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1523 	    dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1524 	}
1525     }
1526 
1527   return true;
1528 }
1529 
1530 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1531    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1532    is the const offset stripped from IV base and MEM_TYPE is the type
1533    of the memory being addressed.  For uses of other types, ADDR_BASE
1534    and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE.  */
1535 
1536 static struct iv_use *
record_use(struct iv_group * group,tree * use_p,struct iv * iv,gimple * stmt,enum use_type type,tree mem_type,tree addr_base,poly_uint64 addr_offset)1537 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1538 	    gimple *stmt, enum use_type type, tree mem_type,
1539 	    tree addr_base, poly_uint64 addr_offset)
1540 {
1541   struct iv_use *use = XCNEW (struct iv_use);
1542 
1543   use->id = group->vuses.length ();
1544   use->group_id = group->id;
1545   use->type = type;
1546   use->mem_type = mem_type;
1547   use->iv = iv;
1548   use->stmt = stmt;
1549   use->op_p = use_p;
1550   use->addr_base = addr_base;
1551   use->addr_offset = addr_offset;
1552 
1553   group->vuses.safe_push (use);
1554   return use;
1555 }
1556 
1557 /* Checks whether OP is a loop-level invariant and if so, records it.
1558    NONLINEAR_USE is true if the invariant is used in a way we do not
1559    handle specially.  */
1560 
1561 static void
record_invariant(struct ivopts_data * data,tree op,bool nonlinear_use)1562 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1563 {
1564   basic_block bb;
1565   struct version_info *info;
1566 
1567   if (TREE_CODE (op) != SSA_NAME
1568       || virtual_operand_p (op))
1569     return;
1570 
1571   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1572   if (bb
1573       && flow_bb_inside_loop_p (data->current_loop, bb))
1574     return;
1575 
1576   info = name_info (data, op);
1577   info->name = op;
1578   info->has_nonlin_use |= nonlinear_use;
1579   if (!info->inv_id)
1580     info->inv_id = ++data->max_inv_var_id;
1581   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1582 }
1583 
1584 /* Record a group of TYPE.  */
1585 
1586 static struct iv_group *
record_group(struct ivopts_data * data,enum use_type type)1587 record_group (struct ivopts_data *data, enum use_type type)
1588 {
1589   struct iv_group *group = XCNEW (struct iv_group);
1590 
1591   group->id = data->vgroups.length ();
1592   group->type = type;
1593   group->related_cands = BITMAP_ALLOC (NULL);
1594   group->vuses.create (1);
1595 
1596   data->vgroups.safe_push (group);
1597   return group;
1598 }
1599 
1600 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1601    New group will be created if there is no existing group for the use.
1602    MEM_TYPE is the type of memory being addressed, or NULL if this
1603    isn't an address reference.  */
1604 
1605 static struct iv_use *
record_group_use(struct ivopts_data * data,tree * use_p,struct iv * iv,gimple * stmt,enum use_type type,tree mem_type)1606 record_group_use (struct ivopts_data *data, tree *use_p,
1607 		  struct iv *iv, gimple *stmt, enum use_type type,
1608 		  tree mem_type)
1609 {
1610   tree addr_base = NULL;
1611   struct iv_group *group = NULL;
1612   poly_uint64 addr_offset = 0;
1613 
1614   /* Record non address type use in a new group.  */
1615   if (address_p (type))
1616     {
1617       unsigned int i;
1618 
1619       addr_base = strip_offset (iv->base, &addr_offset);
1620       for (i = 0; i < data->vgroups.length (); i++)
1621 	{
1622 	  struct iv_use *use;
1623 
1624 	  group = data->vgroups[i];
1625 	  use = group->vuses[0];
1626 	  if (!address_p (use->type))
1627 	    continue;
1628 
1629 	  /* Check if it has the same stripped base and step.  */
1630 	  if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1631 	      && operand_equal_p (iv->step, use->iv->step, 0)
1632 	      && operand_equal_p (addr_base, use->addr_base, 0))
1633 	    break;
1634 	}
1635       if (i == data->vgroups.length ())
1636 	group = NULL;
1637     }
1638 
1639   if (!group)
1640     group = record_group (data, type);
1641 
1642   return record_use (group, use_p, iv, stmt, type, mem_type,
1643 		     addr_base, addr_offset);
1644 }
1645 
1646 /* Checks whether the use OP is interesting and if so, records it.  */
1647 
1648 static struct iv_use *
find_interesting_uses_op(struct ivopts_data * data,tree op)1649 find_interesting_uses_op (struct ivopts_data *data, tree op)
1650 {
1651   struct iv *iv;
1652   gimple *stmt;
1653   struct iv_use *use;
1654 
1655   if (TREE_CODE (op) != SSA_NAME)
1656     return NULL;
1657 
1658   iv = get_iv (data, op);
1659   if (!iv)
1660     return NULL;
1661 
1662   if (iv->nonlin_use)
1663     {
1664       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1665       return iv->nonlin_use;
1666     }
1667 
1668   if (integer_zerop (iv->step))
1669     {
1670       record_invariant (data, op, true);
1671       return NULL;
1672     }
1673 
1674   stmt = SSA_NAME_DEF_STMT (op);
1675   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1676 
1677   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1678   iv->nonlin_use = use;
1679   return use;
1680 }
1681 
1682 /* Indicate how compare type iv_use can be handled.  */
1683 enum comp_iv_rewrite
1684 {
1685   COMP_IV_NA,
1686   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1687   COMP_IV_EXPR,
1688   /* We may rewrite compare type iv_uses on both sides of comparison by
1689      expressing value of each iv_use.  */
1690   COMP_IV_EXPR_2,
1691   /* We may rewrite compare type iv_use by expressing value of the iv_use
1692      or by eliminating it with other iv_cand.  */
1693   COMP_IV_ELIM
1694 };
1695 
1696 /* Given a condition in statement STMT, checks whether it is a compare
1697    of an induction variable and an invariant.  If this is the case,
1698    CONTROL_VAR is set to location of the iv, BOUND to the location of
1699    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1700    induction variable descriptions, and true is returned.  If this is not
1701    the case, CONTROL_VAR and BOUND are set to the arguments of the
1702    condition and false is returned.  */
1703 
1704 static enum comp_iv_rewrite
extract_cond_operands(struct ivopts_data * data,gimple * stmt,tree ** control_var,tree ** bound,struct iv ** iv_var,struct iv ** iv_bound)1705 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1706 		       tree **control_var, tree **bound,
1707 		       struct iv **iv_var, struct iv **iv_bound)
1708 {
1709   /* The objects returned when COND has constant operands.  */
1710   static struct iv const_iv;
1711   static tree zero;
1712   tree *op0 = &zero, *op1 = &zero;
1713   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1714   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1715 
1716   if (gimple_code (stmt) == GIMPLE_COND)
1717     {
1718       gcond *cond_stmt = as_a <gcond *> (stmt);
1719       op0 = gimple_cond_lhs_ptr (cond_stmt);
1720       op1 = gimple_cond_rhs_ptr (cond_stmt);
1721     }
1722   else
1723     {
1724       op0 = gimple_assign_rhs1_ptr (stmt);
1725       op1 = gimple_assign_rhs2_ptr (stmt);
1726     }
1727 
1728   zero = integer_zero_node;
1729   const_iv.step = integer_zero_node;
1730 
1731   if (TREE_CODE (*op0) == SSA_NAME)
1732     iv0 = get_iv (data, *op0);
1733   if (TREE_CODE (*op1) == SSA_NAME)
1734     iv1 = get_iv (data, *op1);
1735 
1736   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1737   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1738     {
1739       rewrite_type = COMP_IV_EXPR_2;
1740       goto end;
1741     }
1742 
1743   /* If none side of comparison is IV.  */
1744   if ((!iv0 || integer_zerop (iv0->step))
1745       && (!iv1 || integer_zerop (iv1->step)))
1746     goto end;
1747 
1748   /* Control variable may be on the other side.  */
1749   if (!iv0 || integer_zerop (iv0->step))
1750     {
1751       std::swap (op0, op1);
1752       std::swap (iv0, iv1);
1753     }
1754   /* If one side is IV and the other side isn't loop invariant.  */
1755   if (!iv1)
1756     rewrite_type = COMP_IV_EXPR;
1757   /* If one side is IV and the other side is loop invariant.  */
1758   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1759     rewrite_type = COMP_IV_ELIM;
1760 
1761 end:
1762   if (control_var)
1763     *control_var = op0;
1764   if (iv_var)
1765     *iv_var = iv0;
1766   if (bound)
1767     *bound = op1;
1768   if (iv_bound)
1769     *iv_bound = iv1;
1770 
1771   return rewrite_type;
1772 }
1773 
1774 /* Checks whether the condition in STMT is interesting and if so,
1775    records it.  */
1776 
1777 static void
find_interesting_uses_cond(struct ivopts_data * data,gimple * stmt)1778 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1779 {
1780   tree *var_p, *bound_p;
1781   struct iv *var_iv, *bound_iv;
1782   enum comp_iv_rewrite ret;
1783 
1784   ret = extract_cond_operands (data, stmt,
1785 			       &var_p, &bound_p, &var_iv, &bound_iv);
1786   if (ret == COMP_IV_NA)
1787     {
1788       find_interesting_uses_op (data, *var_p);
1789       find_interesting_uses_op (data, *bound_p);
1790       return;
1791     }
1792 
1793   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1794   /* Record compare type iv_use for iv on the other side of comparison.  */
1795   if (ret == COMP_IV_EXPR_2)
1796     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1797 }
1798 
1799 /* Returns the outermost loop EXPR is obviously invariant in
1800    relative to the loop LOOP, i.e. if all its operands are defined
1801    outside of the returned loop.  Returns NULL if EXPR is not
1802    even obviously invariant in LOOP.  */
1803 
1804 struct loop *
outermost_invariant_loop_for_expr(struct loop * loop,tree expr)1805 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1806 {
1807   basic_block def_bb;
1808   unsigned i, len;
1809 
1810   if (is_gimple_min_invariant (expr))
1811     return current_loops->tree_root;
1812 
1813   if (TREE_CODE (expr) == SSA_NAME)
1814     {
1815       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1816       if (def_bb)
1817 	{
1818 	  if (flow_bb_inside_loop_p (loop, def_bb))
1819 	    return NULL;
1820 	  return superloop_at_depth (loop,
1821 				     loop_depth (def_bb->loop_father) + 1);
1822 	}
1823 
1824       return current_loops->tree_root;
1825     }
1826 
1827   if (!EXPR_P (expr))
1828     return NULL;
1829 
1830   unsigned maxdepth = 0;
1831   len = TREE_OPERAND_LENGTH (expr);
1832   for (i = 0; i < len; i++)
1833     {
1834       struct loop *ivloop;
1835       if (!TREE_OPERAND (expr, i))
1836 	continue;
1837 
1838       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1839       if (!ivloop)
1840 	return NULL;
1841       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1842     }
1843 
1844   return superloop_at_depth (loop, maxdepth);
1845 }
1846 
1847 /* Returns true if expression EXPR is obviously invariant in LOOP,
1848    i.e. if all its operands are defined outside of the LOOP.  LOOP
1849    should not be the function body.  */
1850 
1851 bool
expr_invariant_in_loop_p(struct loop * loop,tree expr)1852 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1853 {
1854   basic_block def_bb;
1855   unsigned i, len;
1856 
1857   gcc_assert (loop_depth (loop) > 0);
1858 
1859   if (is_gimple_min_invariant (expr))
1860     return true;
1861 
1862   if (TREE_CODE (expr) == SSA_NAME)
1863     {
1864       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1865       if (def_bb
1866 	  && flow_bb_inside_loop_p (loop, def_bb))
1867 	return false;
1868 
1869       return true;
1870     }
1871 
1872   if (!EXPR_P (expr))
1873     return false;
1874 
1875   len = TREE_OPERAND_LENGTH (expr);
1876   for (i = 0; i < len; i++)
1877     if (TREE_OPERAND (expr, i)
1878 	&& !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1879       return false;
1880 
1881   return true;
1882 }
1883 
1884 /* Given expression EXPR which computes inductive values with respect
1885    to loop recorded in DATA, this function returns biv from which EXPR
1886    is derived by tracing definition chains of ssa variables in EXPR.  */
1887 
1888 static struct iv*
find_deriving_biv_for_expr(struct ivopts_data * data,tree expr)1889 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1890 {
1891   struct iv *iv;
1892   unsigned i, n;
1893   tree e2, e1;
1894   enum tree_code code;
1895   gimple *stmt;
1896 
1897   if (expr == NULL_TREE)
1898     return NULL;
1899 
1900   if (is_gimple_min_invariant (expr))
1901     return NULL;
1902 
1903   code = TREE_CODE (expr);
1904   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1905     {
1906       n = TREE_OPERAND_LENGTH (expr);
1907       for (i = 0; i < n; i++)
1908 	{
1909 	  iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1910 	  if (iv)
1911 	    return iv;
1912 	}
1913     }
1914 
1915   /* Stop if it's not ssa name.  */
1916   if (code != SSA_NAME)
1917     return NULL;
1918 
1919   iv = get_iv (data, expr);
1920   if (!iv || integer_zerop (iv->step))
1921     return NULL;
1922   else if (iv->biv_p)
1923     return iv;
1924 
1925   stmt = SSA_NAME_DEF_STMT (expr);
1926   if (gphi *phi = dyn_cast <gphi *> (stmt))
1927     {
1928       ssa_op_iter iter;
1929       use_operand_p use_p;
1930       basic_block phi_bb = gimple_bb (phi);
1931 
1932       /* Skip loop header PHI that doesn't define biv.  */
1933       if (phi_bb->loop_father == data->current_loop)
1934 	return NULL;
1935 
1936       if (virtual_operand_p (gimple_phi_result (phi)))
1937 	return NULL;
1938 
1939       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1940 	{
1941 	  tree use = USE_FROM_PTR (use_p);
1942 	  iv = find_deriving_biv_for_expr (data, use);
1943 	  if (iv)
1944 	    return iv;
1945 	}
1946       return NULL;
1947     }
1948   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1949     return NULL;
1950 
1951   e1 = gimple_assign_rhs1 (stmt);
1952   code = gimple_assign_rhs_code (stmt);
1953   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1954     return find_deriving_biv_for_expr (data, e1);
1955 
1956   switch (code)
1957     {
1958     case MULT_EXPR:
1959     case PLUS_EXPR:
1960     case MINUS_EXPR:
1961     case POINTER_PLUS_EXPR:
1962       /* Increments, decrements and multiplications by a constant
1963 	 are simple.  */
1964       e2 = gimple_assign_rhs2 (stmt);
1965       iv = find_deriving_biv_for_expr (data, e2);
1966       if (iv)
1967 	return iv;
1968       gcc_fallthrough ();
1969 
1970     CASE_CONVERT:
1971       /* Casts are simple.  */
1972       return find_deriving_biv_for_expr (data, e1);
1973 
1974     default:
1975       break;
1976     }
1977 
1978   return NULL;
1979 }
1980 
1981 /* Record BIV, its predecessor and successor that they are used in
1982    address type uses.  */
1983 
1984 static void
record_biv_for_address_use(struct ivopts_data * data,struct iv * biv)1985 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1986 {
1987   unsigned i;
1988   tree type, base_1, base_2;
1989   bitmap_iterator bi;
1990 
1991   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1992       || biv->have_address_use || !biv->no_overflow)
1993     return;
1994 
1995   type = TREE_TYPE (biv->base);
1996   if (!INTEGRAL_TYPE_P (type))
1997     return;
1998 
1999   biv->have_address_use = true;
2000   data->bivs_not_used_in_addr--;
2001   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
2002   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2003     {
2004       struct iv *iv = ver_info (data, i)->iv;
2005 
2006       if (!iv || !iv->biv_p || integer_zerop (iv->step)
2007 	  || iv->have_address_use || !iv->no_overflow)
2008 	continue;
2009 
2010       if (type != TREE_TYPE (iv->base)
2011 	  || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2012 	continue;
2013 
2014       if (!operand_equal_p (biv->step, iv->step, 0))
2015 	continue;
2016 
2017       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2018       if (operand_equal_p (base_1, iv->base, 0)
2019 	  || operand_equal_p (base_2, biv->base, 0))
2020 	{
2021 	  iv->have_address_use = true;
2022 	  data->bivs_not_used_in_addr--;
2023 	}
2024     }
2025 }
2026 
2027 /* Cumulates the steps of indices into DATA and replaces their values with the
2028    initial ones.  Returns false when the value of the index cannot be determined.
2029    Callback for for_each_index.  */
2030 
2031 struct ifs_ivopts_data
2032 {
2033   struct ivopts_data *ivopts_data;
2034   gimple *stmt;
2035   tree step;
2036 };
2037 
2038 static bool
idx_find_step(tree base,tree * idx,void * data)2039 idx_find_step (tree base, tree *idx, void *data)
2040 {
2041   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2042   struct iv *iv;
2043   bool use_overflow_semantics = false;
2044   tree step, iv_base, iv_step, lbound, off;
2045   struct loop *loop = dta->ivopts_data->current_loop;
2046 
2047   /* If base is a component ref, require that the offset of the reference
2048      be invariant.  */
2049   if (TREE_CODE (base) == COMPONENT_REF)
2050     {
2051       off = component_ref_field_offset (base);
2052       return expr_invariant_in_loop_p (loop, off);
2053     }
2054 
2055   /* If base is array, first check whether we will be able to move the
2056      reference out of the loop (in order to take its address in strength
2057      reduction).  In order for this to work we need both lower bound
2058      and step to be loop invariants.  */
2059   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2060     {
2061       /* Moreover, for a range, the size needs to be invariant as well.  */
2062       if (TREE_CODE (base) == ARRAY_RANGE_REF
2063 	  && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2064 	return false;
2065 
2066       step = array_ref_element_size (base);
2067       lbound = array_ref_low_bound (base);
2068 
2069       if (!expr_invariant_in_loop_p (loop, step)
2070 	  || !expr_invariant_in_loop_p (loop, lbound))
2071 	return false;
2072     }
2073 
2074   if (TREE_CODE (*idx) != SSA_NAME)
2075     return true;
2076 
2077   iv = get_iv (dta->ivopts_data, *idx);
2078   if (!iv)
2079     return false;
2080 
2081   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2082 	  *&x[0], which is not folded and does not trigger the
2083 	  ARRAY_REF path below.  */
2084   *idx = iv->base;
2085 
2086   if (integer_zerop (iv->step))
2087     return true;
2088 
2089   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2090     {
2091       step = array_ref_element_size (base);
2092 
2093       /* We only handle addresses whose step is an integer constant.  */
2094       if (TREE_CODE (step) != INTEGER_CST)
2095 	return false;
2096     }
2097   else
2098     /* The step for pointer arithmetics already is 1 byte.  */
2099     step = size_one_node;
2100 
2101   iv_base = iv->base;
2102   iv_step = iv->step;
2103   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2104     use_overflow_semantics = true;
2105 
2106   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2107 			    sizetype, &iv_base, &iv_step, dta->stmt,
2108 			    use_overflow_semantics))
2109     {
2110       /* The index might wrap.  */
2111       return false;
2112     }
2113 
2114   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2115   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2116 
2117   if (dta->ivopts_data->bivs_not_used_in_addr)
2118     {
2119       if (!iv->biv_p)
2120 	iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2121 
2122       record_biv_for_address_use (dta->ivopts_data, iv);
2123     }
2124   return true;
2125 }
2126 
2127 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2128    object is passed to it in DATA.  */
2129 
2130 static bool
idx_record_use(tree base,tree * idx,void * vdata)2131 idx_record_use (tree base, tree *idx,
2132 		void *vdata)
2133 {
2134   struct ivopts_data *data = (struct ivopts_data *) vdata;
2135   find_interesting_uses_op (data, *idx);
2136   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2137     {
2138       find_interesting_uses_op (data, array_ref_element_size (base));
2139       find_interesting_uses_op (data, array_ref_low_bound (base));
2140     }
2141   return true;
2142 }
2143 
2144 /* If we can prove that TOP = cst * BOT for some constant cst,
2145    store cst to MUL and return true.  Otherwise return false.
2146    The returned value is always sign-extended, regardless of the
2147    signedness of TOP and BOT.  */
2148 
2149 static bool
constant_multiple_of(tree top,tree bot,widest_int * mul)2150 constant_multiple_of (tree top, tree bot, widest_int *mul)
2151 {
2152   tree mby;
2153   enum tree_code code;
2154   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2155   widest_int res, p0, p1;
2156 
2157   STRIP_NOPS (top);
2158   STRIP_NOPS (bot);
2159 
2160   if (operand_equal_p (top, bot, 0))
2161     {
2162       *mul = 1;
2163       return true;
2164     }
2165 
2166   code = TREE_CODE (top);
2167   switch (code)
2168     {
2169     case MULT_EXPR:
2170       mby = TREE_OPERAND (top, 1);
2171       if (TREE_CODE (mby) != INTEGER_CST)
2172 	return false;
2173 
2174       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2175 	return false;
2176 
2177       *mul = wi::sext (res * wi::to_widest (mby), precision);
2178       return true;
2179 
2180     case PLUS_EXPR:
2181     case MINUS_EXPR:
2182       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2183 	  || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2184 	return false;
2185 
2186       if (code == MINUS_EXPR)
2187 	p1 = -p1;
2188       *mul = wi::sext (p0 + p1, precision);
2189       return true;
2190 
2191     case INTEGER_CST:
2192       if (TREE_CODE (bot) != INTEGER_CST)
2193 	return false;
2194 
2195       p0 = widest_int::from (wi::to_wide (top), SIGNED);
2196       p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2197       if (p1 == 0)
2198 	return false;
2199       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2200       return res == 0;
2201 
2202     default:
2203       if (POLY_INT_CST_P (top)
2204 	  && POLY_INT_CST_P (bot)
2205 	  && constant_multiple_p (wi::to_poly_widest (top),
2206 				  wi::to_poly_widest (bot), mul))
2207 	return true;
2208 
2209       return false;
2210     }
2211 }
2212 
2213 /* Return true if memory reference REF with step STEP may be unaligned.  */
2214 
2215 static bool
may_be_unaligned_p(tree ref,tree step)2216 may_be_unaligned_p (tree ref, tree step)
2217 {
2218   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2219      thus they are not misaligned.  */
2220   if (TREE_CODE (ref) == TARGET_MEM_REF)
2221     return false;
2222 
2223   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2224   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2225     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2226 
2227   unsigned HOST_WIDE_INT bitpos;
2228   unsigned int ref_align;
2229   get_object_alignment_1 (ref, &ref_align, &bitpos);
2230   if (ref_align < align
2231       || (bitpos % align) != 0
2232       || (bitpos % BITS_PER_UNIT) != 0)
2233     return true;
2234 
2235   unsigned int trailing_zeros = tree_ctz (step);
2236   if (trailing_zeros < HOST_BITS_PER_INT
2237       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2238     return true;
2239 
2240   return false;
2241 }
2242 
2243 /* Return true if EXPR may be non-addressable.   */
2244 
2245 bool
may_be_nonaddressable_p(tree expr)2246 may_be_nonaddressable_p (tree expr)
2247 {
2248   switch (TREE_CODE (expr))
2249     {
2250     case VAR_DECL:
2251       /* Check if it's a register variable.  */
2252       return DECL_HARD_REGISTER (expr);
2253 
2254     case TARGET_MEM_REF:
2255       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2256 	 target, thus they are always addressable.  */
2257       return false;
2258 
2259     case MEM_REF:
2260       /* Likewise for MEM_REFs, modulo the storage order.  */
2261       return REF_REVERSE_STORAGE_ORDER (expr);
2262 
2263     case BIT_FIELD_REF:
2264       if (REF_REVERSE_STORAGE_ORDER (expr))
2265 	return true;
2266       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2267 
2268     case COMPONENT_REF:
2269       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2270 	return true;
2271       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2272 	     || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2273 
2274     case ARRAY_REF:
2275     case ARRAY_RANGE_REF:
2276       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2277 	return true;
2278       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2279 
2280     case VIEW_CONVERT_EXPR:
2281       /* This kind of view-conversions may wrap non-addressable objects
2282 	 and make them look addressable.  After some processing the
2283 	 non-addressability may be uncovered again, causing ADDR_EXPRs
2284 	 of inappropriate objects to be built.  */
2285       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2286 	  || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2287 	return true;
2288       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2289 
2290     CASE_CONVERT:
2291       return true;
2292 
2293     default:
2294       break;
2295     }
2296 
2297   return false;
2298 }
2299 
2300 /* Finds addresses in *OP_P inside STMT.  */
2301 
2302 static void
find_interesting_uses_address(struct ivopts_data * data,gimple * stmt,tree * op_p)2303 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2304 			       tree *op_p)
2305 {
2306   tree base = *op_p, step = size_zero_node;
2307   struct iv *civ;
2308   struct ifs_ivopts_data ifs_ivopts_data;
2309 
2310   /* Do not play with volatile memory references.  A bit too conservative,
2311      perhaps, but safe.  */
2312   if (gimple_has_volatile_ops (stmt))
2313     goto fail;
2314 
2315   /* Ignore bitfields for now.  Not really something terribly complicated
2316      to handle.  TODO.  */
2317   if (TREE_CODE (base) == BIT_FIELD_REF)
2318     goto fail;
2319 
2320   base = unshare_expr (base);
2321 
2322   if (TREE_CODE (base) == TARGET_MEM_REF)
2323     {
2324       tree type = build_pointer_type (TREE_TYPE (base));
2325       tree astep;
2326 
2327       if (TMR_BASE (base)
2328 	  && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2329 	{
2330 	  civ = get_iv (data, TMR_BASE (base));
2331 	  if (!civ)
2332 	    goto fail;
2333 
2334 	  TMR_BASE (base) = civ->base;
2335 	  step = civ->step;
2336 	}
2337       if (TMR_INDEX2 (base)
2338 	  && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2339 	{
2340 	  civ = get_iv (data, TMR_INDEX2 (base));
2341 	  if (!civ)
2342 	    goto fail;
2343 
2344 	  TMR_INDEX2 (base) = civ->base;
2345 	  step = civ->step;
2346 	}
2347       if (TMR_INDEX (base)
2348 	  && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2349 	{
2350 	  civ = get_iv (data, TMR_INDEX (base));
2351 	  if (!civ)
2352 	    goto fail;
2353 
2354 	  TMR_INDEX (base) = civ->base;
2355 	  astep = civ->step;
2356 
2357 	  if (astep)
2358 	    {
2359 	      if (TMR_STEP (base))
2360 		astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2361 
2362 	      step = fold_build2 (PLUS_EXPR, type, step, astep);
2363 	    }
2364 	}
2365 
2366       if (integer_zerop (step))
2367 	goto fail;
2368       base = tree_mem_ref_addr (type, base);
2369     }
2370   else
2371     {
2372       ifs_ivopts_data.ivopts_data = data;
2373       ifs_ivopts_data.stmt = stmt;
2374       ifs_ivopts_data.step = size_zero_node;
2375       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2376 	  || integer_zerop (ifs_ivopts_data.step))
2377 	goto fail;
2378       step = ifs_ivopts_data.step;
2379 
2380       /* Check that the base expression is addressable.  This needs
2381 	 to be done after substituting bases of IVs into it.  */
2382       if (may_be_nonaddressable_p (base))
2383 	goto fail;
2384 
2385       /* Moreover, on strict alignment platforms, check that it is
2386 	 sufficiently aligned.  */
2387       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2388 	goto fail;
2389 
2390       base = build_fold_addr_expr (base);
2391 
2392       /* Substituting bases of IVs into the base expression might
2393 	 have caused folding opportunities.  */
2394       if (TREE_CODE (base) == ADDR_EXPR)
2395 	{
2396 	  tree *ref = &TREE_OPERAND (base, 0);
2397 	  while (handled_component_p (*ref))
2398 	    ref = &TREE_OPERAND (*ref, 0);
2399 	  if (TREE_CODE (*ref) == MEM_REF)
2400 	    {
2401 	      tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2402 				      TREE_OPERAND (*ref, 0),
2403 				      TREE_OPERAND (*ref, 1));
2404 	      if (tem)
2405 		*ref = tem;
2406 	    }
2407 	}
2408     }
2409 
2410   civ = alloc_iv (data, base, step);
2411   /* Fail if base object of this memory reference is unknown.  */
2412   if (civ->base_object == NULL_TREE)
2413     goto fail;
2414 
2415   record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2416   return;
2417 
2418 fail:
2419   for_each_index (op_p, idx_record_use, data);
2420 }
2421 
2422 /* Finds and records invariants used in STMT.  */
2423 
2424 static void
find_invariants_stmt(struct ivopts_data * data,gimple * stmt)2425 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2426 {
2427   ssa_op_iter iter;
2428   use_operand_p use_p;
2429   tree op;
2430 
2431   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2432     {
2433       op = USE_FROM_PTR (use_p);
2434       record_invariant (data, op, false);
2435     }
2436 }
2437 
2438 /* CALL calls an internal function.  If operand *OP_P will become an
2439    address when the call is expanded, return the type of the memory
2440    being addressed, otherwise return null.  */
2441 
2442 static tree
get_mem_type_for_internal_fn(gcall * call,tree * op_p)2443 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2444 {
2445   switch (gimple_call_internal_fn (call))
2446     {
2447     case IFN_MASK_LOAD:
2448       if (op_p == gimple_call_arg_ptr (call, 0))
2449 	return TREE_TYPE (gimple_call_lhs (call));
2450       return NULL_TREE;
2451 
2452     case IFN_MASK_STORE:
2453       if (op_p == gimple_call_arg_ptr (call, 0))
2454 	return TREE_TYPE (gimple_call_arg (call, 3));
2455       return NULL_TREE;
2456 
2457     default:
2458       return NULL_TREE;
2459     }
2460 }
2461 
2462 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2463    Return true if the operand will become an address when STMT
2464    is expanded and record the associated address use if so.  */
2465 
2466 static bool
find_address_like_use(struct ivopts_data * data,gimple * stmt,tree * op_p,struct iv * iv)2467 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2468 		       struct iv *iv)
2469 {
2470   /* Fail if base object of this memory reference is unknown.  */
2471   if (iv->base_object == NULL_TREE)
2472     return false;
2473 
2474   tree mem_type = NULL_TREE;
2475   if (gcall *call = dyn_cast <gcall *> (stmt))
2476     if (gimple_call_internal_p (call))
2477       mem_type = get_mem_type_for_internal_fn (call, op_p);
2478   if (mem_type)
2479     {
2480       iv = alloc_iv (data, iv->base, iv->step);
2481       record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2482       return true;
2483     }
2484   return false;
2485 }
2486 
2487 /* Finds interesting uses of induction variables in the statement STMT.  */
2488 
2489 static void
find_interesting_uses_stmt(struct ivopts_data * data,gimple * stmt)2490 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2491 {
2492   struct iv *iv;
2493   tree op, *lhs, *rhs;
2494   ssa_op_iter iter;
2495   use_operand_p use_p;
2496   enum tree_code code;
2497 
2498   find_invariants_stmt (data, stmt);
2499 
2500   if (gimple_code (stmt) == GIMPLE_COND)
2501     {
2502       find_interesting_uses_cond (data, stmt);
2503       return;
2504     }
2505 
2506   if (is_gimple_assign (stmt))
2507     {
2508       lhs = gimple_assign_lhs_ptr (stmt);
2509       rhs = gimple_assign_rhs1_ptr (stmt);
2510 
2511       if (TREE_CODE (*lhs) == SSA_NAME)
2512 	{
2513 	  /* If the statement defines an induction variable, the uses are not
2514 	     interesting by themselves.  */
2515 
2516 	  iv = get_iv (data, *lhs);
2517 
2518 	  if (iv && !integer_zerop (iv->step))
2519 	    return;
2520 	}
2521 
2522       code = gimple_assign_rhs_code (stmt);
2523       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2524 	  && (REFERENCE_CLASS_P (*rhs)
2525 	      || is_gimple_val (*rhs)))
2526 	{
2527 	  if (REFERENCE_CLASS_P (*rhs))
2528 	    find_interesting_uses_address (data, stmt, rhs);
2529 	  else
2530 	    find_interesting_uses_op (data, *rhs);
2531 
2532 	  if (REFERENCE_CLASS_P (*lhs))
2533 	    find_interesting_uses_address (data, stmt, lhs);
2534 	  return;
2535 	}
2536       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2537 	{
2538 	  find_interesting_uses_cond (data, stmt);
2539 	  return;
2540 	}
2541 
2542       /* TODO -- we should also handle address uses of type
2543 
2544 	 memory = call (whatever);
2545 
2546 	 and
2547 
2548 	 call (memory).  */
2549     }
2550 
2551   if (gimple_code (stmt) == GIMPLE_PHI
2552       && gimple_bb (stmt) == data->current_loop->header)
2553     {
2554       iv = get_iv (data, PHI_RESULT (stmt));
2555 
2556       if (iv && !integer_zerop (iv->step))
2557 	return;
2558     }
2559 
2560   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2561     {
2562       op = USE_FROM_PTR (use_p);
2563 
2564       if (TREE_CODE (op) != SSA_NAME)
2565 	continue;
2566 
2567       iv = get_iv (data, op);
2568       if (!iv)
2569 	continue;
2570 
2571       if (!find_address_like_use (data, stmt, use_p->use, iv))
2572 	find_interesting_uses_op (data, op);
2573     }
2574 }
2575 
2576 /* Finds interesting uses of induction variables outside of loops
2577    on loop exit edge EXIT.  */
2578 
2579 static void
find_interesting_uses_outside(struct ivopts_data * data,edge exit)2580 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2581 {
2582   gphi *phi;
2583   gphi_iterator psi;
2584   tree def;
2585 
2586   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2587     {
2588       phi = psi.phi ();
2589       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2590       if (!virtual_operand_p (def))
2591 	find_interesting_uses_op (data, def);
2592     }
2593 }
2594 
2595 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2596    mode for memory reference represented by USE.  */
2597 
2598 static GTY (()) vec<rtx, va_gc> *addr_list;
2599 
2600 static bool
addr_offset_valid_p(struct iv_use * use,poly_int64 offset)2601 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2602 {
2603   rtx reg, addr;
2604   unsigned list_index;
2605   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2606   machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2607 
2608   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2609   if (list_index >= vec_safe_length (addr_list))
2610     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2611 
2612   addr = (*addr_list)[list_index];
2613   if (!addr)
2614     {
2615       addr_mode = targetm.addr_space.address_mode (as);
2616       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2617       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2618       (*addr_list)[list_index] = addr;
2619     }
2620   else
2621     addr_mode = GET_MODE (addr);
2622 
2623   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2624   return (memory_address_addr_space_p (mem_mode, addr, as));
2625 }
2626 
2627 /* Comparison function to sort group in ascending order of addr_offset.  */
2628 
2629 static int
group_compare_offset(const void * a,const void * b)2630 group_compare_offset (const void *a, const void *b)
2631 {
2632   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2633   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2634 
2635   return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2636 }
2637 
2638 /* Check if small groups should be split.  Return true if no group
2639    contains more than two uses with distinct addr_offsets.  Return
2640    false otherwise.  We want to split such groups because:
2641 
2642      1) Small groups don't have much benefit and may interfer with
2643 	general candidate selection.
2644      2) Size for problem with only small groups is usually small and
2645 	general algorithm can handle it well.
2646 
2647    TODO -- Above claim may not hold when we want to merge memory
2648    accesses with conseuctive addresses.  */
2649 
2650 static bool
split_small_address_groups_p(struct ivopts_data * data)2651 split_small_address_groups_p (struct ivopts_data *data)
2652 {
2653   unsigned int i, j, distinct = 1;
2654   struct iv_use *pre;
2655   struct iv_group *group;
2656 
2657   for (i = 0; i < data->vgroups.length (); i++)
2658     {
2659       group = data->vgroups[i];
2660       if (group->vuses.length () == 1)
2661 	continue;
2662 
2663       gcc_assert (address_p (group->type));
2664       if (group->vuses.length () == 2)
2665 	{
2666 	  if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2667 				      group->vuses[1]->addr_offset) > 0)
2668 	    std::swap (group->vuses[0], group->vuses[1]);
2669 	}
2670       else
2671 	group->vuses.qsort (group_compare_offset);
2672 
2673       if (distinct > 2)
2674 	continue;
2675 
2676       distinct = 1;
2677       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2678 	{
2679 	  if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2680 	    {
2681 	      pre = group->vuses[j];
2682 	      distinct++;
2683 	    }
2684 
2685 	  if (distinct > 2)
2686 	    break;
2687 	}
2688     }
2689 
2690   return (distinct <= 2);
2691 }
2692 
2693 /* For each group of address type uses, this function further groups
2694    these uses according to the maximum offset supported by target's
2695    [base + offset] addressing mode.  */
2696 
2697 static void
split_address_groups(struct ivopts_data * data)2698 split_address_groups (struct ivopts_data *data)
2699 {
2700   unsigned int i, j;
2701   /* Always split group.  */
2702   bool split_p = split_small_address_groups_p (data);
2703 
2704   for (i = 0; i < data->vgroups.length (); i++)
2705     {
2706       struct iv_group *new_group = NULL;
2707       struct iv_group *group = data->vgroups[i];
2708       struct iv_use *use = group->vuses[0];
2709 
2710       use->id = 0;
2711       use->group_id = group->id;
2712       if (group->vuses.length () == 1)
2713 	continue;
2714 
2715       gcc_assert (address_p (use->type));
2716 
2717       for (j = 1; j < group->vuses.length ();)
2718 	{
2719 	  struct iv_use *next = group->vuses[j];
2720 	  poly_int64 offset = next->addr_offset - use->addr_offset;
2721 
2722 	  /* Split group if aksed to, or the offset against the first
2723 	     use can't fit in offset part of addressing mode.  IV uses
2724 	     having the same offset are still kept in one group.  */
2725 	  if (maybe_ne (offset, 0)
2726 	      && (split_p || !addr_offset_valid_p (use, offset)))
2727 	    {
2728 	      if (!new_group)
2729 		new_group = record_group (data, group->type);
2730 	      group->vuses.ordered_remove (j);
2731 	      new_group->vuses.safe_push (next);
2732 	      continue;
2733 	    }
2734 
2735 	  next->id = j;
2736 	  next->group_id = group->id;
2737 	  j++;
2738 	}
2739     }
2740 }
2741 
2742 /* Finds uses of the induction variables that are interesting.  */
2743 
2744 static void
find_interesting_uses(struct ivopts_data * data)2745 find_interesting_uses (struct ivopts_data *data)
2746 {
2747   basic_block bb;
2748   gimple_stmt_iterator bsi;
2749   basic_block *body = get_loop_body (data->current_loop);
2750   unsigned i;
2751   edge e;
2752 
2753   for (i = 0; i < data->current_loop->num_nodes; i++)
2754     {
2755       edge_iterator ei;
2756       bb = body[i];
2757 
2758       FOR_EACH_EDGE (e, ei, bb->succs)
2759 	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2760 	    && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2761 	  find_interesting_uses_outside (data, e);
2762 
2763       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2764 	find_interesting_uses_stmt (data, gsi_stmt (bsi));
2765       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2766 	if (!is_gimple_debug (gsi_stmt (bsi)))
2767 	  find_interesting_uses_stmt (data, gsi_stmt (bsi));
2768     }
2769   free (body);
2770 
2771   split_address_groups (data);
2772 
2773   if (dump_file && (dump_flags & TDF_DETAILS))
2774     {
2775       fprintf (dump_file, "\n<IV Groups>:\n");
2776       dump_groups (dump_file, data);
2777       fprintf (dump_file, "\n");
2778     }
2779 }
2780 
2781 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2782    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2783    we are at the top-level of the processed address.  */
2784 
2785 static tree
strip_offset_1(tree expr,bool inside_addr,bool top_compref,poly_int64 * offset)2786 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2787 		poly_int64 *offset)
2788 {
2789   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2790   enum tree_code code;
2791   tree type, orig_type = TREE_TYPE (expr);
2792   poly_int64 off0, off1;
2793   HOST_WIDE_INT st;
2794   tree orig_expr = expr;
2795 
2796   STRIP_NOPS (expr);
2797 
2798   type = TREE_TYPE (expr);
2799   code = TREE_CODE (expr);
2800   *offset = 0;
2801 
2802   switch (code)
2803     {
2804     case POINTER_PLUS_EXPR:
2805     case PLUS_EXPR:
2806     case MINUS_EXPR:
2807       op0 = TREE_OPERAND (expr, 0);
2808       op1 = TREE_OPERAND (expr, 1);
2809 
2810       op0 = strip_offset_1 (op0, false, false, &off0);
2811       op1 = strip_offset_1 (op1, false, false, &off1);
2812 
2813       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2814       if (op0 == TREE_OPERAND (expr, 0)
2815 	  && op1 == TREE_OPERAND (expr, 1))
2816 	return orig_expr;
2817 
2818       if (integer_zerop (op1))
2819 	expr = op0;
2820       else if (integer_zerop (op0))
2821 	{
2822 	  if (code == MINUS_EXPR)
2823 	    expr = fold_build1 (NEGATE_EXPR, type, op1);
2824 	  else
2825 	    expr = op1;
2826 	}
2827       else
2828 	expr = fold_build2 (code, type, op0, op1);
2829 
2830       return fold_convert (orig_type, expr);
2831 
2832     case MULT_EXPR:
2833       op1 = TREE_OPERAND (expr, 1);
2834       if (!cst_and_fits_in_hwi (op1))
2835 	return orig_expr;
2836 
2837       op0 = TREE_OPERAND (expr, 0);
2838       op0 = strip_offset_1 (op0, false, false, &off0);
2839       if (op0 == TREE_OPERAND (expr, 0))
2840 	return orig_expr;
2841 
2842       *offset = off0 * int_cst_value (op1);
2843       if (integer_zerop (op0))
2844 	expr = op0;
2845       else
2846 	expr = fold_build2 (MULT_EXPR, type, op0, op1);
2847 
2848       return fold_convert (orig_type, expr);
2849 
2850     case ARRAY_REF:
2851     case ARRAY_RANGE_REF:
2852       if (!inside_addr)
2853 	return orig_expr;
2854 
2855       step = array_ref_element_size (expr);
2856       if (!cst_and_fits_in_hwi (step))
2857 	break;
2858 
2859       st = int_cst_value (step);
2860       op1 = TREE_OPERAND (expr, 1);
2861       op1 = strip_offset_1 (op1, false, false, &off1);
2862       *offset = off1 * st;
2863 
2864       if (top_compref
2865 	  && integer_zerop (op1))
2866 	{
2867 	  /* Strip the component reference completely.  */
2868 	  op0 = TREE_OPERAND (expr, 0);
2869 	  op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2870 	  *offset += off0;
2871 	  return op0;
2872 	}
2873       break;
2874 
2875     case COMPONENT_REF:
2876       {
2877 	tree field;
2878 
2879 	if (!inside_addr)
2880 	  return orig_expr;
2881 
2882 	tmp = component_ref_field_offset (expr);
2883 	field = TREE_OPERAND (expr, 1);
2884 	if (top_compref
2885 	    && cst_and_fits_in_hwi (tmp)
2886 	    && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2887 	  {
2888 	    HOST_WIDE_INT boffset, abs_off;
2889 
2890 	    /* Strip the component reference completely.  */
2891 	    op0 = TREE_OPERAND (expr, 0);
2892 	    op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2893 	    boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2894 	    abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2895 	    if (boffset < 0)
2896 	      abs_off = -abs_off;
2897 
2898 	    *offset = off0 + int_cst_value (tmp) + abs_off;
2899 	    return op0;
2900 	  }
2901       }
2902       break;
2903 
2904     case ADDR_EXPR:
2905       op0 = TREE_OPERAND (expr, 0);
2906       op0 = strip_offset_1 (op0, true, true, &off0);
2907       *offset += off0;
2908 
2909       if (op0 == TREE_OPERAND (expr, 0))
2910 	return orig_expr;
2911 
2912       expr = build_fold_addr_expr (op0);
2913       return fold_convert (orig_type, expr);
2914 
2915     case MEM_REF:
2916       /* ???  Offset operand?  */
2917       inside_addr = false;
2918       break;
2919 
2920     default:
2921       if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2922 	return build_int_cst (orig_type, 0);
2923       return orig_expr;
2924     }
2925 
2926   /* Default handling of expressions for that we want to recurse into
2927      the first operand.  */
2928   op0 = TREE_OPERAND (expr, 0);
2929   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2930   *offset += off0;
2931 
2932   if (op0 == TREE_OPERAND (expr, 0)
2933       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2934     return orig_expr;
2935 
2936   expr = copy_node (expr);
2937   TREE_OPERAND (expr, 0) = op0;
2938   if (op1)
2939     TREE_OPERAND (expr, 1) = op1;
2940 
2941   /* Inside address, we might strip the top level component references,
2942      thus changing type of the expression.  Handling of ADDR_EXPR
2943      will fix that.  */
2944   expr = fold_convert (orig_type, expr);
2945 
2946   return expr;
2947 }
2948 
2949 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2950 
2951 tree
strip_offset(tree expr,poly_uint64_pod * offset)2952 strip_offset (tree expr, poly_uint64_pod *offset)
2953 {
2954   poly_int64 off;
2955   tree core = strip_offset_1 (expr, false, false, &off);
2956   *offset = off;
2957   return core;
2958 }
2959 
2960 /* Returns variant of TYPE that can be used as base for different uses.
2961    We return unsigned type with the same precision, which avoids problems
2962    with overflows.  */
2963 
2964 static tree
generic_type_for(tree type)2965 generic_type_for (tree type)
2966 {
2967   if (POINTER_TYPE_P (type))
2968     return unsigned_type_for (type);
2969 
2970   if (TYPE_UNSIGNED (type))
2971     return type;
2972 
2973   return unsigned_type_for (type);
2974 }
2975 
2976 /* Private data for walk_tree.  */
2977 
2978 struct walk_tree_data
2979 {
2980   bitmap *inv_vars;
2981   struct ivopts_data *idata;
2982 };
2983 
2984 /* Callback function for walk_tree, it records invariants and symbol
2985    reference in *EXPR_P.  DATA is the structure storing result info.  */
2986 
2987 static tree
find_inv_vars_cb(tree * expr_p,int * ws ATTRIBUTE_UNUSED,void * data)2988 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2989 {
2990   tree op = *expr_p;
2991   struct version_info *info;
2992   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2993 
2994   if (TREE_CODE (op) != SSA_NAME)
2995     return NULL_TREE;
2996 
2997   info = name_info (wdata->idata, op);
2998   /* Because we expand simple operations when finding IVs, loop invariant
2999      variable that isn't referred by the original loop could be used now.
3000      Record such invariant variables here.  */
3001   if (!info->iv)
3002     {
3003       struct ivopts_data *idata = wdata->idata;
3004       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
3005 
3006       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
3007 	{
3008 	  set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
3009 	  record_invariant (idata, op, false);
3010 	}
3011     }
3012   if (!info->inv_id || info->has_nonlin_use)
3013     return NULL_TREE;
3014 
3015   if (!*wdata->inv_vars)
3016     *wdata->inv_vars = BITMAP_ALLOC (NULL);
3017   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3018 
3019   return NULL_TREE;
3020 }
3021 
3022 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
3023    store it.  */
3024 
3025 static inline void
find_inv_vars(struct ivopts_data * data,tree * expr_p,bitmap * inv_vars)3026 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3027 {
3028   struct walk_tree_data wdata;
3029 
3030   if (!inv_vars)
3031     return;
3032 
3033   wdata.idata = data;
3034   wdata.inv_vars = inv_vars;
3035   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3036 }
3037 
3038 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
3039    will be recorded if it doesn't exist yet.  Given below two exprs:
3040      inv_expr + cst1, inv_expr + cst2
3041    It's hard to make decision whether constant part should be stripped
3042    or not.  We choose to not strip based on below facts:
3043      1) We need to count ADD cost for constant part if it's stripped,
3044 	which isn't always trivial where this functions is called.
3045      2) Stripping constant away may be conflict with following loop
3046 	invariant hoisting pass.
3047      3) Not stripping constant away results in more invariant exprs,
3048 	which usually leads to decision preferring lower reg pressure.  */
3049 
3050 static iv_inv_expr_ent *
get_loop_invariant_expr(struct ivopts_data * data,tree inv_expr)3051 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3052 {
3053   STRIP_NOPS (inv_expr);
3054 
3055   if (poly_int_tree_p (inv_expr)
3056       || TREE_CODE (inv_expr) == SSA_NAME)
3057     return NULL;
3058 
3059   /* Don't strip constant part away as we used to.  */
3060 
3061   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
3062   struct iv_inv_expr_ent ent;
3063   ent.expr = inv_expr;
3064   ent.hash = iterative_hash_expr (inv_expr, 0);
3065   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3066 
3067   if (!*slot)
3068     {
3069       *slot = XNEW (struct iv_inv_expr_ent);
3070       (*slot)->expr = inv_expr;
3071       (*slot)->hash = ent.hash;
3072       (*slot)->id = ++data->max_inv_expr_id;
3073     }
3074 
3075   return *slot;
3076 }
3077 
3078 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3079    position to POS.  If USE is not NULL, the candidate is set as related to
3080    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
3081    replacement of the final value of the iv by a direct computation.  */
3082 
3083 static struct iv_cand *
3084 add_candidate_1 (struct ivopts_data *data,
3085 		 tree base, tree step, bool important, enum iv_position pos,
3086 		 struct iv_use *use, gimple *incremented_at,
3087 		 struct iv *orig_iv = NULL)
3088 {
3089   unsigned i;
3090   struct iv_cand *cand = NULL;
3091   tree type, orig_type;
3092 
3093   gcc_assert (base && step);
3094 
3095   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3096      live, but the ivopts code may replace a real pointer with one
3097      pointing before or after the memory block that is then adjusted
3098      into the memory block during the loop.  FIXME: It would likely be
3099      better to actually force the pointer live and still use ivopts;
3100      for example, it would be enough to write the pointer into memory
3101      and keep it there until after the loop.  */
3102   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3103     return NULL;
3104 
3105   /* For non-original variables, make sure their values are computed in a type
3106      that does not invoke undefined behavior on overflows (since in general,
3107      we cannot prove that these induction variables are non-wrapping).  */
3108   if (pos != IP_ORIGINAL)
3109     {
3110       orig_type = TREE_TYPE (base);
3111       type = generic_type_for (orig_type);
3112       if (type != orig_type)
3113 	{
3114 	  base = fold_convert (type, base);
3115 	  step = fold_convert (type, step);
3116 	}
3117     }
3118 
3119   for (i = 0; i < data->vcands.length (); i++)
3120     {
3121       cand = data->vcands[i];
3122 
3123       if (cand->pos != pos)
3124 	continue;
3125 
3126       if (cand->incremented_at != incremented_at
3127 	  || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3128 	      && cand->ainc_use != use))
3129 	continue;
3130 
3131       if (operand_equal_p (base, cand->iv->base, 0)
3132 	  && operand_equal_p (step, cand->iv->step, 0)
3133 	  && (TYPE_PRECISION (TREE_TYPE (base))
3134 	      == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3135 	break;
3136     }
3137 
3138   if (i == data->vcands.length ())
3139     {
3140       cand = XCNEW (struct iv_cand);
3141       cand->id = i;
3142       cand->iv = alloc_iv (data, base, step);
3143       cand->pos = pos;
3144       if (pos != IP_ORIGINAL)
3145 	{
3146 	  cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3147 	  cand->var_after = cand->var_before;
3148 	}
3149       cand->important = important;
3150       cand->incremented_at = incremented_at;
3151       data->vcands.safe_push (cand);
3152 
3153       if (!poly_int_tree_p (step))
3154 	{
3155 	  find_inv_vars (data, &step, &cand->inv_vars);
3156 
3157 	  iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3158 	  /* Share bitmap between inv_vars and inv_exprs for cand.  */
3159 	  if (inv_expr != NULL)
3160 	    {
3161 	      cand->inv_exprs = cand->inv_vars;
3162 	      cand->inv_vars = NULL;
3163 	      if (cand->inv_exprs)
3164 		bitmap_clear (cand->inv_exprs);
3165 	      else
3166 		cand->inv_exprs = BITMAP_ALLOC (NULL);
3167 
3168 	      bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3169 	    }
3170 	}
3171 
3172       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3173 	cand->ainc_use = use;
3174       else
3175 	cand->ainc_use = NULL;
3176 
3177       cand->orig_iv = orig_iv;
3178       if (dump_file && (dump_flags & TDF_DETAILS))
3179 	dump_cand (dump_file, cand);
3180     }
3181 
3182   cand->important |= important;
3183 
3184   /* Relate candidate to the group for which it is added.  */
3185   if (use)
3186     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3187 
3188   return cand;
3189 }
3190 
3191 /* Returns true if incrementing the induction variable at the end of the LOOP
3192    is allowed.
3193 
3194    The purpose is to avoid splitting latch edge with a biv increment, thus
3195    creating a jump, possibly confusing other optimization passes and leaving
3196    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3197    available (so we do not have a better alternative), or if the latch edge
3198    is already nonempty.  */
3199 
3200 static bool
allow_ip_end_pos_p(struct loop * loop)3201 allow_ip_end_pos_p (struct loop *loop)
3202 {
3203   if (!ip_normal_pos (loop))
3204     return true;
3205 
3206   if (!empty_block_p (ip_end_pos (loop)))
3207     return true;
3208 
3209   return false;
3210 }
3211 
3212 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3213    Important field is set to IMPORTANT.  */
3214 
3215 static void
add_autoinc_candidates(struct ivopts_data * data,tree base,tree step,bool important,struct iv_use * use)3216 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3217 			bool important, struct iv_use *use)
3218 {
3219   basic_block use_bb = gimple_bb (use->stmt);
3220   machine_mode mem_mode;
3221   unsigned HOST_WIDE_INT cstepi;
3222 
3223   /* If we insert the increment in any position other than the standard
3224      ones, we must ensure that it is incremented once per iteration.
3225      It must not be in an inner nested loop, or one side of an if
3226      statement.  */
3227   if (use_bb->loop_father != data->current_loop
3228       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3229       || stmt_can_throw_internal (cfun, use->stmt)
3230       || !cst_and_fits_in_hwi (step))
3231     return;
3232 
3233   cstepi = int_cst_value (step);
3234 
3235   mem_mode = TYPE_MODE (use->mem_type);
3236   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3237 	|| USE_STORE_PRE_INCREMENT (mem_mode))
3238        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3239       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3240 	   || USE_STORE_PRE_DECREMENT (mem_mode))
3241 	  && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3242     {
3243       enum tree_code code = MINUS_EXPR;
3244       tree new_base;
3245       tree new_step = step;
3246 
3247       if (POINTER_TYPE_P (TREE_TYPE (base)))
3248 	{
3249 	  new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3250 	  code = POINTER_PLUS_EXPR;
3251 	}
3252       else
3253 	new_step = fold_convert (TREE_TYPE (base), new_step);
3254       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3255       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3256 		       use->stmt);
3257     }
3258   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3259 	|| USE_STORE_POST_INCREMENT (mem_mode))
3260        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3261       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3262 	   || USE_STORE_POST_DECREMENT (mem_mode))
3263 	  && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3264     {
3265       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3266 		       use->stmt);
3267     }
3268 }
3269 
3270 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3271    position to POS.  If USE is not NULL, the candidate is set as related to
3272    it.  The candidate computation is scheduled before exit condition and at
3273    the end of loop.  */
3274 
3275 static void
3276 add_candidate (struct ivopts_data *data,
3277 	       tree base, tree step, bool important, struct iv_use *use,
3278 	       struct iv *orig_iv = NULL)
3279 {
3280   if (ip_normal_pos (data->current_loop))
3281     add_candidate_1 (data, base, step, important,
3282 		     IP_NORMAL, use, NULL, orig_iv);
3283   if (ip_end_pos (data->current_loop)
3284       && allow_ip_end_pos_p (data->current_loop))
3285     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3286 }
3287 
3288 /* Adds standard iv candidates.  */
3289 
3290 static void
add_standard_iv_candidates(struct ivopts_data * data)3291 add_standard_iv_candidates (struct ivopts_data *data)
3292 {
3293   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3294 
3295   /* The same for a double-integer type if it is still fast enough.  */
3296   if (TYPE_PRECISION
3297 	(long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3298       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3299     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3300 		   build_int_cst (long_integer_type_node, 1), true, NULL);
3301 
3302   /* The same for a double-integer type if it is still fast enough.  */
3303   if (TYPE_PRECISION
3304 	(long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3305       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3306     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3307 		   build_int_cst (long_long_integer_type_node, 1), true, NULL);
3308 }
3309 
3310 
3311 /* Adds candidates bases on the old induction variable IV.  */
3312 
3313 static void
add_iv_candidate_for_biv(struct ivopts_data * data,struct iv * iv)3314 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3315 {
3316   gimple *phi;
3317   tree def;
3318   struct iv_cand *cand;
3319 
3320   /* Check if this biv is used in address type use.  */
3321   if (iv->no_overflow  && iv->have_address_use
3322       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3323       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3324     {
3325       tree base = fold_convert (sizetype, iv->base);
3326       tree step = fold_convert (sizetype, iv->step);
3327 
3328       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3329       add_candidate (data, base, step, true, NULL, iv);
3330       /* Add iv cand of the original type only if it has nonlinear use.  */
3331       if (iv->nonlin_use)
3332 	add_candidate (data, iv->base, iv->step, true, NULL);
3333     }
3334   else
3335     add_candidate (data, iv->base, iv->step, true, NULL);
3336 
3337   /* The same, but with initial value zero.  */
3338   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3339     add_candidate (data, size_int (0), iv->step, true, NULL);
3340   else
3341     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3342 		   iv->step, true, NULL);
3343 
3344   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3345   if (gimple_code (phi) == GIMPLE_PHI)
3346     {
3347       /* Additionally record the possibility of leaving the original iv
3348 	 untouched.  */
3349       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3350       /* Don't add candidate if it's from another PHI node because
3351 	 it's an affine iv appearing in the form of PEELED_CHREC.  */
3352       phi = SSA_NAME_DEF_STMT (def);
3353       if (gimple_code (phi) != GIMPLE_PHI)
3354 	{
3355 	  cand = add_candidate_1 (data,
3356 				  iv->base, iv->step, true, IP_ORIGINAL, NULL,
3357 				  SSA_NAME_DEF_STMT (def));
3358 	  if (cand)
3359 	    {
3360 	      cand->var_before = iv->ssa_name;
3361 	      cand->var_after = def;
3362 	    }
3363 	}
3364       else
3365 	gcc_assert (gimple_bb (phi) == data->current_loop->header);
3366     }
3367 }
3368 
3369 /* Adds candidates based on the old induction variables.  */
3370 
3371 static void
add_iv_candidate_for_bivs(struct ivopts_data * data)3372 add_iv_candidate_for_bivs (struct ivopts_data *data)
3373 {
3374   unsigned i;
3375   struct iv *iv;
3376   bitmap_iterator bi;
3377 
3378   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3379     {
3380       iv = ver_info (data, i)->iv;
3381       if (iv && iv->biv_p && !integer_zerop (iv->step))
3382 	add_iv_candidate_for_biv (data, iv);
3383     }
3384 }
3385 
3386 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3387 
3388 static void
record_common_cand(struct ivopts_data * data,tree base,tree step,struct iv_use * use)3389 record_common_cand (struct ivopts_data *data, tree base,
3390 		    tree step, struct iv_use *use)
3391 {
3392   struct iv_common_cand ent;
3393   struct iv_common_cand **slot;
3394 
3395   ent.base = base;
3396   ent.step = step;
3397   ent.hash = iterative_hash_expr (base, 0);
3398   ent.hash = iterative_hash_expr (step, ent.hash);
3399 
3400   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3401   if (*slot == NULL)
3402     {
3403       *slot = new iv_common_cand ();
3404       (*slot)->base = base;
3405       (*slot)->step = step;
3406       (*slot)->uses.create (8);
3407       (*slot)->hash = ent.hash;
3408       data->iv_common_cands.safe_push ((*slot));
3409     }
3410 
3411   gcc_assert (use != NULL);
3412   (*slot)->uses.safe_push (use);
3413   return;
3414 }
3415 
3416 /* Comparison function used to sort common candidates.  */
3417 
3418 static int
common_cand_cmp(const void * p1,const void * p2)3419 common_cand_cmp (const void *p1, const void *p2)
3420 {
3421   unsigned n1, n2;
3422   const struct iv_common_cand *const *const ccand1
3423     = (const struct iv_common_cand *const *)p1;
3424   const struct iv_common_cand *const *const ccand2
3425     = (const struct iv_common_cand *const *)p2;
3426 
3427   n1 = (*ccand1)->uses.length ();
3428   n2 = (*ccand2)->uses.length ();
3429   return n2 - n1;
3430 }
3431 
3432 /* Adds IV candidates based on common candidated recorded.  */
3433 
3434 static void
add_iv_candidate_derived_from_uses(struct ivopts_data * data)3435 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3436 {
3437   unsigned i, j;
3438   struct iv_cand *cand_1, *cand_2;
3439 
3440   data->iv_common_cands.qsort (common_cand_cmp);
3441   for (i = 0; i < data->iv_common_cands.length (); i++)
3442     {
3443       struct iv_common_cand *ptr = data->iv_common_cands[i];
3444 
3445       /* Only add IV candidate if it's derived from multiple uses.  */
3446       if (ptr->uses.length () <= 1)
3447 	break;
3448 
3449       cand_1 = NULL;
3450       cand_2 = NULL;
3451       if (ip_normal_pos (data->current_loop))
3452 	cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3453 				  false, IP_NORMAL, NULL, NULL);
3454 
3455       if (ip_end_pos (data->current_loop)
3456 	  && allow_ip_end_pos_p (data->current_loop))
3457 	cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3458 				  false, IP_END, NULL, NULL);
3459 
3460       /* Bind deriving uses and the new candidates.  */
3461       for (j = 0; j < ptr->uses.length (); j++)
3462 	{
3463 	  struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3464 	  if (cand_1)
3465 	    bitmap_set_bit (group->related_cands, cand_1->id);
3466 	  if (cand_2)
3467 	    bitmap_set_bit (group->related_cands, cand_2->id);
3468 	}
3469     }
3470 
3471   /* Release data since it is useless from this point.  */
3472   data->iv_common_cand_tab->empty ();
3473   data->iv_common_cands.truncate (0);
3474 }
3475 
3476 /* Adds candidates based on the value of USE's iv.  */
3477 
3478 static void
add_iv_candidate_for_use(struct ivopts_data * data,struct iv_use * use)3479 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3480 {
3481   poly_uint64 offset;
3482   tree base;
3483   tree basetype;
3484   struct iv *iv = use->iv;
3485 
3486   add_candidate (data, iv->base, iv->step, false, use);
3487 
3488   /* Record common candidate for use in case it can be shared by others.  */
3489   record_common_cand (data, iv->base, iv->step, use);
3490 
3491   /* Record common candidate with initial value zero.  */
3492   basetype = TREE_TYPE (iv->base);
3493   if (POINTER_TYPE_P (basetype))
3494     basetype = sizetype;
3495   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3496 
3497   /* Record common candidate with constant offset stripped in base.
3498      Like the use itself, we also add candidate directly for it.  */
3499   base = strip_offset (iv->base, &offset);
3500   if (maybe_ne (offset, 0U) || base != iv->base)
3501     {
3502       record_common_cand (data, base, iv->step, use);
3503       add_candidate (data, base, iv->step, false, use);
3504     }
3505 
3506   /* Record common candidate with base_object removed in base.  */
3507   base = iv->base;
3508   STRIP_NOPS (base);
3509   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3510     {
3511       tree step = iv->step;
3512 
3513       STRIP_NOPS (step);
3514       base = TREE_OPERAND (base, 1);
3515       step = fold_convert (sizetype, step);
3516       record_common_cand (data, base, step, use);
3517       /* Also record common candidate with offset stripped.  */
3518       base = strip_offset (base, &offset);
3519       if (maybe_ne (offset, 0U))
3520 	record_common_cand (data, base, step, use);
3521     }
3522 
3523   /* At last, add auto-incremental candidates.  Make such variables
3524      important since other iv uses with same base object may be based
3525      on it.  */
3526   if (use != NULL && address_p (use->type))
3527     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3528 }
3529 
3530 /* Adds candidates based on the uses.  */
3531 
3532 static void
add_iv_candidate_for_groups(struct ivopts_data * data)3533 add_iv_candidate_for_groups (struct ivopts_data *data)
3534 {
3535   unsigned i;
3536 
3537   /* Only add candidate for the first use in group.  */
3538   for (i = 0; i < data->vgroups.length (); i++)
3539     {
3540       struct iv_group *group = data->vgroups[i];
3541 
3542       gcc_assert (group->vuses[0] != NULL);
3543       add_iv_candidate_for_use (data, group->vuses[0]);
3544     }
3545   add_iv_candidate_derived_from_uses (data);
3546 }
3547 
3548 /* Record important candidates and add them to related_cands bitmaps.  */
3549 
3550 static void
record_important_candidates(struct ivopts_data * data)3551 record_important_candidates (struct ivopts_data *data)
3552 {
3553   unsigned i;
3554   struct iv_group *group;
3555 
3556   for (i = 0; i < data->vcands.length (); i++)
3557     {
3558       struct iv_cand *cand = data->vcands[i];
3559 
3560       if (cand->important)
3561 	bitmap_set_bit (data->important_candidates, i);
3562     }
3563 
3564   data->consider_all_candidates = (data->vcands.length ()
3565 				   <= CONSIDER_ALL_CANDIDATES_BOUND);
3566 
3567   /* Add important candidates to groups' related_cands bitmaps.  */
3568   for (i = 0; i < data->vgroups.length (); i++)
3569     {
3570       group = data->vgroups[i];
3571       bitmap_ior_into (group->related_cands, data->important_candidates);
3572     }
3573 }
3574 
3575 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3576    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3577    we allocate a simple list to every use.  */
3578 
3579 static void
alloc_use_cost_map(struct ivopts_data * data)3580 alloc_use_cost_map (struct ivopts_data *data)
3581 {
3582   unsigned i, size, s;
3583 
3584   for (i = 0; i < data->vgroups.length (); i++)
3585     {
3586       struct iv_group *group = data->vgroups[i];
3587 
3588       if (data->consider_all_candidates)
3589 	size = data->vcands.length ();
3590       else
3591 	{
3592 	  s = bitmap_count_bits (group->related_cands);
3593 
3594 	  /* Round up to the power of two, so that moduling by it is fast.  */
3595 	  size = s ? (1 << ceil_log2 (s)) : 1;
3596 	}
3597 
3598       group->n_map_members = size;
3599       group->cost_map = XCNEWVEC (struct cost_pair, size);
3600     }
3601 }
3602 
3603 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3604    on invariants INV_VARS and that the value used in expressing it is
3605    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3606 
3607 static void
set_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand,comp_cost cost,bitmap inv_vars,tree value,enum tree_code comp,bitmap inv_exprs)3608 set_group_iv_cost (struct ivopts_data *data,
3609 		   struct iv_group *group, struct iv_cand *cand,
3610 		   comp_cost cost, bitmap inv_vars, tree value,
3611 		   enum tree_code comp, bitmap inv_exprs)
3612 {
3613   unsigned i, s;
3614 
3615   if (cost.infinite_cost_p ())
3616     {
3617       BITMAP_FREE (inv_vars);
3618       BITMAP_FREE (inv_exprs);
3619       return;
3620     }
3621 
3622   if (data->consider_all_candidates)
3623     {
3624       group->cost_map[cand->id].cand = cand;
3625       group->cost_map[cand->id].cost = cost;
3626       group->cost_map[cand->id].inv_vars = inv_vars;
3627       group->cost_map[cand->id].inv_exprs = inv_exprs;
3628       group->cost_map[cand->id].value = value;
3629       group->cost_map[cand->id].comp = comp;
3630       return;
3631     }
3632 
3633   /* n_map_members is a power of two, so this computes modulo.  */
3634   s = cand->id & (group->n_map_members - 1);
3635   for (i = s; i < group->n_map_members; i++)
3636     if (!group->cost_map[i].cand)
3637       goto found;
3638   for (i = 0; i < s; i++)
3639     if (!group->cost_map[i].cand)
3640       goto found;
3641 
3642   gcc_unreachable ();
3643 
3644 found:
3645   group->cost_map[i].cand = cand;
3646   group->cost_map[i].cost = cost;
3647   group->cost_map[i].inv_vars = inv_vars;
3648   group->cost_map[i].inv_exprs = inv_exprs;
3649   group->cost_map[i].value = value;
3650   group->cost_map[i].comp = comp;
3651 }
3652 
3653 /* Gets cost of (GROUP, CAND) pair.  */
3654 
3655 static struct cost_pair *
get_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)3656 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3657 		   struct iv_cand *cand)
3658 {
3659   unsigned i, s;
3660   struct cost_pair *ret;
3661 
3662   if (!cand)
3663     return NULL;
3664 
3665   if (data->consider_all_candidates)
3666     {
3667       ret = group->cost_map + cand->id;
3668       if (!ret->cand)
3669 	return NULL;
3670 
3671       return ret;
3672     }
3673 
3674   /* n_map_members is a power of two, so this computes modulo.  */
3675   s = cand->id & (group->n_map_members - 1);
3676   for (i = s; i < group->n_map_members; i++)
3677     if (group->cost_map[i].cand == cand)
3678       return group->cost_map + i;
3679     else if (group->cost_map[i].cand == NULL)
3680       return NULL;
3681   for (i = 0; i < s; i++)
3682     if (group->cost_map[i].cand == cand)
3683       return group->cost_map + i;
3684     else if (group->cost_map[i].cand == NULL)
3685       return NULL;
3686 
3687   return NULL;
3688 }
3689 
3690 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3691 static rtx
produce_memory_decl_rtl(tree obj,int * regno)3692 produce_memory_decl_rtl (tree obj, int *regno)
3693 {
3694   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3695   machine_mode address_mode = targetm.addr_space.address_mode (as);
3696   rtx x;
3697 
3698   gcc_assert (obj);
3699   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3700     {
3701       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3702       x = gen_rtx_SYMBOL_REF (address_mode, name);
3703       SET_SYMBOL_REF_DECL (x, obj);
3704       x = gen_rtx_MEM (DECL_MODE (obj), x);
3705       set_mem_addr_space (x, as);
3706       targetm.encode_section_info (obj, x, true);
3707     }
3708   else
3709     {
3710       x = gen_raw_REG (address_mode, (*regno)++);
3711       x = gen_rtx_MEM (DECL_MODE (obj), x);
3712       set_mem_addr_space (x, as);
3713     }
3714 
3715   return x;
3716 }
3717 
3718 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3719    walk_tree.  DATA contains the actual fake register number.  */
3720 
3721 static tree
prepare_decl_rtl(tree * expr_p,int * ws,void * data)3722 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3723 {
3724   tree obj = NULL_TREE;
3725   rtx x = NULL_RTX;
3726   int *regno = (int *) data;
3727 
3728   switch (TREE_CODE (*expr_p))
3729     {
3730     case ADDR_EXPR:
3731       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3732 	   handled_component_p (*expr_p);
3733 	   expr_p = &TREE_OPERAND (*expr_p, 0))
3734 	continue;
3735       obj = *expr_p;
3736       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3737 	x = produce_memory_decl_rtl (obj, regno);
3738       break;
3739 
3740     case SSA_NAME:
3741       *ws = 0;
3742       obj = SSA_NAME_VAR (*expr_p);
3743       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3744       if (!obj)
3745 	return NULL_TREE;
3746       if (!DECL_RTL_SET_P (obj))
3747 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3748       break;
3749 
3750     case VAR_DECL:
3751     case PARM_DECL:
3752     case RESULT_DECL:
3753       *ws = 0;
3754       obj = *expr_p;
3755 
3756       if (DECL_RTL_SET_P (obj))
3757 	break;
3758 
3759       if (DECL_MODE (obj) == BLKmode)
3760 	x = produce_memory_decl_rtl (obj, regno);
3761       else
3762 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3763 
3764       break;
3765 
3766     default:
3767       break;
3768     }
3769 
3770   if (x)
3771     {
3772       decl_rtl_to_reset.safe_push (obj);
3773       SET_DECL_RTL (obj, x);
3774     }
3775 
3776   return NULL_TREE;
3777 }
3778 
3779 /* Determines cost of the computation of EXPR.  */
3780 
3781 static unsigned
computation_cost(tree expr,bool speed)3782 computation_cost (tree expr, bool speed)
3783 {
3784   rtx_insn *seq;
3785   rtx rslt;
3786   tree type = TREE_TYPE (expr);
3787   unsigned cost;
3788   /* Avoid using hard regs in ways which may be unsupported.  */
3789   int regno = LAST_VIRTUAL_REGISTER + 1;
3790   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3791   enum node_frequency real_frequency = node->frequency;
3792 
3793   node->frequency = NODE_FREQUENCY_NORMAL;
3794   crtl->maybe_hot_insn_p = speed;
3795   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3796   start_sequence ();
3797   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3798   seq = get_insns ();
3799   end_sequence ();
3800   default_rtl_profile ();
3801   node->frequency = real_frequency;
3802 
3803   cost = seq_cost (seq, speed);
3804   if (MEM_P (rslt))
3805     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3806 			  TYPE_ADDR_SPACE (type), speed);
3807   else if (!REG_P (rslt))
3808     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3809 
3810   return cost;
3811 }
3812 
3813 /* Returns variable containing the value of candidate CAND at statement AT.  */
3814 
3815 static tree
var_at_stmt(struct loop * loop,struct iv_cand * cand,gimple * stmt)3816 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3817 {
3818   if (stmt_after_increment (loop, cand, stmt))
3819     return cand->var_after;
3820   else
3821     return cand->var_before;
3822 }
3823 
3824 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3825    same precision that is at least as wide as the precision of TYPE, stores
3826    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3827    type of A and B.  */
3828 
3829 static tree
determine_common_wider_type(tree * a,tree * b)3830 determine_common_wider_type (tree *a, tree *b)
3831 {
3832   tree wider_type = NULL;
3833   tree suba, subb;
3834   tree atype = TREE_TYPE (*a);
3835 
3836   if (CONVERT_EXPR_P (*a))
3837     {
3838       suba = TREE_OPERAND (*a, 0);
3839       wider_type = TREE_TYPE (suba);
3840       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3841 	return atype;
3842     }
3843   else
3844     return atype;
3845 
3846   if (CONVERT_EXPR_P (*b))
3847     {
3848       subb = TREE_OPERAND (*b, 0);
3849       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3850 	return atype;
3851     }
3852   else
3853     return atype;
3854 
3855   *a = suba;
3856   *b = subb;
3857   return wider_type;
3858 }
3859 
3860 /* Determines the expression by that USE is expressed from induction variable
3861    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3862    decomposed form.  The invariant part is stored in AFF_INV; while variant
3863    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3864    non-null.  Returns false if USE cannot be expressed using CAND.  */
3865 
3866 static bool
3867 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3868 		       struct iv_cand *cand, struct aff_tree *aff_inv,
3869 		       struct aff_tree *aff_var, widest_int *prat = NULL)
3870 {
3871   tree ubase = use->iv->base, ustep = use->iv->step;
3872   tree cbase = cand->iv->base, cstep = cand->iv->step;
3873   tree common_type, uutype, var, cstep_common;
3874   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3875   aff_tree aff_cbase;
3876   widest_int rat;
3877 
3878   /* We must have a precision to express the values of use.  */
3879   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3880     return false;
3881 
3882   var = var_at_stmt (loop, cand, at);
3883   uutype = unsigned_type_for (utype);
3884 
3885   /* If the conversion is not noop, perform it.  */
3886   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3887     {
3888       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3889 	  && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3890 	{
3891 	  tree inner_base, inner_step, inner_type;
3892 	  inner_base = TREE_OPERAND (cbase, 0);
3893 	  if (CONVERT_EXPR_P (cstep))
3894 	    inner_step = TREE_OPERAND (cstep, 0);
3895 	  else
3896 	    inner_step = cstep;
3897 
3898 	  inner_type = TREE_TYPE (inner_base);
3899 	  /* If candidate is added from a biv whose type is smaller than
3900 	     ctype, we know both candidate and the biv won't overflow.
3901 	     In this case, it's safe to skip the convertion in candidate.
3902 	     As an example, (unsigned short)((unsigned long)A) equals to
3903 	     (unsigned short)A, if A has a type no larger than short.  */
3904 	  if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3905 	    {
3906 	      cbase = inner_base;
3907 	      cstep = inner_step;
3908 	    }
3909 	}
3910       cbase = fold_convert (uutype, cbase);
3911       cstep = fold_convert (uutype, cstep);
3912       var = fold_convert (uutype, var);
3913     }
3914 
3915   /* Ratio is 1 when computing the value of biv cand by itself.
3916      We can't rely on constant_multiple_of in this case because the
3917      use is created after the original biv is selected.  The call
3918      could fail because of inconsistent fold behavior.  See PR68021
3919      for more information.  */
3920   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3921     {
3922       gcc_assert (is_gimple_assign (use->stmt));
3923       gcc_assert (use->iv->ssa_name == cand->var_after);
3924       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3925       rat = 1;
3926     }
3927   else if (!constant_multiple_of (ustep, cstep, &rat))
3928     return false;
3929 
3930   if (prat)
3931     *prat = rat;
3932 
3933   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3934      type, we achieve better folding by computing their difference in this
3935      wider type, and cast the result to UUTYPE.  We do not need to worry about
3936      overflows, as all the arithmetics will in the end be performed in UUTYPE
3937      anyway.  */
3938   common_type = determine_common_wider_type (&ubase, &cbase);
3939 
3940   /* use = ubase - ratio * cbase + ratio * var.  */
3941   tree_to_aff_combination (ubase, common_type, aff_inv);
3942   tree_to_aff_combination (cbase, common_type, &aff_cbase);
3943   tree_to_aff_combination (var, uutype, aff_var);
3944 
3945   /* We need to shift the value if we are after the increment.  */
3946   if (stmt_after_increment (loop, cand, at))
3947     {
3948       aff_tree cstep_aff;
3949 
3950       if (common_type != uutype)
3951 	cstep_common = fold_convert (common_type, cstep);
3952       else
3953 	cstep_common = cstep;
3954 
3955       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3956       aff_combination_add (&aff_cbase, &cstep_aff);
3957     }
3958 
3959   aff_combination_scale (&aff_cbase, -rat);
3960   aff_combination_add (aff_inv, &aff_cbase);
3961   if (common_type != uutype)
3962     aff_combination_convert (aff_inv, uutype);
3963 
3964   aff_combination_scale (aff_var, rat);
3965   return true;
3966 }
3967 
3968 /* Determines the expression by that USE is expressed from induction variable
3969    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3970    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3971 
3972 static bool
get_computation_aff(struct loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand,struct aff_tree * aff)3973 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3974 		     struct iv_cand *cand, struct aff_tree *aff)
3975 {
3976   aff_tree aff_var;
3977 
3978   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3979     return false;
3980 
3981   aff_combination_add (aff, &aff_var);
3982   return true;
3983 }
3984 
3985 /* Return the type of USE.  */
3986 
3987 static tree
get_use_type(struct iv_use * use)3988 get_use_type (struct iv_use *use)
3989 {
3990   tree base_type = TREE_TYPE (use->iv->base);
3991   tree type;
3992 
3993   if (use->type == USE_REF_ADDRESS)
3994     {
3995       /* The base_type may be a void pointer.  Create a pointer type based on
3996 	 the mem_ref instead.  */
3997       type = build_pointer_type (TREE_TYPE (*use->op_p));
3998       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3999 		  == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4000     }
4001   else
4002     type = base_type;
4003 
4004   return type;
4005 }
4006 
4007 /* Determines the expression by that USE is expressed from induction variable
4008    CAND at statement AT in LOOP.  The computation is unshared.  */
4009 
4010 static tree
get_computation_at(struct loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand)4011 get_computation_at (struct loop *loop, gimple *at,
4012 		    struct iv_use *use, struct iv_cand *cand)
4013 {
4014   aff_tree aff;
4015   tree type = get_use_type (use);
4016 
4017   if (!get_computation_aff (loop, at, use, cand, &aff))
4018     return NULL_TREE;
4019   unshare_aff_combination (&aff);
4020   return fold_convert (type, aff_combination_to_tree (&aff));
4021 }
4022 
4023 /* Adjust the cost COST for being in loop setup rather than loop body.
4024    If we're optimizing for space, the loop setup overhead is constant;
4025    if we're optimizing for speed, amortize it over the per-iteration cost.
4026    If ROUND_UP_P is true, the result is round up rather than to zero when
4027    optimizing for speed.  */
4028 static unsigned
4029 adjust_setup_cost (struct ivopts_data *data, unsigned cost,
4030 		   bool round_up_p = false)
4031 {
4032   if (cost == INFTY)
4033     return cost;
4034   else if (optimize_loop_for_speed_p (data->current_loop))
4035     {
4036       HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
4037       return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
4038     }
4039   else
4040     return cost;
4041 }
4042 
4043 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4044    EXPR operand holding the shift.  COST0 and COST1 are the costs for
4045    calculating the operands of EXPR.  Returns true if successful, and returns
4046    the cost in COST.  */
4047 
4048 static bool
get_shiftadd_cost(tree expr,scalar_int_mode mode,comp_cost cost0,comp_cost cost1,tree mult,bool speed,comp_cost * cost)4049 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4050 		   comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4051 {
4052   comp_cost res;
4053   tree op1 = TREE_OPERAND (expr, 1);
4054   tree cst = TREE_OPERAND (mult, 1);
4055   tree multop = TREE_OPERAND (mult, 0);
4056   int m = exact_log2 (int_cst_value (cst));
4057   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4058   int as_cost, sa_cost;
4059   bool mult_in_op1;
4060 
4061   if (!(m >= 0 && m < maxm))
4062     return false;
4063 
4064   STRIP_NOPS (op1);
4065   mult_in_op1 = operand_equal_p (op1, mult, 0);
4066 
4067   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4068 
4069   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4070      use that in preference to a shift insn followed by an add insn.  */
4071   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4072 	     ? shiftadd_cost (speed, mode, m)
4073 	     : (mult_in_op1
4074 		? shiftsub1_cost (speed, mode, m)
4075 		: shiftsub0_cost (speed, mode, m)));
4076 
4077   res = comp_cost (MIN (as_cost, sa_cost), 0);
4078   res += (mult_in_op1 ? cost0 : cost1);
4079 
4080   STRIP_NOPS (multop);
4081   if (!is_gimple_val (multop))
4082     res += force_expr_to_var_cost (multop, speed);
4083 
4084   *cost = res;
4085   return true;
4086 }
4087 
4088 /* Estimates cost of forcing expression EXPR into a variable.  */
4089 
4090 static comp_cost
force_expr_to_var_cost(tree expr,bool speed)4091 force_expr_to_var_cost (tree expr, bool speed)
4092 {
4093   static bool costs_initialized = false;
4094   static unsigned integer_cost [2];
4095   static unsigned symbol_cost [2];
4096   static unsigned address_cost [2];
4097   tree op0, op1;
4098   comp_cost cost0, cost1, cost;
4099   machine_mode mode;
4100   scalar_int_mode int_mode;
4101 
4102   if (!costs_initialized)
4103     {
4104       tree type = build_pointer_type (integer_type_node);
4105       tree var, addr;
4106       rtx x;
4107       int i;
4108 
4109       var = create_tmp_var_raw (integer_type_node, "test_var");
4110       TREE_STATIC (var) = 1;
4111       x = produce_memory_decl_rtl (var, NULL);
4112       SET_DECL_RTL (var, x);
4113 
4114       addr = build1 (ADDR_EXPR, type, var);
4115 
4116 
4117       for (i = 0; i < 2; i++)
4118 	{
4119 	  integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4120 							     2000), i);
4121 
4122 	  symbol_cost[i] = computation_cost (addr, i) + 1;
4123 
4124 	  address_cost[i]
4125 	    = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4126 	  if (dump_file && (dump_flags & TDF_DETAILS))
4127 	    {
4128 	      fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4129 	      fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4130 	      fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4131 	      fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4132 	      fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4133 	      fprintf (dump_file, "\n");
4134 	    }
4135 	}
4136 
4137       costs_initialized = true;
4138     }
4139 
4140   STRIP_NOPS (expr);
4141 
4142   if (SSA_VAR_P (expr))
4143     return no_cost;
4144 
4145   if (is_gimple_min_invariant (expr))
4146     {
4147       if (poly_int_tree_p (expr))
4148 	return comp_cost (integer_cost [speed], 0);
4149 
4150       if (TREE_CODE (expr) == ADDR_EXPR)
4151 	{
4152 	  tree obj = TREE_OPERAND (expr, 0);
4153 
4154 	  if (VAR_P (obj)
4155 	      || TREE_CODE (obj) == PARM_DECL
4156 	      || TREE_CODE (obj) == RESULT_DECL)
4157 	    return comp_cost (symbol_cost [speed], 0);
4158 	}
4159 
4160       return comp_cost (address_cost [speed], 0);
4161     }
4162 
4163   switch (TREE_CODE (expr))
4164     {
4165     case POINTER_PLUS_EXPR:
4166     case PLUS_EXPR:
4167     case MINUS_EXPR:
4168     case MULT_EXPR:
4169     case TRUNC_DIV_EXPR:
4170     case BIT_AND_EXPR:
4171     case BIT_IOR_EXPR:
4172     case LSHIFT_EXPR:
4173     case RSHIFT_EXPR:
4174       op0 = TREE_OPERAND (expr, 0);
4175       op1 = TREE_OPERAND (expr, 1);
4176       STRIP_NOPS (op0);
4177       STRIP_NOPS (op1);
4178       break;
4179 
4180     CASE_CONVERT:
4181     case NEGATE_EXPR:
4182     case BIT_NOT_EXPR:
4183       op0 = TREE_OPERAND (expr, 0);
4184       STRIP_NOPS (op0);
4185       op1 = NULL_TREE;
4186       break;
4187 
4188     default:
4189       /* Just an arbitrary value, FIXME.  */
4190       return comp_cost (target_spill_cost[speed], 0);
4191     }
4192 
4193   if (op0 == NULL_TREE
4194       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4195     cost0 = no_cost;
4196   else
4197     cost0 = force_expr_to_var_cost (op0, speed);
4198 
4199   if (op1 == NULL_TREE
4200       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4201     cost1 = no_cost;
4202   else
4203     cost1 = force_expr_to_var_cost (op1, speed);
4204 
4205   mode = TYPE_MODE (TREE_TYPE (expr));
4206   switch (TREE_CODE (expr))
4207     {
4208     case POINTER_PLUS_EXPR:
4209     case PLUS_EXPR:
4210     case MINUS_EXPR:
4211     case NEGATE_EXPR:
4212       cost = comp_cost (add_cost (speed, mode), 0);
4213       if (TREE_CODE (expr) != NEGATE_EXPR)
4214 	{
4215 	  tree mult = NULL_TREE;
4216 	  comp_cost sa_cost;
4217 	  if (TREE_CODE (op1) == MULT_EXPR)
4218 	    mult = op1;
4219 	  else if (TREE_CODE (op0) == MULT_EXPR)
4220 	    mult = op0;
4221 
4222 	  if (mult != NULL_TREE
4223 	      && is_a <scalar_int_mode> (mode, &int_mode)
4224 	      && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4225 	      && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4226 				    speed, &sa_cost))
4227 	    return sa_cost;
4228 	}
4229       break;
4230 
4231     CASE_CONVERT:
4232       {
4233 	tree inner_mode, outer_mode;
4234 	outer_mode = TREE_TYPE (expr);
4235 	inner_mode = TREE_TYPE (op0);
4236 	cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4237 				       TYPE_MODE (inner_mode), speed), 0);
4238       }
4239       break;
4240 
4241     case MULT_EXPR:
4242       if (cst_and_fits_in_hwi (op0))
4243 	cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4244 					     mode, speed), 0);
4245       else if (cst_and_fits_in_hwi (op1))
4246 	cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4247 					     mode, speed), 0);
4248       else
4249 	return comp_cost (target_spill_cost [speed], 0);
4250       break;
4251 
4252     case TRUNC_DIV_EXPR:
4253       /* Division by power of two is usually cheap, so we allow it.  Forbid
4254 	 anything else.  */
4255       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4256 	cost = comp_cost (add_cost (speed, mode), 0);
4257       else
4258 	cost = comp_cost (target_spill_cost[speed], 0);
4259       break;
4260 
4261     case BIT_AND_EXPR:
4262     case BIT_IOR_EXPR:
4263     case BIT_NOT_EXPR:
4264     case LSHIFT_EXPR:
4265     case RSHIFT_EXPR:
4266       cost = comp_cost (add_cost (speed, mode), 0);
4267       break;
4268 
4269     default:
4270       gcc_unreachable ();
4271     }
4272 
4273   cost += cost0;
4274   cost += cost1;
4275   return cost;
4276 }
4277 
4278 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4279    invariants the computation depends on.  */
4280 
4281 static comp_cost
force_var_cost(struct ivopts_data * data,tree expr,bitmap * inv_vars)4282 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4283 {
4284   if (!expr)
4285     return no_cost;
4286 
4287   find_inv_vars (data, &expr, inv_vars);
4288   return force_expr_to_var_cost (expr, data->speed);
4289 }
4290 
4291 /* Returns cost of auto-modifying address expression in shape base + offset.
4292    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4293    address expression.  The address expression has ADDR_MODE in addr space
4294    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4295    speed or size.  */
4296 
4297 enum ainc_type
4298 {
4299   AINC_PRE_INC,		/* Pre increment.  */
4300   AINC_PRE_DEC,		/* Pre decrement.  */
4301   AINC_POST_INC,	/* Post increment.  */
4302   AINC_POST_DEC,	/* Post decrement.  */
4303   AINC_NONE		/* Also the number of auto increment types.  */
4304 };
4305 
4306 struct ainc_cost_data
4307 {
4308   unsigned costs[AINC_NONE];
4309 };
4310 
4311 static comp_cost
get_address_cost_ainc(poly_int64 ainc_step,poly_int64 ainc_offset,machine_mode addr_mode,machine_mode mem_mode,addr_space_t as,bool speed)4312 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4313 		       machine_mode addr_mode, machine_mode mem_mode,
4314 		       addr_space_t as, bool speed)
4315 {
4316   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4317       && !USE_STORE_PRE_DECREMENT (mem_mode)
4318       && !USE_LOAD_POST_DECREMENT (mem_mode)
4319       && !USE_STORE_POST_DECREMENT (mem_mode)
4320       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4321       && !USE_STORE_PRE_INCREMENT (mem_mode)
4322       && !USE_LOAD_POST_INCREMENT (mem_mode)
4323       && !USE_STORE_POST_INCREMENT (mem_mode))
4324     return infinite_cost;
4325 
4326   static vec<ainc_cost_data *> ainc_cost_data_list;
4327   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4328   if (idx >= ainc_cost_data_list.length ())
4329     {
4330       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4331 
4332       gcc_assert (nsize > idx);
4333       ainc_cost_data_list.safe_grow_cleared (nsize);
4334     }
4335 
4336   ainc_cost_data *data = ainc_cost_data_list[idx];
4337   if (data == NULL)
4338     {
4339       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4340 
4341       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4342       data->costs[AINC_PRE_DEC] = INFTY;
4343       data->costs[AINC_POST_DEC] = INFTY;
4344       data->costs[AINC_PRE_INC] = INFTY;
4345       data->costs[AINC_POST_INC] = INFTY;
4346       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4347 	  || USE_STORE_PRE_DECREMENT (mem_mode))
4348 	{
4349 	  rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4350 
4351 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4352 	    data->costs[AINC_PRE_DEC]
4353 	      = address_cost (addr, mem_mode, as, speed);
4354 	}
4355       if (USE_LOAD_POST_DECREMENT (mem_mode)
4356 	  || USE_STORE_POST_DECREMENT (mem_mode))
4357 	{
4358 	  rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4359 
4360 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4361 	    data->costs[AINC_POST_DEC]
4362 	      = address_cost (addr, mem_mode, as, speed);
4363 	}
4364       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4365 	  || USE_STORE_PRE_INCREMENT (mem_mode))
4366 	{
4367 	  rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4368 
4369 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4370 	    data->costs[AINC_PRE_INC]
4371 	      = address_cost (addr, mem_mode, as, speed);
4372 	}
4373       if (USE_LOAD_POST_INCREMENT (mem_mode)
4374 	  || USE_STORE_POST_INCREMENT (mem_mode))
4375 	{
4376 	  rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4377 
4378 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4379 	    data->costs[AINC_POST_INC]
4380 	      = address_cost (addr, mem_mode, as, speed);
4381 	}
4382       ainc_cost_data_list[idx] = data;
4383     }
4384 
4385   poly_int64 msize = GET_MODE_SIZE (mem_mode);
4386   if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4387     return comp_cost (data->costs[AINC_POST_INC], 0);
4388   if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4389     return comp_cost (data->costs[AINC_POST_DEC], 0);
4390   if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4391     return comp_cost (data->costs[AINC_PRE_INC], 0);
4392   if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4393     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4394 
4395   return infinite_cost;
4396 }
4397 
4398 /* Return cost of computing USE's address expression by using CAND.
4399    AFF_INV and AFF_VAR represent invariant and variant parts of the
4400    address expression, respectively.  If AFF_INV is simple, store
4401    the loop invariant variables which are depended by it in INV_VARS;
4402    if AFF_INV is complicated, handle it as a new invariant expression
4403    and record it in INV_EXPR.  RATIO indicates multiple times between
4404    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4405    value to it indicating if this is an auto-increment address.  */
4406 
4407 static comp_cost
get_address_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,aff_tree * aff_inv,aff_tree * aff_var,HOST_WIDE_INT ratio,bitmap * inv_vars,iv_inv_expr_ent ** inv_expr,bool * can_autoinc,bool speed)4408 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4409 		  struct iv_cand *cand, aff_tree *aff_inv,
4410 		  aff_tree *aff_var, HOST_WIDE_INT ratio,
4411 		  bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4412 		  bool *can_autoinc, bool speed)
4413 {
4414   rtx addr;
4415   bool simple_inv = true;
4416   tree comp_inv = NULL_TREE, type = aff_var->type;
4417   comp_cost var_cost = no_cost, cost = no_cost;
4418   struct mem_address parts = {NULL_TREE, integer_one_node,
4419 			      NULL_TREE, NULL_TREE, NULL_TREE};
4420   machine_mode addr_mode = TYPE_MODE (type);
4421   machine_mode mem_mode = TYPE_MODE (use->mem_type);
4422   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4423   /* Only true if ratio != 1.  */
4424   bool ok_with_ratio_p = false;
4425   bool ok_without_ratio_p = false;
4426 
4427   if (!aff_combination_const_p (aff_inv))
4428     {
4429       parts.index = integer_one_node;
4430       /* Addressing mode "base + index".  */
4431       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4432       if (ratio != 1)
4433 	{
4434 	  parts.step = wide_int_to_tree (type, ratio);
4435 	  /* Addressing mode "base + index << scale".  */
4436 	  ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4437 	  if (!ok_with_ratio_p)
4438 	    parts.step = NULL_TREE;
4439 	}
4440       if (ok_with_ratio_p || ok_without_ratio_p)
4441 	{
4442 	  if (maybe_ne (aff_inv->offset, 0))
4443 	    {
4444 	      parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4445 	      /* Addressing mode "base + index [<< scale] + offset".  */
4446 	      if (!valid_mem_ref_p (mem_mode, as, &parts))
4447 		parts.offset = NULL_TREE;
4448 	      else
4449 		aff_inv->offset = 0;
4450 	    }
4451 
4452 	  move_fixed_address_to_symbol (&parts, aff_inv);
4453 	  /* Base is fixed address and is moved to symbol part.  */
4454 	  if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4455 	    parts.base = NULL_TREE;
4456 
4457 	  /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4458 	  if (parts.symbol != NULL_TREE
4459 	      && !valid_mem_ref_p (mem_mode, as, &parts))
4460 	    {
4461 	      aff_combination_add_elt (aff_inv, parts.symbol, 1);
4462 	      parts.symbol = NULL_TREE;
4463 	      /* Reset SIMPLE_INV since symbol address needs to be computed
4464 		 outside of address expression in this case.  */
4465 	      simple_inv = false;
4466 	      /* Symbol part is moved back to base part, it can't be NULL.  */
4467 	      parts.base = integer_one_node;
4468 	    }
4469 	}
4470       else
4471 	parts.index = NULL_TREE;
4472     }
4473   else
4474     {
4475       poly_int64 ainc_step;
4476       if (can_autoinc
4477 	  && ratio == 1
4478 	  && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4479 	{
4480 	  poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4481 
4482 	  if (stmt_after_increment (data->current_loop, cand, use->stmt))
4483 	    ainc_offset += ainc_step;
4484 	  cost = get_address_cost_ainc (ainc_step, ainc_offset,
4485 					addr_mode, mem_mode, as, speed);
4486 	  if (!cost.infinite_cost_p ())
4487 	    {
4488 	      *can_autoinc = true;
4489 	      return cost;
4490 	    }
4491 	  cost = no_cost;
4492 	}
4493       if (!aff_combination_zero_p (aff_inv))
4494 	{
4495 	  parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4496 	  /* Addressing mode "base + offset".  */
4497 	  if (!valid_mem_ref_p (mem_mode, as, &parts))
4498 	    parts.offset = NULL_TREE;
4499 	  else
4500 	    aff_inv->offset = 0;
4501 	}
4502     }
4503 
4504   if (simple_inv)
4505     simple_inv = (aff_inv == NULL
4506 		  || aff_combination_const_p (aff_inv)
4507 		  || aff_combination_singleton_var_p (aff_inv));
4508   if (!aff_combination_zero_p (aff_inv))
4509     comp_inv = aff_combination_to_tree (aff_inv);
4510   if (comp_inv != NULL_TREE)
4511     cost = force_var_cost (data, comp_inv, inv_vars);
4512   if (ratio != 1 && parts.step == NULL_TREE)
4513     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4514   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4515     var_cost += add_cost (speed, addr_mode);
4516 
4517   if (comp_inv && inv_expr && !simple_inv)
4518     {
4519       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4520       /* Clear depends on.  */
4521       if (*inv_expr != NULL && inv_vars && *inv_vars)
4522 	bitmap_clear (*inv_vars);
4523 
4524       /* Cost of small invariant expression adjusted against loop niters
4525 	 is usually zero, which makes it difficult to be differentiated
4526 	 from candidate based on loop invariant variables.  Secondly, the
4527 	 generated invariant expression may not be hoisted out of loop by
4528 	 following pass.  We penalize the cost by rounding up in order to
4529 	 neutralize such effects.  */
4530       cost.cost = adjust_setup_cost (data, cost.cost, true);
4531       cost.scratch = cost.cost;
4532     }
4533 
4534   cost += var_cost;
4535   addr = addr_for_mem_ref (&parts, as, false);
4536   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4537   cost += address_cost (addr, mem_mode, as, speed);
4538 
4539   if (parts.symbol != NULL_TREE)
4540     cost.complexity += 1;
4541   /* Don't increase the complexity of adding a scaled index if it's
4542      the only kind of index that the target allows.  */
4543   if (parts.step != NULL_TREE && ok_without_ratio_p)
4544     cost.complexity += 1;
4545   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4546     cost.complexity += 1;
4547   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4548     cost.complexity += 1;
4549 
4550   return cost;
4551 }
4552 
4553 /* Scale (multiply) the computed COST (except scratch part that should be
4554    hoisted out a loop) by header->frequency / AT->frequency, which makes
4555    expected cost more accurate.  */
4556 
4557 static comp_cost
get_scaled_computation_cost_at(ivopts_data * data,gimple * at,comp_cost cost)4558 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4559 {
4560    int loop_freq = data->current_loop->header->count.to_frequency (cfun);
4561    int bb_freq = gimple_bb (at)->count.to_frequency (cfun);
4562    if (loop_freq != 0)
4563      {
4564        gcc_assert (cost.scratch <= cost.cost);
4565        int scaled_cost
4566 	 = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4567 
4568        if (dump_file && (dump_flags & TDF_DETAILS))
4569 	 fprintf (dump_file, "Scaling cost based on bb prob "
4570 		  "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4571 		  1.0f * bb_freq / loop_freq, cost.cost,
4572 		  cost.scratch, scaled_cost, bb_freq, loop_freq);
4573 
4574        cost.cost = scaled_cost;
4575      }
4576 
4577   return cost;
4578 }
4579 
4580 /* Determines the cost of the computation by that USE is expressed
4581    from induction variable CAND.  If ADDRESS_P is true, we just need
4582    to create an address from it, otherwise we want to get it into
4583    register.  A set of invariants we depend on is stored in INV_VARS.
4584    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4585    addressing is likely.  If INV_EXPR is nonnull, record invariant
4586    expr entry in it.  */
4587 
4588 static comp_cost
get_computation_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,bool address_p,bitmap * inv_vars,bool * can_autoinc,iv_inv_expr_ent ** inv_expr)4589 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4590 		      struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4591 		      bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4592 {
4593   gimple *at = use->stmt;
4594   tree ubase = use->iv->base, cbase = cand->iv->base;
4595   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4596   tree comp_inv = NULL_TREE;
4597   HOST_WIDE_INT ratio, aratio;
4598   comp_cost cost;
4599   widest_int rat;
4600   aff_tree aff_inv, aff_var;
4601   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4602 
4603   if (inv_vars)
4604     *inv_vars = NULL;
4605   if (can_autoinc)
4606     *can_autoinc = false;
4607   if (inv_expr)
4608     *inv_expr = NULL;
4609 
4610   /* Check if we have enough precision to express the values of use.  */
4611   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4612     return infinite_cost;
4613 
4614   if (address_p
4615       || (use->iv->base_object
4616 	  && cand->iv->base_object
4617 	  && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4618 	  && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4619     {
4620       /* Do not try to express address of an object with computation based
4621 	 on address of a different object.  This may cause problems in rtl
4622 	 level alias analysis (that does not expect this to be happening,
4623 	 as this is illegal in C), and would be unlikely to be useful
4624 	 anyway.  */
4625       if (use->iv->base_object
4626 	  && cand->iv->base_object
4627 	  && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4628 	return infinite_cost;
4629     }
4630 
4631   if (!get_computation_aff_1 (data->current_loop, at, use,
4632 			      cand, &aff_inv, &aff_var, &rat)
4633       || !wi::fits_shwi_p (rat))
4634     return infinite_cost;
4635 
4636   ratio = rat.to_shwi ();
4637   if (address_p)
4638     {
4639       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4640 			       inv_vars, inv_expr, can_autoinc, speed);
4641       return get_scaled_computation_cost_at (data, at, cost);
4642     }
4643 
4644   bool simple_inv = (aff_combination_const_p (&aff_inv)
4645 		     || aff_combination_singleton_var_p (&aff_inv));
4646   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4647   aff_combination_convert (&aff_inv, signed_type);
4648   if (!aff_combination_zero_p (&aff_inv))
4649     comp_inv = aff_combination_to_tree (&aff_inv);
4650 
4651   cost = force_var_cost (data, comp_inv, inv_vars);
4652   if (comp_inv && inv_expr && !simple_inv)
4653     {
4654       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4655       /* Clear depends on.  */
4656       if (*inv_expr != NULL && inv_vars && *inv_vars)
4657 	bitmap_clear (*inv_vars);
4658 
4659       cost.cost = adjust_setup_cost (data, cost.cost);
4660       /* Record setup cost in scratch field.  */
4661       cost.scratch = cost.cost;
4662     }
4663   /* Cost of constant integer can be covered when adding invariant part to
4664      variant part.  */
4665   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4666     cost = no_cost;
4667 
4668   /* Need type narrowing to represent use with cand.  */
4669   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4670     {
4671       machine_mode outer_mode = TYPE_MODE (utype);
4672       machine_mode inner_mode = TYPE_MODE (ctype);
4673       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4674     }
4675 
4676   /* Turn a + i * (-c) into a - i * c.  */
4677   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4678     aratio = -ratio;
4679   else
4680     aratio = ratio;
4681 
4682   if (ratio != 1)
4683     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4684 
4685   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4686      instruction.  */
4687   /* Need to add up the invariant and variant parts.  */
4688   if (comp_inv && !integer_zerop (comp_inv))
4689     cost += add_cost (speed, TYPE_MODE (utype));
4690 
4691   return get_scaled_computation_cost_at (data, at, cost);
4692 }
4693 
4694 /* Determines cost of computing the use in GROUP with CAND in a generic
4695    expression.  */
4696 
4697 static bool
determine_group_iv_cost_generic(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)4698 determine_group_iv_cost_generic (struct ivopts_data *data,
4699 				 struct iv_group *group, struct iv_cand *cand)
4700 {
4701   comp_cost cost;
4702   iv_inv_expr_ent *inv_expr = NULL;
4703   bitmap inv_vars = NULL, inv_exprs = NULL;
4704   struct iv_use *use = group->vuses[0];
4705 
4706   /* The simple case first -- if we need to express value of the preserved
4707      original biv, the cost is 0.  This also prevents us from counting the
4708      cost of increment twice -- once at this use and once in the cost of
4709      the candidate.  */
4710   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4711     cost = no_cost;
4712   else
4713     cost = get_computation_cost (data, use, cand, false,
4714 				 &inv_vars, NULL, &inv_expr);
4715 
4716   if (inv_expr)
4717     {
4718       inv_exprs = BITMAP_ALLOC (NULL);
4719       bitmap_set_bit (inv_exprs, inv_expr->id);
4720     }
4721   set_group_iv_cost (data, group, cand, cost, inv_vars,
4722 		     NULL_TREE, ERROR_MARK, inv_exprs);
4723   return !cost.infinite_cost_p ();
4724 }
4725 
4726 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
4727 
4728 static bool
determine_group_iv_cost_address(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)4729 determine_group_iv_cost_address (struct ivopts_data *data,
4730 				 struct iv_group *group, struct iv_cand *cand)
4731 {
4732   unsigned i;
4733   bitmap inv_vars = NULL, inv_exprs = NULL;
4734   bool can_autoinc;
4735   iv_inv_expr_ent *inv_expr = NULL;
4736   struct iv_use *use = group->vuses[0];
4737   comp_cost sum_cost = no_cost, cost;
4738 
4739   cost = get_computation_cost (data, use, cand, true,
4740 			       &inv_vars, &can_autoinc, &inv_expr);
4741 
4742   if (inv_expr)
4743     {
4744       inv_exprs = BITMAP_ALLOC (NULL);
4745       bitmap_set_bit (inv_exprs, inv_expr->id);
4746     }
4747   sum_cost = cost;
4748   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4749     {
4750       if (can_autoinc)
4751 	sum_cost -= cand->cost_step;
4752       /* If we generated the candidate solely for exploiting autoincrement
4753 	 opportunities, and it turns out it can't be used, set the cost to
4754 	 infinity to make sure we ignore it.  */
4755       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4756 	sum_cost = infinite_cost;
4757     }
4758 
4759   /* Uses in a group can share setup code, so only add setup cost once.  */
4760   cost -= cost.scratch;
4761   /* Compute and add costs for rest uses of this group.  */
4762   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4763     {
4764       struct iv_use *next = group->vuses[i];
4765 
4766       /* TODO: We could skip computing cost for sub iv_use when it has the
4767 	 same cost as the first iv_use, but the cost really depends on the
4768 	 offset and where the iv_use is.  */
4769 	cost = get_computation_cost (data, next, cand, true,
4770 				     NULL, &can_autoinc, &inv_expr);
4771 	if (inv_expr)
4772 	  {
4773 	    if (!inv_exprs)
4774 	      inv_exprs = BITMAP_ALLOC (NULL);
4775 
4776 	    bitmap_set_bit (inv_exprs, inv_expr->id);
4777 	  }
4778       sum_cost += cost;
4779     }
4780   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4781 		     NULL_TREE, ERROR_MARK, inv_exprs);
4782 
4783   return !sum_cost.infinite_cost_p ();
4784 }
4785 
4786 /* Computes value of candidate CAND at position AT in iteration NITER, and
4787    stores it to VAL.  */
4788 
4789 static void
cand_value_at(struct loop * loop,struct iv_cand * cand,gimple * at,tree niter,aff_tree * val)4790 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4791 	       aff_tree *val)
4792 {
4793   aff_tree step, delta, nit;
4794   struct iv *iv = cand->iv;
4795   tree type = TREE_TYPE (iv->base);
4796   tree steptype;
4797   if (POINTER_TYPE_P (type))
4798     steptype = sizetype;
4799   else
4800     steptype = unsigned_type_for (type);
4801 
4802   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4803   aff_combination_convert (&step, steptype);
4804   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4805   aff_combination_convert (&nit, steptype);
4806   aff_combination_mult (&nit, &step, &delta);
4807   if (stmt_after_increment (loop, cand, at))
4808     aff_combination_add (&delta, &step);
4809 
4810   tree_to_aff_combination (iv->base, type, val);
4811   if (!POINTER_TYPE_P (type))
4812     aff_combination_convert (val, steptype);
4813   aff_combination_add (val, &delta);
4814 }
4815 
4816 /* Returns period of induction variable iv.  */
4817 
4818 static tree
iv_period(struct iv * iv)4819 iv_period (struct iv *iv)
4820 {
4821   tree step = iv->step, period, type;
4822   tree pow2div;
4823 
4824   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4825 
4826   type = unsigned_type_for (TREE_TYPE (step));
4827   /* Period of the iv is lcm (step, type_range)/step -1,
4828      i.e., N*type_range/step - 1. Since type range is power
4829      of two, N == (step >> num_of_ending_zeros_binary (step),
4830      so the final result is
4831 
4832        (type_range >> num_of_ending_zeros_binary (step)) - 1
4833 
4834   */
4835   pow2div = num_ending_zeros (step);
4836 
4837   period = build_low_bits_mask (type,
4838 				(TYPE_PRECISION (type)
4839 				 - tree_to_uhwi (pow2div)));
4840 
4841   return period;
4842 }
4843 
4844 /* Returns the comparison operator used when eliminating the iv USE.  */
4845 
4846 static enum tree_code
iv_elimination_compare(struct ivopts_data * data,struct iv_use * use)4847 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4848 {
4849   struct loop *loop = data->current_loop;
4850   basic_block ex_bb;
4851   edge exit;
4852 
4853   ex_bb = gimple_bb (use->stmt);
4854   exit = EDGE_SUCC (ex_bb, 0);
4855   if (flow_bb_inside_loop_p (loop, exit->dest))
4856     exit = EDGE_SUCC (ex_bb, 1);
4857 
4858   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4859 }
4860 
4861 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4862    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4863    calculation is performed in non-wrapping type.
4864 
4865    TODO: More generally, we could test for the situation that
4866 	 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4867 	 This would require knowing the sign of OFFSET.  */
4868 
4869 static bool
difference_cannot_overflow_p(struct ivopts_data * data,tree base,tree offset)4870 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4871 {
4872   enum tree_code code;
4873   tree e1, e2;
4874   aff_tree aff_e1, aff_e2, aff_offset;
4875 
4876   if (!nowrap_type_p (TREE_TYPE (base)))
4877     return false;
4878 
4879   base = expand_simple_operations (base);
4880 
4881   if (TREE_CODE (base) == SSA_NAME)
4882     {
4883       gimple *stmt = SSA_NAME_DEF_STMT (base);
4884 
4885       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4886 	return false;
4887 
4888       code = gimple_assign_rhs_code (stmt);
4889       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4890 	return false;
4891 
4892       e1 = gimple_assign_rhs1 (stmt);
4893       e2 = gimple_assign_rhs2 (stmt);
4894     }
4895   else
4896     {
4897       code = TREE_CODE (base);
4898       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4899 	return false;
4900       e1 = TREE_OPERAND (base, 0);
4901       e2 = TREE_OPERAND (base, 1);
4902     }
4903 
4904   /* Use affine expansion as deeper inspection to prove the equality.  */
4905   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4906 				  &aff_e2, &data->name_expansion_cache);
4907   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4908 				  &aff_offset, &data->name_expansion_cache);
4909   aff_combination_scale (&aff_offset, -1);
4910   switch (code)
4911     {
4912     case PLUS_EXPR:
4913       aff_combination_add (&aff_e2, &aff_offset);
4914       if (aff_combination_zero_p (&aff_e2))
4915 	return true;
4916 
4917       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4918 				      &aff_e1, &data->name_expansion_cache);
4919       aff_combination_add (&aff_e1, &aff_offset);
4920       return aff_combination_zero_p (&aff_e1);
4921 
4922     case POINTER_PLUS_EXPR:
4923       aff_combination_add (&aff_e2, &aff_offset);
4924       return aff_combination_zero_p (&aff_e2);
4925 
4926     default:
4927       return false;
4928     }
4929 }
4930 
4931 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4932    comparison with CAND.  NITER describes the number of iterations of
4933    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4934 
4935    We aim to handle the following situation:
4936 
4937    sometype *base, *p;
4938    int a, b, i;
4939 
4940    i = a;
4941    p = p_0 = base + a;
4942 
4943    do
4944      {
4945        bla (*p);
4946        p++;
4947        i++;
4948      }
4949    while (i < b);
4950 
4951    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4952    We aim to optimize this to
4953 
4954    p = p_0 = base + a;
4955    do
4956      {
4957        bla (*p);
4958        p++;
4959      }
4960    while (p < p_0 - a + b);
4961 
4962    This preserves the correctness, since the pointer arithmetics does not
4963    overflow.  More precisely:
4964 
4965    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4966       overflow in computing it or the values of p.
4967    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4968       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4969 
4970 static bool
iv_elimination_compare_lt(struct ivopts_data * data,struct iv_cand * cand,enum tree_code * comp_p,struct tree_niter_desc * niter)4971 iv_elimination_compare_lt (struct ivopts_data *data,
4972 			   struct iv_cand *cand, enum tree_code *comp_p,
4973 			   struct tree_niter_desc *niter)
4974 {
4975   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4976   struct aff_tree nit, tmpa, tmpb;
4977   enum tree_code comp;
4978   HOST_WIDE_INT step;
4979 
4980   /* We need to know that the candidate induction variable does not overflow.
4981      While more complex analysis may be used to prove this, for now just
4982      check that the variable appears in the original program and that it
4983      is computed in a type that guarantees no overflows.  */
4984   cand_type = TREE_TYPE (cand->iv->base);
4985   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4986     return false;
4987 
4988   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4989      the calculation of the BOUND could overflow, making the comparison
4990      invalid.  */
4991   if (!data->loop_single_exit_p)
4992     return false;
4993 
4994   /* We need to be able to decide whether candidate is increasing or decreasing
4995      in order to choose the right comparison operator.  */
4996   if (!cst_and_fits_in_hwi (cand->iv->step))
4997     return false;
4998   step = int_cst_value (cand->iv->step);
4999 
5000   /* Check that the number of iterations matches the expected pattern:
5001      a + 1 > b ? 0 : b - a - 1.  */
5002   mbz = niter->may_be_zero;
5003   if (TREE_CODE (mbz) == GT_EXPR)
5004     {
5005       /* Handle a + 1 > b.  */
5006       tree op0 = TREE_OPERAND (mbz, 0);
5007       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5008 	{
5009 	  a = TREE_OPERAND (op0, 0);
5010 	  b = TREE_OPERAND (mbz, 1);
5011 	}
5012       else
5013 	return false;
5014     }
5015   else if (TREE_CODE (mbz) == LT_EXPR)
5016     {
5017       tree op1 = TREE_OPERAND (mbz, 1);
5018 
5019       /* Handle b < a + 1.  */
5020       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5021 	{
5022 	  a = TREE_OPERAND (op1, 0);
5023 	  b = TREE_OPERAND (mbz, 0);
5024 	}
5025       else
5026 	return false;
5027     }
5028   else
5029     return false;
5030 
5031   /* Expected number of iterations is B - A - 1.  Check that it matches
5032      the actual number, i.e., that B - A - NITER = 1.  */
5033   tree_to_aff_combination (niter->niter, nit_type, &nit);
5034   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5035   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5036   aff_combination_scale (&nit, -1);
5037   aff_combination_scale (&tmpa, -1);
5038   aff_combination_add (&tmpb, &tmpa);
5039   aff_combination_add (&tmpb, &nit);
5040   if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5041     return false;
5042 
5043   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5044      overflow.  */
5045   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5046 			cand->iv->step,
5047 			fold_convert (TREE_TYPE (cand->iv->step), a));
5048   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5049     return false;
5050 
5051   /* Determine the new comparison operator.  */
5052   comp = step < 0 ? GT_EXPR : LT_EXPR;
5053   if (*comp_p == NE_EXPR)
5054     *comp_p = comp;
5055   else if (*comp_p == EQ_EXPR)
5056     *comp_p = invert_tree_comparison (comp, false);
5057   else
5058     gcc_unreachable ();
5059 
5060   return true;
5061 }
5062 
5063 /* Check whether it is possible to express the condition in USE by comparison
5064    of candidate CAND.  If so, store the value compared with to BOUND, and the
5065    comparison operator to COMP.  */
5066 
5067 static bool
may_eliminate_iv(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,tree * bound,enum tree_code * comp)5068 may_eliminate_iv (struct ivopts_data *data,
5069 		  struct iv_use *use, struct iv_cand *cand, tree *bound,
5070 		  enum tree_code *comp)
5071 {
5072   basic_block ex_bb;
5073   edge exit;
5074   tree period;
5075   struct loop *loop = data->current_loop;
5076   aff_tree bnd;
5077   struct tree_niter_desc *desc = NULL;
5078 
5079   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5080     return false;
5081 
5082   /* For now works only for exits that dominate the loop latch.
5083      TODO: extend to other conditions inside loop body.  */
5084   ex_bb = gimple_bb (use->stmt);
5085   if (use->stmt != last_stmt (ex_bb)
5086       || gimple_code (use->stmt) != GIMPLE_COND
5087       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5088     return false;
5089 
5090   exit = EDGE_SUCC (ex_bb, 0);
5091   if (flow_bb_inside_loop_p (loop, exit->dest))
5092     exit = EDGE_SUCC (ex_bb, 1);
5093   if (flow_bb_inside_loop_p (loop, exit->dest))
5094     return false;
5095 
5096   desc = niter_for_exit (data, exit);
5097   if (!desc)
5098     return false;
5099 
5100   /* Determine whether we can use the variable to test the exit condition.
5101      This is the case iff the period of the induction variable is greater
5102      than the number of iterations for which the exit condition is true.  */
5103   period = iv_period (cand->iv);
5104 
5105   /* If the number of iterations is constant, compare against it directly.  */
5106   if (TREE_CODE (desc->niter) == INTEGER_CST)
5107     {
5108       /* See cand_value_at.  */
5109       if (stmt_after_increment (loop, cand, use->stmt))
5110 	{
5111 	  if (!tree_int_cst_lt (desc->niter, period))
5112 	    return false;
5113 	}
5114       else
5115 	{
5116 	  if (tree_int_cst_lt (period, desc->niter))
5117 	    return false;
5118 	}
5119     }
5120 
5121   /* If not, and if this is the only possible exit of the loop, see whether
5122      we can get a conservative estimate on the number of iterations of the
5123      entire loop and compare against that instead.  */
5124   else
5125     {
5126       widest_int period_value, max_niter;
5127 
5128       max_niter = desc->max;
5129       if (stmt_after_increment (loop, cand, use->stmt))
5130 	max_niter += 1;
5131       period_value = wi::to_widest (period);
5132       if (wi::gtu_p (max_niter, period_value))
5133 	{
5134 	  /* See if we can take advantage of inferred loop bound
5135 	     information.  */
5136 	  if (data->loop_single_exit_p)
5137 	    {
5138 	      if (!max_loop_iterations (loop, &max_niter))
5139 		return false;
5140 	      /* The loop bound is already adjusted by adding 1.  */
5141 	      if (wi::gtu_p (max_niter, period_value))
5142 		return false;
5143 	    }
5144 	  else
5145 	    return false;
5146 	}
5147     }
5148 
5149   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5150 
5151   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5152 			 aff_combination_to_tree (&bnd));
5153   *comp = iv_elimination_compare (data, use);
5154 
5155   /* It is unlikely that computing the number of iterations using division
5156      would be more profitable than keeping the original induction variable.  */
5157   if (expression_expensive_p (*bound))
5158     return false;
5159 
5160   /* Sometimes, it is possible to handle the situation that the number of
5161      iterations may be zero unless additional assumptions by using <
5162      instead of != in the exit condition.
5163 
5164      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5165 	   base the exit condition on it.  However, that is often too
5166 	   expensive.  */
5167   if (!integer_zerop (desc->may_be_zero))
5168     return iv_elimination_compare_lt (data, cand, comp, desc);
5169 
5170   return true;
5171 }
5172 
5173  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5174     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5175 
5176 static int
parm_decl_cost(struct ivopts_data * data,tree bound)5177 parm_decl_cost (struct ivopts_data *data, tree bound)
5178 {
5179   tree sbound = bound;
5180   STRIP_NOPS (sbound);
5181 
5182   if (TREE_CODE (sbound) == SSA_NAME
5183       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5184       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5185       && data->body_includes_call)
5186     return COSTS_N_INSNS (1);
5187 
5188   return 0;
5189 }
5190 
5191 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5192 
5193 static bool
determine_group_iv_cost_cond(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5194 determine_group_iv_cost_cond (struct ivopts_data *data,
5195 			      struct iv_group *group, struct iv_cand *cand)
5196 {
5197   tree bound = NULL_TREE;
5198   struct iv *cmp_iv;
5199   bitmap inv_exprs = NULL;
5200   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5201   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5202   enum comp_iv_rewrite rewrite_type;
5203   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5204   tree *control_var, *bound_cst;
5205   enum tree_code comp = ERROR_MARK;
5206   struct iv_use *use = group->vuses[0];
5207 
5208   /* Extract condition operands.  */
5209   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5210 					&bound_cst, NULL, &cmp_iv);
5211   gcc_assert (rewrite_type != COMP_IV_NA);
5212 
5213   /* Try iv elimination.  */
5214   if (rewrite_type == COMP_IV_ELIM
5215       && may_eliminate_iv (data, use, cand, &bound, &comp))
5216     {
5217       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5218       if (elim_cost.cost == 0)
5219 	elim_cost.cost = parm_decl_cost (data, bound);
5220       else if (TREE_CODE (bound) == INTEGER_CST)
5221 	elim_cost.cost = 0;
5222       /* If we replace a loop condition 'i < n' with 'p < base + n',
5223 	 inv_vars_elim will have 'base' and 'n' set, which implies that both
5224 	 'base' and 'n' will be live during the loop.	 More likely,
5225 	 'base + n' will be loop invariant, resulting in only one live value
5226 	 during the loop.  So in that case we clear inv_vars_elim and set
5227 	 inv_expr_elim instead.  */
5228       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5229 	{
5230 	  inv_expr_elim = get_loop_invariant_expr (data, bound);
5231 	  bitmap_clear (inv_vars_elim);
5232 	}
5233       /* The bound is a loop invariant, so it will be only computed
5234 	 once.  */
5235       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5236     }
5237 
5238   /* When the condition is a comparison of the candidate IV against
5239      zero, prefer this IV.
5240 
5241      TODO: The constant that we're subtracting from the cost should
5242      be target-dependent.  This information should be added to the
5243      target costs for each backend.  */
5244   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5245       && integer_zerop (*bound_cst)
5246       && (operand_equal_p (*control_var, cand->var_after, 0)
5247 	  || operand_equal_p (*control_var, cand->var_before, 0)))
5248     elim_cost -= 1;
5249 
5250   express_cost = get_computation_cost (data, use, cand, false,
5251 				       &inv_vars_express, NULL,
5252 				       &inv_expr_express);
5253   if (cmp_iv != NULL)
5254     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5255 
5256   /* Count the cost of the original bound as well.  */
5257   bound_cost = force_var_cost (data, *bound_cst, NULL);
5258   if (bound_cost.cost == 0)
5259     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5260   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5261     bound_cost.cost = 0;
5262   express_cost += bound_cost;
5263 
5264   /* Choose the better approach, preferring the eliminated IV. */
5265   if (elim_cost <= express_cost)
5266     {
5267       cost = elim_cost;
5268       inv_vars = inv_vars_elim;
5269       inv_vars_elim = NULL;
5270       inv_expr = inv_expr_elim;
5271     }
5272   else
5273     {
5274       cost = express_cost;
5275       inv_vars = inv_vars_express;
5276       inv_vars_express = NULL;
5277       bound = NULL_TREE;
5278       comp = ERROR_MARK;
5279       inv_expr = inv_expr_express;
5280     }
5281 
5282   if (inv_expr)
5283     {
5284       inv_exprs = BITMAP_ALLOC (NULL);
5285       bitmap_set_bit (inv_exprs, inv_expr->id);
5286     }
5287   set_group_iv_cost (data, group, cand, cost,
5288 		     inv_vars, bound, comp, inv_exprs);
5289 
5290   if (inv_vars_elim)
5291     BITMAP_FREE (inv_vars_elim);
5292   if (inv_vars_express)
5293     BITMAP_FREE (inv_vars_express);
5294 
5295   return !cost.infinite_cost_p ();
5296 }
5297 
5298 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5299    if USE cannot be represented with CAND.  */
5300 
5301 static bool
determine_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5302 determine_group_iv_cost (struct ivopts_data *data,
5303 			 struct iv_group *group, struct iv_cand *cand)
5304 {
5305   switch (group->type)
5306     {
5307     case USE_NONLINEAR_EXPR:
5308       return determine_group_iv_cost_generic (data, group, cand);
5309 
5310     case USE_REF_ADDRESS:
5311     case USE_PTR_ADDRESS:
5312       return determine_group_iv_cost_address (data, group, cand);
5313 
5314     case USE_COMPARE:
5315       return determine_group_iv_cost_cond (data, group, cand);
5316 
5317     default:
5318       gcc_unreachable ();
5319     }
5320 }
5321 
5322 /* Return true if get_computation_cost indicates that autoincrement is
5323    a possibility for the pair of USE and CAND, false otherwise.  */
5324 
5325 static bool
autoinc_possible_for_pair(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5326 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5327 			   struct iv_cand *cand)
5328 {
5329   if (!address_p (use->type))
5330     return false;
5331 
5332   bool can_autoinc = false;
5333   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5334   return can_autoinc;
5335 }
5336 
5337 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5338    use that allows autoincrement, and set their AINC_USE if possible.  */
5339 
5340 static void
set_autoinc_for_original_candidates(struct ivopts_data * data)5341 set_autoinc_for_original_candidates (struct ivopts_data *data)
5342 {
5343   unsigned i, j;
5344 
5345   for (i = 0; i < data->vcands.length (); i++)
5346     {
5347       struct iv_cand *cand = data->vcands[i];
5348       struct iv_use *closest_before = NULL;
5349       struct iv_use *closest_after = NULL;
5350       if (cand->pos != IP_ORIGINAL)
5351 	continue;
5352 
5353       for (j = 0; j < data->vgroups.length (); j++)
5354 	{
5355 	  struct iv_group *group = data->vgroups[j];
5356 	  struct iv_use *use = group->vuses[0];
5357 	  unsigned uid = gimple_uid (use->stmt);
5358 
5359 	  if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5360 	    continue;
5361 
5362 	  if (uid < gimple_uid (cand->incremented_at)
5363 	      && (closest_before == NULL
5364 		  || uid > gimple_uid (closest_before->stmt)))
5365 	    closest_before = use;
5366 
5367 	  if (uid > gimple_uid (cand->incremented_at)
5368 	      && (closest_after == NULL
5369 		  || uid < gimple_uid (closest_after->stmt)))
5370 	    closest_after = use;
5371 	}
5372 
5373       if (closest_before != NULL
5374 	  && autoinc_possible_for_pair (data, closest_before, cand))
5375 	cand->ainc_use = closest_before;
5376       else if (closest_after != NULL
5377 	       && autoinc_possible_for_pair (data, closest_after, cand))
5378 	cand->ainc_use = closest_after;
5379     }
5380 }
5381 
5382 /* Relate compare use with all candidates.  */
5383 
5384 static void
relate_compare_use_with_all_cands(struct ivopts_data * data)5385 relate_compare_use_with_all_cands (struct ivopts_data *data)
5386 {
5387   unsigned i, count = data->vcands.length ();
5388   for (i = 0; i < data->vgroups.length (); i++)
5389     {
5390       struct iv_group *group = data->vgroups[i];
5391 
5392       if (group->type == USE_COMPARE)
5393 	bitmap_set_range (group->related_cands, 0, count);
5394     }
5395 }
5396 
5397 /* Finds the candidates for the induction variables.  */
5398 
5399 static void
find_iv_candidates(struct ivopts_data * data)5400 find_iv_candidates (struct ivopts_data *data)
5401 {
5402   /* Add commonly used ivs.  */
5403   add_standard_iv_candidates (data);
5404 
5405   /* Add old induction variables.  */
5406   add_iv_candidate_for_bivs (data);
5407 
5408   /* Add induction variables derived from uses.  */
5409   add_iv_candidate_for_groups (data);
5410 
5411   set_autoinc_for_original_candidates (data);
5412 
5413   /* Record the important candidates.  */
5414   record_important_candidates (data);
5415 
5416   /* Relate compare iv_use with all candidates.  */
5417   if (!data->consider_all_candidates)
5418     relate_compare_use_with_all_cands (data);
5419 
5420   if (dump_file && (dump_flags & TDF_DETAILS))
5421     {
5422       unsigned i;
5423 
5424       fprintf (dump_file, "\n<Important Candidates>:\t");
5425       for (i = 0; i < data->vcands.length (); i++)
5426 	if (data->vcands[i]->important)
5427 	  fprintf (dump_file, " %d,", data->vcands[i]->id);
5428       fprintf (dump_file, "\n");
5429 
5430       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5431       for (i = 0; i < data->vgroups.length (); i++)
5432 	{
5433 	  struct iv_group *group = data->vgroups[i];
5434 
5435 	  if (group->related_cands)
5436 	    {
5437 	      fprintf (dump_file, "  Group %d:\t", group->id);
5438 	      dump_bitmap (dump_file, group->related_cands);
5439 	    }
5440 	}
5441       fprintf (dump_file, "\n");
5442     }
5443 }
5444 
5445 /* Determines costs of computing use of iv with an iv candidate.  */
5446 
5447 static void
determine_group_iv_costs(struct ivopts_data * data)5448 determine_group_iv_costs (struct ivopts_data *data)
5449 {
5450   unsigned i, j;
5451   struct iv_cand *cand;
5452   struct iv_group *group;
5453   bitmap to_clear = BITMAP_ALLOC (NULL);
5454 
5455   alloc_use_cost_map (data);
5456 
5457   for (i = 0; i < data->vgroups.length (); i++)
5458     {
5459       group = data->vgroups[i];
5460 
5461       if (data->consider_all_candidates)
5462 	{
5463 	  for (j = 0; j < data->vcands.length (); j++)
5464 	    {
5465 	      cand = data->vcands[j];
5466 	      determine_group_iv_cost (data, group, cand);
5467 	    }
5468 	}
5469       else
5470 	{
5471 	  bitmap_iterator bi;
5472 
5473 	  EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5474 	    {
5475 	      cand = data->vcands[j];
5476 	      if (!determine_group_iv_cost (data, group, cand))
5477 		bitmap_set_bit (to_clear, j);
5478 	    }
5479 
5480 	  /* Remove the candidates for that the cost is infinite from
5481 	     the list of related candidates.  */
5482 	  bitmap_and_compl_into (group->related_cands, to_clear);
5483 	  bitmap_clear (to_clear);
5484 	}
5485     }
5486 
5487   BITMAP_FREE (to_clear);
5488 
5489   if (dump_file && (dump_flags & TDF_DETAILS))
5490     {
5491       bitmap_iterator bi;
5492 
5493       /* Dump invariant variables.  */
5494       fprintf (dump_file, "\n<Invariant Vars>:\n");
5495       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5496 	{
5497 	  struct version_info *info = ver_info (data, i);
5498 	  if (info->inv_id)
5499 	    {
5500 	      fprintf (dump_file, "Inv %d:\t", info->inv_id);
5501 	      print_generic_expr (dump_file, info->name, TDF_SLIM);
5502 	      fprintf (dump_file, "%s\n",
5503 		       info->has_nonlin_use ? "" : "\t(eliminable)");
5504 	    }
5505 	}
5506 
5507       /* Dump invariant expressions.  */
5508       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5509       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5510 
5511       for (hash_table<iv_inv_expr_hasher>::iterator it
5512 	   = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5513 	   ++it)
5514 	list.safe_push (*it);
5515 
5516       list.qsort (sort_iv_inv_expr_ent);
5517 
5518       for (i = 0; i < list.length (); ++i)
5519 	{
5520 	  fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5521 	  print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5522 	  fprintf (dump_file, "\n");
5523 	}
5524 
5525       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5526 
5527       for (i = 0; i < data->vgroups.length (); i++)
5528 	{
5529 	  group = data->vgroups[i];
5530 
5531 	  fprintf (dump_file, "Group %d:\n", i);
5532 	  fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5533 	  for (j = 0; j < group->n_map_members; j++)
5534 	    {
5535 	      if (!group->cost_map[j].cand
5536 		  || group->cost_map[j].cost.infinite_cost_p ())
5537 		continue;
5538 
5539 	      fprintf (dump_file, "  %d\t%d\t%d\t",
5540 		       group->cost_map[j].cand->id,
5541 		       group->cost_map[j].cost.cost,
5542 		       group->cost_map[j].cost.complexity);
5543 	      if (!group->cost_map[j].inv_exprs
5544 		  || bitmap_empty_p (group->cost_map[j].inv_exprs))
5545 		fprintf (dump_file, "NIL;\t");
5546 	      else
5547 		bitmap_print (dump_file,
5548 			      group->cost_map[j].inv_exprs, "", ";\t");
5549 	      if (!group->cost_map[j].inv_vars
5550 		  || bitmap_empty_p (group->cost_map[j].inv_vars))
5551 		fprintf (dump_file, "NIL;\n");
5552 	      else
5553 		bitmap_print (dump_file,
5554 			      group->cost_map[j].inv_vars, "", "\n");
5555 	    }
5556 
5557 	  fprintf (dump_file, "\n");
5558 	}
5559       fprintf (dump_file, "\n");
5560     }
5561 }
5562 
5563 /* Determines cost of the candidate CAND.  */
5564 
5565 static void
determine_iv_cost(struct ivopts_data * data,struct iv_cand * cand)5566 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5567 {
5568   comp_cost cost_base;
5569   unsigned cost, cost_step;
5570   tree base;
5571 
5572   gcc_assert (cand->iv != NULL);
5573 
5574   /* There are two costs associated with the candidate -- its increment
5575      and its initialization.  The second is almost negligible for any loop
5576      that rolls enough, so we take it just very little into account.  */
5577 
5578   base = cand->iv->base;
5579   cost_base = force_var_cost (data, base, NULL);
5580   /* It will be exceptional that the iv register happens to be initialized with
5581      the proper value at no cost.  In general, there will at least be a regcopy
5582      or a const set.  */
5583   if (cost_base.cost == 0)
5584     cost_base.cost = COSTS_N_INSNS (1);
5585   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5586 
5587   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5588 
5589   /* Prefer the original ivs unless we may gain something by replacing it.
5590      The reason is to make debugging simpler; so this is not relevant for
5591      artificial ivs created by other optimization passes.  */
5592   if (cand->pos != IP_ORIGINAL
5593       || !SSA_NAME_VAR (cand->var_before)
5594       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5595     cost++;
5596 
5597   /* Prefer not to insert statements into latch unless there are some
5598      already (so that we do not create unnecessary jumps).  */
5599   if (cand->pos == IP_END
5600       && empty_block_p (ip_end_pos (data->current_loop)))
5601     cost++;
5602 
5603   cand->cost = cost;
5604   cand->cost_step = cost_step;
5605 }
5606 
5607 /* Determines costs of computation of the candidates.  */
5608 
5609 static void
determine_iv_costs(struct ivopts_data * data)5610 determine_iv_costs (struct ivopts_data *data)
5611 {
5612   unsigned i;
5613 
5614   if (dump_file && (dump_flags & TDF_DETAILS))
5615     {
5616       fprintf (dump_file, "<Candidate Costs>:\n");
5617       fprintf (dump_file, "  cand\tcost\n");
5618     }
5619 
5620   for (i = 0; i < data->vcands.length (); i++)
5621     {
5622       struct iv_cand *cand = data->vcands[i];
5623 
5624       determine_iv_cost (data, cand);
5625 
5626       if (dump_file && (dump_flags & TDF_DETAILS))
5627 	fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5628     }
5629 
5630   if (dump_file && (dump_flags & TDF_DETAILS))
5631     fprintf (dump_file, "\n");
5632 }
5633 
5634 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5635    induction variables.  Note N_INVS includes both invariant variables and
5636    invariant expressions.  */
5637 
5638 static unsigned
ivopts_estimate_reg_pressure(struct ivopts_data * data,unsigned n_invs,unsigned n_cands)5639 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5640 			      unsigned n_cands)
5641 {
5642   unsigned cost;
5643   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5644   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5645   bool speed = data->speed;
5646 
5647   /* If there is a call in the loop body, the call-clobbered registers
5648      are not available for loop invariants.  */
5649   if (data->body_includes_call)
5650     available_regs = available_regs - target_clobbered_regs;
5651 
5652   /* If we have enough registers.  */
5653   if (regs_needed + target_res_regs < available_regs)
5654     cost = n_new;
5655   /* If close to running out of registers, try to preserve them.  */
5656   else if (regs_needed <= available_regs)
5657     cost = target_reg_cost [speed] * regs_needed;
5658   /* If we run out of available registers but the number of candidates
5659      does not, we penalize extra registers using target_spill_cost.  */
5660   else if (n_cands <= available_regs)
5661     cost = target_reg_cost [speed] * available_regs
5662 	   + target_spill_cost [speed] * (regs_needed - available_regs);
5663   /* If the number of candidates runs out available registers, we penalize
5664      extra candidate registers using target_spill_cost * 2.  Because it is
5665      more expensive to spill induction variable than invariant.  */
5666   else
5667     cost = target_reg_cost [speed] * available_regs
5668 	   + target_spill_cost [speed] * (n_cands - available_regs) * 2
5669 	   + target_spill_cost [speed] * (regs_needed - n_cands);
5670 
5671   /* Finally, add the number of candidates, so that we prefer eliminating
5672      induction variables if possible.  */
5673   return cost + n_cands;
5674 }
5675 
5676 /* For each size of the induction variable set determine the penalty.  */
5677 
5678 static void
determine_set_costs(struct ivopts_data * data)5679 determine_set_costs (struct ivopts_data *data)
5680 {
5681   unsigned j, n;
5682   gphi *phi;
5683   gphi_iterator psi;
5684   tree op;
5685   struct loop *loop = data->current_loop;
5686   bitmap_iterator bi;
5687 
5688   if (dump_file && (dump_flags & TDF_DETAILS))
5689     {
5690       fprintf (dump_file, "<Global Costs>:\n");
5691       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5692       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5693       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5694       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5695     }
5696 
5697   n = 0;
5698   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5699     {
5700       phi = psi.phi ();
5701       op = PHI_RESULT (phi);
5702 
5703       if (virtual_operand_p (op))
5704 	continue;
5705 
5706       if (get_iv (data, op))
5707 	continue;
5708 
5709       if (!POINTER_TYPE_P (TREE_TYPE (op))
5710 	  && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5711 	continue;
5712 
5713       n++;
5714     }
5715 
5716   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5717     {
5718       struct version_info *info = ver_info (data, j);
5719 
5720       if (info->inv_id && info->has_nonlin_use)
5721 	n++;
5722     }
5723 
5724   data->regs_used = n;
5725   if (dump_file && (dump_flags & TDF_DETAILS))
5726     fprintf (dump_file, "  regs_used %d\n", n);
5727 
5728   if (dump_file && (dump_flags & TDF_DETAILS))
5729     {
5730       fprintf (dump_file, "  cost for size:\n");
5731       fprintf (dump_file, "  ivs\tcost\n");
5732       for (j = 0; j <= 2 * target_avail_regs; j++)
5733 	fprintf (dump_file, "  %d\t%d\n", j,
5734 		 ivopts_estimate_reg_pressure (data, 0, j));
5735       fprintf (dump_file, "\n");
5736     }
5737 }
5738 
5739 /* Returns true if A is a cheaper cost pair than B.  */
5740 
5741 static bool
cheaper_cost_pair(struct cost_pair * a,struct cost_pair * b)5742 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5743 {
5744   if (!a)
5745     return false;
5746 
5747   if (!b)
5748     return true;
5749 
5750   if (a->cost < b->cost)
5751     return true;
5752 
5753   if (b->cost < a->cost)
5754     return false;
5755 
5756   /* In case the costs are the same, prefer the cheaper candidate.  */
5757   if (a->cand->cost < b->cand->cost)
5758     return true;
5759 
5760   return false;
5761 }
5762 
5763 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
5764    for more expensive, equal and cheaper respectively.  */
5765 
5766 static int
compare_cost_pair(struct cost_pair * a,struct cost_pair * b)5767 compare_cost_pair (struct cost_pair *a, struct cost_pair *b)
5768 {
5769   if (cheaper_cost_pair (a, b))
5770     return -1;
5771   if (cheaper_cost_pair (b, a))
5772     return 1;
5773 
5774   return 0;
5775 }
5776 
5777 /* Returns candidate by that USE is expressed in IVS.  */
5778 
5779 static struct cost_pair *
iv_ca_cand_for_group(struct iv_ca * ivs,struct iv_group * group)5780 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5781 {
5782   return ivs->cand_for_group[group->id];
5783 }
5784 
5785 /* Computes the cost field of IVS structure.  */
5786 
5787 static void
iv_ca_recount_cost(struct ivopts_data * data,struct iv_ca * ivs)5788 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5789 {
5790   comp_cost cost = ivs->cand_use_cost;
5791 
5792   cost += ivs->cand_cost;
5793   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
5794   ivs->cost = cost;
5795 }
5796 
5797 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5798    and IVS.  */
5799 
5800 static void
iv_ca_set_remove_invs(struct iv_ca * ivs,bitmap invs,unsigned * n_inv_uses)5801 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5802 {
5803   bitmap_iterator bi;
5804   unsigned iid;
5805 
5806   if (!invs)
5807     return;
5808 
5809   gcc_assert (n_inv_uses != NULL);
5810   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5811     {
5812       n_inv_uses[iid]--;
5813       if (n_inv_uses[iid] == 0)
5814 	ivs->n_invs--;
5815     }
5816 }
5817 
5818 /* Set USE not to be expressed by any candidate in IVS.  */
5819 
5820 static void
iv_ca_set_no_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group)5821 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5822 		 struct iv_group *group)
5823 {
5824   unsigned gid = group->id, cid;
5825   struct cost_pair *cp;
5826 
5827   cp = ivs->cand_for_group[gid];
5828   if (!cp)
5829     return;
5830   cid = cp->cand->id;
5831 
5832   ivs->bad_groups++;
5833   ivs->cand_for_group[gid] = NULL;
5834   ivs->n_cand_uses[cid]--;
5835 
5836   if (ivs->n_cand_uses[cid] == 0)
5837     {
5838       bitmap_clear_bit (ivs->cands, cid);
5839       ivs->n_cands--;
5840       ivs->cand_cost -= cp->cand->cost;
5841       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5842       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5843     }
5844 
5845   ivs->cand_use_cost -= cp->cost;
5846   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5847   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5848   iv_ca_recount_cost (data, ivs);
5849 }
5850 
5851 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5852    IVS.  */
5853 
5854 static void
iv_ca_set_add_invs(struct iv_ca * ivs,bitmap invs,unsigned * n_inv_uses)5855 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5856 {
5857   bitmap_iterator bi;
5858   unsigned iid;
5859 
5860   if (!invs)
5861     return;
5862 
5863   gcc_assert (n_inv_uses != NULL);
5864   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5865     {
5866       n_inv_uses[iid]++;
5867       if (n_inv_uses[iid] == 1)
5868 	ivs->n_invs++;
5869     }
5870 }
5871 
5872 /* Set cost pair for GROUP in set IVS to CP.  */
5873 
5874 static void
iv_ca_set_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group,struct cost_pair * cp)5875 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5876 	      struct iv_group *group, struct cost_pair *cp)
5877 {
5878   unsigned gid = group->id, cid;
5879 
5880   if (ivs->cand_for_group[gid] == cp)
5881     return;
5882 
5883   if (ivs->cand_for_group[gid])
5884     iv_ca_set_no_cp (data, ivs, group);
5885 
5886   if (cp)
5887     {
5888       cid = cp->cand->id;
5889 
5890       ivs->bad_groups--;
5891       ivs->cand_for_group[gid] = cp;
5892       ivs->n_cand_uses[cid]++;
5893       if (ivs->n_cand_uses[cid] == 1)
5894 	{
5895 	  bitmap_set_bit (ivs->cands, cid);
5896 	  ivs->n_cands++;
5897 	  ivs->cand_cost += cp->cand->cost;
5898 	  iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5899 	  iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5900 	}
5901 
5902       ivs->cand_use_cost += cp->cost;
5903       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5904       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5905       iv_ca_recount_cost (data, ivs);
5906     }
5907 }
5908 
5909 /* Extend set IVS by expressing USE by some of the candidates in it
5910    if possible.  Consider all important candidates if candidates in
5911    set IVS don't give any result.  */
5912 
5913 static void
iv_ca_add_group(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group)5914 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5915 	       struct iv_group *group)
5916 {
5917   struct cost_pair *best_cp = NULL, *cp;
5918   bitmap_iterator bi;
5919   unsigned i;
5920   struct iv_cand *cand;
5921 
5922   gcc_assert (ivs->upto >= group->id);
5923   ivs->upto++;
5924   ivs->bad_groups++;
5925 
5926   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5927     {
5928       cand = data->vcands[i];
5929       cp = get_group_iv_cost (data, group, cand);
5930       if (cheaper_cost_pair (cp, best_cp))
5931 	best_cp = cp;
5932     }
5933 
5934   if (best_cp == NULL)
5935     {
5936       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5937 	{
5938 	  cand = data->vcands[i];
5939 	  cp = get_group_iv_cost (data, group, cand);
5940 	  if (cheaper_cost_pair (cp, best_cp))
5941 	    best_cp = cp;
5942 	}
5943     }
5944 
5945   iv_ca_set_cp (data, ivs, group, best_cp);
5946 }
5947 
5948 /* Get cost for assignment IVS.  */
5949 
5950 static comp_cost
iv_ca_cost(struct iv_ca * ivs)5951 iv_ca_cost (struct iv_ca *ivs)
5952 {
5953   /* This was a conditional expression but it triggered a bug in
5954      Sun C 5.5.  */
5955   if (ivs->bad_groups)
5956     return infinite_cost;
5957   else
5958     return ivs->cost;
5959 }
5960 
5961 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5962    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
5963    respectively.  */
5964 
5965 static int
iv_ca_compare_deps(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group,struct cost_pair * old_cp,struct cost_pair * new_cp)5966 iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs,
5967 		    struct iv_group *group, struct cost_pair *old_cp,
5968 		    struct cost_pair *new_cp)
5969 {
5970   gcc_assert (old_cp && new_cp && old_cp != new_cp);
5971   unsigned old_n_invs = ivs->n_invs;
5972   iv_ca_set_cp (data, ivs, group, new_cp);
5973   unsigned new_n_invs = ivs->n_invs;
5974   iv_ca_set_cp (data, ivs, group, old_cp);
5975 
5976   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
5977 }
5978 
5979 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5980    it before NEXT.  */
5981 
5982 static struct iv_ca_delta *
iv_ca_delta_add(struct iv_group * group,struct cost_pair * old_cp,struct cost_pair * new_cp,struct iv_ca_delta * next)5983 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
5984 		 struct cost_pair *new_cp, struct iv_ca_delta *next)
5985 {
5986   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5987 
5988   change->group = group;
5989   change->old_cp = old_cp;
5990   change->new_cp = new_cp;
5991   change->next = next;
5992 
5993   return change;
5994 }
5995 
5996 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5997    are rewritten.  */
5998 
5999 static struct iv_ca_delta *
iv_ca_delta_join(struct iv_ca_delta * l1,struct iv_ca_delta * l2)6000 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6001 {
6002   struct iv_ca_delta *last;
6003 
6004   if (!l2)
6005     return l1;
6006 
6007   if (!l1)
6008     return l2;
6009 
6010   for (last = l1; last->next; last = last->next)
6011     continue;
6012   last->next = l2;
6013 
6014   return l1;
6015 }
6016 
6017 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6018 
6019 static struct iv_ca_delta *
iv_ca_delta_reverse(struct iv_ca_delta * delta)6020 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6021 {
6022   struct iv_ca_delta *act, *next, *prev = NULL;
6023 
6024   for (act = delta; act; act = next)
6025     {
6026       next = act->next;
6027       act->next = prev;
6028       prev = act;
6029 
6030       std::swap (act->old_cp, act->new_cp);
6031     }
6032 
6033   return prev;
6034 }
6035 
6036 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6037    reverted instead.  */
6038 
6039 static void
iv_ca_delta_commit(struct ivopts_data * data,struct iv_ca * ivs,struct iv_ca_delta * delta,bool forward)6040 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6041 		    struct iv_ca_delta *delta, bool forward)
6042 {
6043   struct cost_pair *from, *to;
6044   struct iv_ca_delta *act;
6045 
6046   if (!forward)
6047     delta = iv_ca_delta_reverse (delta);
6048 
6049   for (act = delta; act; act = act->next)
6050     {
6051       from = act->old_cp;
6052       to = act->new_cp;
6053       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6054       iv_ca_set_cp (data, ivs, act->group, to);
6055     }
6056 
6057   if (!forward)
6058     iv_ca_delta_reverse (delta);
6059 }
6060 
6061 /* Returns true if CAND is used in IVS.  */
6062 
6063 static bool
iv_ca_cand_used_p(struct iv_ca * ivs,struct iv_cand * cand)6064 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6065 {
6066   return ivs->n_cand_uses[cand->id] > 0;
6067 }
6068 
6069 /* Returns number of induction variable candidates in the set IVS.  */
6070 
6071 static unsigned
iv_ca_n_cands(struct iv_ca * ivs)6072 iv_ca_n_cands (struct iv_ca *ivs)
6073 {
6074   return ivs->n_cands;
6075 }
6076 
6077 /* Free the list of changes DELTA.  */
6078 
6079 static void
iv_ca_delta_free(struct iv_ca_delta ** delta)6080 iv_ca_delta_free (struct iv_ca_delta **delta)
6081 {
6082   struct iv_ca_delta *act, *next;
6083 
6084   for (act = *delta; act; act = next)
6085     {
6086       next = act->next;
6087       free (act);
6088     }
6089 
6090   *delta = NULL;
6091 }
6092 
6093 /* Allocates new iv candidates assignment.  */
6094 
6095 static struct iv_ca *
iv_ca_new(struct ivopts_data * data)6096 iv_ca_new (struct ivopts_data *data)
6097 {
6098   struct iv_ca *nw = XNEW (struct iv_ca);
6099 
6100   nw->upto = 0;
6101   nw->bad_groups = 0;
6102   nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6103 				 data->vgroups.length ());
6104   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6105   nw->cands = BITMAP_ALLOC (NULL);
6106   nw->n_cands = 0;
6107   nw->n_invs = 0;
6108   nw->cand_use_cost = no_cost;
6109   nw->cand_cost = 0;
6110   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6111   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6112   nw->cost = no_cost;
6113 
6114   return nw;
6115 }
6116 
6117 /* Free memory occupied by the set IVS.  */
6118 
6119 static void
iv_ca_free(struct iv_ca ** ivs)6120 iv_ca_free (struct iv_ca **ivs)
6121 {
6122   free ((*ivs)->cand_for_group);
6123   free ((*ivs)->n_cand_uses);
6124   BITMAP_FREE ((*ivs)->cands);
6125   free ((*ivs)->n_inv_var_uses);
6126   free ((*ivs)->n_inv_expr_uses);
6127   free (*ivs);
6128   *ivs = NULL;
6129 }
6130 
6131 /* Dumps IVS to FILE.  */
6132 
6133 static void
iv_ca_dump(struct ivopts_data * data,FILE * file,struct iv_ca * ivs)6134 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6135 {
6136   unsigned i;
6137   comp_cost cost = iv_ca_cost (ivs);
6138 
6139   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost,
6140 	   cost.complexity);
6141   fprintf (file, "  cand_cost: %d\n  cand_group_cost: %d (complexity %d)\n",
6142 	   ivs->cand_cost, ivs->cand_use_cost.cost,
6143 	   ivs->cand_use_cost.complexity);
6144   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6145 
6146   for (i = 0; i < ivs->upto; i++)
6147     {
6148       struct iv_group *group = data->vgroups[i];
6149       struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6150       if (cp)
6151         fprintf (file, "   group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6152 		 group->id, cp->cand->id, cp->cost.cost,
6153 		 cp->cost.complexity);
6154       else
6155 	fprintf (file, "   group:%d --> ??\n", group->id);
6156     }
6157 
6158   const char *pref = "";
6159   fprintf (file, "  invariant variables: ");
6160   for (i = 1; i <= data->max_inv_var_id; i++)
6161     if (ivs->n_inv_var_uses[i])
6162       {
6163 	fprintf (file, "%s%d", pref, i);
6164 	pref = ", ";
6165       }
6166 
6167   pref = "";
6168   fprintf (file, "\n  invariant expressions: ");
6169   for (i = 1; i <= data->max_inv_expr_id; i++)
6170     if (ivs->n_inv_expr_uses[i])
6171       {
6172 	fprintf (file, "%s%d", pref, i);
6173 	pref = ", ";
6174       }
6175 
6176   fprintf (file, "\n\n");
6177 }
6178 
6179 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6180    new set, and store differences in DELTA.  Number of induction variables
6181    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6182    the function will try to find a solution with mimimal iv candidates.  */
6183 
6184 static comp_cost
iv_ca_extend(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_ca_delta ** delta,unsigned * n_ivs,bool min_ncand)6185 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6186 	      struct iv_cand *cand, struct iv_ca_delta **delta,
6187 	      unsigned *n_ivs, bool min_ncand)
6188 {
6189   unsigned i;
6190   comp_cost cost;
6191   struct iv_group *group;
6192   struct cost_pair *old_cp, *new_cp;
6193 
6194   *delta = NULL;
6195   for (i = 0; i < ivs->upto; i++)
6196     {
6197       group = data->vgroups[i];
6198       old_cp = iv_ca_cand_for_group (ivs, group);
6199 
6200       if (old_cp
6201 	  && old_cp->cand == cand)
6202 	continue;
6203 
6204       new_cp = get_group_iv_cost (data, group, cand);
6205       if (!new_cp)
6206 	continue;
6207 
6208       if (!min_ncand)
6209 	{
6210 	  int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6211 	  /* Skip if new_cp depends on more invariants.  */
6212 	  if (cmp_invs > 0)
6213 	    continue;
6214 
6215 	  int cmp_cost = compare_cost_pair (new_cp, old_cp);
6216 	  /* Skip if new_cp is not cheaper.  */
6217 	  if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6218 	    continue;
6219 	}
6220 
6221       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6222     }
6223 
6224   iv_ca_delta_commit (data, ivs, *delta, true);
6225   cost = iv_ca_cost (ivs);
6226   if (n_ivs)
6227     *n_ivs = iv_ca_n_cands (ivs);
6228   iv_ca_delta_commit (data, ivs, *delta, false);
6229 
6230   return cost;
6231 }
6232 
6233 /* Try narrowing set IVS by removing CAND.  Return the cost of
6234    the new set and store the differences in DELTA.  START is
6235    the candidate with which we start narrowing.  */
6236 
6237 static comp_cost
iv_ca_narrow(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_cand * start,struct iv_ca_delta ** delta)6238 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6239 	      struct iv_cand *cand, struct iv_cand *start,
6240 	      struct iv_ca_delta **delta)
6241 {
6242   unsigned i, ci;
6243   struct iv_group *group;
6244   struct cost_pair *old_cp, *new_cp, *cp;
6245   bitmap_iterator bi;
6246   struct iv_cand *cnd;
6247   comp_cost cost, best_cost, acost;
6248 
6249   *delta = NULL;
6250   for (i = 0; i < data->vgroups.length (); i++)
6251     {
6252       group = data->vgroups[i];
6253 
6254       old_cp = iv_ca_cand_for_group (ivs, group);
6255       if (old_cp->cand != cand)
6256 	continue;
6257 
6258       best_cost = iv_ca_cost (ivs);
6259       /* Start narrowing with START.  */
6260       new_cp = get_group_iv_cost (data, group, start);
6261 
6262       if (data->consider_all_candidates)
6263 	{
6264 	  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6265 	    {
6266 	      if (ci == cand->id || (start && ci == start->id))
6267 		continue;
6268 
6269 	      cnd = data->vcands[ci];
6270 
6271 	      cp = get_group_iv_cost (data, group, cnd);
6272 	      if (!cp)
6273 		continue;
6274 
6275 	      iv_ca_set_cp (data, ivs, group, cp);
6276 	      acost = iv_ca_cost (ivs);
6277 
6278 	      if (acost < best_cost)
6279 		{
6280 		  best_cost = acost;
6281 		  new_cp = cp;
6282 		}
6283 	    }
6284 	}
6285       else
6286 	{
6287 	  EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6288 	    {
6289 	      if (ci == cand->id || (start && ci == start->id))
6290 		continue;
6291 
6292 	      cnd = data->vcands[ci];
6293 
6294 	      cp = get_group_iv_cost (data, group, cnd);
6295 	      if (!cp)
6296 		continue;
6297 
6298 	      iv_ca_set_cp (data, ivs, group, cp);
6299 	      acost = iv_ca_cost (ivs);
6300 
6301 	      if (acost < best_cost)
6302 		{
6303 		  best_cost = acost;
6304 		  new_cp = cp;
6305 		}
6306 	    }
6307 	}
6308       /* Restore to old cp for use.  */
6309       iv_ca_set_cp (data, ivs, group, old_cp);
6310 
6311       if (!new_cp)
6312 	{
6313 	  iv_ca_delta_free (delta);
6314 	  return infinite_cost;
6315 	}
6316 
6317       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6318     }
6319 
6320   iv_ca_delta_commit (data, ivs, *delta, true);
6321   cost = iv_ca_cost (ivs);
6322   iv_ca_delta_commit (data, ivs, *delta, false);
6323 
6324   return cost;
6325 }
6326 
6327 /* Try optimizing the set of candidates IVS by removing candidates different
6328    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6329    differences in DELTA.  */
6330 
6331 static comp_cost
iv_ca_prune(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * except_cand,struct iv_ca_delta ** delta)6332 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6333 	     struct iv_cand *except_cand, struct iv_ca_delta **delta)
6334 {
6335   bitmap_iterator bi;
6336   struct iv_ca_delta *act_delta, *best_delta;
6337   unsigned i;
6338   comp_cost best_cost, acost;
6339   struct iv_cand *cand;
6340 
6341   best_delta = NULL;
6342   best_cost = iv_ca_cost (ivs);
6343 
6344   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6345     {
6346       cand = data->vcands[i];
6347 
6348       if (cand == except_cand)
6349 	continue;
6350 
6351       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6352 
6353       if (acost < best_cost)
6354 	{
6355 	  best_cost = acost;
6356 	  iv_ca_delta_free (&best_delta);
6357 	  best_delta = act_delta;
6358 	}
6359       else
6360 	iv_ca_delta_free (&act_delta);
6361     }
6362 
6363   if (!best_delta)
6364     {
6365       *delta = NULL;
6366       return best_cost;
6367     }
6368 
6369   /* Recurse to possibly remove other unnecessary ivs.  */
6370   iv_ca_delta_commit (data, ivs, best_delta, true);
6371   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6372   iv_ca_delta_commit (data, ivs, best_delta, false);
6373   *delta = iv_ca_delta_join (best_delta, *delta);
6374   return best_cost;
6375 }
6376 
6377 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6378    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6379    the corresponding cost_pair, otherwise just return BEST_CP.  */
6380 
6381 static struct cost_pair*
cheaper_cost_with_cand(struct ivopts_data * data,struct iv_group * group,unsigned int cand_idx,struct iv_cand * old_cand,struct cost_pair * best_cp)6382 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6383 			unsigned int cand_idx, struct iv_cand *old_cand,
6384 			struct cost_pair *best_cp)
6385 {
6386   struct iv_cand *cand;
6387   struct cost_pair *cp;
6388 
6389   gcc_assert (old_cand != NULL && best_cp != NULL);
6390   if (cand_idx == old_cand->id)
6391     return best_cp;
6392 
6393   cand = data->vcands[cand_idx];
6394   cp = get_group_iv_cost (data, group, cand);
6395   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6396     return cp;
6397 
6398   return best_cp;
6399 }
6400 
6401 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6402    which are used by more than one iv uses.  For each of those candidates,
6403    this function tries to represent iv uses under that candidate using
6404    other ones with lower local cost, then tries to prune the new set.
6405    If the new set has lower cost, It returns the new cost after recording
6406    candidate replacement in list DELTA.  */
6407 
6408 static comp_cost
iv_ca_replace(struct ivopts_data * data,struct iv_ca * ivs,struct iv_ca_delta ** delta)6409 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6410 	       struct iv_ca_delta **delta)
6411 {
6412   bitmap_iterator bi, bj;
6413   unsigned int i, j, k;
6414   struct iv_cand *cand;
6415   comp_cost orig_cost, acost;
6416   struct iv_ca_delta *act_delta, *tmp_delta;
6417   struct cost_pair *old_cp, *best_cp = NULL;
6418 
6419   *delta = NULL;
6420   orig_cost = iv_ca_cost (ivs);
6421 
6422   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6423     {
6424       if (ivs->n_cand_uses[i] == 1
6425 	  || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6426 	continue;
6427 
6428       cand = data->vcands[i];
6429 
6430       act_delta = NULL;
6431       /*  Represent uses under current candidate using other ones with
6432 	  lower local cost.  */
6433       for (j = 0; j < ivs->upto; j++)
6434 	{
6435 	  struct iv_group *group = data->vgroups[j];
6436 	  old_cp = iv_ca_cand_for_group (ivs, group);
6437 
6438 	  if (old_cp->cand != cand)
6439 	    continue;
6440 
6441 	  best_cp = old_cp;
6442 	  if (data->consider_all_candidates)
6443 	    for (k = 0; k < data->vcands.length (); k++)
6444 	      best_cp = cheaper_cost_with_cand (data, group, k,
6445 						old_cp->cand, best_cp);
6446 	  else
6447 	    EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6448 	      best_cp = cheaper_cost_with_cand (data, group, k,
6449 						old_cp->cand, best_cp);
6450 
6451 	  if (best_cp == old_cp)
6452 	    continue;
6453 
6454 	  act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6455 	}
6456       /* No need for further prune.  */
6457       if (!act_delta)
6458 	continue;
6459 
6460       /* Prune the new candidate set.  */
6461       iv_ca_delta_commit (data, ivs, act_delta, true);
6462       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6463       iv_ca_delta_commit (data, ivs, act_delta, false);
6464       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6465 
6466       if (acost < orig_cost)
6467 	{
6468 	  *delta = act_delta;
6469 	  return acost;
6470 	}
6471       else
6472 	iv_ca_delta_free (&act_delta);
6473     }
6474 
6475   return orig_cost;
6476 }
6477 
6478 /* Tries to extend the sets IVS in the best possible way in order to
6479    express the GROUP.  If ORIGINALP is true, prefer candidates from
6480    the original set of IVs, otherwise favor important candidates not
6481    based on any memory object.  */
6482 
6483 static bool
try_add_cand_for(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group,bool originalp)6484 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6485 		  struct iv_group *group, bool originalp)
6486 {
6487   comp_cost best_cost, act_cost;
6488   unsigned i;
6489   bitmap_iterator bi;
6490   struct iv_cand *cand;
6491   struct iv_ca_delta *best_delta = NULL, *act_delta;
6492   struct cost_pair *cp;
6493 
6494   iv_ca_add_group (data, ivs, group);
6495   best_cost = iv_ca_cost (ivs);
6496   cp = iv_ca_cand_for_group (ivs, group);
6497   if (cp)
6498     {
6499       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6500       iv_ca_set_no_cp (data, ivs, group);
6501     }
6502 
6503   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6504      first try important candidates not based on any memory object.  Only if
6505      this fails, try the specific ones.  Rationale -- in loops with many
6506      variables the best choice often is to use just one generic biv.  If we
6507      added here many ivs specific to the uses, the optimization algorithm later
6508      would be likely to get stuck in a local minimum, thus causing us to create
6509      too many ivs.  The approach from few ivs to more seems more likely to be
6510      successful -- starting from few ivs, replacing an expensive use by a
6511      specific iv should always be a win.  */
6512   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6513     {
6514       cand = data->vcands[i];
6515 
6516       if (originalp && cand->pos !=IP_ORIGINAL)
6517 	continue;
6518 
6519       if (!originalp && cand->iv->base_object != NULL_TREE)
6520 	continue;
6521 
6522       if (iv_ca_cand_used_p (ivs, cand))
6523 	continue;
6524 
6525       cp = get_group_iv_cost (data, group, cand);
6526       if (!cp)
6527 	continue;
6528 
6529       iv_ca_set_cp (data, ivs, group, cp);
6530       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6531 			       true);
6532       iv_ca_set_no_cp (data, ivs, group);
6533       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6534 
6535       if (act_cost < best_cost)
6536 	{
6537 	  best_cost = act_cost;
6538 
6539 	  iv_ca_delta_free (&best_delta);
6540 	  best_delta = act_delta;
6541 	}
6542       else
6543 	iv_ca_delta_free (&act_delta);
6544     }
6545 
6546   if (best_cost.infinite_cost_p ())
6547     {
6548       for (i = 0; i < group->n_map_members; i++)
6549 	{
6550 	  cp = group->cost_map + i;
6551 	  cand = cp->cand;
6552 	  if (!cand)
6553 	    continue;
6554 
6555 	  /* Already tried this.  */
6556 	  if (cand->important)
6557 	    {
6558 	      if (originalp && cand->pos == IP_ORIGINAL)
6559 		continue;
6560 	      if (!originalp && cand->iv->base_object == NULL_TREE)
6561 		continue;
6562 	    }
6563 
6564 	  if (iv_ca_cand_used_p (ivs, cand))
6565 	    continue;
6566 
6567 	  act_delta = NULL;
6568 	  iv_ca_set_cp (data, ivs, group, cp);
6569 	  act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6570 	  iv_ca_set_no_cp (data, ivs, group);
6571 	  act_delta = iv_ca_delta_add (group,
6572 				       iv_ca_cand_for_group (ivs, group),
6573 				       cp, act_delta);
6574 
6575 	  if (act_cost < best_cost)
6576 	    {
6577 	      best_cost = act_cost;
6578 
6579 	      if (best_delta)
6580 		iv_ca_delta_free (&best_delta);
6581 	      best_delta = act_delta;
6582 	    }
6583 	  else
6584 	    iv_ca_delta_free (&act_delta);
6585 	}
6586     }
6587 
6588   iv_ca_delta_commit (data, ivs, best_delta, true);
6589   iv_ca_delta_free (&best_delta);
6590 
6591   return !best_cost.infinite_cost_p ();
6592 }
6593 
6594 /* Finds an initial assignment of candidates to uses.  */
6595 
6596 static struct iv_ca *
get_initial_solution(struct ivopts_data * data,bool originalp)6597 get_initial_solution (struct ivopts_data *data, bool originalp)
6598 {
6599   unsigned i;
6600   struct iv_ca *ivs = iv_ca_new (data);
6601 
6602   for (i = 0; i < data->vgroups.length (); i++)
6603     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6604       {
6605 	iv_ca_free (&ivs);
6606 	return NULL;
6607       }
6608 
6609   return ivs;
6610 }
6611 
6612 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6613    points to a bool variable, this function tries to break local
6614    optimal fixed-point by replacing candidates in IVS if it's true.  */
6615 
6616 static bool
try_improve_iv_set(struct ivopts_data * data,struct iv_ca * ivs,bool * try_replace_p)6617 try_improve_iv_set (struct ivopts_data *data,
6618 		    struct iv_ca *ivs, bool *try_replace_p)
6619 {
6620   unsigned i, n_ivs;
6621   comp_cost acost, best_cost = iv_ca_cost (ivs);
6622   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6623   struct iv_cand *cand;
6624 
6625   /* Try extending the set of induction variables by one.  */
6626   for (i = 0; i < data->vcands.length (); i++)
6627     {
6628       cand = data->vcands[i];
6629 
6630       if (iv_ca_cand_used_p (ivs, cand))
6631 	continue;
6632 
6633       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6634       if (!act_delta)
6635 	continue;
6636 
6637       /* If we successfully added the candidate and the set is small enough,
6638 	 try optimizing it by removing other candidates.  */
6639       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6640       	{
6641 	  iv_ca_delta_commit (data, ivs, act_delta, true);
6642 	  acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6643 	  iv_ca_delta_commit (data, ivs, act_delta, false);
6644 	  act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6645 	}
6646 
6647       if (acost < best_cost)
6648 	{
6649 	  best_cost = acost;
6650 	  iv_ca_delta_free (&best_delta);
6651 	  best_delta = act_delta;
6652 	}
6653       else
6654 	iv_ca_delta_free (&act_delta);
6655     }
6656 
6657   if (!best_delta)
6658     {
6659       /* Try removing the candidates from the set instead.  */
6660       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6661 
6662       if (!best_delta && *try_replace_p)
6663 	{
6664 	  *try_replace_p = false;
6665 	  /* So far candidate selecting algorithm tends to choose fewer IVs
6666 	     so that it can handle cases in which loops have many variables
6667 	     but the best choice is often to use only one general biv.  One
6668 	     weakness is it can't handle opposite cases, in which different
6669 	     candidates should be chosen with respect to each use.  To solve
6670 	     the problem, we replace candidates in a manner described by the
6671 	     comments of iv_ca_replace, thus give general algorithm a chance
6672 	     to break local optimal fixed-point in these cases.  */
6673 	  best_cost = iv_ca_replace (data, ivs, &best_delta);
6674 	}
6675 
6676       if (!best_delta)
6677 	return false;
6678     }
6679 
6680   iv_ca_delta_commit (data, ivs, best_delta, true);
6681   gcc_assert (best_cost == iv_ca_cost (ivs));
6682   iv_ca_delta_free (&best_delta);
6683   return true;
6684 }
6685 
6686 /* Attempts to find the optimal set of induction variables.  We do simple
6687    greedy heuristic -- we try to replace at most one candidate in the selected
6688    solution and remove the unused ivs while this improves the cost.  */
6689 
6690 static struct iv_ca *
find_optimal_iv_set_1(struct ivopts_data * data,bool originalp)6691 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6692 {
6693   struct iv_ca *set;
6694   bool try_replace_p = true;
6695 
6696   /* Get the initial solution.  */
6697   set = get_initial_solution (data, originalp);
6698   if (!set)
6699     {
6700       if (dump_file && (dump_flags & TDF_DETAILS))
6701 	fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6702       return NULL;
6703     }
6704 
6705   if (dump_file && (dump_flags & TDF_DETAILS))
6706     {
6707       fprintf (dump_file, "Initial set of candidates:\n");
6708       iv_ca_dump (data, dump_file, set);
6709     }
6710 
6711   while (try_improve_iv_set (data, set, &try_replace_p))
6712     {
6713       if (dump_file && (dump_flags & TDF_DETAILS))
6714 	{
6715 	  fprintf (dump_file, "Improved to:\n");
6716 	  iv_ca_dump (data, dump_file, set);
6717 	}
6718     }
6719 
6720   return set;
6721 }
6722 
6723 static struct iv_ca *
find_optimal_iv_set(struct ivopts_data * data)6724 find_optimal_iv_set (struct ivopts_data *data)
6725 {
6726   unsigned i;
6727   comp_cost cost, origcost;
6728   struct iv_ca *set, *origset;
6729 
6730   /* Determine the cost based on a strategy that starts with original IVs,
6731      and try again using a strategy that prefers candidates not based
6732      on any IVs.  */
6733   origset = find_optimal_iv_set_1 (data, true);
6734   set = find_optimal_iv_set_1 (data, false);
6735 
6736   if (!origset && !set)
6737     return NULL;
6738 
6739   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6740   cost = set ? iv_ca_cost (set) : infinite_cost;
6741 
6742   if (dump_file && (dump_flags & TDF_DETAILS))
6743     {
6744       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6745 	       origcost.cost, origcost.complexity);
6746       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6747 	       cost.cost, cost.complexity);
6748     }
6749 
6750   /* Choose the one with the best cost.  */
6751   if (origcost <= cost)
6752     {
6753       if (set)
6754 	iv_ca_free (&set);
6755       set = origset;
6756     }
6757   else if (origset)
6758     iv_ca_free (&origset);
6759 
6760   for (i = 0; i < data->vgroups.length (); i++)
6761     {
6762       struct iv_group *group = data->vgroups[i];
6763       group->selected = iv_ca_cand_for_group (set, group)->cand;
6764     }
6765 
6766   return set;
6767 }
6768 
6769 /* Creates a new induction variable corresponding to CAND.  */
6770 
6771 static void
create_new_iv(struct ivopts_data * data,struct iv_cand * cand)6772 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6773 {
6774   gimple_stmt_iterator incr_pos;
6775   tree base;
6776   struct iv_use *use;
6777   struct iv_group *group;
6778   bool after = false;
6779 
6780   gcc_assert (cand->iv != NULL);
6781 
6782   switch (cand->pos)
6783     {
6784     case IP_NORMAL:
6785       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6786       break;
6787 
6788     case IP_END:
6789       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6790       after = true;
6791       break;
6792 
6793     case IP_AFTER_USE:
6794       after = true;
6795       /* fall through */
6796     case IP_BEFORE_USE:
6797       incr_pos = gsi_for_stmt (cand->incremented_at);
6798       break;
6799 
6800     case IP_ORIGINAL:
6801       /* Mark that the iv is preserved.  */
6802       name_info (data, cand->var_before)->preserve_biv = true;
6803       name_info (data, cand->var_after)->preserve_biv = true;
6804 
6805       /* Rewrite the increment so that it uses var_before directly.  */
6806       use = find_interesting_uses_op (data, cand->var_after);
6807       group = data->vgroups[use->group_id];
6808       group->selected = cand;
6809       return;
6810     }
6811 
6812   gimple_add_tmp_var (cand->var_before);
6813 
6814   base = unshare_expr (cand->iv->base);
6815 
6816   create_iv (base, unshare_expr (cand->iv->step),
6817 	     cand->var_before, data->current_loop,
6818 	     &incr_pos, after, &cand->var_before, &cand->var_after);
6819 }
6820 
6821 /* Creates new induction variables described in SET.  */
6822 
6823 static void
create_new_ivs(struct ivopts_data * data,struct iv_ca * set)6824 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6825 {
6826   unsigned i;
6827   struct iv_cand *cand;
6828   bitmap_iterator bi;
6829 
6830   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6831     {
6832       cand = data->vcands[i];
6833       create_new_iv (data, cand);
6834     }
6835 
6836   if (dump_file && (dump_flags & TDF_DETAILS))
6837     {
6838       fprintf (dump_file, "Selected IV set for loop %d",
6839 	       data->current_loop->num);
6840       if (data->loop_loc != UNKNOWN_LOCATION)
6841 	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6842 		 LOCATION_LINE (data->loop_loc));
6843       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
6844 	       avg_loop_niter (data->current_loop));
6845       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6846       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6847 	{
6848 	  cand = data->vcands[i];
6849 	  dump_cand (dump_file, cand);
6850 	}
6851       fprintf (dump_file, "\n");
6852     }
6853 }
6854 
6855 /* Rewrites USE (definition of iv used in a nonlinear expression)
6856    using candidate CAND.  */
6857 
6858 static void
rewrite_use_nonlinear_expr(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)6859 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6860 			    struct iv_use *use, struct iv_cand *cand)
6861 {
6862   gassign *ass;
6863   gimple_stmt_iterator bsi;
6864   tree comp, type = get_use_type (use), tgt;
6865 
6866   /* An important special case -- if we are asked to express value of
6867      the original iv by itself, just exit; there is no need to
6868      introduce a new computation (that might also need casting the
6869      variable to unsigned and back).  */
6870   if (cand->pos == IP_ORIGINAL
6871       && cand->incremented_at == use->stmt)
6872     {
6873       tree op = NULL_TREE;
6874       enum tree_code stmt_code;
6875 
6876       gcc_assert (is_gimple_assign (use->stmt));
6877       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6878 
6879       /* Check whether we may leave the computation unchanged.
6880 	 This is the case only if it does not rely on other
6881 	 computations in the loop -- otherwise, the computation
6882 	 we rely upon may be removed in remove_unused_ivs,
6883 	 thus leading to ICE.  */
6884       stmt_code = gimple_assign_rhs_code (use->stmt);
6885       if (stmt_code == PLUS_EXPR
6886 	  || stmt_code == MINUS_EXPR
6887 	  || stmt_code == POINTER_PLUS_EXPR)
6888 	{
6889 	  if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6890 	    op = gimple_assign_rhs2 (use->stmt);
6891 	  else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6892 	    op = gimple_assign_rhs1 (use->stmt);
6893 	}
6894 
6895       if (op != NULL_TREE)
6896 	{
6897 	  if (expr_invariant_in_loop_p (data->current_loop, op))
6898 	    return;
6899 	  if (TREE_CODE (op) == SSA_NAME)
6900 	    {
6901 	      struct iv *iv = get_iv (data, op);
6902 	      if (iv != NULL && integer_zerop (iv->step))
6903 		return;
6904 	    }
6905 	}
6906     }
6907 
6908   switch (gimple_code (use->stmt))
6909     {
6910     case GIMPLE_PHI:
6911       tgt = PHI_RESULT (use->stmt);
6912 
6913       /* If we should keep the biv, do not replace it.  */
6914       if (name_info (data, tgt)->preserve_biv)
6915 	return;
6916 
6917       bsi = gsi_after_labels (gimple_bb (use->stmt));
6918       break;
6919 
6920     case GIMPLE_ASSIGN:
6921       tgt = gimple_assign_lhs (use->stmt);
6922       bsi = gsi_for_stmt (use->stmt);
6923       break;
6924 
6925     default:
6926       gcc_unreachable ();
6927     }
6928 
6929   aff_tree aff_inv, aff_var;
6930   if (!get_computation_aff_1 (data->current_loop, use->stmt,
6931 			      use, cand, &aff_inv, &aff_var))
6932     gcc_unreachable ();
6933 
6934   unshare_aff_combination (&aff_inv);
6935   unshare_aff_combination (&aff_var);
6936   /* Prefer CSE opportunity than loop invariant by adding offset at last
6937      so that iv_uses have different offsets can be CSEed.  */
6938   poly_widest_int offset = aff_inv.offset;
6939   aff_inv.offset = 0;
6940 
6941   gimple_seq stmt_list = NULL, seq = NULL;
6942   tree comp_op1 = aff_combination_to_tree (&aff_inv);
6943   tree comp_op2 = aff_combination_to_tree (&aff_var);
6944   gcc_assert (comp_op1 && comp_op2);
6945 
6946   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
6947   gimple_seq_add_seq (&stmt_list, seq);
6948   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
6949   gimple_seq_add_seq (&stmt_list, seq);
6950 
6951   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
6952     std::swap (comp_op1, comp_op2);
6953 
6954   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
6955     {
6956       comp = fold_build_pointer_plus (comp_op1,
6957 				      fold_convert (sizetype, comp_op2));
6958       comp = fold_build_pointer_plus (comp,
6959 				      wide_int_to_tree (sizetype, offset));
6960     }
6961   else
6962     {
6963       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
6964 			  fold_convert (TREE_TYPE (comp_op1), comp_op2));
6965       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
6966 			  wide_int_to_tree (TREE_TYPE (comp_op1), offset));
6967     }
6968 
6969   comp = fold_convert (type, comp);
6970   if (!valid_gimple_rhs_p (comp)
6971       || (gimple_code (use->stmt) != GIMPLE_PHI
6972 	  /* We can't allow re-allocating the stmt as it might be pointed
6973 	     to still.  */
6974 	  && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6975 	      >= gimple_num_ops (gsi_stmt (bsi)))))
6976     {
6977       comp = force_gimple_operand (comp, &seq, true, NULL);
6978       gimple_seq_add_seq (&stmt_list, seq);
6979       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6980 	{
6981 	  duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6982 	  /* As this isn't a plain copy we have to reset alignment
6983 	     information.  */
6984 	  if (SSA_NAME_PTR_INFO (comp))
6985 	    mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6986 	}
6987     }
6988 
6989   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
6990   if (gimple_code (use->stmt) == GIMPLE_PHI)
6991     {
6992       ass = gimple_build_assign (tgt, comp);
6993       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6994 
6995       bsi = gsi_for_stmt (use->stmt);
6996       remove_phi_node (&bsi, false);
6997     }
6998   else
6999     {
7000       gimple_assign_set_rhs_from_tree (&bsi, comp);
7001       use->stmt = gsi_stmt (bsi);
7002     }
7003 }
7004 
7005 /* Performs a peephole optimization to reorder the iv update statement with
7006    a mem ref to enable instruction combining in later phases. The mem ref uses
7007    the iv value before the update, so the reordering transformation requires
7008    adjustment of the offset. CAND is the selected IV_CAND.
7009 
7010    Example:
7011 
7012    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7013    iv2 = iv1 + 1;
7014 
7015    if (t < val)      (1)
7016      goto L;
7017    goto Head;
7018 
7019 
7020    directly propagating t over to (1) will introduce overlapping live range
7021    thus increase register pressure. This peephole transform it into:
7022 
7023 
7024    iv2 = iv1 + 1;
7025    t = MEM_REF (base, iv2, 8, 8);
7026    if (t < val)
7027      goto L;
7028    goto Head;
7029 */
7030 
7031 static void
adjust_iv_update_pos(struct iv_cand * cand,struct iv_use * use)7032 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7033 {
7034   tree var_after;
7035   gimple *iv_update, *stmt;
7036   basic_block bb;
7037   gimple_stmt_iterator gsi, gsi_iv;
7038 
7039   if (cand->pos != IP_NORMAL)
7040     return;
7041 
7042   var_after = cand->var_after;
7043   iv_update = SSA_NAME_DEF_STMT (var_after);
7044 
7045   bb = gimple_bb (iv_update);
7046   gsi = gsi_last_nondebug_bb (bb);
7047   stmt = gsi_stmt (gsi);
7048 
7049   /* Only handle conditional statement for now.  */
7050   if (gimple_code (stmt) != GIMPLE_COND)
7051     return;
7052 
7053   gsi_prev_nondebug (&gsi);
7054   stmt = gsi_stmt (gsi);
7055   if (stmt != iv_update)
7056     return;
7057 
7058   gsi_prev_nondebug (&gsi);
7059   if (gsi_end_p (gsi))
7060     return;
7061 
7062   stmt = gsi_stmt (gsi);
7063   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7064     return;
7065 
7066   if (stmt != use->stmt)
7067     return;
7068 
7069   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7070     return;
7071 
7072   if (dump_file && (dump_flags & TDF_DETAILS))
7073     {
7074       fprintf (dump_file, "Reordering \n");
7075       print_gimple_stmt (dump_file, iv_update, 0);
7076       print_gimple_stmt (dump_file, use->stmt, 0);
7077       fprintf (dump_file, "\n");
7078     }
7079 
7080   gsi = gsi_for_stmt (use->stmt);
7081   gsi_iv = gsi_for_stmt (iv_update);
7082   gsi_move_before (&gsi_iv, &gsi);
7083 
7084   cand->pos = IP_BEFORE_USE;
7085   cand->incremented_at = use->stmt;
7086 }
7087 
7088 /* Return the alias pointer type that should be used for a MEM_REF
7089    associated with USE, which has type USE_PTR_ADDRESS.  */
7090 
7091 static tree
get_alias_ptr_type_for_ptr_address(iv_use * use)7092 get_alias_ptr_type_for_ptr_address (iv_use *use)
7093 {
7094   gcall *call = as_a <gcall *> (use->stmt);
7095   switch (gimple_call_internal_fn (call))
7096     {
7097     case IFN_MASK_LOAD:
7098     case IFN_MASK_STORE:
7099       /* The second argument contains the correct alias type.  */
7100       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7101       return TREE_TYPE (gimple_call_arg (call, 1));
7102 
7103     default:
7104       gcc_unreachable ();
7105     }
7106 }
7107 
7108 
7109 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7110 
7111 static void
rewrite_use_address(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7112 rewrite_use_address (struct ivopts_data *data,
7113 		     struct iv_use *use, struct iv_cand *cand)
7114 {
7115   aff_tree aff;
7116   bool ok;
7117 
7118   adjust_iv_update_pos (cand, use);
7119   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7120   gcc_assert (ok);
7121   unshare_aff_combination (&aff);
7122 
7123   /* To avoid undefined overflow problems, all IV candidates use unsigned
7124      integer types.  The drawback is that this makes it impossible for
7125      create_mem_ref to distinguish an IV that is based on a memory object
7126      from one that represents simply an offset.
7127 
7128      To work around this problem, we pass a hint to create_mem_ref that
7129      indicates which variable (if any) in aff is an IV based on a memory
7130      object.  Note that we only consider the candidate.  If this is not
7131      based on an object, the base of the reference is in some subexpression
7132      of the use -- but these will use pointer types, so they are recognized
7133      by the create_mem_ref heuristics anyway.  */
7134   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7135   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7136   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7137   tree type = use->mem_type;
7138   tree alias_ptr_type;
7139   if (use->type == USE_PTR_ADDRESS)
7140     alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7141   else
7142     {
7143       gcc_assert (type == TREE_TYPE (*use->op_p));
7144       unsigned int align = get_object_alignment (*use->op_p);
7145       if (align != TYPE_ALIGN (type))
7146 	type = build_aligned_type (type, align);
7147       alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7148     }
7149   tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7150 			     iv, base_hint, data->speed);
7151 
7152   if (use->type == USE_PTR_ADDRESS)
7153     {
7154       ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7155       ref = fold_convert (get_use_type (use), ref);
7156       ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7157 				      true, GSI_SAME_STMT);
7158     }
7159   else
7160     copy_ref_info (ref, *use->op_p);
7161 
7162   *use->op_p = ref;
7163 }
7164 
7165 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7166    candidate CAND.  */
7167 
7168 static void
rewrite_use_compare(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7169 rewrite_use_compare (struct ivopts_data *data,
7170 		     struct iv_use *use, struct iv_cand *cand)
7171 {
7172   tree comp, op, bound;
7173   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7174   enum tree_code compare;
7175   struct iv_group *group = data->vgroups[use->group_id];
7176   struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7177 
7178   bound = cp->value;
7179   if (bound)
7180     {
7181       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7182       tree var_type = TREE_TYPE (var);
7183       gimple_seq stmts;
7184 
7185       if (dump_file && (dump_flags & TDF_DETAILS))
7186 	{
7187 	  fprintf (dump_file, "Replacing exit test: ");
7188 	  print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7189 	}
7190       compare = cp->comp;
7191       bound = unshare_expr (fold_convert (var_type, bound));
7192       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7193       if (stmts)
7194 	gsi_insert_seq_on_edge_immediate (
7195 		loop_preheader_edge (data->current_loop),
7196 		stmts);
7197 
7198       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7199       gimple_cond_set_lhs (cond_stmt, var);
7200       gimple_cond_set_code (cond_stmt, compare);
7201       gimple_cond_set_rhs (cond_stmt, op);
7202       return;
7203     }
7204 
7205   /* The induction variable elimination failed; just express the original
7206      giv.  */
7207   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7208   gcc_assert (comp != NULL_TREE);
7209   gcc_assert (use->op_p != NULL);
7210   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7211 					 SSA_NAME_VAR (*use->op_p),
7212 					 true, GSI_SAME_STMT);
7213 }
7214 
7215 /* Rewrite the groups using the selected induction variables.  */
7216 
7217 static void
rewrite_groups(struct ivopts_data * data)7218 rewrite_groups (struct ivopts_data *data)
7219 {
7220   unsigned i, j;
7221 
7222   for (i = 0; i < data->vgroups.length (); i++)
7223     {
7224       struct iv_group *group = data->vgroups[i];
7225       struct iv_cand *cand = group->selected;
7226 
7227       gcc_assert (cand);
7228 
7229       if (group->type == USE_NONLINEAR_EXPR)
7230 	{
7231 	  for (j = 0; j < group->vuses.length (); j++)
7232 	    {
7233 	      rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7234 	      update_stmt (group->vuses[j]->stmt);
7235 	    }
7236 	}
7237       else if (address_p (group->type))
7238 	{
7239 	  for (j = 0; j < group->vuses.length (); j++)
7240 	    {
7241 	      rewrite_use_address (data, group->vuses[j], cand);
7242 	      update_stmt (group->vuses[j]->stmt);
7243 	    }
7244 	}
7245       else
7246 	{
7247 	  gcc_assert (group->type == USE_COMPARE);
7248 
7249 	  for (j = 0; j < group->vuses.length (); j++)
7250 	    {
7251 	      rewrite_use_compare (data, group->vuses[j], cand);
7252 	      update_stmt (group->vuses[j]->stmt);
7253 	    }
7254 	}
7255     }
7256 }
7257 
7258 /* Removes the ivs that are not used after rewriting.  */
7259 
7260 static void
remove_unused_ivs(struct ivopts_data * data,bitmap toremove)7261 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7262 {
7263   unsigned j;
7264   bitmap_iterator bi;
7265 
7266   /* Figure out an order in which to release SSA DEFs so that we don't
7267      release something that we'd have to propagate into a debug stmt
7268      afterwards.  */
7269   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7270     {
7271       struct version_info *info;
7272 
7273       info = ver_info (data, j);
7274       if (info->iv
7275 	  && !integer_zerop (info->iv->step)
7276 	  && !info->inv_id
7277 	  && !info->iv->nonlin_use
7278 	  && !info->preserve_biv)
7279 	{
7280 	  bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7281 
7282 	  tree def = info->iv->ssa_name;
7283 
7284 	  if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7285 	    {
7286 	      imm_use_iterator imm_iter;
7287 	      use_operand_p use_p;
7288 	      gimple *stmt;
7289 	      int count = 0;
7290 
7291 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7292 		{
7293 		  if (!gimple_debug_bind_p (stmt))
7294 		    continue;
7295 
7296 		  /* We just want to determine whether to do nothing
7297 		     (count == 0), to substitute the computed
7298 		     expression into a single use of the SSA DEF by
7299 		     itself (count == 1), or to use a debug temp
7300 		     because the SSA DEF is used multiple times or as
7301 		     part of a larger expression (count > 1). */
7302 		  count++;
7303 		  if (gimple_debug_bind_get_value (stmt) != def)
7304 		    count++;
7305 
7306 		  if (count > 1)
7307 		    BREAK_FROM_IMM_USE_STMT (imm_iter);
7308 		}
7309 
7310 	      if (!count)
7311 		continue;
7312 
7313 	      struct iv_use dummy_use;
7314 	      struct iv_cand *best_cand = NULL, *cand;
7315 	      unsigned i, best_pref = 0, cand_pref;
7316 
7317 	      memset (&dummy_use, 0, sizeof (dummy_use));
7318 	      dummy_use.iv = info->iv;
7319 	      for (i = 0; i < data->vgroups.length () && i < 64; i++)
7320 		{
7321 		  cand = data->vgroups[i]->selected;
7322 		  if (cand == best_cand)
7323 		    continue;
7324 		  cand_pref = operand_equal_p (cand->iv->step,
7325 					       info->iv->step, 0)
7326 		    ? 4 : 0;
7327 		  cand_pref
7328 		    += TYPE_MODE (TREE_TYPE (cand->iv->base))
7329 		    == TYPE_MODE (TREE_TYPE (info->iv->base))
7330 		    ? 2 : 0;
7331 		  cand_pref
7332 		    += TREE_CODE (cand->iv->base) == INTEGER_CST
7333 		    ? 1 : 0;
7334 		  if (best_cand == NULL || best_pref < cand_pref)
7335 		    {
7336 		      best_cand = cand;
7337 		      best_pref = cand_pref;
7338 		    }
7339 		}
7340 
7341 	      if (!best_cand)
7342 		continue;
7343 
7344 	      tree comp = get_computation_at (data->current_loop,
7345 					      SSA_NAME_DEF_STMT (def),
7346 					      &dummy_use, best_cand);
7347 	      if (!comp)
7348 		continue;
7349 
7350 	      if (count > 1)
7351 		{
7352 		  tree vexpr = make_node (DEBUG_EXPR_DECL);
7353 		  DECL_ARTIFICIAL (vexpr) = 1;
7354 		  TREE_TYPE (vexpr) = TREE_TYPE (comp);
7355 		  if (SSA_NAME_VAR (def))
7356 		    SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7357 		  else
7358 		    SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7359 		  gdebug *def_temp
7360 		    = gimple_build_debug_bind (vexpr, comp, NULL);
7361 		  gimple_stmt_iterator gsi;
7362 
7363 		  if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7364 		    gsi = gsi_after_labels (gimple_bb
7365 					    (SSA_NAME_DEF_STMT (def)));
7366 		  else
7367 		    gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7368 
7369 		  gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7370 		  comp = vexpr;
7371 		}
7372 
7373 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7374 		{
7375 		  if (!gimple_debug_bind_p (stmt))
7376 		    continue;
7377 
7378 		  FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7379 		    SET_USE (use_p, comp);
7380 
7381 		  update_stmt (stmt);
7382 		}
7383 	    }
7384 	}
7385     }
7386 }
7387 
7388 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7389    for hash_map::traverse.  */
7390 
7391 bool
free_tree_niter_desc(edge const &,tree_niter_desc * const & value,void *)7392 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7393 {
7394   free (value);
7395   return true;
7396 }
7397 
7398 /* Frees data allocated by the optimization of a single loop.  */
7399 
7400 static void
free_loop_data(struct ivopts_data * data)7401 free_loop_data (struct ivopts_data *data)
7402 {
7403   unsigned i, j;
7404   bitmap_iterator bi;
7405   tree obj;
7406 
7407   if (data->niters)
7408     {
7409       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7410       delete data->niters;
7411       data->niters = NULL;
7412     }
7413 
7414   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7415     {
7416       struct version_info *info;
7417 
7418       info = ver_info (data, i);
7419       info->iv = NULL;
7420       info->has_nonlin_use = false;
7421       info->preserve_biv = false;
7422       info->inv_id = 0;
7423     }
7424   bitmap_clear (data->relevant);
7425   bitmap_clear (data->important_candidates);
7426 
7427   for (i = 0; i < data->vgroups.length (); i++)
7428     {
7429       struct iv_group *group = data->vgroups[i];
7430 
7431       for (j = 0; j < group->vuses.length (); j++)
7432 	free (group->vuses[j]);
7433       group->vuses.release ();
7434 
7435       BITMAP_FREE (group->related_cands);
7436       for (j = 0; j < group->n_map_members; j++)
7437 	{
7438 	  if (group->cost_map[j].inv_vars)
7439 	    BITMAP_FREE (group->cost_map[j].inv_vars);
7440 	  if (group->cost_map[j].inv_exprs)
7441 	    BITMAP_FREE (group->cost_map[j].inv_exprs);
7442 	}
7443 
7444       free (group->cost_map);
7445       free (group);
7446     }
7447   data->vgroups.truncate (0);
7448 
7449   for (i = 0; i < data->vcands.length (); i++)
7450     {
7451       struct iv_cand *cand = data->vcands[i];
7452 
7453       if (cand->inv_vars)
7454 	BITMAP_FREE (cand->inv_vars);
7455       if (cand->inv_exprs)
7456 	BITMAP_FREE (cand->inv_exprs);
7457       free (cand);
7458     }
7459   data->vcands.truncate (0);
7460 
7461   if (data->version_info_size < num_ssa_names)
7462     {
7463       data->version_info_size = 2 * num_ssa_names;
7464       free (data->version_info);
7465       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7466     }
7467 
7468   data->max_inv_var_id = 0;
7469   data->max_inv_expr_id = 0;
7470 
7471   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7472     SET_DECL_RTL (obj, NULL_RTX);
7473 
7474   decl_rtl_to_reset.truncate (0);
7475 
7476   data->inv_expr_tab->empty ();
7477 
7478   data->iv_common_cand_tab->empty ();
7479   data->iv_common_cands.truncate (0);
7480 }
7481 
7482 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7483    loop tree.  */
7484 
7485 static void
tree_ssa_iv_optimize_finalize(struct ivopts_data * data)7486 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7487 {
7488   free_loop_data (data);
7489   free (data->version_info);
7490   BITMAP_FREE (data->relevant);
7491   BITMAP_FREE (data->important_candidates);
7492 
7493   decl_rtl_to_reset.release ();
7494   data->vgroups.release ();
7495   data->vcands.release ();
7496   delete data->inv_expr_tab;
7497   data->inv_expr_tab = NULL;
7498   free_affine_expand_cache (&data->name_expansion_cache);
7499   delete data->iv_common_cand_tab;
7500   data->iv_common_cand_tab = NULL;
7501   data->iv_common_cands.release ();
7502   obstack_free (&data->iv_obstack, NULL);
7503 }
7504 
7505 /* Returns true if the loop body BODY includes any function calls.  */
7506 
7507 static bool
loop_body_includes_call(basic_block * body,unsigned num_nodes)7508 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7509 {
7510   gimple_stmt_iterator gsi;
7511   unsigned i;
7512 
7513   for (i = 0; i < num_nodes; i++)
7514     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7515       {
7516 	gimple *stmt = gsi_stmt (gsi);
7517 	if (is_gimple_call (stmt)
7518 	    && !gimple_call_internal_p (stmt)
7519 	    && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7520 	  return true;
7521       }
7522   return false;
7523 }
7524 
7525 /* Optimizes the LOOP.  Returns true if anything changed.  */
7526 
7527 static bool
tree_ssa_iv_optimize_loop(struct ivopts_data * data,struct loop * loop,bitmap toremove)7528 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop,
7529 			   bitmap toremove)
7530 {
7531   bool changed = false;
7532   struct iv_ca *iv_ca;
7533   edge exit = single_dom_exit (loop);
7534   basic_block *body;
7535 
7536   gcc_assert (!data->niters);
7537   data->current_loop = loop;
7538   data->loop_loc = find_loop_location (loop).get_location_t ();
7539   data->speed = optimize_loop_for_speed_p (loop);
7540 
7541   if (dump_file && (dump_flags & TDF_DETAILS))
7542     {
7543       fprintf (dump_file, "Processing loop %d", loop->num);
7544       if (data->loop_loc != UNKNOWN_LOCATION)
7545 	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7546 		 LOCATION_LINE (data->loop_loc));
7547       fprintf (dump_file, "\n");
7548 
7549       if (exit)
7550 	{
7551 	  fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7552 		   exit->src->index, exit->dest->index);
7553 	  print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7554 	  fprintf (dump_file, "\n");
7555 	}
7556 
7557       fprintf (dump_file, "\n");
7558     }
7559 
7560   body = get_loop_body (loop);
7561   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7562   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7563   free (body);
7564 
7565   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7566 
7567   /* For each ssa name determines whether it behaves as an induction variable
7568      in some loop.  */
7569   if (!find_induction_variables (data))
7570     goto finish;
7571 
7572   /* Finds interesting uses (item 1).  */
7573   find_interesting_uses (data);
7574   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7575     goto finish;
7576 
7577   /* Finds candidates for the induction variables (item 2).  */
7578   find_iv_candidates (data);
7579 
7580   /* Calculates the costs (item 3, part 1).  */
7581   determine_iv_costs (data);
7582   determine_group_iv_costs (data);
7583   determine_set_costs (data);
7584 
7585   /* Find the optimal set of induction variables (item 3, part 2).  */
7586   iv_ca = find_optimal_iv_set (data);
7587   if (!iv_ca)
7588     goto finish;
7589   changed = true;
7590 
7591   /* Create the new induction variables (item 4, part 1).  */
7592   create_new_ivs (data, iv_ca);
7593   iv_ca_free (&iv_ca);
7594 
7595   /* Rewrite the uses (item 4, part 2).  */
7596   rewrite_groups (data);
7597 
7598   /* Remove the ivs that are unused after rewriting.  */
7599   remove_unused_ivs (data, toremove);
7600 
7601 finish:
7602   free_loop_data (data);
7603 
7604   return changed;
7605 }
7606 
7607 /* Main entry point.  Optimizes induction variables in loops.  */
7608 
7609 void
tree_ssa_iv_optimize(void)7610 tree_ssa_iv_optimize (void)
7611 {
7612   struct loop *loop;
7613   struct ivopts_data data;
7614   auto_bitmap toremove;
7615 
7616   tree_ssa_iv_optimize_init (&data);
7617 
7618   /* Optimize the loops starting with the innermost ones.  */
7619   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7620     {
7621       if (dump_file && (dump_flags & TDF_DETAILS))
7622 	flow_loop_dump (loop, dump_file, NULL, 1);
7623 
7624       tree_ssa_iv_optimize_loop (&data, loop, toremove);
7625     }
7626 
7627   /* Remove eliminated IV defs.  */
7628   release_defs_bitset (toremove);
7629 
7630   /* We have changed the structure of induction variables; it might happen
7631      that definitions in the scev database refer to some of them that were
7632      eliminated.  */
7633   scev_reset_htab ();
7634   /* Likewise niter and control-IV information.  */
7635   free_numbers_of_iterations_estimates (cfun);
7636 
7637   tree_ssa_iv_optimize_finalize (&data);
7638 }
7639 
7640 #include "gt-tree-ssa-loop-ivopts.h"
7641