1 /* Induction variable optimizations.
2    Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This pass tries to find the optimal set of induction variables for the loop.
21    It optimizes just the basic linear induction variables (although adding
22    support for other types should not be too hard).  It includes the
23    optimizations commonly known as strength reduction, induction variable
24    coalescing and induction variable elimination.  It does it in the
25    following steps:
26 
27    1) The interesting uses of induction variables are found.  This includes
28 
29       -- uses of induction variables in non-linear expressions
30       -- addresses of arrays
31       -- comparisons of induction variables
32 
33       Note the interesting uses are categorized and handled in group.
34       Generally, address type uses are grouped together if their iv bases
35       are different in constant offset.
36 
37    2) Candidates for the induction variables are found.  This includes
38 
39       -- old induction variables
40       -- the variables defined by expressions derived from the "interesting
41 	 groups/uses" above
42 
43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
44       cost function assigns a cost to sets of induction variables and consists
45       of three parts:
46 
47       -- The group/use costs.  Each of the interesting groups/uses chooses
48 	 the best induction variable in the set and adds its cost to the sum.
49 	 The cost reflects the time spent on modifying the induction variables
50 	 value to be usable for the given purpose (adding base and offset for
51 	 arrays, etc.).
52       -- The variable costs.  Each of the variables has a cost assigned that
53 	 reflects the costs associated with incrementing the value of the
54 	 variable.  The original variables are somewhat preferred.
55       -- The set cost.  Depending on the size of the set, extra cost may be
56 	 added to reflect register pressure.
57 
58       All the costs are defined in a machine-specific way, using the target
59       hooks and machine descriptions to determine them.
60 
61    4) The trees are transformed to use the new variables, the dead code is
62       removed.
63 
64    All of this is done loop by loop.  Doing it globally is theoretically
65    possible, it might give a better performance and it might enable us
66    to decide costs more precisely, but getting all the interactions right
67    would be complicated.  */
68 
69 #include "config.h"
70 #include "system.h"
71 #include "coretypes.h"
72 #include "backend.h"
73 #include "rtl.h"
74 #include "tree.h"
75 #include "gimple.h"
76 #include "cfghooks.h"
77 #include "tree-pass.h"
78 #include "memmodel.h"
79 #include "tm_p.h"
80 #include "ssa.h"
81 #include "expmed.h"
82 #include "insn-config.h"
83 #include "emit-rtl.h"
84 #include "recog.h"
85 #include "cgraph.h"
86 #include "gimple-pretty-print.h"
87 #include "alias.h"
88 #include "fold-const.h"
89 #include "stor-layout.h"
90 #include "tree-eh.h"
91 #include "gimplify.h"
92 #include "gimple-iterator.h"
93 #include "gimplify-me.h"
94 #include "tree-cfg.h"
95 #include "tree-ssa-loop-ivopts.h"
96 #include "tree-ssa-loop-manip.h"
97 #include "tree-ssa-loop-niter.h"
98 #include "tree-ssa-loop.h"
99 #include "explow.h"
100 #include "expr.h"
101 #include "tree-dfa.h"
102 #include "tree-ssa.h"
103 #include "cfgloop.h"
104 #include "tree-scalar-evolution.h"
105 #include "params.h"
106 #include "tree-affine.h"
107 #include "tree-ssa-propagate.h"
108 #include "tree-ssa-address.h"
109 #include "builtins.h"
110 #include "tree-vectorizer.h"
111 
112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
113    cost of different addressing modes.  This should be moved to a TBD
114    interface between the GIMPLE and RTL worlds.  */
115 
116 /* The infinite cost.  */
117 #define INFTY 10000000
118 
119 /* Returns the expected number of loop iterations for LOOP.
120    The average trip count is computed from profile data if it
121    exists. */
122 
123 static inline HOST_WIDE_INT
avg_loop_niter(struct loop * loop)124 avg_loop_niter (struct loop *loop)
125 {
126   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
127   if (niter == -1)
128     {
129       niter = likely_max_stmt_executions_int (loop);
130 
131       if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
132 	return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
133     }
134 
135   return niter;
136 }
137 
138 struct iv_use;
139 
140 /* Representation of the induction variable.  */
141 struct iv
142 {
143   tree base;		/* Initial value of the iv.  */
144   tree base_object;	/* A memory object to that the induction variable points.  */
145   tree step;		/* Step of the iv (constant only).  */
146   tree ssa_name;	/* The ssa name with the value.  */
147   struct iv_use *nonlin_use;	/* The identifier in the use if it is the case.  */
148   bool biv_p;		/* Is it a biv?  */
149   bool no_overflow;	/* True if the iv doesn't overflow.  */
150   bool have_address_use;/* For biv, indicate if it's used in any address
151 			   type use.  */
152 };
153 
154 /* Per-ssa version information (induction variable descriptions, etc.).  */
155 struct version_info
156 {
157   tree name;		/* The ssa name.  */
158   struct iv *iv;	/* Induction variable description.  */
159   bool has_nonlin_use;	/* For a loop-level invariant, whether it is used in
160 			   an expression that is not an induction variable.  */
161   bool preserve_biv;	/* For the original biv, whether to preserve it.  */
162   unsigned inv_id;	/* Id of an invariant.  */
163 };
164 
165 /* Types of uses.  */
166 enum use_type
167 {
168   USE_NONLINEAR_EXPR,	/* Use in a nonlinear expression.  */
169   USE_REF_ADDRESS,	/* Use is an address for an explicit memory
170 			   reference.  */
171   USE_PTR_ADDRESS,	/* Use is a pointer argument to a function in
172 			   cases where the expansion of the function
173 			   will turn the argument into a normal address.  */
174   USE_COMPARE		/* Use is a compare.  */
175 };
176 
177 /* Cost of a computation.  */
178 struct comp_cost
179 {
comp_costcomp_cost180   comp_cost (): cost (0), complexity (0), scratch (0)
181   {}
182 
183   comp_cost (int cost, unsigned complexity, int scratch = 0)
costcomp_cost184     : cost (cost), complexity (complexity), scratch (scratch)
185   {}
186 
187   /* Returns true if COST is infinite.  */
188   bool infinite_cost_p ();
189 
190   /* Adds costs COST1 and COST2.  */
191   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
192 
193   /* Adds COST to the comp_cost.  */
194   comp_cost operator+= (comp_cost cost);
195 
196   /* Adds constant C to this comp_cost.  */
197   comp_cost operator+= (HOST_WIDE_INT c);
198 
199   /* Subtracts constant C to this comp_cost.  */
200   comp_cost operator-= (HOST_WIDE_INT c);
201 
202   /* Divide the comp_cost by constant C.  */
203   comp_cost operator/= (HOST_WIDE_INT c);
204 
205   /* Multiply the comp_cost by constant C.  */
206   comp_cost operator*= (HOST_WIDE_INT c);
207 
208   /* Subtracts costs COST1 and COST2.  */
209   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
210 
211   /* Subtracts COST from this comp_cost.  */
212   comp_cost operator-= (comp_cost cost);
213 
214   /* Returns true if COST1 is smaller than COST2.  */
215   friend bool operator< (comp_cost cost1, comp_cost cost2);
216 
217   /* Returns true if COST1 and COST2 are equal.  */
218   friend bool operator== (comp_cost cost1, comp_cost cost2);
219 
220   /* Returns true if COST1 is smaller or equal than COST2.  */
221   friend bool operator<= (comp_cost cost1, comp_cost cost2);
222 
223   int cost;		/* The runtime cost.  */
224   unsigned complexity;  /* The estimate of the complexity of the code for
225 			   the computation (in no concrete units --
226 			   complexity field should be larger for more
227 			   complex expressions and addressing modes).  */
228   int scratch;		/* Scratch used during cost computation.  */
229 };
230 
231 static const comp_cost no_cost;
232 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
233 
234 bool
infinite_cost_p()235 comp_cost::infinite_cost_p ()
236 {
237   return cost == INFTY;
238 }
239 
240 comp_cost
241 operator+ (comp_cost cost1, comp_cost cost2)
242 {
243   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
244     return infinite_cost;
245 
246   cost1.cost += cost2.cost;
247   cost1.complexity += cost2.complexity;
248 
249   return cost1;
250 }
251 
252 comp_cost
253 operator- (comp_cost cost1, comp_cost cost2)
254 {
255   if (cost1.infinite_cost_p ())
256     return infinite_cost;
257 
258   gcc_assert (!cost2.infinite_cost_p ());
259 
260   cost1.cost -= cost2.cost;
261   cost1.complexity -= cost2.complexity;
262 
263   return cost1;
264 }
265 
266 comp_cost
267 comp_cost::operator+= (comp_cost cost)
268 {
269   *this = *this + cost;
270   return *this;
271 }
272 
273 comp_cost
274 comp_cost::operator+= (HOST_WIDE_INT c)
275 {
276   if (infinite_cost_p ())
277     return *this;
278 
279   this->cost += c;
280 
281   return *this;
282 }
283 
284 comp_cost
285 comp_cost::operator-= (HOST_WIDE_INT c)
286 {
287   if (infinite_cost_p ())
288     return *this;
289 
290   this->cost -= c;
291 
292   return *this;
293 }
294 
295 comp_cost
296 comp_cost::operator/= (HOST_WIDE_INT c)
297 {
298   if (infinite_cost_p ())
299     return *this;
300 
301   this->cost /= c;
302 
303   return *this;
304 }
305 
306 comp_cost
307 comp_cost::operator*= (HOST_WIDE_INT c)
308 {
309   if (infinite_cost_p ())
310     return *this;
311 
312   this->cost *= c;
313 
314   return *this;
315 }
316 
317 comp_cost
318 comp_cost::operator-= (comp_cost cost)
319 {
320   *this = *this - cost;
321   return *this;
322 }
323 
324 bool
325 operator< (comp_cost cost1, comp_cost cost2)
326 {
327   if (cost1.cost == cost2.cost)
328     return cost1.complexity < cost2.complexity;
329 
330   return cost1.cost < cost2.cost;
331 }
332 
333 bool
334 operator== (comp_cost cost1, comp_cost cost2)
335 {
336   return cost1.cost == cost2.cost
337     && cost1.complexity == cost2.complexity;
338 }
339 
340 bool
341 operator<= (comp_cost cost1, comp_cost cost2)
342 {
343   return cost1 < cost2 || cost1 == cost2;
344 }
345 
346 struct iv_inv_expr_ent;
347 
348 /* The candidate - cost pair.  */
349 struct cost_pair
350 {
351   struct iv_cand *cand;	/* The candidate.  */
352   comp_cost cost;	/* The cost.  */
353   enum tree_code comp;	/* For iv elimination, the comparison.  */
354   bitmap inv_vars;	/* The list of invariant ssa_vars that have to be
355 			   preserved when representing iv_use with iv_cand.  */
356   bitmap inv_exprs;	/* The list of newly created invariant expressions
357 			   when representing iv_use with iv_cand.  */
358   tree value;		/* For final value elimination, the expression for
359 			   the final value of the iv.  For iv elimination,
360 			   the new bound to compare with.  */
361 };
362 
363 /* Use.  */
364 struct iv_use
365 {
366   unsigned id;		/* The id of the use.  */
367   unsigned group_id;	/* The group id the use belongs to.  */
368   enum use_type type;	/* Type of the use.  */
369   tree mem_type;	/* The memory type to use when testing whether an
370 			   address is legitimate, and what the address's
371 			   cost is.  */
372   struct iv *iv;	/* The induction variable it is based on.  */
373   gimple *stmt;		/* Statement in that it occurs.  */
374   tree *op_p;		/* The place where it occurs.  */
375 
376   tree addr_base;	/* Base address with const offset stripped.  */
377   poly_uint64_pod addr_offset;
378 			/* Const offset stripped from base address.  */
379 };
380 
381 /* Group of uses.  */
382 struct iv_group
383 {
384   /* The id of the group.  */
385   unsigned id;
386   /* Uses of the group are of the same type.  */
387   enum use_type type;
388   /* The set of "related" IV candidates, plus the important ones.  */
389   bitmap related_cands;
390   /* Number of IV candidates in the cost_map.  */
391   unsigned n_map_members;
392   /* The costs wrto the iv candidates.  */
393   struct cost_pair *cost_map;
394   /* The selected candidate for the group.  */
395   struct iv_cand *selected;
396   /* Uses in the group.  */
397   vec<struct iv_use *> vuses;
398 };
399 
400 /* The position where the iv is computed.  */
401 enum iv_position
402 {
403   IP_NORMAL,		/* At the end, just before the exit condition.  */
404   IP_END,		/* At the end of the latch block.  */
405   IP_BEFORE_USE,	/* Immediately before a specific use.  */
406   IP_AFTER_USE,		/* Immediately after a specific use.  */
407   IP_ORIGINAL		/* The original biv.  */
408 };
409 
410 /* The induction variable candidate.  */
411 struct iv_cand
412 {
413   unsigned id;		/* The number of the candidate.  */
414   bool important;	/* Whether this is an "important" candidate, i.e. such
415 			   that it should be considered by all uses.  */
416   ENUM_BITFIELD(iv_position) pos : 8;	/* Where it is computed.  */
417   gimple *incremented_at;/* For original biv, the statement where it is
418 			   incremented.  */
419   tree var_before;	/* The variable used for it before increment.  */
420   tree var_after;	/* The variable used for it after increment.  */
421   struct iv *iv;	/* The value of the candidate.  NULL for
422 			   "pseudocandidate" used to indicate the possibility
423 			   to replace the final value of an iv by direct
424 			   computation of the value.  */
425   unsigned cost;	/* Cost of the candidate.  */
426   unsigned cost_step;	/* Cost of the candidate's increment operation.  */
427   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
428 			      where it is incremented.  */
429   bitmap inv_vars;	/* The list of invariant ssa_vars used in step of the
430 			   iv_cand.  */
431   bitmap inv_exprs;	/* If step is more complicated than a single ssa_var,
432 			   hanlde it as a new invariant expression which will
433 			   be hoisted out of loop.  */
434   struct iv *orig_iv;	/* The original iv if this cand is added from biv with
435 			   smaller type.  */
436 };
437 
438 /* Hashtable entry for common candidate derived from iv uses.  */
439 struct iv_common_cand
440 {
441   tree base;
442   tree step;
443   /* IV uses from which this common candidate is derived.  */
444   auto_vec<struct iv_use *> uses;
445   hashval_t hash;
446 };
447 
448 /* Hashtable helpers.  */
449 
450 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
451 {
452   static inline hashval_t hash (const iv_common_cand *);
453   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
454 };
455 
456 /* Hash function for possible common candidates.  */
457 
458 inline hashval_t
hash(const iv_common_cand * ccand)459 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
460 {
461   return ccand->hash;
462 }
463 
464 /* Hash table equality function for common candidates.  */
465 
466 inline bool
equal(const iv_common_cand * ccand1,const iv_common_cand * ccand2)467 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
468 			      const iv_common_cand *ccand2)
469 {
470   return (ccand1->hash == ccand2->hash
471 	  && operand_equal_p (ccand1->base, ccand2->base, 0)
472 	  && operand_equal_p (ccand1->step, ccand2->step, 0)
473 	  && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
474 	      == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
475 }
476 
477 /* Loop invariant expression hashtable entry.  */
478 
479 struct iv_inv_expr_ent
480 {
481   /* Tree expression of the entry.  */
482   tree expr;
483   /* Unique indentifier.  */
484   int id;
485   /* Hash value.  */
486   hashval_t hash;
487 };
488 
489 /* Sort iv_inv_expr_ent pair A and B by id field.  */
490 
491 static int
sort_iv_inv_expr_ent(const void * a,const void * b)492 sort_iv_inv_expr_ent (const void *a, const void *b)
493 {
494   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
495   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
496 
497   unsigned id1 = (*e1)->id;
498   unsigned id2 = (*e2)->id;
499 
500   if (id1 < id2)
501     return -1;
502   else if (id1 > id2)
503     return 1;
504   else
505     return 0;
506 }
507 
508 /* Hashtable helpers.  */
509 
510 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
511 {
512   static inline hashval_t hash (const iv_inv_expr_ent *);
513   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
514 };
515 
516 /* Return true if uses of type TYPE represent some form of address.  */
517 
518 inline bool
address_p(use_type type)519 address_p (use_type type)
520 {
521   return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
522 }
523 
524 /* Hash function for loop invariant expressions.  */
525 
526 inline hashval_t
hash(const iv_inv_expr_ent * expr)527 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
528 {
529   return expr->hash;
530 }
531 
532 /* Hash table equality function for expressions.  */
533 
534 inline bool
equal(const iv_inv_expr_ent * expr1,const iv_inv_expr_ent * expr2)535 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
536 			   const iv_inv_expr_ent *expr2)
537 {
538   return expr1->hash == expr2->hash
539 	 && operand_equal_p (expr1->expr, expr2->expr, 0);
540 }
541 
542 struct ivopts_data
543 {
544   /* The currently optimized loop.  */
545   struct loop *current_loop;
546   source_location loop_loc;
547 
548   /* Numbers of iterations for all exits of the current loop.  */
549   hash_map<edge, tree_niter_desc *> *niters;
550 
551   /* Number of registers used in it.  */
552   unsigned regs_used;
553 
554   /* The size of version_info array allocated.  */
555   unsigned version_info_size;
556 
557   /* The array of information for the ssa names.  */
558   struct version_info *version_info;
559 
560   /* The hashtable of loop invariant expressions created
561      by ivopt.  */
562   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
563 
564   /* The bitmap of indices in version_info whose value was changed.  */
565   bitmap relevant;
566 
567   /* The uses of induction variables.  */
568   vec<iv_group *> vgroups;
569 
570   /* The candidates.  */
571   vec<iv_cand *> vcands;
572 
573   /* A bitmap of important candidates.  */
574   bitmap important_candidates;
575 
576   /* Cache used by tree_to_aff_combination_expand.  */
577   hash_map<tree, name_expansion *> *name_expansion_cache;
578 
579   /* The hashtable of common candidates derived from iv uses.  */
580   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
581 
582   /* The common candidates.  */
583   vec<iv_common_cand *> iv_common_cands;
584 
585   /* Hash map recording base object information of tree exp.  */
586   hash_map<tree, tree> *base_object_map;
587 
588   /* The maximum invariant variable id.  */
589   unsigned max_inv_var_id;
590 
591   /* The maximum invariant expression id.  */
592   unsigned max_inv_expr_id;
593 
594   /* Number of no_overflow BIVs which are not used in memory address.  */
595   unsigned bivs_not_used_in_addr;
596 
597   /* Obstack for iv structure.  */
598   struct obstack iv_obstack;
599 
600   /* Whether to consider just related and important candidates when replacing a
601      use.  */
602   bool consider_all_candidates;
603 
604   /* Are we optimizing for speed?  */
605   bool speed;
606 
607   /* Whether the loop body includes any function calls.  */
608   bool body_includes_call;
609 
610   /* Whether the loop body can only be exited via single exit.  */
611   bool loop_single_exit_p;
612 };
613 
614 /* An assignment of iv candidates to uses.  */
615 
616 struct iv_ca
617 {
618   /* The number of uses covered by the assignment.  */
619   unsigned upto;
620 
621   /* Number of uses that cannot be expressed by the candidates in the set.  */
622   unsigned bad_groups;
623 
624   /* Candidate assigned to a use, together with the related costs.  */
625   struct cost_pair **cand_for_group;
626 
627   /* Number of times each candidate is used.  */
628   unsigned *n_cand_uses;
629 
630   /* The candidates used.  */
631   bitmap cands;
632 
633   /* The number of candidates in the set.  */
634   unsigned n_cands;
635 
636   /* The number of invariants needed, including both invariant variants and
637      invariant expressions.  */
638   unsigned n_invs;
639 
640   /* Total cost of expressing uses.  */
641   comp_cost cand_use_cost;
642 
643   /* Total cost of candidates.  */
644   unsigned cand_cost;
645 
646   /* Number of times each invariant variable is used.  */
647   unsigned *n_inv_var_uses;
648 
649   /* Number of times each invariant expression is used.  */
650   unsigned *n_inv_expr_uses;
651 
652   /* Total cost of the assignment.  */
653   comp_cost cost;
654 };
655 
656 /* Difference of two iv candidate assignments.  */
657 
658 struct iv_ca_delta
659 {
660   /* Changed group.  */
661   struct iv_group *group;
662 
663   /* An old assignment (for rollback purposes).  */
664   struct cost_pair *old_cp;
665 
666   /* A new assignment.  */
667   struct cost_pair *new_cp;
668 
669   /* Next change in the list.  */
670   struct iv_ca_delta *next;
671 };
672 
673 /* Bound on number of candidates below that all candidates are considered.  */
674 
675 #define CONSIDER_ALL_CANDIDATES_BOUND \
676   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
677 
678 /* If there are more iv occurrences, we just give up (it is quite unlikely that
679    optimizing such a loop would help, and it would take ages).  */
680 
681 #define MAX_CONSIDERED_GROUPS \
682   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
683 
684 /* If there are at most this number of ivs in the set, try removing unnecessary
685    ivs from the set always.  */
686 
687 #define ALWAYS_PRUNE_CAND_SET_BOUND \
688   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
689 
690 /* The list of trees for that the decl_rtl field must be reset is stored
691    here.  */
692 
693 static vec<tree> decl_rtl_to_reset;
694 
695 static comp_cost force_expr_to_var_cost (tree, bool);
696 
697 /* The single loop exit if it dominates the latch, NULL otherwise.  */
698 
699 edge
single_dom_exit(struct loop * loop)700 single_dom_exit (struct loop *loop)
701 {
702   edge exit = single_exit (loop);
703 
704   if (!exit)
705     return NULL;
706 
707   if (!just_once_each_iteration_p (loop, exit->src))
708     return NULL;
709 
710   return exit;
711 }
712 
713 /* Dumps information about the induction variable IV to FILE.  Don't dump
714    variable's name if DUMP_NAME is FALSE.  The information is dumped with
715    preceding spaces indicated by INDENT_LEVEL.  */
716 
717 void
dump_iv(FILE * file,struct iv * iv,bool dump_name,unsigned indent_level)718 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
719 {
720   const char *p;
721   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
722 
723   if (indent_level > 4)
724     indent_level = 4;
725   p = spaces + 8 - (indent_level << 1);
726 
727   fprintf (file, "%sIV struct:\n", p);
728   if (iv->ssa_name && dump_name)
729     {
730       fprintf (file, "%s  SSA_NAME:\t", p);
731       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
732       fprintf (file, "\n");
733     }
734 
735   fprintf (file, "%s  Type:\t", p);
736   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
737   fprintf (file, "\n");
738 
739   fprintf (file, "%s  Base:\t", p);
740   print_generic_expr (file, iv->base, TDF_SLIM);
741   fprintf (file, "\n");
742 
743   fprintf (file, "%s  Step:\t", p);
744   print_generic_expr (file, iv->step, TDF_SLIM);
745   fprintf (file, "\n");
746 
747   if (iv->base_object)
748     {
749       fprintf (file, "%s  Object:\t", p);
750       print_generic_expr (file, iv->base_object, TDF_SLIM);
751       fprintf (file, "\n");
752     }
753 
754   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
755 
756   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
757 	   p, iv->no_overflow ? "No-overflow" : "Overflow");
758 }
759 
760 /* Dumps information about the USE to FILE.  */
761 
762 void
dump_use(FILE * file,struct iv_use * use)763 dump_use (FILE *file, struct iv_use *use)
764 {
765   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
766   fprintf (file, "    At stmt:\t");
767   print_gimple_stmt (file, use->stmt, 0);
768   fprintf (file, "    At pos:\t");
769   if (use->op_p)
770     print_generic_expr (file, *use->op_p, TDF_SLIM);
771   fprintf (file, "\n");
772   dump_iv (file, use->iv, false, 2);
773 }
774 
775 /* Dumps information about the uses to FILE.  */
776 
777 void
dump_groups(FILE * file,struct ivopts_data * data)778 dump_groups (FILE *file, struct ivopts_data *data)
779 {
780   unsigned i, j;
781   struct iv_group *group;
782 
783   for (i = 0; i < data->vgroups.length (); i++)
784     {
785       group = data->vgroups[i];
786       fprintf (file, "Group %d:\n", group->id);
787       if (group->type == USE_NONLINEAR_EXPR)
788 	fprintf (file, "  Type:\tGENERIC\n");
789       else if (group->type == USE_REF_ADDRESS)
790 	fprintf (file, "  Type:\tREFERENCE ADDRESS\n");
791       else if (group->type == USE_PTR_ADDRESS)
792 	fprintf (file, "  Type:\tPOINTER ARGUMENT ADDRESS\n");
793       else
794 	{
795 	  gcc_assert (group->type == USE_COMPARE);
796 	  fprintf (file, "  Type:\tCOMPARE\n");
797 	}
798       for (j = 0; j < group->vuses.length (); j++)
799 	dump_use (file, group->vuses[j]);
800     }
801 }
802 
803 /* Dumps information about induction variable candidate CAND to FILE.  */
804 
805 void
dump_cand(FILE * file,struct iv_cand * cand)806 dump_cand (FILE *file, struct iv_cand *cand)
807 {
808   struct iv *iv = cand->iv;
809 
810   fprintf (file, "Candidate %d:\n", cand->id);
811   if (cand->inv_vars)
812     {
813       fprintf (file, "  Depend on inv.vars: ");
814       dump_bitmap (file, cand->inv_vars);
815     }
816   if (cand->inv_exprs)
817     {
818       fprintf (file, "  Depend on inv.exprs: ");
819       dump_bitmap (file, cand->inv_exprs);
820     }
821 
822   if (cand->var_before)
823     {
824       fprintf (file, "  Var befor: ");
825       print_generic_expr (file, cand->var_before, TDF_SLIM);
826       fprintf (file, "\n");
827     }
828   if (cand->var_after)
829     {
830       fprintf (file, "  Var after: ");
831       print_generic_expr (file, cand->var_after, TDF_SLIM);
832       fprintf (file, "\n");
833     }
834 
835   switch (cand->pos)
836     {
837     case IP_NORMAL:
838       fprintf (file, "  Incr POS: before exit test\n");
839       break;
840 
841     case IP_BEFORE_USE:
842       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
843       break;
844 
845     case IP_AFTER_USE:
846       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
847       break;
848 
849     case IP_END:
850       fprintf (file, "  Incr POS: at end\n");
851       break;
852 
853     case IP_ORIGINAL:
854       fprintf (file, "  Incr POS: orig biv\n");
855       break;
856     }
857 
858   dump_iv (file, iv, false, 1);
859 }
860 
861 /* Returns the info for ssa version VER.  */
862 
863 static inline struct version_info *
ver_info(struct ivopts_data * data,unsigned ver)864 ver_info (struct ivopts_data *data, unsigned ver)
865 {
866   return data->version_info + ver;
867 }
868 
869 /* Returns the info for ssa name NAME.  */
870 
871 static inline struct version_info *
name_info(struct ivopts_data * data,tree name)872 name_info (struct ivopts_data *data, tree name)
873 {
874   return ver_info (data, SSA_NAME_VERSION (name));
875 }
876 
877 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
878    emitted in LOOP.  */
879 
880 static bool
stmt_after_ip_normal_pos(struct loop * loop,gimple * stmt)881 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
882 {
883   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
884 
885   gcc_assert (bb);
886 
887   if (sbb == loop->latch)
888     return true;
889 
890   if (sbb != bb)
891     return false;
892 
893   return stmt == last_stmt (bb);
894 }
895 
896 /* Returns true if STMT if after the place where the original induction
897    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
898    if the positions are identical.  */
899 
900 static bool
stmt_after_inc_pos(struct iv_cand * cand,gimple * stmt,bool true_if_equal)901 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
902 {
903   basic_block cand_bb = gimple_bb (cand->incremented_at);
904   basic_block stmt_bb = gimple_bb (stmt);
905 
906   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
907     return false;
908 
909   if (stmt_bb != cand_bb)
910     return true;
911 
912   if (true_if_equal
913       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
914     return true;
915   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
916 }
917 
918 /* Returns true if STMT if after the place where the induction variable
919    CAND is incremented in LOOP.  */
920 
921 static bool
stmt_after_increment(struct loop * loop,struct iv_cand * cand,gimple * stmt)922 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
923 {
924   switch (cand->pos)
925     {
926     case IP_END:
927       return false;
928 
929     case IP_NORMAL:
930       return stmt_after_ip_normal_pos (loop, stmt);
931 
932     case IP_ORIGINAL:
933     case IP_AFTER_USE:
934       return stmt_after_inc_pos (cand, stmt, false);
935 
936     case IP_BEFORE_USE:
937       return stmt_after_inc_pos (cand, stmt, true);
938 
939     default:
940       gcc_unreachable ();
941     }
942 }
943 
944 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
945 
946 static bool
abnormal_ssa_name_p(tree exp)947 abnormal_ssa_name_p (tree exp)
948 {
949   if (!exp)
950     return false;
951 
952   if (TREE_CODE (exp) != SSA_NAME)
953     return false;
954 
955   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
956 }
957 
958 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
959    abnormal phi node.  Callback for for_each_index.  */
960 
961 static bool
idx_contains_abnormal_ssa_name_p(tree base,tree * index,void * data ATTRIBUTE_UNUSED)962 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
963 				  void *data ATTRIBUTE_UNUSED)
964 {
965   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
966     {
967       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
968 	return false;
969       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
970 	return false;
971     }
972 
973   return !abnormal_ssa_name_p (*index);
974 }
975 
976 /* Returns true if EXPR contains a ssa name that occurs in an
977    abnormal phi node.  */
978 
979 bool
contains_abnormal_ssa_name_p(tree expr)980 contains_abnormal_ssa_name_p (tree expr)
981 {
982   enum tree_code code;
983   enum tree_code_class codeclass;
984 
985   if (!expr)
986     return false;
987 
988   code = TREE_CODE (expr);
989   codeclass = TREE_CODE_CLASS (code);
990 
991   if (code == SSA_NAME)
992     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
993 
994   if (code == INTEGER_CST
995       || is_gimple_min_invariant (expr))
996     return false;
997 
998   if (code == ADDR_EXPR)
999     return !for_each_index (&TREE_OPERAND (expr, 0),
1000 			    idx_contains_abnormal_ssa_name_p,
1001 			    NULL);
1002 
1003   if (code == COND_EXPR)
1004     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
1005       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
1006       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
1007 
1008   switch (codeclass)
1009     {
1010     case tcc_binary:
1011     case tcc_comparison:
1012       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
1013 	return true;
1014 
1015       /* Fallthru.  */
1016     case tcc_unary:
1017       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
1018 	return true;
1019 
1020       break;
1021 
1022     default:
1023       gcc_unreachable ();
1024     }
1025 
1026   return false;
1027 }
1028 
1029 /*  Returns the structure describing number of iterations determined from
1030     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1031 
1032 static struct tree_niter_desc *
niter_for_exit(struct ivopts_data * data,edge exit)1033 niter_for_exit (struct ivopts_data *data, edge exit)
1034 {
1035   struct tree_niter_desc *desc;
1036   tree_niter_desc **slot;
1037 
1038   if (!data->niters)
1039     {
1040       data->niters = new hash_map<edge, tree_niter_desc *>;
1041       slot = NULL;
1042     }
1043   else
1044     slot = data->niters->get (exit);
1045 
1046   if (!slot)
1047     {
1048       /* Try to determine number of iterations.  We cannot safely work with ssa
1049 	 names that appear in phi nodes on abnormal edges, so that we do not
1050 	 create overlapping life ranges for them (PR 27283).  */
1051       desc = XNEW (struct tree_niter_desc);
1052       if (!number_of_iterations_exit (data->current_loop,
1053 				      exit, desc, true)
1054      	  || contains_abnormal_ssa_name_p (desc->niter))
1055 	{
1056 	  XDELETE (desc);
1057 	  desc = NULL;
1058 	}
1059       data->niters->put (exit, desc);
1060     }
1061   else
1062     desc = *slot;
1063 
1064   return desc;
1065 }
1066 
1067 /* Returns the structure describing number of iterations determined from
1068    single dominating exit of DATA->current_loop, or NULL if something
1069    goes wrong.  */
1070 
1071 static struct tree_niter_desc *
niter_for_single_dom_exit(struct ivopts_data * data)1072 niter_for_single_dom_exit (struct ivopts_data *data)
1073 {
1074   edge exit = single_dom_exit (data->current_loop);
1075 
1076   if (!exit)
1077     return NULL;
1078 
1079   return niter_for_exit (data, exit);
1080 }
1081 
1082 /* Initializes data structures used by the iv optimization pass, stored
1083    in DATA.  */
1084 
1085 static void
tree_ssa_iv_optimize_init(struct ivopts_data * data)1086 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1087 {
1088   data->version_info_size = 2 * num_ssa_names;
1089   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1090   data->relevant = BITMAP_ALLOC (NULL);
1091   data->important_candidates = BITMAP_ALLOC (NULL);
1092   data->max_inv_var_id = 0;
1093   data->max_inv_expr_id = 0;
1094   data->niters = NULL;
1095   data->vgroups.create (20);
1096   data->vcands.create (20);
1097   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1098   data->name_expansion_cache = NULL;
1099   data->base_object_map = NULL;
1100   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1101   data->iv_common_cands.create (20);
1102   decl_rtl_to_reset.create (20);
1103   gcc_obstack_init (&data->iv_obstack);
1104 }
1105 
1106 /* walk_tree callback for determine_base_object.  */
1107 
1108 static tree
determine_base_object_1(tree * tp,int * walk_subtrees,void * wdata)1109 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1110 {
1111   tree_code code = TREE_CODE (*tp);
1112   tree obj = NULL_TREE;
1113   if (code == ADDR_EXPR)
1114     {
1115       tree base = get_base_address (TREE_OPERAND (*tp, 0));
1116       if (!base)
1117 	obj = *tp;
1118       else if (TREE_CODE (base) != MEM_REF)
1119 	obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1120     }
1121   else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1122 	obj = fold_convert (ptr_type_node, *tp);
1123 
1124   if (!obj)
1125     {
1126       if (!EXPR_P (*tp))
1127 	*walk_subtrees = 0;
1128 
1129       return NULL_TREE;
1130     }
1131   /* Record special node for multiple base objects and stop.  */
1132   if (*static_cast<tree *> (wdata))
1133     {
1134       *static_cast<tree *> (wdata) = integer_zero_node;
1135       return integer_zero_node;
1136     }
1137   /* Record the base object and continue looking.  */
1138   *static_cast<tree *> (wdata) = obj;
1139   return NULL_TREE;
1140 }
1141 
1142 /* Returns a memory object to that EXPR points with caching.  Return NULL if we
1143    are able to determine that it does not point to any such object; specially
1144    return integer_zero_node if EXPR contains multiple base objects.  */
1145 
1146 static tree
determine_base_object(struct ivopts_data * data,tree expr)1147 determine_base_object (struct ivopts_data *data, tree expr)
1148 {
1149   tree *slot, obj = NULL_TREE;
1150   if (data->base_object_map)
1151     {
1152       if ((slot = data->base_object_map->get(expr)) != NULL)
1153 	return *slot;
1154     }
1155   else
1156     data->base_object_map = new hash_map<tree, tree>;
1157 
1158   (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1159   data->base_object_map->put (expr, obj);
1160   return obj;
1161 }
1162 
1163 /* Return true if address expression with non-DECL_P operand appears
1164    in EXPR.  */
1165 
1166 static bool
contain_complex_addr_expr(tree expr)1167 contain_complex_addr_expr (tree expr)
1168 {
1169   bool res = false;
1170 
1171   STRIP_NOPS (expr);
1172   switch (TREE_CODE (expr))
1173     {
1174     case POINTER_PLUS_EXPR:
1175     case PLUS_EXPR:
1176     case MINUS_EXPR:
1177       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1178       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1179       break;
1180 
1181     case ADDR_EXPR:
1182       return (!DECL_P (TREE_OPERAND (expr, 0)));
1183 
1184     default:
1185       return false;
1186     }
1187 
1188   return res;
1189 }
1190 
1191 /* Allocates an induction variable with given initial value BASE and step STEP
1192    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1193 
1194 static struct iv *
1195 alloc_iv (struct ivopts_data *data, tree base, tree step,
1196 	  bool no_overflow = false)
1197 {
1198   tree expr = base;
1199   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1200 					      sizeof (struct iv));
1201   gcc_assert (step != NULL_TREE);
1202 
1203   /* Lower address expression in base except ones with DECL_P as operand.
1204      By doing this:
1205        1) More accurate cost can be computed for address expressions;
1206        2) Duplicate candidates won't be created for bases in different
1207 	  forms, like &a[0] and &a.  */
1208   STRIP_NOPS (expr);
1209   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1210       || contain_complex_addr_expr (expr))
1211     {
1212       aff_tree comb;
1213       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1214       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1215     }
1216 
1217   iv->base = base;
1218   iv->base_object = determine_base_object (data, base);
1219   iv->step = step;
1220   iv->biv_p = false;
1221   iv->nonlin_use = NULL;
1222   iv->ssa_name = NULL_TREE;
1223   if (!no_overflow
1224        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1225 			      base, step))
1226     no_overflow = true;
1227   iv->no_overflow = no_overflow;
1228   iv->have_address_use = false;
1229 
1230   return iv;
1231 }
1232 
1233 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1234    doesn't overflow.  */
1235 
1236 static void
set_iv(struct ivopts_data * data,tree iv,tree base,tree step,bool no_overflow)1237 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1238 	bool no_overflow)
1239 {
1240   struct version_info *info = name_info (data, iv);
1241 
1242   gcc_assert (!info->iv);
1243 
1244   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1245   info->iv = alloc_iv (data, base, step, no_overflow);
1246   info->iv->ssa_name = iv;
1247 }
1248 
1249 /* Finds induction variable declaration for VAR.  */
1250 
1251 static struct iv *
get_iv(struct ivopts_data * data,tree var)1252 get_iv (struct ivopts_data *data, tree var)
1253 {
1254   basic_block bb;
1255   tree type = TREE_TYPE (var);
1256 
1257   if (!POINTER_TYPE_P (type)
1258       && !INTEGRAL_TYPE_P (type))
1259     return NULL;
1260 
1261   if (!name_info (data, var)->iv)
1262     {
1263       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1264 
1265       if (!bb
1266 	  || !flow_bb_inside_loop_p (data->current_loop, bb))
1267 	set_iv (data, var, var, build_int_cst (type, 0), true);
1268     }
1269 
1270   return name_info (data, var)->iv;
1271 }
1272 
1273 /* Return the first non-invariant ssa var found in EXPR.  */
1274 
1275 static tree
extract_single_var_from_expr(tree expr)1276 extract_single_var_from_expr (tree expr)
1277 {
1278   int i, n;
1279   tree tmp;
1280   enum tree_code code;
1281 
1282   if (!expr || is_gimple_min_invariant (expr))
1283     return NULL;
1284 
1285   code = TREE_CODE (expr);
1286   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1287     {
1288       n = TREE_OPERAND_LENGTH (expr);
1289       for (i = 0; i < n; i++)
1290 	{
1291 	  tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1292 
1293 	  if (tmp)
1294 	    return tmp;
1295 	}
1296     }
1297   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1298 }
1299 
1300 /* Finds basic ivs.  */
1301 
1302 static bool
find_bivs(struct ivopts_data * data)1303 find_bivs (struct ivopts_data *data)
1304 {
1305   gphi *phi;
1306   affine_iv iv;
1307   tree step, type, base, stop;
1308   bool found = false;
1309   struct loop *loop = data->current_loop;
1310   gphi_iterator psi;
1311 
1312   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1313     {
1314       phi = psi.phi ();
1315 
1316       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1317 	continue;
1318 
1319       if (virtual_operand_p (PHI_RESULT (phi)))
1320 	continue;
1321 
1322       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1323 	continue;
1324 
1325       if (integer_zerop (iv.step))
1326 	continue;
1327 
1328       step = iv.step;
1329       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1330       /* Stop expanding iv base at the first ssa var referred by iv step.
1331 	 Ideally we should stop at any ssa var, because that's expensive
1332 	 and unusual to happen, we just do it on the first one.
1333 
1334 	 See PR64705 for the rationale.  */
1335       stop = extract_single_var_from_expr (step);
1336       base = expand_simple_operations (base, stop);
1337       if (contains_abnormal_ssa_name_p (base)
1338 	  || contains_abnormal_ssa_name_p (step))
1339 	continue;
1340 
1341       type = TREE_TYPE (PHI_RESULT (phi));
1342       base = fold_convert (type, base);
1343       if (step)
1344 	{
1345 	  if (POINTER_TYPE_P (type))
1346 	    step = convert_to_ptrofftype (step);
1347 	  else
1348 	    step = fold_convert (type, step);
1349 	}
1350 
1351       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1352       found = true;
1353     }
1354 
1355   return found;
1356 }
1357 
1358 /* Marks basic ivs.  */
1359 
1360 static void
mark_bivs(struct ivopts_data * data)1361 mark_bivs (struct ivopts_data *data)
1362 {
1363   gphi *phi;
1364   gimple *def;
1365   tree var;
1366   struct iv *iv, *incr_iv;
1367   struct loop *loop = data->current_loop;
1368   basic_block incr_bb;
1369   gphi_iterator psi;
1370 
1371   data->bivs_not_used_in_addr = 0;
1372   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1373     {
1374       phi = psi.phi ();
1375 
1376       iv = get_iv (data, PHI_RESULT (phi));
1377       if (!iv)
1378 	continue;
1379 
1380       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1381       def = SSA_NAME_DEF_STMT (var);
1382       /* Don't mark iv peeled from other one as biv.  */
1383       if (def
1384 	  && gimple_code (def) == GIMPLE_PHI
1385 	  && gimple_bb (def) == loop->header)
1386 	continue;
1387 
1388       incr_iv = get_iv (data, var);
1389       if (!incr_iv)
1390 	continue;
1391 
1392       /* If the increment is in the subloop, ignore it.  */
1393       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1394       if (incr_bb->loop_father != data->current_loop
1395 	  || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1396 	continue;
1397 
1398       iv->biv_p = true;
1399       incr_iv->biv_p = true;
1400       if (iv->no_overflow)
1401 	data->bivs_not_used_in_addr++;
1402       if (incr_iv->no_overflow)
1403 	data->bivs_not_used_in_addr++;
1404     }
1405 }
1406 
1407 /* Checks whether STMT defines a linear induction variable and stores its
1408    parameters to IV.  */
1409 
1410 static bool
find_givs_in_stmt_scev(struct ivopts_data * data,gimple * stmt,affine_iv * iv)1411 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1412 {
1413   tree lhs, stop;
1414   struct loop *loop = data->current_loop;
1415 
1416   iv->base = NULL_TREE;
1417   iv->step = NULL_TREE;
1418 
1419   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1420     return false;
1421 
1422   lhs = gimple_assign_lhs (stmt);
1423   if (TREE_CODE (lhs) != SSA_NAME)
1424     return false;
1425 
1426   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1427     return false;
1428 
1429   /* Stop expanding iv base at the first ssa var referred by iv step.
1430      Ideally we should stop at any ssa var, because that's expensive
1431      and unusual to happen, we just do it on the first one.
1432 
1433      See PR64705 for the rationale.  */
1434   stop = extract_single_var_from_expr (iv->step);
1435   iv->base = expand_simple_operations (iv->base, stop);
1436   if (contains_abnormal_ssa_name_p (iv->base)
1437       || contains_abnormal_ssa_name_p (iv->step))
1438     return false;
1439 
1440   /* If STMT could throw, then do not consider STMT as defining a GIV.
1441      While this will suppress optimizations, we can not safely delete this
1442      GIV and associated statements, even if it appears it is not used.  */
1443   if (stmt_could_throw_p (stmt))
1444     return false;
1445 
1446   return true;
1447 }
1448 
1449 /* Finds general ivs in statement STMT.  */
1450 
1451 static void
find_givs_in_stmt(struct ivopts_data * data,gimple * stmt)1452 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1453 {
1454   affine_iv iv;
1455 
1456   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1457     return;
1458 
1459   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1460 }
1461 
1462 /* Finds general ivs in basic block BB.  */
1463 
1464 static void
find_givs_in_bb(struct ivopts_data * data,basic_block bb)1465 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1466 {
1467   gimple_stmt_iterator bsi;
1468 
1469   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1470     find_givs_in_stmt (data, gsi_stmt (bsi));
1471 }
1472 
1473 /* Finds general ivs.  */
1474 
1475 static void
find_givs(struct ivopts_data * data)1476 find_givs (struct ivopts_data *data)
1477 {
1478   struct loop *loop = data->current_loop;
1479   basic_block *body = get_loop_body_in_dom_order (loop);
1480   unsigned i;
1481 
1482   for (i = 0; i < loop->num_nodes; i++)
1483     find_givs_in_bb (data, body[i]);
1484   free (body);
1485 }
1486 
1487 /* For each ssa name defined in LOOP determines whether it is an induction
1488    variable and if so, its initial value and step.  */
1489 
1490 static bool
find_induction_variables(struct ivopts_data * data)1491 find_induction_variables (struct ivopts_data *data)
1492 {
1493   unsigned i;
1494   bitmap_iterator bi;
1495 
1496   if (!find_bivs (data))
1497     return false;
1498 
1499   find_givs (data);
1500   mark_bivs (data);
1501 
1502   if (dump_file && (dump_flags & TDF_DETAILS))
1503     {
1504       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1505 
1506       if (niter)
1507 	{
1508 	  fprintf (dump_file, "  number of iterations ");
1509 	  print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1510 	  if (!integer_zerop (niter->may_be_zero))
1511 	    {
1512 	      fprintf (dump_file, "; zero if ");
1513 	      print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1514 	    }
1515 	  fprintf (dump_file, "\n");
1516 	};
1517 
1518       fprintf (dump_file, "\n<Induction Vars>:\n");
1519       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1520 	{
1521 	  struct version_info *info = ver_info (data, i);
1522 	  if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1523 	    dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1524 	}
1525     }
1526 
1527   return true;
1528 }
1529 
1530 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1531    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1532    is the const offset stripped from IV base and MEM_TYPE is the type
1533    of the memory being addressed.  For uses of other types, ADDR_BASE
1534    and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE.  */
1535 
1536 static struct iv_use *
record_use(struct iv_group * group,tree * use_p,struct iv * iv,gimple * stmt,enum use_type type,tree mem_type,tree addr_base,poly_uint64 addr_offset)1537 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1538 	    gimple *stmt, enum use_type type, tree mem_type,
1539 	    tree addr_base, poly_uint64 addr_offset)
1540 {
1541   struct iv_use *use = XCNEW (struct iv_use);
1542 
1543   use->id = group->vuses.length ();
1544   use->group_id = group->id;
1545   use->type = type;
1546   use->mem_type = mem_type;
1547   use->iv = iv;
1548   use->stmt = stmt;
1549   use->op_p = use_p;
1550   use->addr_base = addr_base;
1551   use->addr_offset = addr_offset;
1552 
1553   group->vuses.safe_push (use);
1554   return use;
1555 }
1556 
1557 /* Checks whether OP is a loop-level invariant and if so, records it.
1558    NONLINEAR_USE is true if the invariant is used in a way we do not
1559    handle specially.  */
1560 
1561 static void
record_invariant(struct ivopts_data * data,tree op,bool nonlinear_use)1562 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1563 {
1564   basic_block bb;
1565   struct version_info *info;
1566 
1567   if (TREE_CODE (op) != SSA_NAME
1568       || virtual_operand_p (op))
1569     return;
1570 
1571   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1572   if (bb
1573       && flow_bb_inside_loop_p (data->current_loop, bb))
1574     return;
1575 
1576   info = name_info (data, op);
1577   info->name = op;
1578   info->has_nonlin_use |= nonlinear_use;
1579   if (!info->inv_id)
1580     info->inv_id = ++data->max_inv_var_id;
1581   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1582 }
1583 
1584 /* Record a group of TYPE.  */
1585 
1586 static struct iv_group *
record_group(struct ivopts_data * data,enum use_type type)1587 record_group (struct ivopts_data *data, enum use_type type)
1588 {
1589   struct iv_group *group = XCNEW (struct iv_group);
1590 
1591   group->id = data->vgroups.length ();
1592   group->type = type;
1593   group->related_cands = BITMAP_ALLOC (NULL);
1594   group->vuses.create (1);
1595 
1596   data->vgroups.safe_push (group);
1597   return group;
1598 }
1599 
1600 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1601    New group will be created if there is no existing group for the use.
1602    MEM_TYPE is the type of memory being addressed, or NULL if this
1603    isn't an address reference.  */
1604 
1605 static struct iv_use *
record_group_use(struct ivopts_data * data,tree * use_p,struct iv * iv,gimple * stmt,enum use_type type,tree mem_type)1606 record_group_use (struct ivopts_data *data, tree *use_p,
1607 		  struct iv *iv, gimple *stmt, enum use_type type,
1608 		  tree mem_type)
1609 {
1610   tree addr_base = NULL;
1611   struct iv_group *group = NULL;
1612   poly_uint64 addr_offset = 0;
1613 
1614   /* Record non address type use in a new group.  */
1615   if (address_p (type))
1616     {
1617       unsigned int i;
1618 
1619       addr_base = strip_offset (iv->base, &addr_offset);
1620       for (i = 0; i < data->vgroups.length (); i++)
1621 	{
1622 	  struct iv_use *use;
1623 
1624 	  group = data->vgroups[i];
1625 	  use = group->vuses[0];
1626 	  if (!address_p (use->type))
1627 	    continue;
1628 
1629 	  /* Check if it has the same stripped base and step.  */
1630 	  if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1631 	      && operand_equal_p (iv->step, use->iv->step, 0)
1632 	      && operand_equal_p (addr_base, use->addr_base, 0))
1633 	    break;
1634 	}
1635       if (i == data->vgroups.length ())
1636 	group = NULL;
1637     }
1638 
1639   if (!group)
1640     group = record_group (data, type);
1641 
1642   return record_use (group, use_p, iv, stmt, type, mem_type,
1643 		     addr_base, addr_offset);
1644 }
1645 
1646 /* Checks whether the use OP is interesting and if so, records it.  */
1647 
1648 static struct iv_use *
find_interesting_uses_op(struct ivopts_data * data,tree op)1649 find_interesting_uses_op (struct ivopts_data *data, tree op)
1650 {
1651   struct iv *iv;
1652   gimple *stmt;
1653   struct iv_use *use;
1654 
1655   if (TREE_CODE (op) != SSA_NAME)
1656     return NULL;
1657 
1658   iv = get_iv (data, op);
1659   if (!iv)
1660     return NULL;
1661 
1662   if (iv->nonlin_use)
1663     {
1664       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1665       return iv->nonlin_use;
1666     }
1667 
1668   if (integer_zerop (iv->step))
1669     {
1670       record_invariant (data, op, true);
1671       return NULL;
1672     }
1673 
1674   stmt = SSA_NAME_DEF_STMT (op);
1675   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1676 
1677   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1678   iv->nonlin_use = use;
1679   return use;
1680 }
1681 
1682 /* Indicate how compare type iv_use can be handled.  */
1683 enum comp_iv_rewrite
1684 {
1685   COMP_IV_NA,
1686   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1687   COMP_IV_EXPR,
1688   /* We may rewrite compare type iv_uses on both sides of comparison by
1689      expressing value of each iv_use.  */
1690   COMP_IV_EXPR_2,
1691   /* We may rewrite compare type iv_use by expressing value of the iv_use
1692      or by eliminating it with other iv_cand.  */
1693   COMP_IV_ELIM
1694 };
1695 
1696 /* Given a condition in statement STMT, checks whether it is a compare
1697    of an induction variable and an invariant.  If this is the case,
1698    CONTROL_VAR is set to location of the iv, BOUND to the location of
1699    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1700    induction variable descriptions, and true is returned.  If this is not
1701    the case, CONTROL_VAR and BOUND are set to the arguments of the
1702    condition and false is returned.  */
1703 
1704 static enum comp_iv_rewrite
extract_cond_operands(struct ivopts_data * data,gimple * stmt,tree ** control_var,tree ** bound,struct iv ** iv_var,struct iv ** iv_bound)1705 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1706 		       tree **control_var, tree **bound,
1707 		       struct iv **iv_var, struct iv **iv_bound)
1708 {
1709   /* The objects returned when COND has constant operands.  */
1710   static struct iv const_iv;
1711   static tree zero;
1712   tree *op0 = &zero, *op1 = &zero;
1713   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1714   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1715 
1716   if (gimple_code (stmt) == GIMPLE_COND)
1717     {
1718       gcond *cond_stmt = as_a <gcond *> (stmt);
1719       op0 = gimple_cond_lhs_ptr (cond_stmt);
1720       op1 = gimple_cond_rhs_ptr (cond_stmt);
1721     }
1722   else
1723     {
1724       op0 = gimple_assign_rhs1_ptr (stmt);
1725       op1 = gimple_assign_rhs2_ptr (stmt);
1726     }
1727 
1728   zero = integer_zero_node;
1729   const_iv.step = integer_zero_node;
1730 
1731   if (TREE_CODE (*op0) == SSA_NAME)
1732     iv0 = get_iv (data, *op0);
1733   if (TREE_CODE (*op1) == SSA_NAME)
1734     iv1 = get_iv (data, *op1);
1735 
1736   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1737   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1738     {
1739       rewrite_type = COMP_IV_EXPR_2;
1740       goto end;
1741     }
1742 
1743   /* If none side of comparison is IV.  */
1744   if ((!iv0 || integer_zerop (iv0->step))
1745       && (!iv1 || integer_zerop (iv1->step)))
1746     goto end;
1747 
1748   /* Control variable may be on the other side.  */
1749   if (!iv0 || integer_zerop (iv0->step))
1750     {
1751       std::swap (op0, op1);
1752       std::swap (iv0, iv1);
1753     }
1754   /* If one side is IV and the other side isn't loop invariant.  */
1755   if (!iv1)
1756     rewrite_type = COMP_IV_EXPR;
1757   /* If one side is IV and the other side is loop invariant.  */
1758   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1759     rewrite_type = COMP_IV_ELIM;
1760 
1761 end:
1762   if (control_var)
1763     *control_var = op0;
1764   if (iv_var)
1765     *iv_var = iv0;
1766   if (bound)
1767     *bound = op1;
1768   if (iv_bound)
1769     *iv_bound = iv1;
1770 
1771   return rewrite_type;
1772 }
1773 
1774 /* Checks whether the condition in STMT is interesting and if so,
1775    records it.  */
1776 
1777 static void
find_interesting_uses_cond(struct ivopts_data * data,gimple * stmt)1778 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1779 {
1780   tree *var_p, *bound_p;
1781   struct iv *var_iv, *bound_iv;
1782   enum comp_iv_rewrite ret;
1783 
1784   ret = extract_cond_operands (data, stmt,
1785 			       &var_p, &bound_p, &var_iv, &bound_iv);
1786   if (ret == COMP_IV_NA)
1787     {
1788       find_interesting_uses_op (data, *var_p);
1789       find_interesting_uses_op (data, *bound_p);
1790       return;
1791     }
1792 
1793   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1794   /* Record compare type iv_use for iv on the other side of comparison.  */
1795   if (ret == COMP_IV_EXPR_2)
1796     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1797 }
1798 
1799 /* Returns the outermost loop EXPR is obviously invariant in
1800    relative to the loop LOOP, i.e. if all its operands are defined
1801    outside of the returned loop.  Returns NULL if EXPR is not
1802    even obviously invariant in LOOP.  */
1803 
1804 struct loop *
outermost_invariant_loop_for_expr(struct loop * loop,tree expr)1805 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1806 {
1807   basic_block def_bb;
1808   unsigned i, len;
1809 
1810   if (is_gimple_min_invariant (expr))
1811     return current_loops->tree_root;
1812 
1813   if (TREE_CODE (expr) == SSA_NAME)
1814     {
1815       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1816       if (def_bb)
1817 	{
1818 	  if (flow_bb_inside_loop_p (loop, def_bb))
1819 	    return NULL;
1820 	  return superloop_at_depth (loop,
1821 				     loop_depth (def_bb->loop_father) + 1);
1822 	}
1823 
1824       return current_loops->tree_root;
1825     }
1826 
1827   if (!EXPR_P (expr))
1828     return NULL;
1829 
1830   unsigned maxdepth = 0;
1831   len = TREE_OPERAND_LENGTH (expr);
1832   for (i = 0; i < len; i++)
1833     {
1834       struct loop *ivloop;
1835       if (!TREE_OPERAND (expr, i))
1836 	continue;
1837 
1838       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1839       if (!ivloop)
1840 	return NULL;
1841       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1842     }
1843 
1844   return superloop_at_depth (loop, maxdepth);
1845 }
1846 
1847 /* Returns true if expression EXPR is obviously invariant in LOOP,
1848    i.e. if all its operands are defined outside of the LOOP.  LOOP
1849    should not be the function body.  */
1850 
1851 bool
expr_invariant_in_loop_p(struct loop * loop,tree expr)1852 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1853 {
1854   basic_block def_bb;
1855   unsigned i, len;
1856 
1857   gcc_assert (loop_depth (loop) > 0);
1858 
1859   if (is_gimple_min_invariant (expr))
1860     return true;
1861 
1862   if (TREE_CODE (expr) == SSA_NAME)
1863     {
1864       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1865       if (def_bb
1866 	  && flow_bb_inside_loop_p (loop, def_bb))
1867 	return false;
1868 
1869       return true;
1870     }
1871 
1872   if (!EXPR_P (expr))
1873     return false;
1874 
1875   len = TREE_OPERAND_LENGTH (expr);
1876   for (i = 0; i < len; i++)
1877     if (TREE_OPERAND (expr, i)
1878 	&& !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1879       return false;
1880 
1881   return true;
1882 }
1883 
1884 /* Given expression EXPR which computes inductive values with respect
1885    to loop recorded in DATA, this function returns biv from which EXPR
1886    is derived by tracing definition chains of ssa variables in EXPR.  */
1887 
1888 static struct iv*
find_deriving_biv_for_expr(struct ivopts_data * data,tree expr)1889 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1890 {
1891   struct iv *iv;
1892   unsigned i, n;
1893   tree e2, e1;
1894   enum tree_code code;
1895   gimple *stmt;
1896 
1897   if (expr == NULL_TREE)
1898     return NULL;
1899 
1900   if (is_gimple_min_invariant (expr))
1901     return NULL;
1902 
1903   code = TREE_CODE (expr);
1904   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1905     {
1906       n = TREE_OPERAND_LENGTH (expr);
1907       for (i = 0; i < n; i++)
1908 	{
1909 	  iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1910 	  if (iv)
1911 	    return iv;
1912 	}
1913     }
1914 
1915   /* Stop if it's not ssa name.  */
1916   if (code != SSA_NAME)
1917     return NULL;
1918 
1919   iv = get_iv (data, expr);
1920   if (!iv || integer_zerop (iv->step))
1921     return NULL;
1922   else if (iv->biv_p)
1923     return iv;
1924 
1925   stmt = SSA_NAME_DEF_STMT (expr);
1926   if (gphi *phi = dyn_cast <gphi *> (stmt))
1927     {
1928       ssa_op_iter iter;
1929       use_operand_p use_p;
1930       basic_block phi_bb = gimple_bb (phi);
1931 
1932       /* Skip loop header PHI that doesn't define biv.  */
1933       if (phi_bb->loop_father == data->current_loop)
1934 	return NULL;
1935 
1936       if (virtual_operand_p (gimple_phi_result (phi)))
1937 	return NULL;
1938 
1939       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1940 	{
1941 	  tree use = USE_FROM_PTR (use_p);
1942 	  iv = find_deriving_biv_for_expr (data, use);
1943 	  if (iv)
1944 	    return iv;
1945 	}
1946       return NULL;
1947     }
1948   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1949     return NULL;
1950 
1951   e1 = gimple_assign_rhs1 (stmt);
1952   code = gimple_assign_rhs_code (stmt);
1953   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1954     return find_deriving_biv_for_expr (data, e1);
1955 
1956   switch (code)
1957     {
1958     case MULT_EXPR:
1959     case PLUS_EXPR:
1960     case MINUS_EXPR:
1961     case POINTER_PLUS_EXPR:
1962       /* Increments, decrements and multiplications by a constant
1963 	 are simple.  */
1964       e2 = gimple_assign_rhs2 (stmt);
1965       iv = find_deriving_biv_for_expr (data, e2);
1966       if (iv)
1967 	return iv;
1968       gcc_fallthrough ();
1969 
1970     CASE_CONVERT:
1971       /* Casts are simple.  */
1972       return find_deriving_biv_for_expr (data, e1);
1973 
1974     default:
1975       break;
1976     }
1977 
1978   return NULL;
1979 }
1980 
1981 /* Record BIV, its predecessor and successor that they are used in
1982    address type uses.  */
1983 
1984 static void
record_biv_for_address_use(struct ivopts_data * data,struct iv * biv)1985 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1986 {
1987   unsigned i;
1988   tree type, base_1, base_2;
1989   bitmap_iterator bi;
1990 
1991   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1992       || biv->have_address_use || !biv->no_overflow)
1993     return;
1994 
1995   type = TREE_TYPE (biv->base);
1996   if (!INTEGRAL_TYPE_P (type))
1997     return;
1998 
1999   biv->have_address_use = true;
2000   data->bivs_not_used_in_addr--;
2001   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
2002   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2003     {
2004       struct iv *iv = ver_info (data, i)->iv;
2005 
2006       if (!iv || !iv->biv_p || integer_zerop (iv->step)
2007 	  || iv->have_address_use || !iv->no_overflow)
2008 	continue;
2009 
2010       if (type != TREE_TYPE (iv->base)
2011 	  || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2012 	continue;
2013 
2014       if (!operand_equal_p (biv->step, iv->step, 0))
2015 	continue;
2016 
2017       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2018       if (operand_equal_p (base_1, iv->base, 0)
2019 	  || operand_equal_p (base_2, biv->base, 0))
2020 	{
2021 	  iv->have_address_use = true;
2022 	  data->bivs_not_used_in_addr--;
2023 	}
2024     }
2025 }
2026 
2027 /* Cumulates the steps of indices into DATA and replaces their values with the
2028    initial ones.  Returns false when the value of the index cannot be determined.
2029    Callback for for_each_index.  */
2030 
2031 struct ifs_ivopts_data
2032 {
2033   struct ivopts_data *ivopts_data;
2034   gimple *stmt;
2035   tree step;
2036 };
2037 
2038 static bool
idx_find_step(tree base,tree * idx,void * data)2039 idx_find_step (tree base, tree *idx, void *data)
2040 {
2041   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2042   struct iv *iv;
2043   bool use_overflow_semantics = false;
2044   tree step, iv_base, iv_step, lbound, off;
2045   struct loop *loop = dta->ivopts_data->current_loop;
2046 
2047   /* If base is a component ref, require that the offset of the reference
2048      be invariant.  */
2049   if (TREE_CODE (base) == COMPONENT_REF)
2050     {
2051       off = component_ref_field_offset (base);
2052       return expr_invariant_in_loop_p (loop, off);
2053     }
2054 
2055   /* If base is array, first check whether we will be able to move the
2056      reference out of the loop (in order to take its address in strength
2057      reduction).  In order for this to work we need both lower bound
2058      and step to be loop invariants.  */
2059   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2060     {
2061       /* Moreover, for a range, the size needs to be invariant as well.  */
2062       if (TREE_CODE (base) == ARRAY_RANGE_REF
2063 	  && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2064 	return false;
2065 
2066       step = array_ref_element_size (base);
2067       lbound = array_ref_low_bound (base);
2068 
2069       if (!expr_invariant_in_loop_p (loop, step)
2070 	  || !expr_invariant_in_loop_p (loop, lbound))
2071 	return false;
2072     }
2073 
2074   if (TREE_CODE (*idx) != SSA_NAME)
2075     return true;
2076 
2077   iv = get_iv (dta->ivopts_data, *idx);
2078   if (!iv)
2079     return false;
2080 
2081   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2082 	  *&x[0], which is not folded and does not trigger the
2083 	  ARRAY_REF path below.  */
2084   *idx = iv->base;
2085 
2086   if (integer_zerop (iv->step))
2087     return true;
2088 
2089   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2090     {
2091       step = array_ref_element_size (base);
2092 
2093       /* We only handle addresses whose step is an integer constant.  */
2094       if (TREE_CODE (step) != INTEGER_CST)
2095 	return false;
2096     }
2097   else
2098     /* The step for pointer arithmetics already is 1 byte.  */
2099     step = size_one_node;
2100 
2101   iv_base = iv->base;
2102   iv_step = iv->step;
2103   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2104     use_overflow_semantics = true;
2105 
2106   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2107 			    sizetype, &iv_base, &iv_step, dta->stmt,
2108 			    use_overflow_semantics))
2109     {
2110       /* The index might wrap.  */
2111       return false;
2112     }
2113 
2114   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2115   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2116 
2117   if (dta->ivopts_data->bivs_not_used_in_addr)
2118     {
2119       if (!iv->biv_p)
2120 	iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2121 
2122       record_biv_for_address_use (dta->ivopts_data, iv);
2123     }
2124   return true;
2125 }
2126 
2127 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2128    object is passed to it in DATA.  */
2129 
2130 static bool
idx_record_use(tree base,tree * idx,void * vdata)2131 idx_record_use (tree base, tree *idx,
2132 		void *vdata)
2133 {
2134   struct ivopts_data *data = (struct ivopts_data *) vdata;
2135   find_interesting_uses_op (data, *idx);
2136   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2137     {
2138       find_interesting_uses_op (data, array_ref_element_size (base));
2139       find_interesting_uses_op (data, array_ref_low_bound (base));
2140     }
2141   return true;
2142 }
2143 
2144 /* If we can prove that TOP = cst * BOT for some constant cst,
2145    store cst to MUL and return true.  Otherwise return false.
2146    The returned value is always sign-extended, regardless of the
2147    signedness of TOP and BOT.  */
2148 
2149 static bool
constant_multiple_of(tree top,tree bot,widest_int * mul)2150 constant_multiple_of (tree top, tree bot, widest_int *mul)
2151 {
2152   tree mby;
2153   enum tree_code code;
2154   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2155   widest_int res, p0, p1;
2156 
2157   STRIP_NOPS (top);
2158   STRIP_NOPS (bot);
2159 
2160   if (operand_equal_p (top, bot, 0))
2161     {
2162       *mul = 1;
2163       return true;
2164     }
2165 
2166   code = TREE_CODE (top);
2167   switch (code)
2168     {
2169     case MULT_EXPR:
2170       mby = TREE_OPERAND (top, 1);
2171       if (TREE_CODE (mby) != INTEGER_CST)
2172 	return false;
2173 
2174       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2175 	return false;
2176 
2177       *mul = wi::sext (res * wi::to_widest (mby), precision);
2178       return true;
2179 
2180     case PLUS_EXPR:
2181     case MINUS_EXPR:
2182       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2183 	  || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2184 	return false;
2185 
2186       if (code == MINUS_EXPR)
2187 	p1 = -p1;
2188       *mul = wi::sext (p0 + p1, precision);
2189       return true;
2190 
2191     case INTEGER_CST:
2192       if (TREE_CODE (bot) != INTEGER_CST)
2193 	return false;
2194 
2195       p0 = widest_int::from (wi::to_wide (top), SIGNED);
2196       p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2197       if (p1 == 0)
2198 	return false;
2199       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2200       return res == 0;
2201 
2202     default:
2203       if (POLY_INT_CST_P (top)
2204 	  && POLY_INT_CST_P (bot)
2205 	  && constant_multiple_p (wi::to_poly_widest (top),
2206 				  wi::to_poly_widest (bot), mul))
2207 	return true;
2208 
2209       return false;
2210     }
2211 }
2212 
2213 /* Return true if memory reference REF with step STEP may be unaligned.  */
2214 
2215 static bool
may_be_unaligned_p(tree ref,tree step)2216 may_be_unaligned_p (tree ref, tree step)
2217 {
2218   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2219      thus they are not misaligned.  */
2220   if (TREE_CODE (ref) == TARGET_MEM_REF)
2221     return false;
2222 
2223   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2224   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2225     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2226 
2227   unsigned HOST_WIDE_INT bitpos;
2228   unsigned int ref_align;
2229   get_object_alignment_1 (ref, &ref_align, &bitpos);
2230   if (ref_align < align
2231       || (bitpos % align) != 0
2232       || (bitpos % BITS_PER_UNIT) != 0)
2233     return true;
2234 
2235   unsigned int trailing_zeros = tree_ctz (step);
2236   if (trailing_zeros < HOST_BITS_PER_INT
2237       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2238     return true;
2239 
2240   return false;
2241 }
2242 
2243 /* Return true if EXPR may be non-addressable.   */
2244 
2245 bool
may_be_nonaddressable_p(tree expr)2246 may_be_nonaddressable_p (tree expr)
2247 {
2248   switch (TREE_CODE (expr))
2249     {
2250     case TARGET_MEM_REF:
2251       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2252 	 target, thus they are always addressable.  */
2253       return false;
2254 
2255     case MEM_REF:
2256       /* Likewise for MEM_REFs, modulo the storage order.  */
2257       return REF_REVERSE_STORAGE_ORDER (expr);
2258 
2259     case BIT_FIELD_REF:
2260       if (REF_REVERSE_STORAGE_ORDER (expr))
2261 	return true;
2262       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2263 
2264     case COMPONENT_REF:
2265       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2266 	return true;
2267       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2268 	     || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2269 
2270     case ARRAY_REF:
2271     case ARRAY_RANGE_REF:
2272       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2273 	return true;
2274       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2275 
2276     case VIEW_CONVERT_EXPR:
2277       /* This kind of view-conversions may wrap non-addressable objects
2278 	 and make them look addressable.  After some processing the
2279 	 non-addressability may be uncovered again, causing ADDR_EXPRs
2280 	 of inappropriate objects to be built.  */
2281       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2282 	  || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2283 	return true;
2284       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2285 
2286     CASE_CONVERT:
2287       return true;
2288 
2289     default:
2290       break;
2291     }
2292 
2293   return false;
2294 }
2295 
2296 /* Finds addresses in *OP_P inside STMT.  */
2297 
2298 static void
find_interesting_uses_address(struct ivopts_data * data,gimple * stmt,tree * op_p)2299 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2300 			       tree *op_p)
2301 {
2302   tree base = *op_p, step = size_zero_node;
2303   struct iv *civ;
2304   struct ifs_ivopts_data ifs_ivopts_data;
2305 
2306   /* Do not play with volatile memory references.  A bit too conservative,
2307      perhaps, but safe.  */
2308   if (gimple_has_volatile_ops (stmt))
2309     goto fail;
2310 
2311   /* Ignore bitfields for now.  Not really something terribly complicated
2312      to handle.  TODO.  */
2313   if (TREE_CODE (base) == BIT_FIELD_REF)
2314     goto fail;
2315 
2316   base = unshare_expr (base);
2317 
2318   if (TREE_CODE (base) == TARGET_MEM_REF)
2319     {
2320       tree type = build_pointer_type (TREE_TYPE (base));
2321       tree astep;
2322 
2323       if (TMR_BASE (base)
2324 	  && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2325 	{
2326 	  civ = get_iv (data, TMR_BASE (base));
2327 	  if (!civ)
2328 	    goto fail;
2329 
2330 	  TMR_BASE (base) = civ->base;
2331 	  step = civ->step;
2332 	}
2333       if (TMR_INDEX2 (base)
2334 	  && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2335 	{
2336 	  civ = get_iv (data, TMR_INDEX2 (base));
2337 	  if (!civ)
2338 	    goto fail;
2339 
2340 	  TMR_INDEX2 (base) = civ->base;
2341 	  step = civ->step;
2342 	}
2343       if (TMR_INDEX (base)
2344 	  && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2345 	{
2346 	  civ = get_iv (data, TMR_INDEX (base));
2347 	  if (!civ)
2348 	    goto fail;
2349 
2350 	  TMR_INDEX (base) = civ->base;
2351 	  astep = civ->step;
2352 
2353 	  if (astep)
2354 	    {
2355 	      if (TMR_STEP (base))
2356 		astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2357 
2358 	      step = fold_build2 (PLUS_EXPR, type, step, astep);
2359 	    }
2360 	}
2361 
2362       if (integer_zerop (step))
2363 	goto fail;
2364       base = tree_mem_ref_addr (type, base);
2365     }
2366   else
2367     {
2368       ifs_ivopts_data.ivopts_data = data;
2369       ifs_ivopts_data.stmt = stmt;
2370       ifs_ivopts_data.step = size_zero_node;
2371       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2372 	  || integer_zerop (ifs_ivopts_data.step))
2373 	goto fail;
2374       step = ifs_ivopts_data.step;
2375 
2376       /* Check that the base expression is addressable.  This needs
2377 	 to be done after substituting bases of IVs into it.  */
2378       if (may_be_nonaddressable_p (base))
2379 	goto fail;
2380 
2381       /* Moreover, on strict alignment platforms, check that it is
2382 	 sufficiently aligned.  */
2383       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2384 	goto fail;
2385 
2386       base = build_fold_addr_expr (base);
2387 
2388       /* Substituting bases of IVs into the base expression might
2389 	 have caused folding opportunities.  */
2390       if (TREE_CODE (base) == ADDR_EXPR)
2391 	{
2392 	  tree *ref = &TREE_OPERAND (base, 0);
2393 	  while (handled_component_p (*ref))
2394 	    ref = &TREE_OPERAND (*ref, 0);
2395 	  if (TREE_CODE (*ref) == MEM_REF)
2396 	    {
2397 	      tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2398 				      TREE_OPERAND (*ref, 0),
2399 				      TREE_OPERAND (*ref, 1));
2400 	      if (tem)
2401 		*ref = tem;
2402 	    }
2403 	}
2404     }
2405 
2406   civ = alloc_iv (data, base, step);
2407   /* Fail if base object of this memory reference is unknown.  */
2408   if (civ->base_object == NULL_TREE)
2409     goto fail;
2410 
2411   record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2412   return;
2413 
2414 fail:
2415   for_each_index (op_p, idx_record_use, data);
2416 }
2417 
2418 /* Finds and records invariants used in STMT.  */
2419 
2420 static void
find_invariants_stmt(struct ivopts_data * data,gimple * stmt)2421 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2422 {
2423   ssa_op_iter iter;
2424   use_operand_p use_p;
2425   tree op;
2426 
2427   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2428     {
2429       op = USE_FROM_PTR (use_p);
2430       record_invariant (data, op, false);
2431     }
2432 }
2433 
2434 /* CALL calls an internal function.  If operand *OP_P will become an
2435    address when the call is expanded, return the type of the memory
2436    being addressed, otherwise return null.  */
2437 
2438 static tree
get_mem_type_for_internal_fn(gcall * call,tree * op_p)2439 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2440 {
2441   switch (gimple_call_internal_fn (call))
2442     {
2443     case IFN_MASK_LOAD:
2444       if (op_p == gimple_call_arg_ptr (call, 0))
2445 	return TREE_TYPE (gimple_call_lhs (call));
2446       return NULL_TREE;
2447 
2448     case IFN_MASK_STORE:
2449       if (op_p == gimple_call_arg_ptr (call, 0))
2450 	return TREE_TYPE (gimple_call_arg (call, 3));
2451       return NULL_TREE;
2452 
2453     default:
2454       return NULL_TREE;
2455     }
2456 }
2457 
2458 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2459    Return true if the operand will become an address when STMT
2460    is expanded and record the associated address use if so.  */
2461 
2462 static bool
find_address_like_use(struct ivopts_data * data,gimple * stmt,tree * op_p,struct iv * iv)2463 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2464 		       struct iv *iv)
2465 {
2466   /* Fail if base object of this memory reference is unknown.  */
2467   if (iv->base_object == NULL_TREE)
2468     return false;
2469 
2470   tree mem_type = NULL_TREE;
2471   if (gcall *call = dyn_cast <gcall *> (stmt))
2472     if (gimple_call_internal_p (call))
2473       mem_type = get_mem_type_for_internal_fn (call, op_p);
2474   if (mem_type)
2475     {
2476       iv = alloc_iv (data, iv->base, iv->step);
2477       record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2478       return true;
2479     }
2480   return false;
2481 }
2482 
2483 /* Finds interesting uses of induction variables in the statement STMT.  */
2484 
2485 static void
find_interesting_uses_stmt(struct ivopts_data * data,gimple * stmt)2486 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2487 {
2488   struct iv *iv;
2489   tree op, *lhs, *rhs;
2490   ssa_op_iter iter;
2491   use_operand_p use_p;
2492   enum tree_code code;
2493 
2494   find_invariants_stmt (data, stmt);
2495 
2496   if (gimple_code (stmt) == GIMPLE_COND)
2497     {
2498       find_interesting_uses_cond (data, stmt);
2499       return;
2500     }
2501 
2502   if (is_gimple_assign (stmt))
2503     {
2504       lhs = gimple_assign_lhs_ptr (stmt);
2505       rhs = gimple_assign_rhs1_ptr (stmt);
2506 
2507       if (TREE_CODE (*lhs) == SSA_NAME)
2508 	{
2509 	  /* If the statement defines an induction variable, the uses are not
2510 	     interesting by themselves.  */
2511 
2512 	  iv = get_iv (data, *lhs);
2513 
2514 	  if (iv && !integer_zerop (iv->step))
2515 	    return;
2516 	}
2517 
2518       code = gimple_assign_rhs_code (stmt);
2519       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2520 	  && (REFERENCE_CLASS_P (*rhs)
2521 	      || is_gimple_val (*rhs)))
2522 	{
2523 	  if (REFERENCE_CLASS_P (*rhs))
2524 	    find_interesting_uses_address (data, stmt, rhs);
2525 	  else
2526 	    find_interesting_uses_op (data, *rhs);
2527 
2528 	  if (REFERENCE_CLASS_P (*lhs))
2529 	    find_interesting_uses_address (data, stmt, lhs);
2530 	  return;
2531 	}
2532       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2533 	{
2534 	  find_interesting_uses_cond (data, stmt);
2535 	  return;
2536 	}
2537 
2538       /* TODO -- we should also handle address uses of type
2539 
2540 	 memory = call (whatever);
2541 
2542 	 and
2543 
2544 	 call (memory).  */
2545     }
2546 
2547   if (gimple_code (stmt) == GIMPLE_PHI
2548       && gimple_bb (stmt) == data->current_loop->header)
2549     {
2550       iv = get_iv (data, PHI_RESULT (stmt));
2551 
2552       if (iv && !integer_zerop (iv->step))
2553 	return;
2554     }
2555 
2556   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2557     {
2558       op = USE_FROM_PTR (use_p);
2559 
2560       if (TREE_CODE (op) != SSA_NAME)
2561 	continue;
2562 
2563       iv = get_iv (data, op);
2564       if (!iv)
2565 	continue;
2566 
2567       if (!find_address_like_use (data, stmt, use_p->use, iv))
2568 	find_interesting_uses_op (data, op);
2569     }
2570 }
2571 
2572 /* Finds interesting uses of induction variables outside of loops
2573    on loop exit edge EXIT.  */
2574 
2575 static void
find_interesting_uses_outside(struct ivopts_data * data,edge exit)2576 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2577 {
2578   gphi *phi;
2579   gphi_iterator psi;
2580   tree def;
2581 
2582   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2583     {
2584       phi = psi.phi ();
2585       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2586       if (!virtual_operand_p (def))
2587 	find_interesting_uses_op (data, def);
2588     }
2589 }
2590 
2591 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2592    mode for memory reference represented by USE.  */
2593 
2594 static GTY (()) vec<rtx, va_gc> *addr_list;
2595 
2596 static bool
addr_offset_valid_p(struct iv_use * use,poly_int64 offset)2597 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2598 {
2599   rtx reg, addr;
2600   unsigned list_index;
2601   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2602   machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2603 
2604   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2605   if (list_index >= vec_safe_length (addr_list))
2606     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2607 
2608   addr = (*addr_list)[list_index];
2609   if (!addr)
2610     {
2611       addr_mode = targetm.addr_space.address_mode (as);
2612       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2613       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2614       (*addr_list)[list_index] = addr;
2615     }
2616   else
2617     addr_mode = GET_MODE (addr);
2618 
2619   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2620   return (memory_address_addr_space_p (mem_mode, addr, as));
2621 }
2622 
2623 /* Comparison function to sort group in ascending order of addr_offset.  */
2624 
2625 static int
group_compare_offset(const void * a,const void * b)2626 group_compare_offset (const void *a, const void *b)
2627 {
2628   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2629   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2630 
2631   return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2632 }
2633 
2634 /* Check if small groups should be split.  Return true if no group
2635    contains more than two uses with distinct addr_offsets.  Return
2636    false otherwise.  We want to split such groups because:
2637 
2638      1) Small groups don't have much benefit and may interfer with
2639 	general candidate selection.
2640      2) Size for problem with only small groups is usually small and
2641 	general algorithm can handle it well.
2642 
2643    TODO -- Above claim may not hold when we want to merge memory
2644    accesses with conseuctive addresses.  */
2645 
2646 static bool
split_small_address_groups_p(struct ivopts_data * data)2647 split_small_address_groups_p (struct ivopts_data *data)
2648 {
2649   unsigned int i, j, distinct = 1;
2650   struct iv_use *pre;
2651   struct iv_group *group;
2652 
2653   for (i = 0; i < data->vgroups.length (); i++)
2654     {
2655       group = data->vgroups[i];
2656       if (group->vuses.length () == 1)
2657 	continue;
2658 
2659       gcc_assert (address_p (group->type));
2660       if (group->vuses.length () == 2)
2661 	{
2662 	  if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2663 				      group->vuses[1]->addr_offset) > 0)
2664 	    std::swap (group->vuses[0], group->vuses[1]);
2665 	}
2666       else
2667 	group->vuses.qsort (group_compare_offset);
2668 
2669       if (distinct > 2)
2670 	continue;
2671 
2672       distinct = 1;
2673       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2674 	{
2675 	  if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2676 	    {
2677 	      pre = group->vuses[j];
2678 	      distinct++;
2679 	    }
2680 
2681 	  if (distinct > 2)
2682 	    break;
2683 	}
2684     }
2685 
2686   return (distinct <= 2);
2687 }
2688 
2689 /* For each group of address type uses, this function further groups
2690    these uses according to the maximum offset supported by target's
2691    [base + offset] addressing mode.  */
2692 
2693 static void
split_address_groups(struct ivopts_data * data)2694 split_address_groups (struct ivopts_data *data)
2695 {
2696   unsigned int i, j;
2697   /* Always split group.  */
2698   bool split_p = split_small_address_groups_p (data);
2699 
2700   for (i = 0; i < data->vgroups.length (); i++)
2701     {
2702       struct iv_group *new_group = NULL;
2703       struct iv_group *group = data->vgroups[i];
2704       struct iv_use *use = group->vuses[0];
2705 
2706       use->id = 0;
2707       use->group_id = group->id;
2708       if (group->vuses.length () == 1)
2709 	continue;
2710 
2711       gcc_assert (address_p (use->type));
2712 
2713       for (j = 1; j < group->vuses.length ();)
2714 	{
2715 	  struct iv_use *next = group->vuses[j];
2716 	  poly_int64 offset = next->addr_offset - use->addr_offset;
2717 
2718 	  /* Split group if aksed to, or the offset against the first
2719 	     use can't fit in offset part of addressing mode.  IV uses
2720 	     having the same offset are still kept in one group.  */
2721 	  if (maybe_ne (offset, 0)
2722 	      && (split_p || !addr_offset_valid_p (use, offset)))
2723 	    {
2724 	      if (!new_group)
2725 		new_group = record_group (data, group->type);
2726 	      group->vuses.ordered_remove (j);
2727 	      new_group->vuses.safe_push (next);
2728 	      continue;
2729 	    }
2730 
2731 	  next->id = j;
2732 	  next->group_id = group->id;
2733 	  j++;
2734 	}
2735     }
2736 }
2737 
2738 /* Finds uses of the induction variables that are interesting.  */
2739 
2740 static void
find_interesting_uses(struct ivopts_data * data)2741 find_interesting_uses (struct ivopts_data *data)
2742 {
2743   basic_block bb;
2744   gimple_stmt_iterator bsi;
2745   basic_block *body = get_loop_body (data->current_loop);
2746   unsigned i;
2747   edge e;
2748 
2749   for (i = 0; i < data->current_loop->num_nodes; i++)
2750     {
2751       edge_iterator ei;
2752       bb = body[i];
2753 
2754       FOR_EACH_EDGE (e, ei, bb->succs)
2755 	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2756 	    && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2757 	  find_interesting_uses_outside (data, e);
2758 
2759       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2760 	find_interesting_uses_stmt (data, gsi_stmt (bsi));
2761       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2762 	if (!is_gimple_debug (gsi_stmt (bsi)))
2763 	  find_interesting_uses_stmt (data, gsi_stmt (bsi));
2764     }
2765   free (body);
2766 
2767   split_address_groups (data);
2768 
2769   if (dump_file && (dump_flags & TDF_DETAILS))
2770     {
2771       fprintf (dump_file, "\n<IV Groups>:\n");
2772       dump_groups (dump_file, data);
2773       fprintf (dump_file, "\n");
2774     }
2775 }
2776 
2777 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2778    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2779    we are at the top-level of the processed address.  */
2780 
2781 static tree
strip_offset_1(tree expr,bool inside_addr,bool top_compref,poly_int64 * offset)2782 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2783 		poly_int64 *offset)
2784 {
2785   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2786   enum tree_code code;
2787   tree type, orig_type = TREE_TYPE (expr);
2788   poly_int64 off0, off1;
2789   HOST_WIDE_INT st;
2790   tree orig_expr = expr;
2791 
2792   STRIP_NOPS (expr);
2793 
2794   type = TREE_TYPE (expr);
2795   code = TREE_CODE (expr);
2796   *offset = 0;
2797 
2798   switch (code)
2799     {
2800     case POINTER_PLUS_EXPR:
2801     case PLUS_EXPR:
2802     case MINUS_EXPR:
2803       op0 = TREE_OPERAND (expr, 0);
2804       op1 = TREE_OPERAND (expr, 1);
2805 
2806       op0 = strip_offset_1 (op0, false, false, &off0);
2807       op1 = strip_offset_1 (op1, false, false, &off1);
2808 
2809       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2810       if (op0 == TREE_OPERAND (expr, 0)
2811 	  && op1 == TREE_OPERAND (expr, 1))
2812 	return orig_expr;
2813 
2814       if (integer_zerop (op1))
2815 	expr = op0;
2816       else if (integer_zerop (op0))
2817 	{
2818 	  if (code == MINUS_EXPR)
2819 	    expr = fold_build1 (NEGATE_EXPR, type, op1);
2820 	  else
2821 	    expr = op1;
2822 	}
2823       else
2824 	expr = fold_build2 (code, type, op0, op1);
2825 
2826       return fold_convert (orig_type, expr);
2827 
2828     case MULT_EXPR:
2829       op1 = TREE_OPERAND (expr, 1);
2830       if (!cst_and_fits_in_hwi (op1))
2831 	return orig_expr;
2832 
2833       op0 = TREE_OPERAND (expr, 0);
2834       op0 = strip_offset_1 (op0, false, false, &off0);
2835       if (op0 == TREE_OPERAND (expr, 0))
2836 	return orig_expr;
2837 
2838       *offset = off0 * int_cst_value (op1);
2839       if (integer_zerop (op0))
2840 	expr = op0;
2841       else
2842 	expr = fold_build2 (MULT_EXPR, type, op0, op1);
2843 
2844       return fold_convert (orig_type, expr);
2845 
2846     case ARRAY_REF:
2847     case ARRAY_RANGE_REF:
2848       if (!inside_addr)
2849 	return orig_expr;
2850 
2851       step = array_ref_element_size (expr);
2852       if (!cst_and_fits_in_hwi (step))
2853 	break;
2854 
2855       st = int_cst_value (step);
2856       op1 = TREE_OPERAND (expr, 1);
2857       op1 = strip_offset_1 (op1, false, false, &off1);
2858       *offset = off1 * st;
2859 
2860       if (top_compref
2861 	  && integer_zerop (op1))
2862 	{
2863 	  /* Strip the component reference completely.  */
2864 	  op0 = TREE_OPERAND (expr, 0);
2865 	  op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2866 	  *offset += off0;
2867 	  return op0;
2868 	}
2869       break;
2870 
2871     case COMPONENT_REF:
2872       {
2873 	tree field;
2874 
2875 	if (!inside_addr)
2876 	  return orig_expr;
2877 
2878 	tmp = component_ref_field_offset (expr);
2879 	field = TREE_OPERAND (expr, 1);
2880 	if (top_compref
2881 	    && cst_and_fits_in_hwi (tmp)
2882 	    && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2883 	  {
2884 	    HOST_WIDE_INT boffset, abs_off;
2885 
2886 	    /* Strip the component reference completely.  */
2887 	    op0 = TREE_OPERAND (expr, 0);
2888 	    op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2889 	    boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2890 	    abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2891 	    if (boffset < 0)
2892 	      abs_off = -abs_off;
2893 
2894 	    *offset = off0 + int_cst_value (tmp) + abs_off;
2895 	    return op0;
2896 	  }
2897       }
2898       break;
2899 
2900     case ADDR_EXPR:
2901       op0 = TREE_OPERAND (expr, 0);
2902       op0 = strip_offset_1 (op0, true, true, &off0);
2903       *offset += off0;
2904 
2905       if (op0 == TREE_OPERAND (expr, 0))
2906 	return orig_expr;
2907 
2908       expr = build_fold_addr_expr (op0);
2909       return fold_convert (orig_type, expr);
2910 
2911     case MEM_REF:
2912       /* ???  Offset operand?  */
2913       inside_addr = false;
2914       break;
2915 
2916     default:
2917       if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2918 	return build_int_cst (orig_type, 0);
2919       return orig_expr;
2920     }
2921 
2922   /* Default handling of expressions for that we want to recurse into
2923      the first operand.  */
2924   op0 = TREE_OPERAND (expr, 0);
2925   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2926   *offset += off0;
2927 
2928   if (op0 == TREE_OPERAND (expr, 0)
2929       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2930     return orig_expr;
2931 
2932   expr = copy_node (expr);
2933   TREE_OPERAND (expr, 0) = op0;
2934   if (op1)
2935     TREE_OPERAND (expr, 1) = op1;
2936 
2937   /* Inside address, we might strip the top level component references,
2938      thus changing type of the expression.  Handling of ADDR_EXPR
2939      will fix that.  */
2940   expr = fold_convert (orig_type, expr);
2941 
2942   return expr;
2943 }
2944 
2945 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2946 
2947 tree
strip_offset(tree expr,poly_uint64_pod * offset)2948 strip_offset (tree expr, poly_uint64_pod *offset)
2949 {
2950   poly_int64 off;
2951   tree core = strip_offset_1 (expr, false, false, &off);
2952   *offset = off;
2953   return core;
2954 }
2955 
2956 /* Returns variant of TYPE that can be used as base for different uses.
2957    We return unsigned type with the same precision, which avoids problems
2958    with overflows.  */
2959 
2960 static tree
generic_type_for(tree type)2961 generic_type_for (tree type)
2962 {
2963   if (POINTER_TYPE_P (type))
2964     return unsigned_type_for (type);
2965 
2966   if (TYPE_UNSIGNED (type))
2967     return type;
2968 
2969   return unsigned_type_for (type);
2970 }
2971 
2972 /* Private data for walk_tree.  */
2973 
2974 struct walk_tree_data
2975 {
2976   bitmap *inv_vars;
2977   struct ivopts_data *idata;
2978 };
2979 
2980 /* Callback function for walk_tree, it records invariants and symbol
2981    reference in *EXPR_P.  DATA is the structure storing result info.  */
2982 
2983 static tree
find_inv_vars_cb(tree * expr_p,int * ws ATTRIBUTE_UNUSED,void * data)2984 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2985 {
2986   tree op = *expr_p;
2987   struct version_info *info;
2988   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2989 
2990   if (TREE_CODE (op) != SSA_NAME)
2991     return NULL_TREE;
2992 
2993   info = name_info (wdata->idata, op);
2994   /* Because we expand simple operations when finding IVs, loop invariant
2995      variable that isn't referred by the original loop could be used now.
2996      Record such invariant variables here.  */
2997   if (!info->iv)
2998     {
2999       struct ivopts_data *idata = wdata->idata;
3000       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
3001 
3002       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
3003 	{
3004 	  set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
3005 	  record_invariant (idata, op, false);
3006 	}
3007     }
3008   if (!info->inv_id || info->has_nonlin_use)
3009     return NULL_TREE;
3010 
3011   if (!*wdata->inv_vars)
3012     *wdata->inv_vars = BITMAP_ALLOC (NULL);
3013   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3014 
3015   return NULL_TREE;
3016 }
3017 
3018 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
3019    store it.  */
3020 
3021 static inline void
find_inv_vars(struct ivopts_data * data,tree * expr_p,bitmap * inv_vars)3022 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3023 {
3024   struct walk_tree_data wdata;
3025 
3026   if (!inv_vars)
3027     return;
3028 
3029   wdata.idata = data;
3030   wdata.inv_vars = inv_vars;
3031   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3032 }
3033 
3034 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
3035    will be recorded if it doesn't exist yet.  Given below two exprs:
3036      inv_expr + cst1, inv_expr + cst2
3037    It's hard to make decision whether constant part should be stripped
3038    or not.  We choose to not strip based on below facts:
3039      1) We need to count ADD cost for constant part if it's stripped,
3040 	which is't always trivial where this functions is called.
3041      2) Stripping constant away may be conflict with following loop
3042 	invariant hoisting pass.
3043      3) Not stripping constant away results in more invariant exprs,
3044 	which usually leads to decision preferring lower reg pressure.  */
3045 
3046 static iv_inv_expr_ent *
get_loop_invariant_expr(struct ivopts_data * data,tree inv_expr)3047 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3048 {
3049   STRIP_NOPS (inv_expr);
3050 
3051   if (poly_int_tree_p (inv_expr)
3052       || TREE_CODE (inv_expr) == SSA_NAME)
3053     return NULL;
3054 
3055   /* Don't strip constant part away as we used to.  */
3056 
3057   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
3058   struct iv_inv_expr_ent ent;
3059   ent.expr = inv_expr;
3060   ent.hash = iterative_hash_expr (inv_expr, 0);
3061   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3062 
3063   if (!*slot)
3064     {
3065       *slot = XNEW (struct iv_inv_expr_ent);
3066       (*slot)->expr = inv_expr;
3067       (*slot)->hash = ent.hash;
3068       (*slot)->id = ++data->max_inv_expr_id;
3069     }
3070 
3071   return *slot;
3072 }
3073 
3074 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3075    position to POS.  If USE is not NULL, the candidate is set as related to
3076    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
3077    replacement of the final value of the iv by a direct computation.  */
3078 
3079 static struct iv_cand *
3080 add_candidate_1 (struct ivopts_data *data,
3081 		 tree base, tree step, bool important, enum iv_position pos,
3082 		 struct iv_use *use, gimple *incremented_at,
3083 		 struct iv *orig_iv = NULL)
3084 {
3085   unsigned i;
3086   struct iv_cand *cand = NULL;
3087   tree type, orig_type;
3088 
3089   gcc_assert (base && step);
3090 
3091   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3092      live, but the ivopts code may replace a real pointer with one
3093      pointing before or after the memory block that is then adjusted
3094      into the memory block during the loop.  FIXME: It would likely be
3095      better to actually force the pointer live and still use ivopts;
3096      for example, it would be enough to write the pointer into memory
3097      and keep it there until after the loop.  */
3098   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3099     return NULL;
3100 
3101   /* For non-original variables, make sure their values are computed in a type
3102      that does not invoke undefined behavior on overflows (since in general,
3103      we cannot prove that these induction variables are non-wrapping).  */
3104   if (pos != IP_ORIGINAL)
3105     {
3106       orig_type = TREE_TYPE (base);
3107       type = generic_type_for (orig_type);
3108       if (type != orig_type)
3109 	{
3110 	  base = fold_convert (type, base);
3111 	  step = fold_convert (type, step);
3112 	}
3113     }
3114 
3115   for (i = 0; i < data->vcands.length (); i++)
3116     {
3117       cand = data->vcands[i];
3118 
3119       if (cand->pos != pos)
3120 	continue;
3121 
3122       if (cand->incremented_at != incremented_at
3123 	  || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3124 	      && cand->ainc_use != use))
3125 	continue;
3126 
3127       if (operand_equal_p (base, cand->iv->base, 0)
3128 	  && operand_equal_p (step, cand->iv->step, 0)
3129 	  && (TYPE_PRECISION (TREE_TYPE (base))
3130 	      == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3131 	break;
3132     }
3133 
3134   if (i == data->vcands.length ())
3135     {
3136       cand = XCNEW (struct iv_cand);
3137       cand->id = i;
3138       cand->iv = alloc_iv (data, base, step);
3139       cand->pos = pos;
3140       if (pos != IP_ORIGINAL)
3141 	{
3142 	  cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3143 	  cand->var_after = cand->var_before;
3144 	}
3145       cand->important = important;
3146       cand->incremented_at = incremented_at;
3147       data->vcands.safe_push (cand);
3148 
3149       if (!poly_int_tree_p (step))
3150 	{
3151 	  find_inv_vars (data, &step, &cand->inv_vars);
3152 
3153 	  iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3154 	  /* Share bitmap between inv_vars and inv_exprs for cand.  */
3155 	  if (inv_expr != NULL)
3156 	    {
3157 	      cand->inv_exprs = cand->inv_vars;
3158 	      cand->inv_vars = NULL;
3159 	      if (cand->inv_exprs)
3160 		bitmap_clear (cand->inv_exprs);
3161 	      else
3162 		cand->inv_exprs = BITMAP_ALLOC (NULL);
3163 
3164 	      bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3165 	    }
3166 	}
3167 
3168       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3169 	cand->ainc_use = use;
3170       else
3171 	cand->ainc_use = NULL;
3172 
3173       cand->orig_iv = orig_iv;
3174       if (dump_file && (dump_flags & TDF_DETAILS))
3175 	dump_cand (dump_file, cand);
3176     }
3177 
3178   cand->important |= important;
3179 
3180   /* Relate candidate to the group for which it is added.  */
3181   if (use)
3182     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3183 
3184   return cand;
3185 }
3186 
3187 /* Returns true if incrementing the induction variable at the end of the LOOP
3188    is allowed.
3189 
3190    The purpose is to avoid splitting latch edge with a biv increment, thus
3191    creating a jump, possibly confusing other optimization passes and leaving
3192    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3193    available (so we do not have a better alternative), or if the latch edge
3194    is already nonempty.  */
3195 
3196 static bool
allow_ip_end_pos_p(struct loop * loop)3197 allow_ip_end_pos_p (struct loop *loop)
3198 {
3199   if (!ip_normal_pos (loop))
3200     return true;
3201 
3202   if (!empty_block_p (ip_end_pos (loop)))
3203     return true;
3204 
3205   return false;
3206 }
3207 
3208 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3209    Important field is set to IMPORTANT.  */
3210 
3211 static void
add_autoinc_candidates(struct ivopts_data * data,tree base,tree step,bool important,struct iv_use * use)3212 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3213 			bool important, struct iv_use *use)
3214 {
3215   basic_block use_bb = gimple_bb (use->stmt);
3216   machine_mode mem_mode;
3217   unsigned HOST_WIDE_INT cstepi;
3218 
3219   /* If we insert the increment in any position other than the standard
3220      ones, we must ensure that it is incremented once per iteration.
3221      It must not be in an inner nested loop, or one side of an if
3222      statement.  */
3223   if (use_bb->loop_father != data->current_loop
3224       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3225       || stmt_can_throw_internal (use->stmt)
3226       || !cst_and_fits_in_hwi (step))
3227     return;
3228 
3229   cstepi = int_cst_value (step);
3230 
3231   mem_mode = TYPE_MODE (use->mem_type);
3232   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3233 	|| USE_STORE_PRE_INCREMENT (mem_mode))
3234        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3235       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3236 	   || USE_STORE_PRE_DECREMENT (mem_mode))
3237 	  && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3238     {
3239       enum tree_code code = MINUS_EXPR;
3240       tree new_base;
3241       tree new_step = step;
3242 
3243       if (POINTER_TYPE_P (TREE_TYPE (base)))
3244 	{
3245 	  new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3246 	  code = POINTER_PLUS_EXPR;
3247 	}
3248       else
3249 	new_step = fold_convert (TREE_TYPE (base), new_step);
3250       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3251       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3252 		       use->stmt);
3253     }
3254   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3255 	|| USE_STORE_POST_INCREMENT (mem_mode))
3256        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3257       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3258 	   || USE_STORE_POST_DECREMENT (mem_mode))
3259 	  && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3260     {
3261       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3262 		       use->stmt);
3263     }
3264 }
3265 
3266 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3267    position to POS.  If USE is not NULL, the candidate is set as related to
3268    it.  The candidate computation is scheduled before exit condition and at
3269    the end of loop.  */
3270 
3271 static void
3272 add_candidate (struct ivopts_data *data,
3273 	       tree base, tree step, bool important, struct iv_use *use,
3274 	       struct iv *orig_iv = NULL)
3275 {
3276   if (ip_normal_pos (data->current_loop))
3277     add_candidate_1 (data, base, step, important,
3278 		     IP_NORMAL, use, NULL, orig_iv);
3279   if (ip_end_pos (data->current_loop)
3280       && allow_ip_end_pos_p (data->current_loop))
3281     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3282 }
3283 
3284 /* Adds standard iv candidates.  */
3285 
3286 static void
add_standard_iv_candidates(struct ivopts_data * data)3287 add_standard_iv_candidates (struct ivopts_data *data)
3288 {
3289   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3290 
3291   /* The same for a double-integer type if it is still fast enough.  */
3292   if (TYPE_PRECISION
3293 	(long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3294       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3295     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3296 		   build_int_cst (long_integer_type_node, 1), true, NULL);
3297 
3298   /* The same for a double-integer type if it is still fast enough.  */
3299   if (TYPE_PRECISION
3300 	(long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3301       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3302     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3303 		   build_int_cst (long_long_integer_type_node, 1), true, NULL);
3304 }
3305 
3306 
3307 /* Adds candidates bases on the old induction variable IV.  */
3308 
3309 static void
add_iv_candidate_for_biv(struct ivopts_data * data,struct iv * iv)3310 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3311 {
3312   gimple *phi;
3313   tree def;
3314   struct iv_cand *cand;
3315 
3316   /* Check if this biv is used in address type use.  */
3317   if (iv->no_overflow  && iv->have_address_use
3318       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3319       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3320     {
3321       tree base = fold_convert (sizetype, iv->base);
3322       tree step = fold_convert (sizetype, iv->step);
3323 
3324       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3325       add_candidate (data, base, step, true, NULL, iv);
3326       /* Add iv cand of the original type only if it has nonlinear use.  */
3327       if (iv->nonlin_use)
3328 	add_candidate (data, iv->base, iv->step, true, NULL);
3329     }
3330   else
3331     add_candidate (data, iv->base, iv->step, true, NULL);
3332 
3333   /* The same, but with initial value zero.  */
3334   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3335     add_candidate (data, size_int (0), iv->step, true, NULL);
3336   else
3337     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3338 		   iv->step, true, NULL);
3339 
3340   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3341   if (gimple_code (phi) == GIMPLE_PHI)
3342     {
3343       /* Additionally record the possibility of leaving the original iv
3344 	 untouched.  */
3345       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3346       /* Don't add candidate if it's from another PHI node because
3347 	 it's an affine iv appearing in the form of PEELED_CHREC.  */
3348       phi = SSA_NAME_DEF_STMT (def);
3349       if (gimple_code (phi) != GIMPLE_PHI)
3350 	{
3351 	  cand = add_candidate_1 (data,
3352 				  iv->base, iv->step, true, IP_ORIGINAL, NULL,
3353 				  SSA_NAME_DEF_STMT (def));
3354 	  if (cand)
3355 	    {
3356 	      cand->var_before = iv->ssa_name;
3357 	      cand->var_after = def;
3358 	    }
3359 	}
3360       else
3361 	gcc_assert (gimple_bb (phi) == data->current_loop->header);
3362     }
3363 }
3364 
3365 /* Adds candidates based on the old induction variables.  */
3366 
3367 static void
add_iv_candidate_for_bivs(struct ivopts_data * data)3368 add_iv_candidate_for_bivs (struct ivopts_data *data)
3369 {
3370   unsigned i;
3371   struct iv *iv;
3372   bitmap_iterator bi;
3373 
3374   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3375     {
3376       iv = ver_info (data, i)->iv;
3377       if (iv && iv->biv_p && !integer_zerop (iv->step))
3378 	add_iv_candidate_for_biv (data, iv);
3379     }
3380 }
3381 
3382 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3383 
3384 static void
record_common_cand(struct ivopts_data * data,tree base,tree step,struct iv_use * use)3385 record_common_cand (struct ivopts_data *data, tree base,
3386 		    tree step, struct iv_use *use)
3387 {
3388   struct iv_common_cand ent;
3389   struct iv_common_cand **slot;
3390 
3391   ent.base = base;
3392   ent.step = step;
3393   ent.hash = iterative_hash_expr (base, 0);
3394   ent.hash = iterative_hash_expr (step, ent.hash);
3395 
3396   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3397   if (*slot == NULL)
3398     {
3399       *slot = new iv_common_cand ();
3400       (*slot)->base = base;
3401       (*slot)->step = step;
3402       (*slot)->uses.create (8);
3403       (*slot)->hash = ent.hash;
3404       data->iv_common_cands.safe_push ((*slot));
3405     }
3406 
3407   gcc_assert (use != NULL);
3408   (*slot)->uses.safe_push (use);
3409   return;
3410 }
3411 
3412 /* Comparison function used to sort common candidates.  */
3413 
3414 static int
common_cand_cmp(const void * p1,const void * p2)3415 common_cand_cmp (const void *p1, const void *p2)
3416 {
3417   unsigned n1, n2;
3418   const struct iv_common_cand *const *const ccand1
3419     = (const struct iv_common_cand *const *)p1;
3420   const struct iv_common_cand *const *const ccand2
3421     = (const struct iv_common_cand *const *)p2;
3422 
3423   n1 = (*ccand1)->uses.length ();
3424   n2 = (*ccand2)->uses.length ();
3425   return n2 - n1;
3426 }
3427 
3428 /* Adds IV candidates based on common candidated recorded.  */
3429 
3430 static void
add_iv_candidate_derived_from_uses(struct ivopts_data * data)3431 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3432 {
3433   unsigned i, j;
3434   struct iv_cand *cand_1, *cand_2;
3435 
3436   data->iv_common_cands.qsort (common_cand_cmp);
3437   for (i = 0; i < data->iv_common_cands.length (); i++)
3438     {
3439       struct iv_common_cand *ptr = data->iv_common_cands[i];
3440 
3441       /* Only add IV candidate if it's derived from multiple uses.  */
3442       if (ptr->uses.length () <= 1)
3443 	break;
3444 
3445       cand_1 = NULL;
3446       cand_2 = NULL;
3447       if (ip_normal_pos (data->current_loop))
3448 	cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3449 				  false, IP_NORMAL, NULL, NULL);
3450 
3451       if (ip_end_pos (data->current_loop)
3452 	  && allow_ip_end_pos_p (data->current_loop))
3453 	cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3454 				  false, IP_END, NULL, NULL);
3455 
3456       /* Bind deriving uses and the new candidates.  */
3457       for (j = 0; j < ptr->uses.length (); j++)
3458 	{
3459 	  struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3460 	  if (cand_1)
3461 	    bitmap_set_bit (group->related_cands, cand_1->id);
3462 	  if (cand_2)
3463 	    bitmap_set_bit (group->related_cands, cand_2->id);
3464 	}
3465     }
3466 
3467   /* Release data since it is useless from this point.  */
3468   data->iv_common_cand_tab->empty ();
3469   data->iv_common_cands.truncate (0);
3470 }
3471 
3472 /* Adds candidates based on the value of USE's iv.  */
3473 
3474 static void
add_iv_candidate_for_use(struct ivopts_data * data,struct iv_use * use)3475 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3476 {
3477   poly_uint64 offset;
3478   tree base;
3479   tree basetype;
3480   struct iv *iv = use->iv;
3481 
3482   add_candidate (data, iv->base, iv->step, false, use);
3483 
3484   /* Record common candidate for use in case it can be shared by others.  */
3485   record_common_cand (data, iv->base, iv->step, use);
3486 
3487   /* Record common candidate with initial value zero.  */
3488   basetype = TREE_TYPE (iv->base);
3489   if (POINTER_TYPE_P (basetype))
3490     basetype = sizetype;
3491   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3492 
3493   /* Record common candidate with constant offset stripped in base.
3494      Like the use itself, we also add candidate directly for it.  */
3495   base = strip_offset (iv->base, &offset);
3496   if (maybe_ne (offset, 0U) || base != iv->base)
3497     {
3498       record_common_cand (data, base, iv->step, use);
3499       add_candidate (data, base, iv->step, false, use);
3500     }
3501 
3502   /* Record common candidate with base_object removed in base.  */
3503   base = iv->base;
3504   STRIP_NOPS (base);
3505   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3506     {
3507       tree step = iv->step;
3508 
3509       STRIP_NOPS (step);
3510       base = TREE_OPERAND (base, 1);
3511       step = fold_convert (sizetype, step);
3512       record_common_cand (data, base, step, use);
3513       /* Also record common candidate with offset stripped.  */
3514       base = strip_offset (base, &offset);
3515       if (maybe_ne (offset, 0U))
3516 	record_common_cand (data, base, step, use);
3517     }
3518 
3519   /* At last, add auto-incremental candidates.  Make such variables
3520      important since other iv uses with same base object may be based
3521      on it.  */
3522   if (use != NULL && address_p (use->type))
3523     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3524 }
3525 
3526 /* Adds candidates based on the uses.  */
3527 
3528 static void
add_iv_candidate_for_groups(struct ivopts_data * data)3529 add_iv_candidate_for_groups (struct ivopts_data *data)
3530 {
3531   unsigned i;
3532 
3533   /* Only add candidate for the first use in group.  */
3534   for (i = 0; i < data->vgroups.length (); i++)
3535     {
3536       struct iv_group *group = data->vgroups[i];
3537 
3538       gcc_assert (group->vuses[0] != NULL);
3539       add_iv_candidate_for_use (data, group->vuses[0]);
3540     }
3541   add_iv_candidate_derived_from_uses (data);
3542 }
3543 
3544 /* Record important candidates and add them to related_cands bitmaps.  */
3545 
3546 static void
record_important_candidates(struct ivopts_data * data)3547 record_important_candidates (struct ivopts_data *data)
3548 {
3549   unsigned i;
3550   struct iv_group *group;
3551 
3552   for (i = 0; i < data->vcands.length (); i++)
3553     {
3554       struct iv_cand *cand = data->vcands[i];
3555 
3556       if (cand->important)
3557 	bitmap_set_bit (data->important_candidates, i);
3558     }
3559 
3560   data->consider_all_candidates = (data->vcands.length ()
3561 				   <= CONSIDER_ALL_CANDIDATES_BOUND);
3562 
3563   /* Add important candidates to groups' related_cands bitmaps.  */
3564   for (i = 0; i < data->vgroups.length (); i++)
3565     {
3566       group = data->vgroups[i];
3567       bitmap_ior_into (group->related_cands, data->important_candidates);
3568     }
3569 }
3570 
3571 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3572    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3573    we allocate a simple list to every use.  */
3574 
3575 static void
alloc_use_cost_map(struct ivopts_data * data)3576 alloc_use_cost_map (struct ivopts_data *data)
3577 {
3578   unsigned i, size, s;
3579 
3580   for (i = 0; i < data->vgroups.length (); i++)
3581     {
3582       struct iv_group *group = data->vgroups[i];
3583 
3584       if (data->consider_all_candidates)
3585 	size = data->vcands.length ();
3586       else
3587 	{
3588 	  s = bitmap_count_bits (group->related_cands);
3589 
3590 	  /* Round up to the power of two, so that moduling by it is fast.  */
3591 	  size = s ? (1 << ceil_log2 (s)) : 1;
3592 	}
3593 
3594       group->n_map_members = size;
3595       group->cost_map = XCNEWVEC (struct cost_pair, size);
3596     }
3597 }
3598 
3599 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3600    on invariants INV_VARS and that the value used in expressing it is
3601    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3602 
3603 static void
set_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand,comp_cost cost,bitmap inv_vars,tree value,enum tree_code comp,bitmap inv_exprs)3604 set_group_iv_cost (struct ivopts_data *data,
3605 		   struct iv_group *group, struct iv_cand *cand,
3606 		   comp_cost cost, bitmap inv_vars, tree value,
3607 		   enum tree_code comp, bitmap inv_exprs)
3608 {
3609   unsigned i, s;
3610 
3611   if (cost.infinite_cost_p ())
3612     {
3613       BITMAP_FREE (inv_vars);
3614       BITMAP_FREE (inv_exprs);
3615       return;
3616     }
3617 
3618   if (data->consider_all_candidates)
3619     {
3620       group->cost_map[cand->id].cand = cand;
3621       group->cost_map[cand->id].cost = cost;
3622       group->cost_map[cand->id].inv_vars = inv_vars;
3623       group->cost_map[cand->id].inv_exprs = inv_exprs;
3624       group->cost_map[cand->id].value = value;
3625       group->cost_map[cand->id].comp = comp;
3626       return;
3627     }
3628 
3629   /* n_map_members is a power of two, so this computes modulo.  */
3630   s = cand->id & (group->n_map_members - 1);
3631   for (i = s; i < group->n_map_members; i++)
3632     if (!group->cost_map[i].cand)
3633       goto found;
3634   for (i = 0; i < s; i++)
3635     if (!group->cost_map[i].cand)
3636       goto found;
3637 
3638   gcc_unreachable ();
3639 
3640 found:
3641   group->cost_map[i].cand = cand;
3642   group->cost_map[i].cost = cost;
3643   group->cost_map[i].inv_vars = inv_vars;
3644   group->cost_map[i].inv_exprs = inv_exprs;
3645   group->cost_map[i].value = value;
3646   group->cost_map[i].comp = comp;
3647 }
3648 
3649 /* Gets cost of (GROUP, CAND) pair.  */
3650 
3651 static struct cost_pair *
get_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)3652 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3653 		   struct iv_cand *cand)
3654 {
3655   unsigned i, s;
3656   struct cost_pair *ret;
3657 
3658   if (!cand)
3659     return NULL;
3660 
3661   if (data->consider_all_candidates)
3662     {
3663       ret = group->cost_map + cand->id;
3664       if (!ret->cand)
3665 	return NULL;
3666 
3667       return ret;
3668     }
3669 
3670   /* n_map_members is a power of two, so this computes modulo.  */
3671   s = cand->id & (group->n_map_members - 1);
3672   for (i = s; i < group->n_map_members; i++)
3673     if (group->cost_map[i].cand == cand)
3674       return group->cost_map + i;
3675     else if (group->cost_map[i].cand == NULL)
3676       return NULL;
3677   for (i = 0; i < s; i++)
3678     if (group->cost_map[i].cand == cand)
3679       return group->cost_map + i;
3680     else if (group->cost_map[i].cand == NULL)
3681       return NULL;
3682 
3683   return NULL;
3684 }
3685 
3686 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3687 static rtx
produce_memory_decl_rtl(tree obj,int * regno)3688 produce_memory_decl_rtl (tree obj, int *regno)
3689 {
3690   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3691   machine_mode address_mode = targetm.addr_space.address_mode (as);
3692   rtx x;
3693 
3694   gcc_assert (obj);
3695   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3696     {
3697       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3698       x = gen_rtx_SYMBOL_REF (address_mode, name);
3699       SET_SYMBOL_REF_DECL (x, obj);
3700       x = gen_rtx_MEM (DECL_MODE (obj), x);
3701       set_mem_addr_space (x, as);
3702       targetm.encode_section_info (obj, x, true);
3703     }
3704   else
3705     {
3706       x = gen_raw_REG (address_mode, (*regno)++);
3707       x = gen_rtx_MEM (DECL_MODE (obj), x);
3708       set_mem_addr_space (x, as);
3709     }
3710 
3711   return x;
3712 }
3713 
3714 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3715    walk_tree.  DATA contains the actual fake register number.  */
3716 
3717 static tree
prepare_decl_rtl(tree * expr_p,int * ws,void * data)3718 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3719 {
3720   tree obj = NULL_TREE;
3721   rtx x = NULL_RTX;
3722   int *regno = (int *) data;
3723 
3724   switch (TREE_CODE (*expr_p))
3725     {
3726     case ADDR_EXPR:
3727       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3728 	   handled_component_p (*expr_p);
3729 	   expr_p = &TREE_OPERAND (*expr_p, 0))
3730 	continue;
3731       obj = *expr_p;
3732       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3733 	x = produce_memory_decl_rtl (obj, regno);
3734       break;
3735 
3736     case SSA_NAME:
3737       *ws = 0;
3738       obj = SSA_NAME_VAR (*expr_p);
3739       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3740       if (!obj)
3741 	return NULL_TREE;
3742       if (!DECL_RTL_SET_P (obj))
3743 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3744       break;
3745 
3746     case VAR_DECL:
3747     case PARM_DECL:
3748     case RESULT_DECL:
3749       *ws = 0;
3750       obj = *expr_p;
3751 
3752       if (DECL_RTL_SET_P (obj))
3753 	break;
3754 
3755       if (DECL_MODE (obj) == BLKmode)
3756 	x = produce_memory_decl_rtl (obj, regno);
3757       else
3758 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3759 
3760       break;
3761 
3762     default:
3763       break;
3764     }
3765 
3766   if (x)
3767     {
3768       decl_rtl_to_reset.safe_push (obj);
3769       SET_DECL_RTL (obj, x);
3770     }
3771 
3772   return NULL_TREE;
3773 }
3774 
3775 /* Determines cost of the computation of EXPR.  */
3776 
3777 static unsigned
computation_cost(tree expr,bool speed)3778 computation_cost (tree expr, bool speed)
3779 {
3780   rtx_insn *seq;
3781   rtx rslt;
3782   tree type = TREE_TYPE (expr);
3783   unsigned cost;
3784   /* Avoid using hard regs in ways which may be unsupported.  */
3785   int regno = LAST_VIRTUAL_REGISTER + 1;
3786   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3787   enum node_frequency real_frequency = node->frequency;
3788 
3789   node->frequency = NODE_FREQUENCY_NORMAL;
3790   crtl->maybe_hot_insn_p = speed;
3791   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3792   start_sequence ();
3793   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3794   seq = get_insns ();
3795   end_sequence ();
3796   default_rtl_profile ();
3797   node->frequency = real_frequency;
3798 
3799   cost = seq_cost (seq, speed);
3800   if (MEM_P (rslt))
3801     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3802 			  TYPE_ADDR_SPACE (type), speed);
3803   else if (!REG_P (rslt))
3804     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3805 
3806   return cost;
3807 }
3808 
3809 /* Returns variable containing the value of candidate CAND at statement AT.  */
3810 
3811 static tree
var_at_stmt(struct loop * loop,struct iv_cand * cand,gimple * stmt)3812 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3813 {
3814   if (stmt_after_increment (loop, cand, stmt))
3815     return cand->var_after;
3816   else
3817     return cand->var_before;
3818 }
3819 
3820 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3821    same precision that is at least as wide as the precision of TYPE, stores
3822    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3823    type of A and B.  */
3824 
3825 static tree
determine_common_wider_type(tree * a,tree * b)3826 determine_common_wider_type (tree *a, tree *b)
3827 {
3828   tree wider_type = NULL;
3829   tree suba, subb;
3830   tree atype = TREE_TYPE (*a);
3831 
3832   if (CONVERT_EXPR_P (*a))
3833     {
3834       suba = TREE_OPERAND (*a, 0);
3835       wider_type = TREE_TYPE (suba);
3836       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3837 	return atype;
3838     }
3839   else
3840     return atype;
3841 
3842   if (CONVERT_EXPR_P (*b))
3843     {
3844       subb = TREE_OPERAND (*b, 0);
3845       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3846 	return atype;
3847     }
3848   else
3849     return atype;
3850 
3851   *a = suba;
3852   *b = subb;
3853   return wider_type;
3854 }
3855 
3856 /* Determines the expression by that USE is expressed from induction variable
3857    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3858    decomposed form.  The invariant part is stored in AFF_INV; while variant
3859    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3860    non-null.  Returns false if USE cannot be expressed using CAND.  */
3861 
3862 static bool
3863 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3864 		       struct iv_cand *cand, struct aff_tree *aff_inv,
3865 		       struct aff_tree *aff_var, widest_int *prat = NULL)
3866 {
3867   tree ubase = use->iv->base, ustep = use->iv->step;
3868   tree cbase = cand->iv->base, cstep = cand->iv->step;
3869   tree common_type, uutype, var, cstep_common;
3870   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3871   aff_tree aff_cbase;
3872   widest_int rat;
3873 
3874   /* We must have a precision to express the values of use.  */
3875   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3876     return false;
3877 
3878   var = var_at_stmt (loop, cand, at);
3879   uutype = unsigned_type_for (utype);
3880 
3881   /* If the conversion is not noop, perform it.  */
3882   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3883     {
3884       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3885 	  && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3886 	{
3887 	  tree inner_base, inner_step, inner_type;
3888 	  inner_base = TREE_OPERAND (cbase, 0);
3889 	  if (CONVERT_EXPR_P (cstep))
3890 	    inner_step = TREE_OPERAND (cstep, 0);
3891 	  else
3892 	    inner_step = cstep;
3893 
3894 	  inner_type = TREE_TYPE (inner_base);
3895 	  /* If candidate is added from a biv whose type is smaller than
3896 	     ctype, we know both candidate and the biv won't overflow.
3897 	     In this case, it's safe to skip the convertion in candidate.
3898 	     As an example, (unsigned short)((unsigned long)A) equals to
3899 	     (unsigned short)A, if A has a type no larger than short.  */
3900 	  if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3901 	    {
3902 	      cbase = inner_base;
3903 	      cstep = inner_step;
3904 	    }
3905 	}
3906       cbase = fold_convert (uutype, cbase);
3907       cstep = fold_convert (uutype, cstep);
3908       var = fold_convert (uutype, var);
3909     }
3910 
3911   /* Ratio is 1 when computing the value of biv cand by itself.
3912      We can't rely on constant_multiple_of in this case because the
3913      use is created after the original biv is selected.  The call
3914      could fail because of inconsistent fold behavior.  See PR68021
3915      for more information.  */
3916   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3917     {
3918       gcc_assert (is_gimple_assign (use->stmt));
3919       gcc_assert (use->iv->ssa_name == cand->var_after);
3920       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3921       rat = 1;
3922     }
3923   else if (!constant_multiple_of (ustep, cstep, &rat))
3924     return false;
3925 
3926   if (prat)
3927     *prat = rat;
3928 
3929   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3930      type, we achieve better folding by computing their difference in this
3931      wider type, and cast the result to UUTYPE.  We do not need to worry about
3932      overflows, as all the arithmetics will in the end be performed in UUTYPE
3933      anyway.  */
3934   common_type = determine_common_wider_type (&ubase, &cbase);
3935 
3936   /* use = ubase - ratio * cbase + ratio * var.  */
3937   tree_to_aff_combination (ubase, common_type, aff_inv);
3938   tree_to_aff_combination (cbase, common_type, &aff_cbase);
3939   tree_to_aff_combination (var, uutype, aff_var);
3940 
3941   /* We need to shift the value if we are after the increment.  */
3942   if (stmt_after_increment (loop, cand, at))
3943     {
3944       aff_tree cstep_aff;
3945 
3946       if (common_type != uutype)
3947 	cstep_common = fold_convert (common_type, cstep);
3948       else
3949 	cstep_common = cstep;
3950 
3951       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3952       aff_combination_add (&aff_cbase, &cstep_aff);
3953     }
3954 
3955   aff_combination_scale (&aff_cbase, -rat);
3956   aff_combination_add (aff_inv, &aff_cbase);
3957   if (common_type != uutype)
3958     aff_combination_convert (aff_inv, uutype);
3959 
3960   aff_combination_scale (aff_var, rat);
3961   return true;
3962 }
3963 
3964 /* Determines the expression by that USE is expressed from induction variable
3965    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3966    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3967 
3968 static bool
get_computation_aff(struct loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand,struct aff_tree * aff)3969 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3970 		     struct iv_cand *cand, struct aff_tree *aff)
3971 {
3972   aff_tree aff_var;
3973 
3974   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3975     return false;
3976 
3977   aff_combination_add (aff, &aff_var);
3978   return true;
3979 }
3980 
3981 /* Return the type of USE.  */
3982 
3983 static tree
get_use_type(struct iv_use * use)3984 get_use_type (struct iv_use *use)
3985 {
3986   tree base_type = TREE_TYPE (use->iv->base);
3987   tree type;
3988 
3989   if (use->type == USE_REF_ADDRESS)
3990     {
3991       /* The base_type may be a void pointer.  Create a pointer type based on
3992 	 the mem_ref instead.  */
3993       type = build_pointer_type (TREE_TYPE (*use->op_p));
3994       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3995 		  == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3996     }
3997   else
3998     type = base_type;
3999 
4000   return type;
4001 }
4002 
4003 /* Determines the expression by that USE is expressed from induction variable
4004    CAND at statement AT in LOOP.  The computation is unshared.  */
4005 
4006 static tree
get_computation_at(struct loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand)4007 get_computation_at (struct loop *loop, gimple *at,
4008 		    struct iv_use *use, struct iv_cand *cand)
4009 {
4010   aff_tree aff;
4011   tree type = get_use_type (use);
4012 
4013   if (!get_computation_aff (loop, at, use, cand, &aff))
4014     return NULL_TREE;
4015   unshare_aff_combination (&aff);
4016   return fold_convert (type, aff_combination_to_tree (&aff));
4017 }
4018 
4019 /* Adjust the cost COST for being in loop setup rather than loop body.
4020    If we're optimizing for space, the loop setup overhead is constant;
4021    if we're optimizing for speed, amortize it over the per-iteration cost.
4022    If ROUND_UP_P is true, the result is round up rather than to zero when
4023    optimizing for speed.  */
4024 static unsigned
4025 adjust_setup_cost (struct ivopts_data *data, unsigned cost,
4026 		   bool round_up_p = false)
4027 {
4028   if (cost == INFTY)
4029     return cost;
4030   else if (optimize_loop_for_speed_p (data->current_loop))
4031     {
4032       HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
4033       return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
4034     }
4035   else
4036     return cost;
4037 }
4038 
4039 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4040    EXPR operand holding the shift.  COST0 and COST1 are the costs for
4041    calculating the operands of EXPR.  Returns true if successful, and returns
4042    the cost in COST.  */
4043 
4044 static bool
get_shiftadd_cost(tree expr,scalar_int_mode mode,comp_cost cost0,comp_cost cost1,tree mult,bool speed,comp_cost * cost)4045 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4046 		   comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4047 {
4048   comp_cost res;
4049   tree op1 = TREE_OPERAND (expr, 1);
4050   tree cst = TREE_OPERAND (mult, 1);
4051   tree multop = TREE_OPERAND (mult, 0);
4052   int m = exact_log2 (int_cst_value (cst));
4053   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4054   int as_cost, sa_cost;
4055   bool mult_in_op1;
4056 
4057   if (!(m >= 0 && m < maxm))
4058     return false;
4059 
4060   STRIP_NOPS (op1);
4061   mult_in_op1 = operand_equal_p (op1, mult, 0);
4062 
4063   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4064 
4065   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4066      use that in preference to a shift insn followed by an add insn.  */
4067   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4068 	     ? shiftadd_cost (speed, mode, m)
4069 	     : (mult_in_op1
4070 		? shiftsub1_cost (speed, mode, m)
4071 		: shiftsub0_cost (speed, mode, m)));
4072 
4073   res = comp_cost (MIN (as_cost, sa_cost), 0);
4074   res += (mult_in_op1 ? cost0 : cost1);
4075 
4076   STRIP_NOPS (multop);
4077   if (!is_gimple_val (multop))
4078     res += force_expr_to_var_cost (multop, speed);
4079 
4080   *cost = res;
4081   return true;
4082 }
4083 
4084 /* Estimates cost of forcing expression EXPR into a variable.  */
4085 
4086 static comp_cost
force_expr_to_var_cost(tree expr,bool speed)4087 force_expr_to_var_cost (tree expr, bool speed)
4088 {
4089   static bool costs_initialized = false;
4090   static unsigned integer_cost [2];
4091   static unsigned symbol_cost [2];
4092   static unsigned address_cost [2];
4093   tree op0, op1;
4094   comp_cost cost0, cost1, cost;
4095   machine_mode mode;
4096   scalar_int_mode int_mode;
4097 
4098   if (!costs_initialized)
4099     {
4100       tree type = build_pointer_type (integer_type_node);
4101       tree var, addr;
4102       rtx x;
4103       int i;
4104 
4105       var = create_tmp_var_raw (integer_type_node, "test_var");
4106       TREE_STATIC (var) = 1;
4107       x = produce_memory_decl_rtl (var, NULL);
4108       SET_DECL_RTL (var, x);
4109 
4110       addr = build1 (ADDR_EXPR, type, var);
4111 
4112 
4113       for (i = 0; i < 2; i++)
4114 	{
4115 	  integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4116 							     2000), i);
4117 
4118 	  symbol_cost[i] = computation_cost (addr, i) + 1;
4119 
4120 	  address_cost[i]
4121 	    = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4122 	  if (dump_file && (dump_flags & TDF_DETAILS))
4123 	    {
4124 	      fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4125 	      fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4126 	      fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4127 	      fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4128 	      fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4129 	      fprintf (dump_file, "\n");
4130 	    }
4131 	}
4132 
4133       costs_initialized = true;
4134     }
4135 
4136   STRIP_NOPS (expr);
4137 
4138   if (SSA_VAR_P (expr))
4139     return no_cost;
4140 
4141   if (is_gimple_min_invariant (expr))
4142     {
4143       if (poly_int_tree_p (expr))
4144 	return comp_cost (integer_cost [speed], 0);
4145 
4146       if (TREE_CODE (expr) == ADDR_EXPR)
4147 	{
4148 	  tree obj = TREE_OPERAND (expr, 0);
4149 
4150 	  if (VAR_P (obj)
4151 	      || TREE_CODE (obj) == PARM_DECL
4152 	      || TREE_CODE (obj) == RESULT_DECL)
4153 	    return comp_cost (symbol_cost [speed], 0);
4154 	}
4155 
4156       return comp_cost (address_cost [speed], 0);
4157     }
4158 
4159   switch (TREE_CODE (expr))
4160     {
4161     case POINTER_PLUS_EXPR:
4162     case PLUS_EXPR:
4163     case MINUS_EXPR:
4164     case MULT_EXPR:
4165     case TRUNC_DIV_EXPR:
4166     case BIT_AND_EXPR:
4167     case BIT_IOR_EXPR:
4168     case LSHIFT_EXPR:
4169     case RSHIFT_EXPR:
4170       op0 = TREE_OPERAND (expr, 0);
4171       op1 = TREE_OPERAND (expr, 1);
4172       STRIP_NOPS (op0);
4173       STRIP_NOPS (op1);
4174       break;
4175 
4176     CASE_CONVERT:
4177     case NEGATE_EXPR:
4178     case BIT_NOT_EXPR:
4179       op0 = TREE_OPERAND (expr, 0);
4180       STRIP_NOPS (op0);
4181       op1 = NULL_TREE;
4182       break;
4183 
4184     default:
4185       /* Just an arbitrary value, FIXME.  */
4186       return comp_cost (target_spill_cost[speed], 0);
4187     }
4188 
4189   if (op0 == NULL_TREE
4190       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4191     cost0 = no_cost;
4192   else
4193     cost0 = force_expr_to_var_cost (op0, speed);
4194 
4195   if (op1 == NULL_TREE
4196       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4197     cost1 = no_cost;
4198   else
4199     cost1 = force_expr_to_var_cost (op1, speed);
4200 
4201   mode = TYPE_MODE (TREE_TYPE (expr));
4202   switch (TREE_CODE (expr))
4203     {
4204     case POINTER_PLUS_EXPR:
4205     case PLUS_EXPR:
4206     case MINUS_EXPR:
4207     case NEGATE_EXPR:
4208       cost = comp_cost (add_cost (speed, mode), 0);
4209       if (TREE_CODE (expr) != NEGATE_EXPR)
4210 	{
4211 	  tree mult = NULL_TREE;
4212 	  comp_cost sa_cost;
4213 	  if (TREE_CODE (op1) == MULT_EXPR)
4214 	    mult = op1;
4215 	  else if (TREE_CODE (op0) == MULT_EXPR)
4216 	    mult = op0;
4217 
4218 	  if (mult != NULL_TREE
4219 	      && is_a <scalar_int_mode> (mode, &int_mode)
4220 	      && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4221 	      && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4222 				    speed, &sa_cost))
4223 	    return sa_cost;
4224 	}
4225       break;
4226 
4227     CASE_CONVERT:
4228       {
4229 	tree inner_mode, outer_mode;
4230 	outer_mode = TREE_TYPE (expr);
4231 	inner_mode = TREE_TYPE (op0);
4232 	cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4233 				       TYPE_MODE (inner_mode), speed), 0);
4234       }
4235       break;
4236 
4237     case MULT_EXPR:
4238       if (cst_and_fits_in_hwi (op0))
4239 	cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4240 					     mode, speed), 0);
4241       else if (cst_and_fits_in_hwi (op1))
4242 	cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4243 					     mode, speed), 0);
4244       else
4245 	return comp_cost (target_spill_cost [speed], 0);
4246       break;
4247 
4248     case TRUNC_DIV_EXPR:
4249       /* Division by power of two is usually cheap, so we allow it.  Forbid
4250 	 anything else.  */
4251       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4252 	cost = comp_cost (add_cost (speed, mode), 0);
4253       else
4254 	cost = comp_cost (target_spill_cost[speed], 0);
4255       break;
4256 
4257     case BIT_AND_EXPR:
4258     case BIT_IOR_EXPR:
4259     case BIT_NOT_EXPR:
4260     case LSHIFT_EXPR:
4261     case RSHIFT_EXPR:
4262       cost = comp_cost (add_cost (speed, mode), 0);
4263       break;
4264 
4265     default:
4266       gcc_unreachable ();
4267     }
4268 
4269   cost += cost0;
4270   cost += cost1;
4271   return cost;
4272 }
4273 
4274 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4275    invariants the computation depends on.  */
4276 
4277 static comp_cost
force_var_cost(struct ivopts_data * data,tree expr,bitmap * inv_vars)4278 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4279 {
4280   if (!expr)
4281     return no_cost;
4282 
4283   find_inv_vars (data, &expr, inv_vars);
4284   return force_expr_to_var_cost (expr, data->speed);
4285 }
4286 
4287 /* Returns cost of auto-modifying address expression in shape base + offset.
4288    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4289    address expression.  The address expression has ADDR_MODE in addr space
4290    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4291    speed or size.  */
4292 
4293 enum ainc_type
4294 {
4295   AINC_PRE_INC,		/* Pre increment.  */
4296   AINC_PRE_DEC,		/* Pre decrement.  */
4297   AINC_POST_INC,	/* Post increment.  */
4298   AINC_POST_DEC,	/* Post decrement.  */
4299   AINC_NONE		/* Also the number of auto increment types.  */
4300 };
4301 
4302 struct ainc_cost_data
4303 {
4304   unsigned costs[AINC_NONE];
4305 };
4306 
4307 static comp_cost
get_address_cost_ainc(poly_int64 ainc_step,poly_int64 ainc_offset,machine_mode addr_mode,machine_mode mem_mode,addr_space_t as,bool speed)4308 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4309 		       machine_mode addr_mode, machine_mode mem_mode,
4310 		       addr_space_t as, bool speed)
4311 {
4312   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4313       && !USE_STORE_PRE_DECREMENT (mem_mode)
4314       && !USE_LOAD_POST_DECREMENT (mem_mode)
4315       && !USE_STORE_POST_DECREMENT (mem_mode)
4316       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4317       && !USE_STORE_PRE_INCREMENT (mem_mode)
4318       && !USE_LOAD_POST_INCREMENT (mem_mode)
4319       && !USE_STORE_POST_INCREMENT (mem_mode))
4320     return infinite_cost;
4321 
4322   static vec<ainc_cost_data *> ainc_cost_data_list;
4323   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4324   if (idx >= ainc_cost_data_list.length ())
4325     {
4326       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4327 
4328       gcc_assert (nsize > idx);
4329       ainc_cost_data_list.safe_grow_cleared (nsize);
4330     }
4331 
4332   ainc_cost_data *data = ainc_cost_data_list[idx];
4333   if (data == NULL)
4334     {
4335       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4336 
4337       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4338       data->costs[AINC_PRE_DEC] = INFTY;
4339       data->costs[AINC_POST_DEC] = INFTY;
4340       data->costs[AINC_PRE_INC] = INFTY;
4341       data->costs[AINC_POST_INC] = INFTY;
4342       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4343 	  || USE_STORE_PRE_DECREMENT (mem_mode))
4344 	{
4345 	  rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4346 
4347 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4348 	    data->costs[AINC_PRE_DEC]
4349 	      = address_cost (addr, mem_mode, as, speed);
4350 	}
4351       if (USE_LOAD_POST_DECREMENT (mem_mode)
4352 	  || USE_STORE_POST_DECREMENT (mem_mode))
4353 	{
4354 	  rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4355 
4356 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4357 	    data->costs[AINC_POST_DEC]
4358 	      = address_cost (addr, mem_mode, as, speed);
4359 	}
4360       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4361 	  || USE_STORE_PRE_INCREMENT (mem_mode))
4362 	{
4363 	  rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4364 
4365 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4366 	    data->costs[AINC_PRE_INC]
4367 	      = address_cost (addr, mem_mode, as, speed);
4368 	}
4369       if (USE_LOAD_POST_INCREMENT (mem_mode)
4370 	  || USE_STORE_POST_INCREMENT (mem_mode))
4371 	{
4372 	  rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4373 
4374 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4375 	    data->costs[AINC_POST_INC]
4376 	      = address_cost (addr, mem_mode, as, speed);
4377 	}
4378       ainc_cost_data_list[idx] = data;
4379     }
4380 
4381   poly_int64 msize = GET_MODE_SIZE (mem_mode);
4382   if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4383     return comp_cost (data->costs[AINC_POST_INC], 0);
4384   if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4385     return comp_cost (data->costs[AINC_POST_DEC], 0);
4386   if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4387     return comp_cost (data->costs[AINC_PRE_INC], 0);
4388   if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4389     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4390 
4391   return infinite_cost;
4392 }
4393 
4394 /* Return cost of computing USE's address expression by using CAND.
4395    AFF_INV and AFF_VAR represent invariant and variant parts of the
4396    address expression, respectively.  If AFF_INV is simple, store
4397    the loop invariant variables which are depended by it in INV_VARS;
4398    if AFF_INV is complicated, handle it as a new invariant expression
4399    and record it in INV_EXPR.  RATIO indicates multiple times between
4400    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4401    value to it indicating if this is an auto-increment address.  */
4402 
4403 static comp_cost
get_address_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,aff_tree * aff_inv,aff_tree * aff_var,HOST_WIDE_INT ratio,bitmap * inv_vars,iv_inv_expr_ent ** inv_expr,bool * can_autoinc,bool speed)4404 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4405 		  struct iv_cand *cand, aff_tree *aff_inv,
4406 		  aff_tree *aff_var, HOST_WIDE_INT ratio,
4407 		  bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4408 		  bool *can_autoinc, bool speed)
4409 {
4410   rtx addr;
4411   bool simple_inv = true;
4412   tree comp_inv = NULL_TREE, type = aff_var->type;
4413   comp_cost var_cost = no_cost, cost = no_cost;
4414   struct mem_address parts = {NULL_TREE, integer_one_node,
4415 			      NULL_TREE, NULL_TREE, NULL_TREE};
4416   machine_mode addr_mode = TYPE_MODE (type);
4417   machine_mode mem_mode = TYPE_MODE (use->mem_type);
4418   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4419   /* Only true if ratio != 1.  */
4420   bool ok_with_ratio_p = false;
4421   bool ok_without_ratio_p = false;
4422 
4423   if (!aff_combination_const_p (aff_inv))
4424     {
4425       parts.index = integer_one_node;
4426       /* Addressing mode "base + index".  */
4427       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4428       if (ratio != 1)
4429 	{
4430 	  parts.step = wide_int_to_tree (type, ratio);
4431 	  /* Addressing mode "base + index << scale".  */
4432 	  ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4433 	  if (!ok_with_ratio_p)
4434 	    parts.step = NULL_TREE;
4435 	}
4436       if (ok_with_ratio_p || ok_without_ratio_p)
4437 	{
4438 	  if (maybe_ne (aff_inv->offset, 0))
4439 	    {
4440 	      parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4441 	      /* Addressing mode "base + index [<< scale] + offset".  */
4442 	      if (!valid_mem_ref_p (mem_mode, as, &parts))
4443 		parts.offset = NULL_TREE;
4444 	      else
4445 		aff_inv->offset = 0;
4446 	    }
4447 
4448 	  move_fixed_address_to_symbol (&parts, aff_inv);
4449 	  /* Base is fixed address and is moved to symbol part.  */
4450 	  if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4451 	    parts.base = NULL_TREE;
4452 
4453 	  /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4454 	  if (parts.symbol != NULL_TREE
4455 	      && !valid_mem_ref_p (mem_mode, as, &parts))
4456 	    {
4457 	      aff_combination_add_elt (aff_inv, parts.symbol, 1);
4458 	      parts.symbol = NULL_TREE;
4459 	      /* Reset SIMPLE_INV since symbol address needs to be computed
4460 		 outside of address expression in this case.  */
4461 	      simple_inv = false;
4462 	      /* Symbol part is moved back to base part, it can't be NULL.  */
4463 	      parts.base = integer_one_node;
4464 	    }
4465 	}
4466       else
4467 	parts.index = NULL_TREE;
4468     }
4469   else
4470     {
4471       poly_int64 ainc_step;
4472       if (can_autoinc
4473 	  && ratio == 1
4474 	  && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4475 	{
4476 	  poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4477 
4478 	  if (stmt_after_increment (data->current_loop, cand, use->stmt))
4479 	    ainc_offset += ainc_step;
4480 	  cost = get_address_cost_ainc (ainc_step, ainc_offset,
4481 					addr_mode, mem_mode, as, speed);
4482 	  if (!cost.infinite_cost_p ())
4483 	    {
4484 	      *can_autoinc = true;
4485 	      return cost;
4486 	    }
4487 	  cost = no_cost;
4488 	}
4489       if (!aff_combination_zero_p (aff_inv))
4490 	{
4491 	  parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4492 	  /* Addressing mode "base + offset".  */
4493 	  if (!valid_mem_ref_p (mem_mode, as, &parts))
4494 	    parts.offset = NULL_TREE;
4495 	  else
4496 	    aff_inv->offset = 0;
4497 	}
4498     }
4499 
4500   if (simple_inv)
4501     simple_inv = (aff_inv == NULL
4502 		  || aff_combination_const_p (aff_inv)
4503 		  || aff_combination_singleton_var_p (aff_inv));
4504   if (!aff_combination_zero_p (aff_inv))
4505     comp_inv = aff_combination_to_tree (aff_inv);
4506   if (comp_inv != NULL_TREE)
4507     cost = force_var_cost (data, comp_inv, inv_vars);
4508   if (ratio != 1 && parts.step == NULL_TREE)
4509     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4510   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4511     var_cost += add_cost (speed, addr_mode);
4512 
4513   if (comp_inv && inv_expr && !simple_inv)
4514     {
4515       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4516       /* Clear depends on.  */
4517       if (*inv_expr != NULL && inv_vars && *inv_vars)
4518 	bitmap_clear (*inv_vars);
4519 
4520       /* Cost of small invariant expression adjusted against loop niters
4521 	 is usually zero, which makes it difficult to be differentiated
4522 	 from candidate based on loop invariant variables.  Secondly, the
4523 	 generated invariant expression may not be hoisted out of loop by
4524 	 following pass.  We penalize the cost by rounding up in order to
4525 	 neutralize such effects.  */
4526       cost.cost = adjust_setup_cost (data, cost.cost, true);
4527       cost.scratch = cost.cost;
4528     }
4529 
4530   cost += var_cost;
4531   addr = addr_for_mem_ref (&parts, as, false);
4532   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4533   cost += address_cost (addr, mem_mode, as, speed);
4534 
4535   if (parts.symbol != NULL_TREE)
4536     cost.complexity += 1;
4537   /* Don't increase the complexity of adding a scaled index if it's
4538      the only kind of index that the target allows.  */
4539   if (parts.step != NULL_TREE && ok_without_ratio_p)
4540     cost.complexity += 1;
4541   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4542     cost.complexity += 1;
4543   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4544     cost.complexity += 1;
4545 
4546   return cost;
4547 }
4548 
4549 /* Scale (multiply) the computed COST (except scratch part that should be
4550    hoisted out a loop) by header->frequency / AT->frequency, which makes
4551    expected cost more accurate.  */
4552 
4553 static comp_cost
get_scaled_computation_cost_at(ivopts_data * data,gimple * at,comp_cost cost)4554 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4555 {
4556    int loop_freq = data->current_loop->header->count.to_frequency (cfun);
4557    int bb_freq = gimple_bb (at)->count.to_frequency (cfun);
4558    if (loop_freq != 0)
4559      {
4560        gcc_assert (cost.scratch <= cost.cost);
4561        int scaled_cost
4562 	 = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4563 
4564        if (dump_file && (dump_flags & TDF_DETAILS))
4565 	 fprintf (dump_file, "Scaling cost based on bb prob "
4566 		  "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4567 		  1.0f * bb_freq / loop_freq, cost.cost,
4568 		  cost.scratch, scaled_cost, bb_freq, loop_freq);
4569 
4570        cost.cost = scaled_cost;
4571      }
4572 
4573   return cost;
4574 }
4575 
4576 /* Determines the cost of the computation by that USE is expressed
4577    from induction variable CAND.  If ADDRESS_P is true, we just need
4578    to create an address from it, otherwise we want to get it into
4579    register.  A set of invariants we depend on is stored in INV_VARS.
4580    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4581    addressing is likely.  If INV_EXPR is nonnull, record invariant
4582    expr entry in it.  */
4583 
4584 static comp_cost
get_computation_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,bool address_p,bitmap * inv_vars,bool * can_autoinc,iv_inv_expr_ent ** inv_expr)4585 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4586 		      struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4587 		      bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4588 {
4589   gimple *at = use->stmt;
4590   tree ubase = use->iv->base, cbase = cand->iv->base;
4591   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4592   tree comp_inv = NULL_TREE;
4593   HOST_WIDE_INT ratio, aratio;
4594   comp_cost cost;
4595   widest_int rat;
4596   aff_tree aff_inv, aff_var;
4597   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4598 
4599   if (inv_vars)
4600     *inv_vars = NULL;
4601   if (can_autoinc)
4602     *can_autoinc = false;
4603   if (inv_expr)
4604     *inv_expr = NULL;
4605 
4606   /* Check if we have enough precision to express the values of use.  */
4607   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4608     return infinite_cost;
4609 
4610   if (address_p
4611       || (use->iv->base_object
4612 	  && cand->iv->base_object
4613 	  && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4614 	  && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4615     {
4616       /* Do not try to express address of an object with computation based
4617 	 on address of a different object.  This may cause problems in rtl
4618 	 level alias analysis (that does not expect this to be happening,
4619 	 as this is illegal in C), and would be unlikely to be useful
4620 	 anyway.  */
4621       if (use->iv->base_object
4622 	  && cand->iv->base_object
4623 	  && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4624 	return infinite_cost;
4625     }
4626 
4627   if (!get_computation_aff_1 (data->current_loop, at, use,
4628 			      cand, &aff_inv, &aff_var, &rat)
4629       || !wi::fits_shwi_p (rat))
4630     return infinite_cost;
4631 
4632   ratio = rat.to_shwi ();
4633   if (address_p)
4634     {
4635       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4636 			       inv_vars, inv_expr, can_autoinc, speed);
4637       return get_scaled_computation_cost_at (data, at, cost);
4638     }
4639 
4640   bool simple_inv = (aff_combination_const_p (&aff_inv)
4641 		     || aff_combination_singleton_var_p (&aff_inv));
4642   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4643   aff_combination_convert (&aff_inv, signed_type);
4644   if (!aff_combination_zero_p (&aff_inv))
4645     comp_inv = aff_combination_to_tree (&aff_inv);
4646 
4647   cost = force_var_cost (data, comp_inv, inv_vars);
4648   if (comp_inv && inv_expr && !simple_inv)
4649     {
4650       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4651       /* Clear depends on.  */
4652       if (*inv_expr != NULL && inv_vars && *inv_vars)
4653 	bitmap_clear (*inv_vars);
4654 
4655       cost.cost = adjust_setup_cost (data, cost.cost);
4656       /* Record setup cost in scratch field.  */
4657       cost.scratch = cost.cost;
4658     }
4659   /* Cost of constant integer can be covered when adding invariant part to
4660      variant part.  */
4661   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4662     cost = no_cost;
4663 
4664   /* Need type narrowing to represent use with cand.  */
4665   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4666     {
4667       machine_mode outer_mode = TYPE_MODE (utype);
4668       machine_mode inner_mode = TYPE_MODE (ctype);
4669       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4670     }
4671 
4672   /* Turn a + i * (-c) into a - i * c.  */
4673   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4674     aratio = -ratio;
4675   else
4676     aratio = ratio;
4677 
4678   if (ratio != 1)
4679     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4680 
4681   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4682      instruction.  */
4683   /* Need to add up the invariant and variant parts.  */
4684   if (comp_inv && !integer_zerop (comp_inv))
4685     cost += add_cost (speed, TYPE_MODE (utype));
4686 
4687   return get_scaled_computation_cost_at (data, at, cost);
4688 }
4689 
4690 /* Determines cost of computing the use in GROUP with CAND in a generic
4691    expression.  */
4692 
4693 static bool
determine_group_iv_cost_generic(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)4694 determine_group_iv_cost_generic (struct ivopts_data *data,
4695 				 struct iv_group *group, struct iv_cand *cand)
4696 {
4697   comp_cost cost;
4698   iv_inv_expr_ent *inv_expr = NULL;
4699   bitmap inv_vars = NULL, inv_exprs = NULL;
4700   struct iv_use *use = group->vuses[0];
4701 
4702   /* The simple case first -- if we need to express value of the preserved
4703      original biv, the cost is 0.  This also prevents us from counting the
4704      cost of increment twice -- once at this use and once in the cost of
4705      the candidate.  */
4706   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4707     cost = no_cost;
4708   else
4709     cost = get_computation_cost (data, use, cand, false,
4710 				 &inv_vars, NULL, &inv_expr);
4711 
4712   if (inv_expr)
4713     {
4714       inv_exprs = BITMAP_ALLOC (NULL);
4715       bitmap_set_bit (inv_exprs, inv_expr->id);
4716     }
4717   set_group_iv_cost (data, group, cand, cost, inv_vars,
4718 		     NULL_TREE, ERROR_MARK, inv_exprs);
4719   return !cost.infinite_cost_p ();
4720 }
4721 
4722 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
4723 
4724 static bool
determine_group_iv_cost_address(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)4725 determine_group_iv_cost_address (struct ivopts_data *data,
4726 				 struct iv_group *group, struct iv_cand *cand)
4727 {
4728   unsigned i;
4729   bitmap inv_vars = NULL, inv_exprs = NULL;
4730   bool can_autoinc;
4731   iv_inv_expr_ent *inv_expr = NULL;
4732   struct iv_use *use = group->vuses[0];
4733   comp_cost sum_cost = no_cost, cost;
4734 
4735   cost = get_computation_cost (data, use, cand, true,
4736 			       &inv_vars, &can_autoinc, &inv_expr);
4737 
4738   if (inv_expr)
4739     {
4740       inv_exprs = BITMAP_ALLOC (NULL);
4741       bitmap_set_bit (inv_exprs, inv_expr->id);
4742     }
4743   sum_cost = cost;
4744   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4745     {
4746       if (can_autoinc)
4747 	sum_cost -= cand->cost_step;
4748       /* If we generated the candidate solely for exploiting autoincrement
4749 	 opportunities, and it turns out it can't be used, set the cost to
4750 	 infinity to make sure we ignore it.  */
4751       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4752 	sum_cost = infinite_cost;
4753     }
4754 
4755   /* Uses in a group can share setup code, so only add setup cost once.  */
4756   cost -= cost.scratch;
4757   /* Compute and add costs for rest uses of this group.  */
4758   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4759     {
4760       struct iv_use *next = group->vuses[i];
4761 
4762       /* TODO: We could skip computing cost for sub iv_use when it has the
4763 	 same cost as the first iv_use, but the cost really depends on the
4764 	 offset and where the iv_use is.  */
4765 	cost = get_computation_cost (data, next, cand, true,
4766 				     NULL, &can_autoinc, &inv_expr);
4767 	if (inv_expr)
4768 	  {
4769 	    if (!inv_exprs)
4770 	      inv_exprs = BITMAP_ALLOC (NULL);
4771 
4772 	    bitmap_set_bit (inv_exprs, inv_expr->id);
4773 	  }
4774       sum_cost += cost;
4775     }
4776   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4777 		     NULL_TREE, ERROR_MARK, inv_exprs);
4778 
4779   return !sum_cost.infinite_cost_p ();
4780 }
4781 
4782 /* Computes value of candidate CAND at position AT in iteration NITER, and
4783    stores it to VAL.  */
4784 
4785 static void
cand_value_at(struct loop * loop,struct iv_cand * cand,gimple * at,tree niter,aff_tree * val)4786 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4787 	       aff_tree *val)
4788 {
4789   aff_tree step, delta, nit;
4790   struct iv *iv = cand->iv;
4791   tree type = TREE_TYPE (iv->base);
4792   tree steptype;
4793   if (POINTER_TYPE_P (type))
4794     steptype = sizetype;
4795   else
4796     steptype = unsigned_type_for (type);
4797 
4798   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4799   aff_combination_convert (&step, steptype);
4800   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4801   aff_combination_convert (&nit, steptype);
4802   aff_combination_mult (&nit, &step, &delta);
4803   if (stmt_after_increment (loop, cand, at))
4804     aff_combination_add (&delta, &step);
4805 
4806   tree_to_aff_combination (iv->base, type, val);
4807   if (!POINTER_TYPE_P (type))
4808     aff_combination_convert (val, steptype);
4809   aff_combination_add (val, &delta);
4810 }
4811 
4812 /* Returns period of induction variable iv.  */
4813 
4814 static tree
iv_period(struct iv * iv)4815 iv_period (struct iv *iv)
4816 {
4817   tree step = iv->step, period, type;
4818   tree pow2div;
4819 
4820   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4821 
4822   type = unsigned_type_for (TREE_TYPE (step));
4823   /* Period of the iv is lcm (step, type_range)/step -1,
4824      i.e., N*type_range/step - 1. Since type range is power
4825      of two, N == (step >> num_of_ending_zeros_binary (step),
4826      so the final result is
4827 
4828        (type_range >> num_of_ending_zeros_binary (step)) - 1
4829 
4830   */
4831   pow2div = num_ending_zeros (step);
4832 
4833   period = build_low_bits_mask (type,
4834 				(TYPE_PRECISION (type)
4835 				 - tree_to_uhwi (pow2div)));
4836 
4837   return period;
4838 }
4839 
4840 /* Returns the comparison operator used when eliminating the iv USE.  */
4841 
4842 static enum tree_code
iv_elimination_compare(struct ivopts_data * data,struct iv_use * use)4843 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4844 {
4845   struct loop *loop = data->current_loop;
4846   basic_block ex_bb;
4847   edge exit;
4848 
4849   ex_bb = gimple_bb (use->stmt);
4850   exit = EDGE_SUCC (ex_bb, 0);
4851   if (flow_bb_inside_loop_p (loop, exit->dest))
4852     exit = EDGE_SUCC (ex_bb, 1);
4853 
4854   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4855 }
4856 
4857 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4858    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4859    calculation is performed in non-wrapping type.
4860 
4861    TODO: More generally, we could test for the situation that
4862 	 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4863 	 This would require knowing the sign of OFFSET.  */
4864 
4865 static bool
difference_cannot_overflow_p(struct ivopts_data * data,tree base,tree offset)4866 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4867 {
4868   enum tree_code code;
4869   tree e1, e2;
4870   aff_tree aff_e1, aff_e2, aff_offset;
4871 
4872   if (!nowrap_type_p (TREE_TYPE (base)))
4873     return false;
4874 
4875   base = expand_simple_operations (base);
4876 
4877   if (TREE_CODE (base) == SSA_NAME)
4878     {
4879       gimple *stmt = SSA_NAME_DEF_STMT (base);
4880 
4881       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4882 	return false;
4883 
4884       code = gimple_assign_rhs_code (stmt);
4885       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4886 	return false;
4887 
4888       e1 = gimple_assign_rhs1 (stmt);
4889       e2 = gimple_assign_rhs2 (stmt);
4890     }
4891   else
4892     {
4893       code = TREE_CODE (base);
4894       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4895 	return false;
4896       e1 = TREE_OPERAND (base, 0);
4897       e2 = TREE_OPERAND (base, 1);
4898     }
4899 
4900   /* Use affine expansion as deeper inspection to prove the equality.  */
4901   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4902 				  &aff_e2, &data->name_expansion_cache);
4903   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4904 				  &aff_offset, &data->name_expansion_cache);
4905   aff_combination_scale (&aff_offset, -1);
4906   switch (code)
4907     {
4908     case PLUS_EXPR:
4909       aff_combination_add (&aff_e2, &aff_offset);
4910       if (aff_combination_zero_p (&aff_e2))
4911 	return true;
4912 
4913       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4914 				      &aff_e1, &data->name_expansion_cache);
4915       aff_combination_add (&aff_e1, &aff_offset);
4916       return aff_combination_zero_p (&aff_e1);
4917 
4918     case POINTER_PLUS_EXPR:
4919       aff_combination_add (&aff_e2, &aff_offset);
4920       return aff_combination_zero_p (&aff_e2);
4921 
4922     default:
4923       return false;
4924     }
4925 }
4926 
4927 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4928    comparison with CAND.  NITER describes the number of iterations of
4929    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4930 
4931    We aim to handle the following situation:
4932 
4933    sometype *base, *p;
4934    int a, b, i;
4935 
4936    i = a;
4937    p = p_0 = base + a;
4938 
4939    do
4940      {
4941        bla (*p);
4942        p++;
4943        i++;
4944      }
4945    while (i < b);
4946 
4947    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4948    We aim to optimize this to
4949 
4950    p = p_0 = base + a;
4951    do
4952      {
4953        bla (*p);
4954        p++;
4955      }
4956    while (p < p_0 - a + b);
4957 
4958    This preserves the correctness, since the pointer arithmetics does not
4959    overflow.  More precisely:
4960 
4961    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4962       overflow in computing it or the values of p.
4963    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4964       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4965 
4966 static bool
iv_elimination_compare_lt(struct ivopts_data * data,struct iv_cand * cand,enum tree_code * comp_p,struct tree_niter_desc * niter)4967 iv_elimination_compare_lt (struct ivopts_data *data,
4968 			   struct iv_cand *cand, enum tree_code *comp_p,
4969 			   struct tree_niter_desc *niter)
4970 {
4971   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4972   struct aff_tree nit, tmpa, tmpb;
4973   enum tree_code comp;
4974   HOST_WIDE_INT step;
4975 
4976   /* We need to know that the candidate induction variable does not overflow.
4977      While more complex analysis may be used to prove this, for now just
4978      check that the variable appears in the original program and that it
4979      is computed in a type that guarantees no overflows.  */
4980   cand_type = TREE_TYPE (cand->iv->base);
4981   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4982     return false;
4983 
4984   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4985      the calculation of the BOUND could overflow, making the comparison
4986      invalid.  */
4987   if (!data->loop_single_exit_p)
4988     return false;
4989 
4990   /* We need to be able to decide whether candidate is increasing or decreasing
4991      in order to choose the right comparison operator.  */
4992   if (!cst_and_fits_in_hwi (cand->iv->step))
4993     return false;
4994   step = int_cst_value (cand->iv->step);
4995 
4996   /* Check that the number of iterations matches the expected pattern:
4997      a + 1 > b ? 0 : b - a - 1.  */
4998   mbz = niter->may_be_zero;
4999   if (TREE_CODE (mbz) == GT_EXPR)
5000     {
5001       /* Handle a + 1 > b.  */
5002       tree op0 = TREE_OPERAND (mbz, 0);
5003       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5004 	{
5005 	  a = TREE_OPERAND (op0, 0);
5006 	  b = TREE_OPERAND (mbz, 1);
5007 	}
5008       else
5009 	return false;
5010     }
5011   else if (TREE_CODE (mbz) == LT_EXPR)
5012     {
5013       tree op1 = TREE_OPERAND (mbz, 1);
5014 
5015       /* Handle b < a + 1.  */
5016       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5017 	{
5018 	  a = TREE_OPERAND (op1, 0);
5019 	  b = TREE_OPERAND (mbz, 0);
5020 	}
5021       else
5022 	return false;
5023     }
5024   else
5025     return false;
5026 
5027   /* Expected number of iterations is B - A - 1.  Check that it matches
5028      the actual number, i.e., that B - A - NITER = 1.  */
5029   tree_to_aff_combination (niter->niter, nit_type, &nit);
5030   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5031   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5032   aff_combination_scale (&nit, -1);
5033   aff_combination_scale (&tmpa, -1);
5034   aff_combination_add (&tmpb, &tmpa);
5035   aff_combination_add (&tmpb, &nit);
5036   if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5037     return false;
5038 
5039   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5040      overflow.  */
5041   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5042 			cand->iv->step,
5043 			fold_convert (TREE_TYPE (cand->iv->step), a));
5044   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5045     return false;
5046 
5047   /* Determine the new comparison operator.  */
5048   comp = step < 0 ? GT_EXPR : LT_EXPR;
5049   if (*comp_p == NE_EXPR)
5050     *comp_p = comp;
5051   else if (*comp_p == EQ_EXPR)
5052     *comp_p = invert_tree_comparison (comp, false);
5053   else
5054     gcc_unreachable ();
5055 
5056   return true;
5057 }
5058 
5059 /* Check whether it is possible to express the condition in USE by comparison
5060    of candidate CAND.  If so, store the value compared with to BOUND, and the
5061    comparison operator to COMP.  */
5062 
5063 static bool
may_eliminate_iv(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,tree * bound,enum tree_code * comp)5064 may_eliminate_iv (struct ivopts_data *data,
5065 		  struct iv_use *use, struct iv_cand *cand, tree *bound,
5066 		  enum tree_code *comp)
5067 {
5068   basic_block ex_bb;
5069   edge exit;
5070   tree period;
5071   struct loop *loop = data->current_loop;
5072   aff_tree bnd;
5073   struct tree_niter_desc *desc = NULL;
5074 
5075   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5076     return false;
5077 
5078   /* For now works only for exits that dominate the loop latch.
5079      TODO: extend to other conditions inside loop body.  */
5080   ex_bb = gimple_bb (use->stmt);
5081   if (use->stmt != last_stmt (ex_bb)
5082       || gimple_code (use->stmt) != GIMPLE_COND
5083       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5084     return false;
5085 
5086   exit = EDGE_SUCC (ex_bb, 0);
5087   if (flow_bb_inside_loop_p (loop, exit->dest))
5088     exit = EDGE_SUCC (ex_bb, 1);
5089   if (flow_bb_inside_loop_p (loop, exit->dest))
5090     return false;
5091 
5092   desc = niter_for_exit (data, exit);
5093   if (!desc)
5094     return false;
5095 
5096   /* Determine whether we can use the variable to test the exit condition.
5097      This is the case iff the period of the induction variable is greater
5098      than the number of iterations for which the exit condition is true.  */
5099   period = iv_period (cand->iv);
5100 
5101   /* If the number of iterations is constant, compare against it directly.  */
5102   if (TREE_CODE (desc->niter) == INTEGER_CST)
5103     {
5104       /* See cand_value_at.  */
5105       if (stmt_after_increment (loop, cand, use->stmt))
5106 	{
5107 	  if (!tree_int_cst_lt (desc->niter, period))
5108 	    return false;
5109 	}
5110       else
5111 	{
5112 	  if (tree_int_cst_lt (period, desc->niter))
5113 	    return false;
5114 	}
5115     }
5116 
5117   /* If not, and if this is the only possible exit of the loop, see whether
5118      we can get a conservative estimate on the number of iterations of the
5119      entire loop and compare against that instead.  */
5120   else
5121     {
5122       widest_int period_value, max_niter;
5123 
5124       max_niter = desc->max;
5125       if (stmt_after_increment (loop, cand, use->stmt))
5126 	max_niter += 1;
5127       period_value = wi::to_widest (period);
5128       if (wi::gtu_p (max_niter, period_value))
5129 	{
5130 	  /* See if we can take advantage of inferred loop bound
5131 	     information.  */
5132 	  if (data->loop_single_exit_p)
5133 	    {
5134 	      if (!max_loop_iterations (loop, &max_niter))
5135 		return false;
5136 	      /* The loop bound is already adjusted by adding 1.  */
5137 	      if (wi::gtu_p (max_niter, period_value))
5138 		return false;
5139 	    }
5140 	  else
5141 	    return false;
5142 	}
5143     }
5144 
5145   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5146 
5147   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5148 			 aff_combination_to_tree (&bnd));
5149   *comp = iv_elimination_compare (data, use);
5150 
5151   /* It is unlikely that computing the number of iterations using division
5152      would be more profitable than keeping the original induction variable.  */
5153   if (expression_expensive_p (*bound))
5154     return false;
5155 
5156   /* Sometimes, it is possible to handle the situation that the number of
5157      iterations may be zero unless additional assumptions by using <
5158      instead of != in the exit condition.
5159 
5160      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5161 	   base the exit condition on it.  However, that is often too
5162 	   expensive.  */
5163   if (!integer_zerop (desc->may_be_zero))
5164     return iv_elimination_compare_lt (data, cand, comp, desc);
5165 
5166   return true;
5167 }
5168 
5169  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5170     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5171 
5172 static int
parm_decl_cost(struct ivopts_data * data,tree bound)5173 parm_decl_cost (struct ivopts_data *data, tree bound)
5174 {
5175   tree sbound = bound;
5176   STRIP_NOPS (sbound);
5177 
5178   if (TREE_CODE (sbound) == SSA_NAME
5179       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5180       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5181       && data->body_includes_call)
5182     return COSTS_N_INSNS (1);
5183 
5184   return 0;
5185 }
5186 
5187 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5188 
5189 static bool
determine_group_iv_cost_cond(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5190 determine_group_iv_cost_cond (struct ivopts_data *data,
5191 			      struct iv_group *group, struct iv_cand *cand)
5192 {
5193   tree bound = NULL_TREE;
5194   struct iv *cmp_iv;
5195   bitmap inv_exprs = NULL;
5196   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5197   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5198   enum comp_iv_rewrite rewrite_type;
5199   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5200   tree *control_var, *bound_cst;
5201   enum tree_code comp = ERROR_MARK;
5202   struct iv_use *use = group->vuses[0];
5203 
5204   /* Extract condition operands.  */
5205   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5206 					&bound_cst, NULL, &cmp_iv);
5207   gcc_assert (rewrite_type != COMP_IV_NA);
5208 
5209   /* Try iv elimination.  */
5210   if (rewrite_type == COMP_IV_ELIM
5211       && may_eliminate_iv (data, use, cand, &bound, &comp))
5212     {
5213       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5214       if (elim_cost.cost == 0)
5215 	elim_cost.cost = parm_decl_cost (data, bound);
5216       else if (TREE_CODE (bound) == INTEGER_CST)
5217 	elim_cost.cost = 0;
5218       /* If we replace a loop condition 'i < n' with 'p < base + n',
5219 	 inv_vars_elim will have 'base' and 'n' set, which implies that both
5220 	 'base' and 'n' will be live during the loop.	 More likely,
5221 	 'base + n' will be loop invariant, resulting in only one live value
5222 	 during the loop.  So in that case we clear inv_vars_elim and set
5223 	 inv_expr_elim instead.  */
5224       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5225 	{
5226 	  inv_expr_elim = get_loop_invariant_expr (data, bound);
5227 	  bitmap_clear (inv_vars_elim);
5228 	}
5229       /* The bound is a loop invariant, so it will be only computed
5230 	 once.  */
5231       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5232     }
5233 
5234   /* When the condition is a comparison of the candidate IV against
5235      zero, prefer this IV.
5236 
5237      TODO: The constant that we're subtracting from the cost should
5238      be target-dependent.  This information should be added to the
5239      target costs for each backend.  */
5240   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5241       && integer_zerop (*bound_cst)
5242       && (operand_equal_p (*control_var, cand->var_after, 0)
5243 	  || operand_equal_p (*control_var, cand->var_before, 0)))
5244     elim_cost -= 1;
5245 
5246   express_cost = get_computation_cost (data, use, cand, false,
5247 				       &inv_vars_express, NULL,
5248 				       &inv_expr_express);
5249   if (cmp_iv != NULL)
5250     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5251 
5252   /* Count the cost of the original bound as well.  */
5253   bound_cost = force_var_cost (data, *bound_cst, NULL);
5254   if (bound_cost.cost == 0)
5255     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5256   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5257     bound_cost.cost = 0;
5258   express_cost += bound_cost;
5259 
5260   /* Choose the better approach, preferring the eliminated IV. */
5261   if (elim_cost <= express_cost)
5262     {
5263       cost = elim_cost;
5264       inv_vars = inv_vars_elim;
5265       inv_vars_elim = NULL;
5266       inv_expr = inv_expr_elim;
5267     }
5268   else
5269     {
5270       cost = express_cost;
5271       inv_vars = inv_vars_express;
5272       inv_vars_express = NULL;
5273       bound = NULL_TREE;
5274       comp = ERROR_MARK;
5275       inv_expr = inv_expr_express;
5276     }
5277 
5278   if (inv_expr)
5279     {
5280       inv_exprs = BITMAP_ALLOC (NULL);
5281       bitmap_set_bit (inv_exprs, inv_expr->id);
5282     }
5283   set_group_iv_cost (data, group, cand, cost,
5284 		     inv_vars, bound, comp, inv_exprs);
5285 
5286   if (inv_vars_elim)
5287     BITMAP_FREE (inv_vars_elim);
5288   if (inv_vars_express)
5289     BITMAP_FREE (inv_vars_express);
5290 
5291   return !cost.infinite_cost_p ();
5292 }
5293 
5294 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5295    if USE cannot be represented with CAND.  */
5296 
5297 static bool
determine_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5298 determine_group_iv_cost (struct ivopts_data *data,
5299 			 struct iv_group *group, struct iv_cand *cand)
5300 {
5301   switch (group->type)
5302     {
5303     case USE_NONLINEAR_EXPR:
5304       return determine_group_iv_cost_generic (data, group, cand);
5305 
5306     case USE_REF_ADDRESS:
5307     case USE_PTR_ADDRESS:
5308       return determine_group_iv_cost_address (data, group, cand);
5309 
5310     case USE_COMPARE:
5311       return determine_group_iv_cost_cond (data, group, cand);
5312 
5313     default:
5314       gcc_unreachable ();
5315     }
5316 }
5317 
5318 /* Return true if get_computation_cost indicates that autoincrement is
5319    a possibility for the pair of USE and CAND, false otherwise.  */
5320 
5321 static bool
autoinc_possible_for_pair(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5322 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5323 			   struct iv_cand *cand)
5324 {
5325   if (!address_p (use->type))
5326     return false;
5327 
5328   bool can_autoinc = false;
5329   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5330   return can_autoinc;
5331 }
5332 
5333 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5334    use that allows autoincrement, and set their AINC_USE if possible.  */
5335 
5336 static void
set_autoinc_for_original_candidates(struct ivopts_data * data)5337 set_autoinc_for_original_candidates (struct ivopts_data *data)
5338 {
5339   unsigned i, j;
5340 
5341   for (i = 0; i < data->vcands.length (); i++)
5342     {
5343       struct iv_cand *cand = data->vcands[i];
5344       struct iv_use *closest_before = NULL;
5345       struct iv_use *closest_after = NULL;
5346       if (cand->pos != IP_ORIGINAL)
5347 	continue;
5348 
5349       for (j = 0; j < data->vgroups.length (); j++)
5350 	{
5351 	  struct iv_group *group = data->vgroups[j];
5352 	  struct iv_use *use = group->vuses[0];
5353 	  unsigned uid = gimple_uid (use->stmt);
5354 
5355 	  if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5356 	    continue;
5357 
5358 	  if (uid < gimple_uid (cand->incremented_at)
5359 	      && (closest_before == NULL
5360 		  || uid > gimple_uid (closest_before->stmt)))
5361 	    closest_before = use;
5362 
5363 	  if (uid > gimple_uid (cand->incremented_at)
5364 	      && (closest_after == NULL
5365 		  || uid < gimple_uid (closest_after->stmt)))
5366 	    closest_after = use;
5367 	}
5368 
5369       if (closest_before != NULL
5370 	  && autoinc_possible_for_pair (data, closest_before, cand))
5371 	cand->ainc_use = closest_before;
5372       else if (closest_after != NULL
5373 	       && autoinc_possible_for_pair (data, closest_after, cand))
5374 	cand->ainc_use = closest_after;
5375     }
5376 }
5377 
5378 /* Relate compare use with all candidates.  */
5379 
5380 static void
relate_compare_use_with_all_cands(struct ivopts_data * data)5381 relate_compare_use_with_all_cands (struct ivopts_data *data)
5382 {
5383   unsigned i, count = data->vcands.length ();
5384   for (i = 0; i < data->vgroups.length (); i++)
5385     {
5386       struct iv_group *group = data->vgroups[i];
5387 
5388       if (group->type == USE_COMPARE)
5389 	bitmap_set_range (group->related_cands, 0, count);
5390     }
5391 }
5392 
5393 /* Finds the candidates for the induction variables.  */
5394 
5395 static void
find_iv_candidates(struct ivopts_data * data)5396 find_iv_candidates (struct ivopts_data *data)
5397 {
5398   /* Add commonly used ivs.  */
5399   add_standard_iv_candidates (data);
5400 
5401   /* Add old induction variables.  */
5402   add_iv_candidate_for_bivs (data);
5403 
5404   /* Add induction variables derived from uses.  */
5405   add_iv_candidate_for_groups (data);
5406 
5407   set_autoinc_for_original_candidates (data);
5408 
5409   /* Record the important candidates.  */
5410   record_important_candidates (data);
5411 
5412   /* Relate compare iv_use with all candidates.  */
5413   if (!data->consider_all_candidates)
5414     relate_compare_use_with_all_cands (data);
5415 
5416   if (dump_file && (dump_flags & TDF_DETAILS))
5417     {
5418       unsigned i;
5419 
5420       fprintf (dump_file, "\n<Important Candidates>:\t");
5421       for (i = 0; i < data->vcands.length (); i++)
5422 	if (data->vcands[i]->important)
5423 	  fprintf (dump_file, " %d,", data->vcands[i]->id);
5424       fprintf (dump_file, "\n");
5425 
5426       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5427       for (i = 0; i < data->vgroups.length (); i++)
5428 	{
5429 	  struct iv_group *group = data->vgroups[i];
5430 
5431 	  if (group->related_cands)
5432 	    {
5433 	      fprintf (dump_file, "  Group %d:\t", group->id);
5434 	      dump_bitmap (dump_file, group->related_cands);
5435 	    }
5436 	}
5437       fprintf (dump_file, "\n");
5438     }
5439 }
5440 
5441 /* Determines costs of computing use of iv with an iv candidate.  */
5442 
5443 static void
determine_group_iv_costs(struct ivopts_data * data)5444 determine_group_iv_costs (struct ivopts_data *data)
5445 {
5446   unsigned i, j;
5447   struct iv_cand *cand;
5448   struct iv_group *group;
5449   bitmap to_clear = BITMAP_ALLOC (NULL);
5450 
5451   alloc_use_cost_map (data);
5452 
5453   for (i = 0; i < data->vgroups.length (); i++)
5454     {
5455       group = data->vgroups[i];
5456 
5457       if (data->consider_all_candidates)
5458 	{
5459 	  for (j = 0; j < data->vcands.length (); j++)
5460 	    {
5461 	      cand = data->vcands[j];
5462 	      determine_group_iv_cost (data, group, cand);
5463 	    }
5464 	}
5465       else
5466 	{
5467 	  bitmap_iterator bi;
5468 
5469 	  EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5470 	    {
5471 	      cand = data->vcands[j];
5472 	      if (!determine_group_iv_cost (data, group, cand))
5473 		bitmap_set_bit (to_clear, j);
5474 	    }
5475 
5476 	  /* Remove the candidates for that the cost is infinite from
5477 	     the list of related candidates.  */
5478 	  bitmap_and_compl_into (group->related_cands, to_clear);
5479 	  bitmap_clear (to_clear);
5480 	}
5481     }
5482 
5483   BITMAP_FREE (to_clear);
5484 
5485   if (dump_file && (dump_flags & TDF_DETAILS))
5486     {
5487       bitmap_iterator bi;
5488 
5489       /* Dump invariant variables.  */
5490       fprintf (dump_file, "\n<Invariant Vars>:\n");
5491       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5492 	{
5493 	  struct version_info *info = ver_info (data, i);
5494 	  if (info->inv_id)
5495 	    {
5496 	      fprintf (dump_file, "Inv %d:\t", info->inv_id);
5497 	      print_generic_expr (dump_file, info->name, TDF_SLIM);
5498 	      fprintf (dump_file, "%s\n",
5499 		       info->has_nonlin_use ? "" : "\t(eliminable)");
5500 	    }
5501 	}
5502 
5503       /* Dump invariant expressions.  */
5504       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5505       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5506 
5507       for (hash_table<iv_inv_expr_hasher>::iterator it
5508 	   = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5509 	   ++it)
5510 	list.safe_push (*it);
5511 
5512       list.qsort (sort_iv_inv_expr_ent);
5513 
5514       for (i = 0; i < list.length (); ++i)
5515 	{
5516 	  fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5517 	  print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5518 	  fprintf (dump_file, "\n");
5519 	}
5520 
5521       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5522 
5523       for (i = 0; i < data->vgroups.length (); i++)
5524 	{
5525 	  group = data->vgroups[i];
5526 
5527 	  fprintf (dump_file, "Group %d:\n", i);
5528 	  fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5529 	  for (j = 0; j < group->n_map_members; j++)
5530 	    {
5531 	      if (!group->cost_map[j].cand
5532 		  || group->cost_map[j].cost.infinite_cost_p ())
5533 		continue;
5534 
5535 	      fprintf (dump_file, "  %d\t%d\t%d\t",
5536 		       group->cost_map[j].cand->id,
5537 		       group->cost_map[j].cost.cost,
5538 		       group->cost_map[j].cost.complexity);
5539 	      if (!group->cost_map[j].inv_exprs
5540 		  || bitmap_empty_p (group->cost_map[j].inv_exprs))
5541 		fprintf (dump_file, "NIL;\t");
5542 	      else
5543 		bitmap_print (dump_file,
5544 			      group->cost_map[j].inv_exprs, "", ";\t");
5545 	      if (!group->cost_map[j].inv_vars
5546 		  || bitmap_empty_p (group->cost_map[j].inv_vars))
5547 		fprintf (dump_file, "NIL;\n");
5548 	      else
5549 		bitmap_print (dump_file,
5550 			      group->cost_map[j].inv_vars, "", "\n");
5551 	    }
5552 
5553 	  fprintf (dump_file, "\n");
5554 	}
5555       fprintf (dump_file, "\n");
5556     }
5557 }
5558 
5559 /* Determines cost of the candidate CAND.  */
5560 
5561 static void
determine_iv_cost(struct ivopts_data * data,struct iv_cand * cand)5562 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5563 {
5564   comp_cost cost_base;
5565   unsigned cost, cost_step;
5566   tree base;
5567 
5568   gcc_assert (cand->iv != NULL);
5569 
5570   /* There are two costs associated with the candidate -- its increment
5571      and its initialization.  The second is almost negligible for any loop
5572      that rolls enough, so we take it just very little into account.  */
5573 
5574   base = cand->iv->base;
5575   cost_base = force_var_cost (data, base, NULL);
5576   /* It will be exceptional that the iv register happens to be initialized with
5577      the proper value at no cost.  In general, there will at least be a regcopy
5578      or a const set.  */
5579   if (cost_base.cost == 0)
5580     cost_base.cost = COSTS_N_INSNS (1);
5581   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5582 
5583   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5584 
5585   /* Prefer the original ivs unless we may gain something by replacing it.
5586      The reason is to make debugging simpler; so this is not relevant for
5587      artificial ivs created by other optimization passes.  */
5588   if (cand->pos != IP_ORIGINAL
5589       || !SSA_NAME_VAR (cand->var_before)
5590       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5591     cost++;
5592 
5593   /* Prefer not to insert statements into latch unless there are some
5594      already (so that we do not create unnecessary jumps).  */
5595   if (cand->pos == IP_END
5596       && empty_block_p (ip_end_pos (data->current_loop)))
5597     cost++;
5598 
5599   cand->cost = cost;
5600   cand->cost_step = cost_step;
5601 }
5602 
5603 /* Determines costs of computation of the candidates.  */
5604 
5605 static void
determine_iv_costs(struct ivopts_data * data)5606 determine_iv_costs (struct ivopts_data *data)
5607 {
5608   unsigned i;
5609 
5610   if (dump_file && (dump_flags & TDF_DETAILS))
5611     {
5612       fprintf (dump_file, "<Candidate Costs>:\n");
5613       fprintf (dump_file, "  cand\tcost\n");
5614     }
5615 
5616   for (i = 0; i < data->vcands.length (); i++)
5617     {
5618       struct iv_cand *cand = data->vcands[i];
5619 
5620       determine_iv_cost (data, cand);
5621 
5622       if (dump_file && (dump_flags & TDF_DETAILS))
5623 	fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5624     }
5625 
5626   if (dump_file && (dump_flags & TDF_DETAILS))
5627     fprintf (dump_file, "\n");
5628 }
5629 
5630 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5631    induction variables.  Note N_INVS includes both invariant variables and
5632    invariant expressions.  */
5633 
5634 static unsigned
ivopts_estimate_reg_pressure(struct ivopts_data * data,unsigned n_invs,unsigned n_cands)5635 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5636 			      unsigned n_cands)
5637 {
5638   unsigned cost;
5639   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5640   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5641   bool speed = data->speed;
5642 
5643   /* If there is a call in the loop body, the call-clobbered registers
5644      are not available for loop invariants.  */
5645   if (data->body_includes_call)
5646     available_regs = available_regs - target_clobbered_regs;
5647 
5648   /* If we have enough registers.  */
5649   if (regs_needed + target_res_regs < available_regs)
5650     cost = n_new;
5651   /* If close to running out of registers, try to preserve them.  */
5652   else if (regs_needed <= available_regs)
5653     cost = target_reg_cost [speed] * regs_needed;
5654   /* If we run out of available registers but the number of candidates
5655      does not, we penalize extra registers using target_spill_cost.  */
5656   else if (n_cands <= available_regs)
5657     cost = target_reg_cost [speed] * available_regs
5658 	   + target_spill_cost [speed] * (regs_needed - available_regs);
5659   /* If the number of candidates runs out available registers, we penalize
5660      extra candidate registers using target_spill_cost * 2.  Because it is
5661      more expensive to spill induction variable than invariant.  */
5662   else
5663     cost = target_reg_cost [speed] * available_regs
5664 	   + target_spill_cost [speed] * (n_cands - available_regs) * 2
5665 	   + target_spill_cost [speed] * (regs_needed - n_cands);
5666 
5667   /* Finally, add the number of candidates, so that we prefer eliminating
5668      induction variables if possible.  */
5669   return cost + n_cands;
5670 }
5671 
5672 /* For each size of the induction variable set determine the penalty.  */
5673 
5674 static void
determine_set_costs(struct ivopts_data * data)5675 determine_set_costs (struct ivopts_data *data)
5676 {
5677   unsigned j, n;
5678   gphi *phi;
5679   gphi_iterator psi;
5680   tree op;
5681   struct loop *loop = data->current_loop;
5682   bitmap_iterator bi;
5683 
5684   if (dump_file && (dump_flags & TDF_DETAILS))
5685     {
5686       fprintf (dump_file, "<Global Costs>:\n");
5687       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5688       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5689       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5690       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5691     }
5692 
5693   n = 0;
5694   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5695     {
5696       phi = psi.phi ();
5697       op = PHI_RESULT (phi);
5698 
5699       if (virtual_operand_p (op))
5700 	continue;
5701 
5702       if (get_iv (data, op))
5703 	continue;
5704 
5705       if (!POINTER_TYPE_P (TREE_TYPE (op))
5706 	  && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5707 	continue;
5708 
5709       n++;
5710     }
5711 
5712   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5713     {
5714       struct version_info *info = ver_info (data, j);
5715 
5716       if (info->inv_id && info->has_nonlin_use)
5717 	n++;
5718     }
5719 
5720   data->regs_used = n;
5721   if (dump_file && (dump_flags & TDF_DETAILS))
5722     fprintf (dump_file, "  regs_used %d\n", n);
5723 
5724   if (dump_file && (dump_flags & TDF_DETAILS))
5725     {
5726       fprintf (dump_file, "  cost for size:\n");
5727       fprintf (dump_file, "  ivs\tcost\n");
5728       for (j = 0; j <= 2 * target_avail_regs; j++)
5729 	fprintf (dump_file, "  %d\t%d\n", j,
5730 		 ivopts_estimate_reg_pressure (data, 0, j));
5731       fprintf (dump_file, "\n");
5732     }
5733 }
5734 
5735 /* Returns true if A is a cheaper cost pair than B.  */
5736 
5737 static bool
cheaper_cost_pair(struct cost_pair * a,struct cost_pair * b)5738 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5739 {
5740   if (!a)
5741     return false;
5742 
5743   if (!b)
5744     return true;
5745 
5746   if (a->cost < b->cost)
5747     return true;
5748 
5749   if (b->cost < a->cost)
5750     return false;
5751 
5752   /* In case the costs are the same, prefer the cheaper candidate.  */
5753   if (a->cand->cost < b->cand->cost)
5754     return true;
5755 
5756   return false;
5757 }
5758 
5759 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
5760    for more expensive, equal and cheaper respectively.  */
5761 
5762 static int
compare_cost_pair(struct cost_pair * a,struct cost_pair * b)5763 compare_cost_pair (struct cost_pair *a, struct cost_pair *b)
5764 {
5765   if (cheaper_cost_pair (a, b))
5766     return -1;
5767   if (cheaper_cost_pair (b, a))
5768     return 1;
5769 
5770   return 0;
5771 }
5772 
5773 /* Returns candidate by that USE is expressed in IVS.  */
5774 
5775 static struct cost_pair *
iv_ca_cand_for_group(struct iv_ca * ivs,struct iv_group * group)5776 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5777 {
5778   return ivs->cand_for_group[group->id];
5779 }
5780 
5781 /* Computes the cost field of IVS structure.  */
5782 
5783 static void
iv_ca_recount_cost(struct ivopts_data * data,struct iv_ca * ivs)5784 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5785 {
5786   comp_cost cost = ivs->cand_use_cost;
5787 
5788   cost += ivs->cand_cost;
5789   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
5790   ivs->cost = cost;
5791 }
5792 
5793 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5794    and IVS.  */
5795 
5796 static void
iv_ca_set_remove_invs(struct iv_ca * ivs,bitmap invs,unsigned * n_inv_uses)5797 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5798 {
5799   bitmap_iterator bi;
5800   unsigned iid;
5801 
5802   if (!invs)
5803     return;
5804 
5805   gcc_assert (n_inv_uses != NULL);
5806   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5807     {
5808       n_inv_uses[iid]--;
5809       if (n_inv_uses[iid] == 0)
5810 	ivs->n_invs--;
5811     }
5812 }
5813 
5814 /* Set USE not to be expressed by any candidate in IVS.  */
5815 
5816 static void
iv_ca_set_no_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group)5817 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5818 		 struct iv_group *group)
5819 {
5820   unsigned gid = group->id, cid;
5821   struct cost_pair *cp;
5822 
5823   cp = ivs->cand_for_group[gid];
5824   if (!cp)
5825     return;
5826   cid = cp->cand->id;
5827 
5828   ivs->bad_groups++;
5829   ivs->cand_for_group[gid] = NULL;
5830   ivs->n_cand_uses[cid]--;
5831 
5832   if (ivs->n_cand_uses[cid] == 0)
5833     {
5834       bitmap_clear_bit (ivs->cands, cid);
5835       ivs->n_cands--;
5836       ivs->cand_cost -= cp->cand->cost;
5837       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5838       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5839     }
5840 
5841   ivs->cand_use_cost -= cp->cost;
5842   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5843   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5844   iv_ca_recount_cost (data, ivs);
5845 }
5846 
5847 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5848    IVS.  */
5849 
5850 static void
iv_ca_set_add_invs(struct iv_ca * ivs,bitmap invs,unsigned * n_inv_uses)5851 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5852 {
5853   bitmap_iterator bi;
5854   unsigned iid;
5855 
5856   if (!invs)
5857     return;
5858 
5859   gcc_assert (n_inv_uses != NULL);
5860   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5861     {
5862       n_inv_uses[iid]++;
5863       if (n_inv_uses[iid] == 1)
5864 	ivs->n_invs++;
5865     }
5866 }
5867 
5868 /* Set cost pair for GROUP in set IVS to CP.  */
5869 
5870 static void
iv_ca_set_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group,struct cost_pair * cp)5871 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5872 	      struct iv_group *group, struct cost_pair *cp)
5873 {
5874   unsigned gid = group->id, cid;
5875 
5876   if (ivs->cand_for_group[gid] == cp)
5877     return;
5878 
5879   if (ivs->cand_for_group[gid])
5880     iv_ca_set_no_cp (data, ivs, group);
5881 
5882   if (cp)
5883     {
5884       cid = cp->cand->id;
5885 
5886       ivs->bad_groups--;
5887       ivs->cand_for_group[gid] = cp;
5888       ivs->n_cand_uses[cid]++;
5889       if (ivs->n_cand_uses[cid] == 1)
5890 	{
5891 	  bitmap_set_bit (ivs->cands, cid);
5892 	  ivs->n_cands++;
5893 	  ivs->cand_cost += cp->cand->cost;
5894 	  iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5895 	  iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5896 	}
5897 
5898       ivs->cand_use_cost += cp->cost;
5899       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5900       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5901       iv_ca_recount_cost (data, ivs);
5902     }
5903 }
5904 
5905 /* Extend set IVS by expressing USE by some of the candidates in it
5906    if possible.  Consider all important candidates if candidates in
5907    set IVS don't give any result.  */
5908 
5909 static void
iv_ca_add_group(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group)5910 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5911 	       struct iv_group *group)
5912 {
5913   struct cost_pair *best_cp = NULL, *cp;
5914   bitmap_iterator bi;
5915   unsigned i;
5916   struct iv_cand *cand;
5917 
5918   gcc_assert (ivs->upto >= group->id);
5919   ivs->upto++;
5920   ivs->bad_groups++;
5921 
5922   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5923     {
5924       cand = data->vcands[i];
5925       cp = get_group_iv_cost (data, group, cand);
5926       if (cheaper_cost_pair (cp, best_cp))
5927 	best_cp = cp;
5928     }
5929 
5930   if (best_cp == NULL)
5931     {
5932       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5933 	{
5934 	  cand = data->vcands[i];
5935 	  cp = get_group_iv_cost (data, group, cand);
5936 	  if (cheaper_cost_pair (cp, best_cp))
5937 	    best_cp = cp;
5938 	}
5939     }
5940 
5941   iv_ca_set_cp (data, ivs, group, best_cp);
5942 }
5943 
5944 /* Get cost for assignment IVS.  */
5945 
5946 static comp_cost
iv_ca_cost(struct iv_ca * ivs)5947 iv_ca_cost (struct iv_ca *ivs)
5948 {
5949   /* This was a conditional expression but it triggered a bug in
5950      Sun C 5.5.  */
5951   if (ivs->bad_groups)
5952     return infinite_cost;
5953   else
5954     return ivs->cost;
5955 }
5956 
5957 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5958    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
5959    respectively.  */
5960 
5961 static int
iv_ca_compare_deps(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group,struct cost_pair * old_cp,struct cost_pair * new_cp)5962 iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs,
5963 		    struct iv_group *group, struct cost_pair *old_cp,
5964 		    struct cost_pair *new_cp)
5965 {
5966   gcc_assert (old_cp && new_cp && old_cp != new_cp);
5967   unsigned old_n_invs = ivs->n_invs;
5968   iv_ca_set_cp (data, ivs, group, new_cp);
5969   unsigned new_n_invs = ivs->n_invs;
5970   iv_ca_set_cp (data, ivs, group, old_cp);
5971 
5972   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
5973 }
5974 
5975 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5976    it before NEXT.  */
5977 
5978 static struct iv_ca_delta *
iv_ca_delta_add(struct iv_group * group,struct cost_pair * old_cp,struct cost_pair * new_cp,struct iv_ca_delta * next)5979 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
5980 		 struct cost_pair *new_cp, struct iv_ca_delta *next)
5981 {
5982   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5983 
5984   change->group = group;
5985   change->old_cp = old_cp;
5986   change->new_cp = new_cp;
5987   change->next = next;
5988 
5989   return change;
5990 }
5991 
5992 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5993    are rewritten.  */
5994 
5995 static struct iv_ca_delta *
iv_ca_delta_join(struct iv_ca_delta * l1,struct iv_ca_delta * l2)5996 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5997 {
5998   struct iv_ca_delta *last;
5999 
6000   if (!l2)
6001     return l1;
6002 
6003   if (!l1)
6004     return l2;
6005 
6006   for (last = l1; last->next; last = last->next)
6007     continue;
6008   last->next = l2;
6009 
6010   return l1;
6011 }
6012 
6013 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6014 
6015 static struct iv_ca_delta *
iv_ca_delta_reverse(struct iv_ca_delta * delta)6016 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6017 {
6018   struct iv_ca_delta *act, *next, *prev = NULL;
6019 
6020   for (act = delta; act; act = next)
6021     {
6022       next = act->next;
6023       act->next = prev;
6024       prev = act;
6025 
6026       std::swap (act->old_cp, act->new_cp);
6027     }
6028 
6029   return prev;
6030 }
6031 
6032 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6033    reverted instead.  */
6034 
6035 static void
iv_ca_delta_commit(struct ivopts_data * data,struct iv_ca * ivs,struct iv_ca_delta * delta,bool forward)6036 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6037 		    struct iv_ca_delta *delta, bool forward)
6038 {
6039   struct cost_pair *from, *to;
6040   struct iv_ca_delta *act;
6041 
6042   if (!forward)
6043     delta = iv_ca_delta_reverse (delta);
6044 
6045   for (act = delta; act; act = act->next)
6046     {
6047       from = act->old_cp;
6048       to = act->new_cp;
6049       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6050       iv_ca_set_cp (data, ivs, act->group, to);
6051     }
6052 
6053   if (!forward)
6054     iv_ca_delta_reverse (delta);
6055 }
6056 
6057 /* Returns true if CAND is used in IVS.  */
6058 
6059 static bool
iv_ca_cand_used_p(struct iv_ca * ivs,struct iv_cand * cand)6060 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6061 {
6062   return ivs->n_cand_uses[cand->id] > 0;
6063 }
6064 
6065 /* Returns number of induction variable candidates in the set IVS.  */
6066 
6067 static unsigned
iv_ca_n_cands(struct iv_ca * ivs)6068 iv_ca_n_cands (struct iv_ca *ivs)
6069 {
6070   return ivs->n_cands;
6071 }
6072 
6073 /* Free the list of changes DELTA.  */
6074 
6075 static void
iv_ca_delta_free(struct iv_ca_delta ** delta)6076 iv_ca_delta_free (struct iv_ca_delta **delta)
6077 {
6078   struct iv_ca_delta *act, *next;
6079 
6080   for (act = *delta; act; act = next)
6081     {
6082       next = act->next;
6083       free (act);
6084     }
6085 
6086   *delta = NULL;
6087 }
6088 
6089 /* Allocates new iv candidates assignment.  */
6090 
6091 static struct iv_ca *
iv_ca_new(struct ivopts_data * data)6092 iv_ca_new (struct ivopts_data *data)
6093 {
6094   struct iv_ca *nw = XNEW (struct iv_ca);
6095 
6096   nw->upto = 0;
6097   nw->bad_groups = 0;
6098   nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6099 				 data->vgroups.length ());
6100   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6101   nw->cands = BITMAP_ALLOC (NULL);
6102   nw->n_cands = 0;
6103   nw->n_invs = 0;
6104   nw->cand_use_cost = no_cost;
6105   nw->cand_cost = 0;
6106   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6107   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6108   nw->cost = no_cost;
6109 
6110   return nw;
6111 }
6112 
6113 /* Free memory occupied by the set IVS.  */
6114 
6115 static void
iv_ca_free(struct iv_ca ** ivs)6116 iv_ca_free (struct iv_ca **ivs)
6117 {
6118   free ((*ivs)->cand_for_group);
6119   free ((*ivs)->n_cand_uses);
6120   BITMAP_FREE ((*ivs)->cands);
6121   free ((*ivs)->n_inv_var_uses);
6122   free ((*ivs)->n_inv_expr_uses);
6123   free (*ivs);
6124   *ivs = NULL;
6125 }
6126 
6127 /* Dumps IVS to FILE.  */
6128 
6129 static void
iv_ca_dump(struct ivopts_data * data,FILE * file,struct iv_ca * ivs)6130 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6131 {
6132   unsigned i;
6133   comp_cost cost = iv_ca_cost (ivs);
6134 
6135   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost,
6136 	   cost.complexity);
6137   fprintf (file, "  cand_cost: %d\n  cand_group_cost: %d (complexity %d)\n",
6138 	   ivs->cand_cost, ivs->cand_use_cost.cost,
6139 	   ivs->cand_use_cost.complexity);
6140   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6141 
6142   for (i = 0; i < ivs->upto; i++)
6143     {
6144       struct iv_group *group = data->vgroups[i];
6145       struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6146       if (cp)
6147         fprintf (file, "   group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6148 		 group->id, cp->cand->id, cp->cost.cost,
6149 		 cp->cost.complexity);
6150       else
6151 	fprintf (file, "   group:%d --> ??\n", group->id);
6152     }
6153 
6154   const char *pref = "";
6155   fprintf (file, "  invariant variables: ");
6156   for (i = 1; i <= data->max_inv_var_id; i++)
6157     if (ivs->n_inv_var_uses[i])
6158       {
6159 	fprintf (file, "%s%d", pref, i);
6160 	pref = ", ";
6161       }
6162 
6163   pref = "";
6164   fprintf (file, "\n  invariant expressions: ");
6165   for (i = 1; i <= data->max_inv_expr_id; i++)
6166     if (ivs->n_inv_expr_uses[i])
6167       {
6168 	fprintf (file, "%s%d", pref, i);
6169 	pref = ", ";
6170       }
6171 
6172   fprintf (file, "\n\n");
6173 }
6174 
6175 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6176    new set, and store differences in DELTA.  Number of induction variables
6177    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6178    the function will try to find a solution with mimimal iv candidates.  */
6179 
6180 static comp_cost
iv_ca_extend(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_ca_delta ** delta,unsigned * n_ivs,bool min_ncand)6181 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6182 	      struct iv_cand *cand, struct iv_ca_delta **delta,
6183 	      unsigned *n_ivs, bool min_ncand)
6184 {
6185   unsigned i;
6186   comp_cost cost;
6187   struct iv_group *group;
6188   struct cost_pair *old_cp, *new_cp;
6189 
6190   *delta = NULL;
6191   for (i = 0; i < ivs->upto; i++)
6192     {
6193       group = data->vgroups[i];
6194       old_cp = iv_ca_cand_for_group (ivs, group);
6195 
6196       if (old_cp
6197 	  && old_cp->cand == cand)
6198 	continue;
6199 
6200       new_cp = get_group_iv_cost (data, group, cand);
6201       if (!new_cp)
6202 	continue;
6203 
6204       if (!min_ncand)
6205 	{
6206 	  int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6207 	  /* Skip if new_cp depends on more invariants.  */
6208 	  if (cmp_invs > 0)
6209 	    continue;
6210 
6211 	  int cmp_cost = compare_cost_pair (new_cp, old_cp);
6212 	  /* Skip if new_cp is not cheaper.  */
6213 	  if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6214 	    continue;
6215 	}
6216 
6217       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6218     }
6219 
6220   iv_ca_delta_commit (data, ivs, *delta, true);
6221   cost = iv_ca_cost (ivs);
6222   if (n_ivs)
6223     *n_ivs = iv_ca_n_cands (ivs);
6224   iv_ca_delta_commit (data, ivs, *delta, false);
6225 
6226   return cost;
6227 }
6228 
6229 /* Try narrowing set IVS by removing CAND.  Return the cost of
6230    the new set and store the differences in DELTA.  START is
6231    the candidate with which we start narrowing.  */
6232 
6233 static comp_cost
iv_ca_narrow(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_cand * start,struct iv_ca_delta ** delta)6234 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6235 	      struct iv_cand *cand, struct iv_cand *start,
6236 	      struct iv_ca_delta **delta)
6237 {
6238   unsigned i, ci;
6239   struct iv_group *group;
6240   struct cost_pair *old_cp, *new_cp, *cp;
6241   bitmap_iterator bi;
6242   struct iv_cand *cnd;
6243   comp_cost cost, best_cost, acost;
6244 
6245   *delta = NULL;
6246   for (i = 0; i < data->vgroups.length (); i++)
6247     {
6248       group = data->vgroups[i];
6249 
6250       old_cp = iv_ca_cand_for_group (ivs, group);
6251       if (old_cp->cand != cand)
6252 	continue;
6253 
6254       best_cost = iv_ca_cost (ivs);
6255       /* Start narrowing with START.  */
6256       new_cp = get_group_iv_cost (data, group, start);
6257 
6258       if (data->consider_all_candidates)
6259 	{
6260 	  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6261 	    {
6262 	      if (ci == cand->id || (start && ci == start->id))
6263 		continue;
6264 
6265 	      cnd = data->vcands[ci];
6266 
6267 	      cp = get_group_iv_cost (data, group, cnd);
6268 	      if (!cp)
6269 		continue;
6270 
6271 	      iv_ca_set_cp (data, ivs, group, cp);
6272 	      acost = iv_ca_cost (ivs);
6273 
6274 	      if (acost < best_cost)
6275 		{
6276 		  best_cost = acost;
6277 		  new_cp = cp;
6278 		}
6279 	    }
6280 	}
6281       else
6282 	{
6283 	  EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6284 	    {
6285 	      if (ci == cand->id || (start && ci == start->id))
6286 		continue;
6287 
6288 	      cnd = data->vcands[ci];
6289 
6290 	      cp = get_group_iv_cost (data, group, cnd);
6291 	      if (!cp)
6292 		continue;
6293 
6294 	      iv_ca_set_cp (data, ivs, group, cp);
6295 	      acost = iv_ca_cost (ivs);
6296 
6297 	      if (acost < best_cost)
6298 		{
6299 		  best_cost = acost;
6300 		  new_cp = cp;
6301 		}
6302 	    }
6303 	}
6304       /* Restore to old cp for use.  */
6305       iv_ca_set_cp (data, ivs, group, old_cp);
6306 
6307       if (!new_cp)
6308 	{
6309 	  iv_ca_delta_free (delta);
6310 	  return infinite_cost;
6311 	}
6312 
6313       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6314     }
6315 
6316   iv_ca_delta_commit (data, ivs, *delta, true);
6317   cost = iv_ca_cost (ivs);
6318   iv_ca_delta_commit (data, ivs, *delta, false);
6319 
6320   return cost;
6321 }
6322 
6323 /* Try optimizing the set of candidates IVS by removing candidates different
6324    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6325    differences in DELTA.  */
6326 
6327 static comp_cost
iv_ca_prune(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * except_cand,struct iv_ca_delta ** delta)6328 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6329 	     struct iv_cand *except_cand, struct iv_ca_delta **delta)
6330 {
6331   bitmap_iterator bi;
6332   struct iv_ca_delta *act_delta, *best_delta;
6333   unsigned i;
6334   comp_cost best_cost, acost;
6335   struct iv_cand *cand;
6336 
6337   best_delta = NULL;
6338   best_cost = iv_ca_cost (ivs);
6339 
6340   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6341     {
6342       cand = data->vcands[i];
6343 
6344       if (cand == except_cand)
6345 	continue;
6346 
6347       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6348 
6349       if (acost < best_cost)
6350 	{
6351 	  best_cost = acost;
6352 	  iv_ca_delta_free (&best_delta);
6353 	  best_delta = act_delta;
6354 	}
6355       else
6356 	iv_ca_delta_free (&act_delta);
6357     }
6358 
6359   if (!best_delta)
6360     {
6361       *delta = NULL;
6362       return best_cost;
6363     }
6364 
6365   /* Recurse to possibly remove other unnecessary ivs.  */
6366   iv_ca_delta_commit (data, ivs, best_delta, true);
6367   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6368   iv_ca_delta_commit (data, ivs, best_delta, false);
6369   *delta = iv_ca_delta_join (best_delta, *delta);
6370   return best_cost;
6371 }
6372 
6373 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6374    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6375    the corresponding cost_pair, otherwise just return BEST_CP.  */
6376 
6377 static struct cost_pair*
cheaper_cost_with_cand(struct ivopts_data * data,struct iv_group * group,unsigned int cand_idx,struct iv_cand * old_cand,struct cost_pair * best_cp)6378 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6379 			unsigned int cand_idx, struct iv_cand *old_cand,
6380 			struct cost_pair *best_cp)
6381 {
6382   struct iv_cand *cand;
6383   struct cost_pair *cp;
6384 
6385   gcc_assert (old_cand != NULL && best_cp != NULL);
6386   if (cand_idx == old_cand->id)
6387     return best_cp;
6388 
6389   cand = data->vcands[cand_idx];
6390   cp = get_group_iv_cost (data, group, cand);
6391   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6392     return cp;
6393 
6394   return best_cp;
6395 }
6396 
6397 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6398    which are used by more than one iv uses.  For each of those candidates,
6399    this function tries to represent iv uses under that candidate using
6400    other ones with lower local cost, then tries to prune the new set.
6401    If the new set has lower cost, It returns the new cost after recording
6402    candidate replacement in list DELTA.  */
6403 
6404 static comp_cost
iv_ca_replace(struct ivopts_data * data,struct iv_ca * ivs,struct iv_ca_delta ** delta)6405 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6406 	       struct iv_ca_delta **delta)
6407 {
6408   bitmap_iterator bi, bj;
6409   unsigned int i, j, k;
6410   struct iv_cand *cand;
6411   comp_cost orig_cost, acost;
6412   struct iv_ca_delta *act_delta, *tmp_delta;
6413   struct cost_pair *old_cp, *best_cp = NULL;
6414 
6415   *delta = NULL;
6416   orig_cost = iv_ca_cost (ivs);
6417 
6418   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6419     {
6420       if (ivs->n_cand_uses[i] == 1
6421 	  || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6422 	continue;
6423 
6424       cand = data->vcands[i];
6425 
6426       act_delta = NULL;
6427       /*  Represent uses under current candidate using other ones with
6428 	  lower local cost.  */
6429       for (j = 0; j < ivs->upto; j++)
6430 	{
6431 	  struct iv_group *group = data->vgroups[j];
6432 	  old_cp = iv_ca_cand_for_group (ivs, group);
6433 
6434 	  if (old_cp->cand != cand)
6435 	    continue;
6436 
6437 	  best_cp = old_cp;
6438 	  if (data->consider_all_candidates)
6439 	    for (k = 0; k < data->vcands.length (); k++)
6440 	      best_cp = cheaper_cost_with_cand (data, group, k,
6441 						old_cp->cand, best_cp);
6442 	  else
6443 	    EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6444 	      best_cp = cheaper_cost_with_cand (data, group, k,
6445 						old_cp->cand, best_cp);
6446 
6447 	  if (best_cp == old_cp)
6448 	    continue;
6449 
6450 	  act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6451 	}
6452       /* No need for further prune.  */
6453       if (!act_delta)
6454 	continue;
6455 
6456       /* Prune the new candidate set.  */
6457       iv_ca_delta_commit (data, ivs, act_delta, true);
6458       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6459       iv_ca_delta_commit (data, ivs, act_delta, false);
6460       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6461 
6462       if (acost < orig_cost)
6463 	{
6464 	  *delta = act_delta;
6465 	  return acost;
6466 	}
6467       else
6468 	iv_ca_delta_free (&act_delta);
6469     }
6470 
6471   return orig_cost;
6472 }
6473 
6474 /* Tries to extend the sets IVS in the best possible way in order to
6475    express the GROUP.  If ORIGINALP is true, prefer candidates from
6476    the original set of IVs, otherwise favor important candidates not
6477    based on any memory object.  */
6478 
6479 static bool
try_add_cand_for(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group,bool originalp)6480 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6481 		  struct iv_group *group, bool originalp)
6482 {
6483   comp_cost best_cost, act_cost;
6484   unsigned i;
6485   bitmap_iterator bi;
6486   struct iv_cand *cand;
6487   struct iv_ca_delta *best_delta = NULL, *act_delta;
6488   struct cost_pair *cp;
6489 
6490   iv_ca_add_group (data, ivs, group);
6491   best_cost = iv_ca_cost (ivs);
6492   cp = iv_ca_cand_for_group (ivs, group);
6493   if (cp)
6494     {
6495       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6496       iv_ca_set_no_cp (data, ivs, group);
6497     }
6498 
6499   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6500      first try important candidates not based on any memory object.  Only if
6501      this fails, try the specific ones.  Rationale -- in loops with many
6502      variables the best choice often is to use just one generic biv.  If we
6503      added here many ivs specific to the uses, the optimization algorithm later
6504      would be likely to get stuck in a local minimum, thus causing us to create
6505      too many ivs.  The approach from few ivs to more seems more likely to be
6506      successful -- starting from few ivs, replacing an expensive use by a
6507      specific iv should always be a win.  */
6508   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6509     {
6510       cand = data->vcands[i];
6511 
6512       if (originalp && cand->pos !=IP_ORIGINAL)
6513 	continue;
6514 
6515       if (!originalp && cand->iv->base_object != NULL_TREE)
6516 	continue;
6517 
6518       if (iv_ca_cand_used_p (ivs, cand))
6519 	continue;
6520 
6521       cp = get_group_iv_cost (data, group, cand);
6522       if (!cp)
6523 	continue;
6524 
6525       iv_ca_set_cp (data, ivs, group, cp);
6526       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6527 			       true);
6528       iv_ca_set_no_cp (data, ivs, group);
6529       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6530 
6531       if (act_cost < best_cost)
6532 	{
6533 	  best_cost = act_cost;
6534 
6535 	  iv_ca_delta_free (&best_delta);
6536 	  best_delta = act_delta;
6537 	}
6538       else
6539 	iv_ca_delta_free (&act_delta);
6540     }
6541 
6542   if (best_cost.infinite_cost_p ())
6543     {
6544       for (i = 0; i < group->n_map_members; i++)
6545 	{
6546 	  cp = group->cost_map + i;
6547 	  cand = cp->cand;
6548 	  if (!cand)
6549 	    continue;
6550 
6551 	  /* Already tried this.  */
6552 	  if (cand->important)
6553 	    {
6554 	      if (originalp && cand->pos == IP_ORIGINAL)
6555 		continue;
6556 	      if (!originalp && cand->iv->base_object == NULL_TREE)
6557 		continue;
6558 	    }
6559 
6560 	  if (iv_ca_cand_used_p (ivs, cand))
6561 	    continue;
6562 
6563 	  act_delta = NULL;
6564 	  iv_ca_set_cp (data, ivs, group, cp);
6565 	  act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6566 	  iv_ca_set_no_cp (data, ivs, group);
6567 	  act_delta = iv_ca_delta_add (group,
6568 				       iv_ca_cand_for_group (ivs, group),
6569 				       cp, act_delta);
6570 
6571 	  if (act_cost < best_cost)
6572 	    {
6573 	      best_cost = act_cost;
6574 
6575 	      if (best_delta)
6576 		iv_ca_delta_free (&best_delta);
6577 	      best_delta = act_delta;
6578 	    }
6579 	  else
6580 	    iv_ca_delta_free (&act_delta);
6581 	}
6582     }
6583 
6584   iv_ca_delta_commit (data, ivs, best_delta, true);
6585   iv_ca_delta_free (&best_delta);
6586 
6587   return !best_cost.infinite_cost_p ();
6588 }
6589 
6590 /* Finds an initial assignment of candidates to uses.  */
6591 
6592 static struct iv_ca *
get_initial_solution(struct ivopts_data * data,bool originalp)6593 get_initial_solution (struct ivopts_data *data, bool originalp)
6594 {
6595   unsigned i;
6596   struct iv_ca *ivs = iv_ca_new (data);
6597 
6598   for (i = 0; i < data->vgroups.length (); i++)
6599     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6600       {
6601 	iv_ca_free (&ivs);
6602 	return NULL;
6603       }
6604 
6605   return ivs;
6606 }
6607 
6608 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6609    points to a bool variable, this function tries to break local
6610    optimal fixed-point by replacing candidates in IVS if it's true.  */
6611 
6612 static bool
try_improve_iv_set(struct ivopts_data * data,struct iv_ca * ivs,bool * try_replace_p)6613 try_improve_iv_set (struct ivopts_data *data,
6614 		    struct iv_ca *ivs, bool *try_replace_p)
6615 {
6616   unsigned i, n_ivs;
6617   comp_cost acost, best_cost = iv_ca_cost (ivs);
6618   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6619   struct iv_cand *cand;
6620 
6621   /* Try extending the set of induction variables by one.  */
6622   for (i = 0; i < data->vcands.length (); i++)
6623     {
6624       cand = data->vcands[i];
6625 
6626       if (iv_ca_cand_used_p (ivs, cand))
6627 	continue;
6628 
6629       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6630       if (!act_delta)
6631 	continue;
6632 
6633       /* If we successfully added the candidate and the set is small enough,
6634 	 try optimizing it by removing other candidates.  */
6635       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6636       	{
6637 	  iv_ca_delta_commit (data, ivs, act_delta, true);
6638 	  acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6639 	  iv_ca_delta_commit (data, ivs, act_delta, false);
6640 	  act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6641 	}
6642 
6643       if (acost < best_cost)
6644 	{
6645 	  best_cost = acost;
6646 	  iv_ca_delta_free (&best_delta);
6647 	  best_delta = act_delta;
6648 	}
6649       else
6650 	iv_ca_delta_free (&act_delta);
6651     }
6652 
6653   if (!best_delta)
6654     {
6655       /* Try removing the candidates from the set instead.  */
6656       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6657 
6658       if (!best_delta && *try_replace_p)
6659 	{
6660 	  *try_replace_p = false;
6661 	  /* So far candidate selecting algorithm tends to choose fewer IVs
6662 	     so that it can handle cases in which loops have many variables
6663 	     but the best choice is often to use only one general biv.  One
6664 	     weakness is it can't handle opposite cases, in which different
6665 	     candidates should be chosen with respect to each use.  To solve
6666 	     the problem, we replace candidates in a manner described by the
6667 	     comments of iv_ca_replace, thus give general algorithm a chance
6668 	     to break local optimal fixed-point in these cases.  */
6669 	  best_cost = iv_ca_replace (data, ivs, &best_delta);
6670 	}
6671 
6672       if (!best_delta)
6673 	return false;
6674     }
6675 
6676   iv_ca_delta_commit (data, ivs, best_delta, true);
6677   gcc_assert (best_cost == iv_ca_cost (ivs));
6678   iv_ca_delta_free (&best_delta);
6679   return true;
6680 }
6681 
6682 /* Attempts to find the optimal set of induction variables.  We do simple
6683    greedy heuristic -- we try to replace at most one candidate in the selected
6684    solution and remove the unused ivs while this improves the cost.  */
6685 
6686 static struct iv_ca *
find_optimal_iv_set_1(struct ivopts_data * data,bool originalp)6687 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6688 {
6689   struct iv_ca *set;
6690   bool try_replace_p = true;
6691 
6692   /* Get the initial solution.  */
6693   set = get_initial_solution (data, originalp);
6694   if (!set)
6695     {
6696       if (dump_file && (dump_flags & TDF_DETAILS))
6697 	fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6698       return NULL;
6699     }
6700 
6701   if (dump_file && (dump_flags & TDF_DETAILS))
6702     {
6703       fprintf (dump_file, "Initial set of candidates:\n");
6704       iv_ca_dump (data, dump_file, set);
6705     }
6706 
6707   while (try_improve_iv_set (data, set, &try_replace_p))
6708     {
6709       if (dump_file && (dump_flags & TDF_DETAILS))
6710 	{
6711 	  fprintf (dump_file, "Improved to:\n");
6712 	  iv_ca_dump (data, dump_file, set);
6713 	}
6714     }
6715 
6716   return set;
6717 }
6718 
6719 static struct iv_ca *
find_optimal_iv_set(struct ivopts_data * data)6720 find_optimal_iv_set (struct ivopts_data *data)
6721 {
6722   unsigned i;
6723   comp_cost cost, origcost;
6724   struct iv_ca *set, *origset;
6725 
6726   /* Determine the cost based on a strategy that starts with original IVs,
6727      and try again using a strategy that prefers candidates not based
6728      on any IVs.  */
6729   origset = find_optimal_iv_set_1 (data, true);
6730   set = find_optimal_iv_set_1 (data, false);
6731 
6732   if (!origset && !set)
6733     return NULL;
6734 
6735   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6736   cost = set ? iv_ca_cost (set) : infinite_cost;
6737 
6738   if (dump_file && (dump_flags & TDF_DETAILS))
6739     {
6740       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6741 	       origcost.cost, origcost.complexity);
6742       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6743 	       cost.cost, cost.complexity);
6744     }
6745 
6746   /* Choose the one with the best cost.  */
6747   if (origcost <= cost)
6748     {
6749       if (set)
6750 	iv_ca_free (&set);
6751       set = origset;
6752     }
6753   else if (origset)
6754     iv_ca_free (&origset);
6755 
6756   for (i = 0; i < data->vgroups.length (); i++)
6757     {
6758       struct iv_group *group = data->vgroups[i];
6759       group->selected = iv_ca_cand_for_group (set, group)->cand;
6760     }
6761 
6762   return set;
6763 }
6764 
6765 /* Creates a new induction variable corresponding to CAND.  */
6766 
6767 static void
create_new_iv(struct ivopts_data * data,struct iv_cand * cand)6768 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6769 {
6770   gimple_stmt_iterator incr_pos;
6771   tree base;
6772   struct iv_use *use;
6773   struct iv_group *group;
6774   bool after = false;
6775 
6776   gcc_assert (cand->iv != NULL);
6777 
6778   switch (cand->pos)
6779     {
6780     case IP_NORMAL:
6781       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6782       break;
6783 
6784     case IP_END:
6785       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6786       after = true;
6787       break;
6788 
6789     case IP_AFTER_USE:
6790       after = true;
6791       /* fall through */
6792     case IP_BEFORE_USE:
6793       incr_pos = gsi_for_stmt (cand->incremented_at);
6794       break;
6795 
6796     case IP_ORIGINAL:
6797       /* Mark that the iv is preserved.  */
6798       name_info (data, cand->var_before)->preserve_biv = true;
6799       name_info (data, cand->var_after)->preserve_biv = true;
6800 
6801       /* Rewrite the increment so that it uses var_before directly.  */
6802       use = find_interesting_uses_op (data, cand->var_after);
6803       group = data->vgroups[use->group_id];
6804       group->selected = cand;
6805       return;
6806     }
6807 
6808   gimple_add_tmp_var (cand->var_before);
6809 
6810   base = unshare_expr (cand->iv->base);
6811 
6812   create_iv (base, unshare_expr (cand->iv->step),
6813 	     cand->var_before, data->current_loop,
6814 	     &incr_pos, after, &cand->var_before, &cand->var_after);
6815 }
6816 
6817 /* Creates new induction variables described in SET.  */
6818 
6819 static void
create_new_ivs(struct ivopts_data * data,struct iv_ca * set)6820 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6821 {
6822   unsigned i;
6823   struct iv_cand *cand;
6824   bitmap_iterator bi;
6825 
6826   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6827     {
6828       cand = data->vcands[i];
6829       create_new_iv (data, cand);
6830     }
6831 
6832   if (dump_file && (dump_flags & TDF_DETAILS))
6833     {
6834       fprintf (dump_file, "Selected IV set for loop %d",
6835 	       data->current_loop->num);
6836       if (data->loop_loc != UNKNOWN_LOCATION)
6837 	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6838 		 LOCATION_LINE (data->loop_loc));
6839       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
6840 	       avg_loop_niter (data->current_loop));
6841       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6842       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6843 	{
6844 	  cand = data->vcands[i];
6845 	  dump_cand (dump_file, cand);
6846 	}
6847       fprintf (dump_file, "\n");
6848     }
6849 }
6850 
6851 /* Rewrites USE (definition of iv used in a nonlinear expression)
6852    using candidate CAND.  */
6853 
6854 static void
rewrite_use_nonlinear_expr(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)6855 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6856 			    struct iv_use *use, struct iv_cand *cand)
6857 {
6858   gassign *ass;
6859   gimple_stmt_iterator bsi;
6860   tree comp, type = get_use_type (use), tgt;
6861 
6862   /* An important special case -- if we are asked to express value of
6863      the original iv by itself, just exit; there is no need to
6864      introduce a new computation (that might also need casting the
6865      variable to unsigned and back).  */
6866   if (cand->pos == IP_ORIGINAL
6867       && cand->incremented_at == use->stmt)
6868     {
6869       tree op = NULL_TREE;
6870       enum tree_code stmt_code;
6871 
6872       gcc_assert (is_gimple_assign (use->stmt));
6873       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6874 
6875       /* Check whether we may leave the computation unchanged.
6876 	 This is the case only if it does not rely on other
6877 	 computations in the loop -- otherwise, the computation
6878 	 we rely upon may be removed in remove_unused_ivs,
6879 	 thus leading to ICE.  */
6880       stmt_code = gimple_assign_rhs_code (use->stmt);
6881       if (stmt_code == PLUS_EXPR
6882 	  || stmt_code == MINUS_EXPR
6883 	  || stmt_code == POINTER_PLUS_EXPR)
6884 	{
6885 	  if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6886 	    op = gimple_assign_rhs2 (use->stmt);
6887 	  else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6888 	    op = gimple_assign_rhs1 (use->stmt);
6889 	}
6890 
6891       if (op != NULL_TREE)
6892 	{
6893 	  if (expr_invariant_in_loop_p (data->current_loop, op))
6894 	    return;
6895 	  if (TREE_CODE (op) == SSA_NAME)
6896 	    {
6897 	      struct iv *iv = get_iv (data, op);
6898 	      if (iv != NULL && integer_zerop (iv->step))
6899 		return;
6900 	    }
6901 	}
6902     }
6903 
6904   switch (gimple_code (use->stmt))
6905     {
6906     case GIMPLE_PHI:
6907       tgt = PHI_RESULT (use->stmt);
6908 
6909       /* If we should keep the biv, do not replace it.  */
6910       if (name_info (data, tgt)->preserve_biv)
6911 	return;
6912 
6913       bsi = gsi_after_labels (gimple_bb (use->stmt));
6914       break;
6915 
6916     case GIMPLE_ASSIGN:
6917       tgt = gimple_assign_lhs (use->stmt);
6918       bsi = gsi_for_stmt (use->stmt);
6919       break;
6920 
6921     default:
6922       gcc_unreachable ();
6923     }
6924 
6925   aff_tree aff_inv, aff_var;
6926   if (!get_computation_aff_1 (data->current_loop, use->stmt,
6927 			      use, cand, &aff_inv, &aff_var))
6928     gcc_unreachable ();
6929 
6930   unshare_aff_combination (&aff_inv);
6931   unshare_aff_combination (&aff_var);
6932   /* Prefer CSE opportunity than loop invariant by adding offset at last
6933      so that iv_uses have different offsets can be CSEed.  */
6934   poly_widest_int offset = aff_inv.offset;
6935   aff_inv.offset = 0;
6936 
6937   gimple_seq stmt_list = NULL, seq = NULL;
6938   tree comp_op1 = aff_combination_to_tree (&aff_inv);
6939   tree comp_op2 = aff_combination_to_tree (&aff_var);
6940   gcc_assert (comp_op1 && comp_op2);
6941 
6942   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
6943   gimple_seq_add_seq (&stmt_list, seq);
6944   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
6945   gimple_seq_add_seq (&stmt_list, seq);
6946 
6947   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
6948     std::swap (comp_op1, comp_op2);
6949 
6950   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
6951     {
6952       comp = fold_build_pointer_plus (comp_op1,
6953 				      fold_convert (sizetype, comp_op2));
6954       comp = fold_build_pointer_plus (comp,
6955 				      wide_int_to_tree (sizetype, offset));
6956     }
6957   else
6958     {
6959       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
6960 			  fold_convert (TREE_TYPE (comp_op1), comp_op2));
6961       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
6962 			  wide_int_to_tree (TREE_TYPE (comp_op1), offset));
6963     }
6964 
6965   comp = fold_convert (type, comp);
6966   if (!valid_gimple_rhs_p (comp)
6967       || (gimple_code (use->stmt) != GIMPLE_PHI
6968 	  /* We can't allow re-allocating the stmt as it might be pointed
6969 	     to still.  */
6970 	  && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6971 	      >= gimple_num_ops (gsi_stmt (bsi)))))
6972     {
6973       comp = force_gimple_operand (comp, &seq, true, NULL);
6974       gimple_seq_add_seq (&stmt_list, seq);
6975       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6976 	{
6977 	  duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6978 	  /* As this isn't a plain copy we have to reset alignment
6979 	     information.  */
6980 	  if (SSA_NAME_PTR_INFO (comp))
6981 	    mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6982 	}
6983     }
6984 
6985   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
6986   if (gimple_code (use->stmt) == GIMPLE_PHI)
6987     {
6988       ass = gimple_build_assign (tgt, comp);
6989       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6990 
6991       bsi = gsi_for_stmt (use->stmt);
6992       remove_phi_node (&bsi, false);
6993     }
6994   else
6995     {
6996       gimple_assign_set_rhs_from_tree (&bsi, comp);
6997       use->stmt = gsi_stmt (bsi);
6998     }
6999 }
7000 
7001 /* Performs a peephole optimization to reorder the iv update statement with
7002    a mem ref to enable instruction combining in later phases. The mem ref uses
7003    the iv value before the update, so the reordering transformation requires
7004    adjustment of the offset. CAND is the selected IV_CAND.
7005 
7006    Example:
7007 
7008    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7009    iv2 = iv1 + 1;
7010 
7011    if (t < val)      (1)
7012      goto L;
7013    goto Head;
7014 
7015 
7016    directly propagating t over to (1) will introduce overlapping live range
7017    thus increase register pressure. This peephole transform it into:
7018 
7019 
7020    iv2 = iv1 + 1;
7021    t = MEM_REF (base, iv2, 8, 8);
7022    if (t < val)
7023      goto L;
7024    goto Head;
7025 */
7026 
7027 static void
adjust_iv_update_pos(struct iv_cand * cand,struct iv_use * use)7028 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7029 {
7030   tree var_after;
7031   gimple *iv_update, *stmt;
7032   basic_block bb;
7033   gimple_stmt_iterator gsi, gsi_iv;
7034 
7035   if (cand->pos != IP_NORMAL)
7036     return;
7037 
7038   var_after = cand->var_after;
7039   iv_update = SSA_NAME_DEF_STMT (var_after);
7040 
7041   bb = gimple_bb (iv_update);
7042   gsi = gsi_last_nondebug_bb (bb);
7043   stmt = gsi_stmt (gsi);
7044 
7045   /* Only handle conditional statement for now.  */
7046   if (gimple_code (stmt) != GIMPLE_COND)
7047     return;
7048 
7049   gsi_prev_nondebug (&gsi);
7050   stmt = gsi_stmt (gsi);
7051   if (stmt != iv_update)
7052     return;
7053 
7054   gsi_prev_nondebug (&gsi);
7055   if (gsi_end_p (gsi))
7056     return;
7057 
7058   stmt = gsi_stmt (gsi);
7059   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7060     return;
7061 
7062   if (stmt != use->stmt)
7063     return;
7064 
7065   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7066     return;
7067 
7068   if (dump_file && (dump_flags & TDF_DETAILS))
7069     {
7070       fprintf (dump_file, "Reordering \n");
7071       print_gimple_stmt (dump_file, iv_update, 0);
7072       print_gimple_stmt (dump_file, use->stmt, 0);
7073       fprintf (dump_file, "\n");
7074     }
7075 
7076   gsi = gsi_for_stmt (use->stmt);
7077   gsi_iv = gsi_for_stmt (iv_update);
7078   gsi_move_before (&gsi_iv, &gsi);
7079 
7080   cand->pos = IP_BEFORE_USE;
7081   cand->incremented_at = use->stmt;
7082 }
7083 
7084 /* Return the alias pointer type that should be used for a MEM_REF
7085    associated with USE, which has type USE_PTR_ADDRESS.  */
7086 
7087 static tree
get_alias_ptr_type_for_ptr_address(iv_use * use)7088 get_alias_ptr_type_for_ptr_address (iv_use *use)
7089 {
7090   gcall *call = as_a <gcall *> (use->stmt);
7091   switch (gimple_call_internal_fn (call))
7092     {
7093     case IFN_MASK_LOAD:
7094     case IFN_MASK_STORE:
7095       /* The second argument contains the correct alias type.  */
7096       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7097       return TREE_TYPE (gimple_call_arg (call, 1));
7098 
7099     default:
7100       gcc_unreachable ();
7101     }
7102 }
7103 
7104 
7105 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7106 
7107 static void
rewrite_use_address(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7108 rewrite_use_address (struct ivopts_data *data,
7109 		     struct iv_use *use, struct iv_cand *cand)
7110 {
7111   aff_tree aff;
7112   bool ok;
7113 
7114   adjust_iv_update_pos (cand, use);
7115   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7116   gcc_assert (ok);
7117   unshare_aff_combination (&aff);
7118 
7119   /* To avoid undefined overflow problems, all IV candidates use unsigned
7120      integer types.  The drawback is that this makes it impossible for
7121      create_mem_ref to distinguish an IV that is based on a memory object
7122      from one that represents simply an offset.
7123 
7124      To work around this problem, we pass a hint to create_mem_ref that
7125      indicates which variable (if any) in aff is an IV based on a memory
7126      object.  Note that we only consider the candidate.  If this is not
7127      based on an object, the base of the reference is in some subexpression
7128      of the use -- but these will use pointer types, so they are recognized
7129      by the create_mem_ref heuristics anyway.  */
7130   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7131   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7132   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7133   tree type = use->mem_type;
7134   tree alias_ptr_type;
7135   if (use->type == USE_PTR_ADDRESS)
7136     alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7137   else
7138     {
7139       gcc_assert (type == TREE_TYPE (*use->op_p));
7140       unsigned int align = get_object_alignment (*use->op_p);
7141       if (align != TYPE_ALIGN (type))
7142 	type = build_aligned_type (type, align);
7143       alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7144     }
7145   tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7146 			     iv, base_hint, data->speed);
7147 
7148   if (use->type == USE_PTR_ADDRESS)
7149     {
7150       ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7151       ref = fold_convert (get_use_type (use), ref);
7152       ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7153 				      true, GSI_SAME_STMT);
7154     }
7155   else
7156     copy_ref_info (ref, *use->op_p);
7157 
7158   *use->op_p = ref;
7159 }
7160 
7161 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7162    candidate CAND.  */
7163 
7164 static void
rewrite_use_compare(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7165 rewrite_use_compare (struct ivopts_data *data,
7166 		     struct iv_use *use, struct iv_cand *cand)
7167 {
7168   tree comp, op, bound;
7169   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7170   enum tree_code compare;
7171   struct iv_group *group = data->vgroups[use->group_id];
7172   struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7173 
7174   bound = cp->value;
7175   if (bound)
7176     {
7177       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7178       tree var_type = TREE_TYPE (var);
7179       gimple_seq stmts;
7180 
7181       if (dump_file && (dump_flags & TDF_DETAILS))
7182 	{
7183 	  fprintf (dump_file, "Replacing exit test: ");
7184 	  print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7185 	}
7186       compare = cp->comp;
7187       bound = unshare_expr (fold_convert (var_type, bound));
7188       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7189       if (stmts)
7190 	gsi_insert_seq_on_edge_immediate (
7191 		loop_preheader_edge (data->current_loop),
7192 		stmts);
7193 
7194       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7195       gimple_cond_set_lhs (cond_stmt, var);
7196       gimple_cond_set_code (cond_stmt, compare);
7197       gimple_cond_set_rhs (cond_stmt, op);
7198       return;
7199     }
7200 
7201   /* The induction variable elimination failed; just express the original
7202      giv.  */
7203   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7204   gcc_assert (comp != NULL_TREE);
7205   gcc_assert (use->op_p != NULL);
7206   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7207 					 SSA_NAME_VAR (*use->op_p),
7208 					 true, GSI_SAME_STMT);
7209 }
7210 
7211 /* Rewrite the groups using the selected induction variables.  */
7212 
7213 static void
rewrite_groups(struct ivopts_data * data)7214 rewrite_groups (struct ivopts_data *data)
7215 {
7216   unsigned i, j;
7217 
7218   for (i = 0; i < data->vgroups.length (); i++)
7219     {
7220       struct iv_group *group = data->vgroups[i];
7221       struct iv_cand *cand = group->selected;
7222 
7223       gcc_assert (cand);
7224 
7225       if (group->type == USE_NONLINEAR_EXPR)
7226 	{
7227 	  for (j = 0; j < group->vuses.length (); j++)
7228 	    {
7229 	      rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7230 	      update_stmt (group->vuses[j]->stmt);
7231 	    }
7232 	}
7233       else if (address_p (group->type))
7234 	{
7235 	  for (j = 0; j < group->vuses.length (); j++)
7236 	    {
7237 	      rewrite_use_address (data, group->vuses[j], cand);
7238 	      update_stmt (group->vuses[j]->stmt);
7239 	    }
7240 	}
7241       else
7242 	{
7243 	  gcc_assert (group->type == USE_COMPARE);
7244 
7245 	  for (j = 0; j < group->vuses.length (); j++)
7246 	    {
7247 	      rewrite_use_compare (data, group->vuses[j], cand);
7248 	      update_stmt (group->vuses[j]->stmt);
7249 	    }
7250 	}
7251     }
7252 }
7253 
7254 /* Removes the ivs that are not used after rewriting.  */
7255 
7256 static void
remove_unused_ivs(struct ivopts_data * data)7257 remove_unused_ivs (struct ivopts_data *data)
7258 {
7259   unsigned j;
7260   bitmap_iterator bi;
7261   bitmap toremove = BITMAP_ALLOC (NULL);
7262 
7263   /* Figure out an order in which to release SSA DEFs so that we don't
7264      release something that we'd have to propagate into a debug stmt
7265      afterwards.  */
7266   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7267     {
7268       struct version_info *info;
7269 
7270       info = ver_info (data, j);
7271       if (info->iv
7272 	  && !integer_zerop (info->iv->step)
7273 	  && !info->inv_id
7274 	  && !info->iv->nonlin_use
7275 	  && !info->preserve_biv)
7276 	{
7277 	  bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7278 
7279 	  tree def = info->iv->ssa_name;
7280 
7281 	  if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7282 	    {
7283 	      imm_use_iterator imm_iter;
7284 	      use_operand_p use_p;
7285 	      gimple *stmt;
7286 	      int count = 0;
7287 
7288 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7289 		{
7290 		  if (!gimple_debug_bind_p (stmt))
7291 		    continue;
7292 
7293 		  /* We just want to determine whether to do nothing
7294 		     (count == 0), to substitute the computed
7295 		     expression into a single use of the SSA DEF by
7296 		     itself (count == 1), or to use a debug temp
7297 		     because the SSA DEF is used multiple times or as
7298 		     part of a larger expression (count > 1). */
7299 		  count++;
7300 		  if (gimple_debug_bind_get_value (stmt) != def)
7301 		    count++;
7302 
7303 		  if (count > 1)
7304 		    BREAK_FROM_IMM_USE_STMT (imm_iter);
7305 		}
7306 
7307 	      if (!count)
7308 		continue;
7309 
7310 	      struct iv_use dummy_use;
7311 	      struct iv_cand *best_cand = NULL, *cand;
7312 	      unsigned i, best_pref = 0, cand_pref;
7313 
7314 	      memset (&dummy_use, 0, sizeof (dummy_use));
7315 	      dummy_use.iv = info->iv;
7316 	      for (i = 0; i < data->vgroups.length () && i < 64; i++)
7317 		{
7318 		  cand = data->vgroups[i]->selected;
7319 		  if (cand == best_cand)
7320 		    continue;
7321 		  cand_pref = operand_equal_p (cand->iv->step,
7322 					       info->iv->step, 0)
7323 		    ? 4 : 0;
7324 		  cand_pref
7325 		    += TYPE_MODE (TREE_TYPE (cand->iv->base))
7326 		    == TYPE_MODE (TREE_TYPE (info->iv->base))
7327 		    ? 2 : 0;
7328 		  cand_pref
7329 		    += TREE_CODE (cand->iv->base) == INTEGER_CST
7330 		    ? 1 : 0;
7331 		  if (best_cand == NULL || best_pref < cand_pref)
7332 		    {
7333 		      best_cand = cand;
7334 		      best_pref = cand_pref;
7335 		    }
7336 		}
7337 
7338 	      if (!best_cand)
7339 		continue;
7340 
7341 	      tree comp = get_computation_at (data->current_loop,
7342 					      SSA_NAME_DEF_STMT (def),
7343 					      &dummy_use, best_cand);
7344 	      if (!comp)
7345 		continue;
7346 
7347 	      if (count > 1)
7348 		{
7349 		  tree vexpr = make_node (DEBUG_EXPR_DECL);
7350 		  DECL_ARTIFICIAL (vexpr) = 1;
7351 		  TREE_TYPE (vexpr) = TREE_TYPE (comp);
7352 		  if (SSA_NAME_VAR (def))
7353 		    SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7354 		  else
7355 		    SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7356 		  gdebug *def_temp
7357 		    = gimple_build_debug_bind (vexpr, comp, NULL);
7358 		  gimple_stmt_iterator gsi;
7359 
7360 		  if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7361 		    gsi = gsi_after_labels (gimple_bb
7362 					    (SSA_NAME_DEF_STMT (def)));
7363 		  else
7364 		    gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7365 
7366 		  gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7367 		  comp = vexpr;
7368 		}
7369 
7370 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7371 		{
7372 		  if (!gimple_debug_bind_p (stmt))
7373 		    continue;
7374 
7375 		  FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7376 		    SET_USE (use_p, comp);
7377 
7378 		  update_stmt (stmt);
7379 		}
7380 	    }
7381 	}
7382     }
7383 
7384   release_defs_bitset (toremove);
7385 
7386   BITMAP_FREE (toremove);
7387 }
7388 
7389 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7390    for hash_map::traverse.  */
7391 
7392 bool
free_tree_niter_desc(edge const &,tree_niter_desc * const & value,void *)7393 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7394 {
7395   free (value);
7396   return true;
7397 }
7398 
7399 /* Frees data allocated by the optimization of a single loop.  */
7400 
7401 static void
free_loop_data(struct ivopts_data * data)7402 free_loop_data (struct ivopts_data *data)
7403 {
7404   unsigned i, j;
7405   bitmap_iterator bi;
7406   tree obj;
7407 
7408   if (data->niters)
7409     {
7410       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7411       delete data->niters;
7412       data->niters = NULL;
7413     }
7414 
7415   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7416     {
7417       struct version_info *info;
7418 
7419       info = ver_info (data, i);
7420       info->iv = NULL;
7421       info->has_nonlin_use = false;
7422       info->preserve_biv = false;
7423       info->inv_id = 0;
7424     }
7425   bitmap_clear (data->relevant);
7426   bitmap_clear (data->important_candidates);
7427 
7428   for (i = 0; i < data->vgroups.length (); i++)
7429     {
7430       struct iv_group *group = data->vgroups[i];
7431 
7432       for (j = 0; j < group->vuses.length (); j++)
7433 	free (group->vuses[j]);
7434       group->vuses.release ();
7435 
7436       BITMAP_FREE (group->related_cands);
7437       for (j = 0; j < group->n_map_members; j++)
7438 	{
7439 	  if (group->cost_map[j].inv_vars)
7440 	    BITMAP_FREE (group->cost_map[j].inv_vars);
7441 	  if (group->cost_map[j].inv_exprs)
7442 	    BITMAP_FREE (group->cost_map[j].inv_exprs);
7443 	}
7444 
7445       free (group->cost_map);
7446       free (group);
7447     }
7448   data->vgroups.truncate (0);
7449 
7450   for (i = 0; i < data->vcands.length (); i++)
7451     {
7452       struct iv_cand *cand = data->vcands[i];
7453 
7454       if (cand->inv_vars)
7455 	BITMAP_FREE (cand->inv_vars);
7456       if (cand->inv_exprs)
7457 	BITMAP_FREE (cand->inv_exprs);
7458       free (cand);
7459     }
7460   data->vcands.truncate (0);
7461 
7462   if (data->version_info_size < num_ssa_names)
7463     {
7464       data->version_info_size = 2 * num_ssa_names;
7465       free (data->version_info);
7466       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7467     }
7468 
7469   data->max_inv_var_id = 0;
7470   data->max_inv_expr_id = 0;
7471 
7472   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7473     SET_DECL_RTL (obj, NULL_RTX);
7474 
7475   decl_rtl_to_reset.truncate (0);
7476 
7477   data->inv_expr_tab->empty ();
7478 
7479   data->iv_common_cand_tab->empty ();
7480   data->iv_common_cands.truncate (0);
7481 }
7482 
7483 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7484    loop tree.  */
7485 
7486 static void
tree_ssa_iv_optimize_finalize(struct ivopts_data * data)7487 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7488 {
7489   free_loop_data (data);
7490   free (data->version_info);
7491   BITMAP_FREE (data->relevant);
7492   BITMAP_FREE (data->important_candidates);
7493 
7494   decl_rtl_to_reset.release ();
7495   data->vgroups.release ();
7496   data->vcands.release ();
7497   delete data->inv_expr_tab;
7498   data->inv_expr_tab = NULL;
7499   free_affine_expand_cache (&data->name_expansion_cache);
7500   if (data->base_object_map)
7501     delete data->base_object_map;
7502   delete data->iv_common_cand_tab;
7503   data->iv_common_cand_tab = NULL;
7504   data->iv_common_cands.release ();
7505   obstack_free (&data->iv_obstack, NULL);
7506 }
7507 
7508 /* Returns true if the loop body BODY includes any function calls.  */
7509 
7510 static bool
loop_body_includes_call(basic_block * body,unsigned num_nodes)7511 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7512 {
7513   gimple_stmt_iterator gsi;
7514   unsigned i;
7515 
7516   for (i = 0; i < num_nodes; i++)
7517     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7518       {
7519 	gimple *stmt = gsi_stmt (gsi);
7520 	if (is_gimple_call (stmt)
7521 	    && !gimple_call_internal_p (stmt)
7522 	    && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7523 	  return true;
7524       }
7525   return false;
7526 }
7527 
7528 /* Optimizes the LOOP.  Returns true if anything changed.  */
7529 
7530 static bool
tree_ssa_iv_optimize_loop(struct ivopts_data * data,struct loop * loop)7531 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7532 {
7533   bool changed = false;
7534   struct iv_ca *iv_ca;
7535   edge exit = single_dom_exit (loop);
7536   basic_block *body;
7537 
7538   gcc_assert (!data->niters);
7539   data->current_loop = loop;
7540   data->loop_loc = find_loop_location (loop);
7541   data->speed = optimize_loop_for_speed_p (loop);
7542 
7543   if (dump_file && (dump_flags & TDF_DETAILS))
7544     {
7545       fprintf (dump_file, "Processing loop %d", loop->num);
7546       if (data->loop_loc != UNKNOWN_LOCATION)
7547 	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7548 		 LOCATION_LINE (data->loop_loc));
7549       fprintf (dump_file, "\n");
7550 
7551       if (exit)
7552 	{
7553 	  fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7554 		   exit->src->index, exit->dest->index);
7555 	  print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7556 	  fprintf (dump_file, "\n");
7557 	}
7558 
7559       fprintf (dump_file, "\n");
7560     }
7561 
7562   body = get_loop_body (loop);
7563   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7564   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7565   free (body);
7566 
7567   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7568 
7569   /* For each ssa name determines whether it behaves as an induction variable
7570      in some loop.  */
7571   if (!find_induction_variables (data))
7572     goto finish;
7573 
7574   /* Finds interesting uses (item 1).  */
7575   find_interesting_uses (data);
7576   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7577     goto finish;
7578 
7579   /* Finds candidates for the induction variables (item 2).  */
7580   find_iv_candidates (data);
7581 
7582   /* Calculates the costs (item 3, part 1).  */
7583   determine_iv_costs (data);
7584   determine_group_iv_costs (data);
7585   determine_set_costs (data);
7586 
7587   /* Find the optimal set of induction variables (item 3, part 2).  */
7588   iv_ca = find_optimal_iv_set (data);
7589   if (!iv_ca)
7590     goto finish;
7591   changed = true;
7592 
7593   /* Create the new induction variables (item 4, part 1).  */
7594   create_new_ivs (data, iv_ca);
7595   iv_ca_free (&iv_ca);
7596 
7597   /* Rewrite the uses (item 4, part 2).  */
7598   rewrite_groups (data);
7599 
7600   /* Remove the ivs that are unused after rewriting.  */
7601   remove_unused_ivs (data);
7602 
7603   /* We have changed the structure of induction variables; it might happen
7604      that definitions in the scev database refer to some of them that were
7605      eliminated.  */
7606   scev_reset ();
7607 
7608 finish:
7609   free_loop_data (data);
7610 
7611   return changed;
7612 }
7613 
7614 /* Main entry point.  Optimizes induction variables in loops.  */
7615 
7616 void
tree_ssa_iv_optimize(void)7617 tree_ssa_iv_optimize (void)
7618 {
7619   struct loop *loop;
7620   struct ivopts_data data;
7621 
7622   tree_ssa_iv_optimize_init (&data);
7623 
7624   /* Optimize the loops starting with the innermost ones.  */
7625   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7626     {
7627       if (dump_file && (dump_flags & TDF_DETAILS))
7628 	flow_loop_dump (loop, dump_file, NULL, 1);
7629 
7630       tree_ssa_iv_optimize_loop (&data, loop);
7631     }
7632 
7633   tree_ssa_iv_optimize_finalize (&data);
7634 }
7635 
7636 #include "gt-tree-ssa-loop-ivopts.h"
7637