1 /* Induction variable optimizations.
2    Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This pass tries to find the optimal set of induction variables for the loop.
21    It optimizes just the basic linear induction variables (although adding
22    support for other types should not be too hard).  It includes the
23    optimizations commonly known as strength reduction, induction variable
24    coalescing and induction variable elimination.  It does it in the
25    following steps:
26 
27    1) The interesting uses of induction variables are found.  This includes
28 
29       -- uses of induction variables in non-linear expressions
30       -- addresses of arrays
31       -- comparisons of induction variables
32 
33       Note the interesting uses are categorized and handled in group.
34       Generally, address type uses are grouped together if their iv bases
35       are different in constant offset.
36 
37    2) Candidates for the induction variables are found.  This includes
38 
39       -- old induction variables
40       -- the variables defined by expressions derived from the "interesting
41 	 groups/uses" above
42 
43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
44       cost function assigns a cost to sets of induction variables and consists
45       of three parts:
46 
47       -- The group/use costs.  Each of the interesting groups/uses chooses
48 	 the best induction variable in the set and adds its cost to the sum.
49 	 The cost reflects the time spent on modifying the induction variables
50 	 value to be usable for the given purpose (adding base and offset for
51 	 arrays, etc.).
52       -- The variable costs.  Each of the variables has a cost assigned that
53 	 reflects the costs associated with incrementing the value of the
54 	 variable.  The original variables are somewhat preferred.
55       -- The set cost.  Depending on the size of the set, extra cost may be
56 	 added to reflect register pressure.
57 
58       All the costs are defined in a machine-specific way, using the target
59       hooks and machine descriptions to determine them.
60 
61    4) The trees are transformed to use the new variables, the dead code is
62       removed.
63 
64    All of this is done loop by loop.  Doing it globally is theoretically
65    possible, it might give a better performance and it might enable us
66    to decide costs more precisely, but getting all the interactions right
67    would be complicated.  */
68 
69 #include "config.h"
70 #include "system.h"
71 #include "coretypes.h"
72 #include "backend.h"
73 #include "rtl.h"
74 #include "tree.h"
75 #include "gimple.h"
76 #include "cfghooks.h"
77 #include "tree-pass.h"
78 #include "memmodel.h"
79 #include "tm_p.h"
80 #include "ssa.h"
81 #include "expmed.h"
82 #include "insn-config.h"
83 #include "emit-rtl.h"
84 #include "recog.h"
85 #include "cgraph.h"
86 #include "gimple-pretty-print.h"
87 #include "alias.h"
88 #include "fold-const.h"
89 #include "stor-layout.h"
90 #include "tree-eh.h"
91 #include "gimplify.h"
92 #include "gimple-iterator.h"
93 #include "gimplify-me.h"
94 #include "tree-cfg.h"
95 #include "tree-ssa-loop-ivopts.h"
96 #include "tree-ssa-loop-manip.h"
97 #include "tree-ssa-loop-niter.h"
98 #include "tree-ssa-loop.h"
99 #include "explow.h"
100 #include "expr.h"
101 #include "tree-dfa.h"
102 #include "tree-ssa.h"
103 #include "cfgloop.h"
104 #include "tree-scalar-evolution.h"
105 #include "params.h"
106 #include "tree-affine.h"
107 #include "tree-ssa-propagate.h"
108 #include "tree-ssa-address.h"
109 #include "builtins.h"
110 #include "tree-vectorizer.h"
111 
112 /* For lang_hooks.types.type_for_mode.  */
113 #include "langhooks.h"
114 
115 /* FIXME: Expressions are expanded to RTL in this pass to determine the
116    cost of different addressing modes.  This should be moved to a TBD
117    interface between the GIMPLE and RTL worlds.  */
118 
119 /* The infinite cost.  */
120 #define INFTY 10000000
121 
122 /* Returns the expected number of loop iterations for LOOP.
123    The average trip count is computed from profile data if it
124    exists. */
125 
126 static inline HOST_WIDE_INT
avg_loop_niter(struct loop * loop)127 avg_loop_niter (struct loop *loop)
128 {
129   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
130   if (niter == -1)
131     {
132       niter = likely_max_stmt_executions_int (loop);
133 
134       if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
135 	return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
136     }
137 
138   return niter;
139 }
140 
141 struct iv_use;
142 
143 /* Representation of the induction variable.  */
144 struct iv
145 {
146   tree base;		/* Initial value of the iv.  */
147   tree base_object;	/* A memory object to that the induction variable points.  */
148   tree step;		/* Step of the iv (constant only).  */
149   tree ssa_name;	/* The ssa name with the value.  */
150   struct iv_use *nonlin_use;	/* The identifier in the use if it is the case.  */
151   bool biv_p;		/* Is it a biv?  */
152   bool no_overflow;	/* True if the iv doesn't overflow.  */
153   bool have_address_use;/* For biv, indicate if it's used in any address
154 			   type use.  */
155 };
156 
157 /* Per-ssa version information (induction variable descriptions, etc.).  */
158 struct version_info
159 {
160   tree name;		/* The ssa name.  */
161   struct iv *iv;	/* Induction variable description.  */
162   bool has_nonlin_use;	/* For a loop-level invariant, whether it is used in
163 			   an expression that is not an induction variable.  */
164   bool preserve_biv;	/* For the original biv, whether to preserve it.  */
165   unsigned inv_id;	/* Id of an invariant.  */
166 };
167 
168 /* Types of uses.  */
169 enum use_type
170 {
171   USE_NONLINEAR_EXPR,	/* Use in a nonlinear expression.  */
172   USE_REF_ADDRESS,	/* Use is an address for an explicit memory
173 			   reference.  */
174   USE_PTR_ADDRESS,	/* Use is a pointer argument to a function in
175 			   cases where the expansion of the function
176 			   will turn the argument into a normal address.  */
177   USE_COMPARE		/* Use is a compare.  */
178 };
179 
180 /* Cost of a computation.  */
181 struct comp_cost
182 {
comp_costcomp_cost183   comp_cost (): cost (0), complexity (0), scratch (0)
184   {}
185 
186   comp_cost (int cost, unsigned complexity, int scratch = 0)
costcomp_cost187     : cost (cost), complexity (complexity), scratch (scratch)
188   {}
189 
190   /* Returns true if COST is infinite.  */
191   bool infinite_cost_p ();
192 
193   /* Adds costs COST1 and COST2.  */
194   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
195 
196   /* Adds COST to the comp_cost.  */
197   comp_cost operator+= (comp_cost cost);
198 
199   /* Adds constant C to this comp_cost.  */
200   comp_cost operator+= (HOST_WIDE_INT c);
201 
202   /* Subtracts constant C to this comp_cost.  */
203   comp_cost operator-= (HOST_WIDE_INT c);
204 
205   /* Divide the comp_cost by constant C.  */
206   comp_cost operator/= (HOST_WIDE_INT c);
207 
208   /* Multiply the comp_cost by constant C.  */
209   comp_cost operator*= (HOST_WIDE_INT c);
210 
211   /* Subtracts costs COST1 and COST2.  */
212   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
213 
214   /* Subtracts COST from this comp_cost.  */
215   comp_cost operator-= (comp_cost cost);
216 
217   /* Returns true if COST1 is smaller than COST2.  */
218   friend bool operator< (comp_cost cost1, comp_cost cost2);
219 
220   /* Returns true if COST1 and COST2 are equal.  */
221   friend bool operator== (comp_cost cost1, comp_cost cost2);
222 
223   /* Returns true if COST1 is smaller or equal than COST2.  */
224   friend bool operator<= (comp_cost cost1, comp_cost cost2);
225 
226   int cost;		/* The runtime cost.  */
227   unsigned complexity;  /* The estimate of the complexity of the code for
228 			   the computation (in no concrete units --
229 			   complexity field should be larger for more
230 			   complex expressions and addressing modes).  */
231   int scratch;		/* Scratch used during cost computation.  */
232 };
233 
234 static const comp_cost no_cost;
235 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
236 
237 bool
infinite_cost_p()238 comp_cost::infinite_cost_p ()
239 {
240   return cost == INFTY;
241 }
242 
243 comp_cost
244 operator+ (comp_cost cost1, comp_cost cost2)
245 {
246   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
247     return infinite_cost;
248 
249   cost1.cost += cost2.cost;
250   cost1.complexity += cost2.complexity;
251 
252   return cost1;
253 }
254 
255 comp_cost
256 operator- (comp_cost cost1, comp_cost cost2)
257 {
258   if (cost1.infinite_cost_p ())
259     return infinite_cost;
260 
261   gcc_assert (!cost2.infinite_cost_p ());
262 
263   cost1.cost -= cost2.cost;
264   cost1.complexity -= cost2.complexity;
265 
266   return cost1;
267 }
268 
269 comp_cost
270 comp_cost::operator+= (comp_cost cost)
271 {
272   *this = *this + cost;
273   return *this;
274 }
275 
276 comp_cost
277 comp_cost::operator+= (HOST_WIDE_INT c)
278 {
279   if (infinite_cost_p ())
280     return *this;
281 
282   this->cost += c;
283 
284   return *this;
285 }
286 
287 comp_cost
288 comp_cost::operator-= (HOST_WIDE_INT c)
289 {
290   if (infinite_cost_p ())
291     return *this;
292 
293   this->cost -= c;
294 
295   return *this;
296 }
297 
298 comp_cost
299 comp_cost::operator/= (HOST_WIDE_INT c)
300 {
301   if (infinite_cost_p ())
302     return *this;
303 
304   this->cost /= c;
305 
306   return *this;
307 }
308 
309 comp_cost
310 comp_cost::operator*= (HOST_WIDE_INT c)
311 {
312   if (infinite_cost_p ())
313     return *this;
314 
315   this->cost *= c;
316 
317   return *this;
318 }
319 
320 comp_cost
321 comp_cost::operator-= (comp_cost cost)
322 {
323   *this = *this - cost;
324   return *this;
325 }
326 
327 bool
328 operator< (comp_cost cost1, comp_cost cost2)
329 {
330   if (cost1.cost == cost2.cost)
331     return cost1.complexity < cost2.complexity;
332 
333   return cost1.cost < cost2.cost;
334 }
335 
336 bool
337 operator== (comp_cost cost1, comp_cost cost2)
338 {
339   return cost1.cost == cost2.cost
340     && cost1.complexity == cost2.complexity;
341 }
342 
343 bool
344 operator<= (comp_cost cost1, comp_cost cost2)
345 {
346   return cost1 < cost2 || cost1 == cost2;
347 }
348 
349 struct iv_inv_expr_ent;
350 
351 /* The candidate - cost pair.  */
352 struct cost_pair
353 {
354   struct iv_cand *cand;	/* The candidate.  */
355   comp_cost cost;	/* The cost.  */
356   enum tree_code comp;	/* For iv elimination, the comparison.  */
357   bitmap inv_vars;	/* The list of invariant ssa_vars that have to be
358 			   preserved when representing iv_use with iv_cand.  */
359   bitmap inv_exprs;	/* The list of newly created invariant expressions
360 			   when representing iv_use with iv_cand.  */
361   tree value;		/* For final value elimination, the expression for
362 			   the final value of the iv.  For iv elimination,
363 			   the new bound to compare with.  */
364 };
365 
366 /* Use.  */
367 struct iv_use
368 {
369   unsigned id;		/* The id of the use.  */
370   unsigned group_id;	/* The group id the use belongs to.  */
371   enum use_type type;	/* Type of the use.  */
372   tree mem_type;	/* The memory type to use when testing whether an
373 			   address is legitimate, and what the address's
374 			   cost is.  */
375   struct iv *iv;	/* The induction variable it is based on.  */
376   gimple *stmt;		/* Statement in that it occurs.  */
377   tree *op_p;		/* The place where it occurs.  */
378 
379   tree addr_base;	/* Base address with const offset stripped.  */
380   poly_uint64_pod addr_offset;
381 			/* Const offset stripped from base address.  */
382 };
383 
384 /* Group of uses.  */
385 struct iv_group
386 {
387   /* The id of the group.  */
388   unsigned id;
389   /* Uses of the group are of the same type.  */
390   enum use_type type;
391   /* The set of "related" IV candidates, plus the important ones.  */
392   bitmap related_cands;
393   /* Number of IV candidates in the cost_map.  */
394   unsigned n_map_members;
395   /* The costs wrto the iv candidates.  */
396   struct cost_pair *cost_map;
397   /* The selected candidate for the group.  */
398   struct iv_cand *selected;
399   /* Uses in the group.  */
400   vec<struct iv_use *> vuses;
401 };
402 
403 /* The position where the iv is computed.  */
404 enum iv_position
405 {
406   IP_NORMAL,		/* At the end, just before the exit condition.  */
407   IP_END,		/* At the end of the latch block.  */
408   IP_BEFORE_USE,	/* Immediately before a specific use.  */
409   IP_AFTER_USE,		/* Immediately after a specific use.  */
410   IP_ORIGINAL		/* The original biv.  */
411 };
412 
413 /* The induction variable candidate.  */
414 struct iv_cand
415 {
416   unsigned id;		/* The number of the candidate.  */
417   bool important;	/* Whether this is an "important" candidate, i.e. such
418 			   that it should be considered by all uses.  */
419   ENUM_BITFIELD(iv_position) pos : 8;	/* Where it is computed.  */
420   gimple *incremented_at;/* For original biv, the statement where it is
421 			   incremented.  */
422   tree var_before;	/* The variable used for it before increment.  */
423   tree var_after;	/* The variable used for it after increment.  */
424   struct iv *iv;	/* The value of the candidate.  NULL for
425 			   "pseudocandidate" used to indicate the possibility
426 			   to replace the final value of an iv by direct
427 			   computation of the value.  */
428   unsigned cost;	/* Cost of the candidate.  */
429   unsigned cost_step;	/* Cost of the candidate's increment operation.  */
430   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
431 			      where it is incremented.  */
432   bitmap inv_vars;	/* The list of invariant ssa_vars used in step of the
433 			   iv_cand.  */
434   bitmap inv_exprs;	/* If step is more complicated than a single ssa_var,
435 			   hanlde it as a new invariant expression which will
436 			   be hoisted out of loop.  */
437   struct iv *orig_iv;	/* The original iv if this cand is added from biv with
438 			   smaller type.  */
439 };
440 
441 /* Hashtable entry for common candidate derived from iv uses.  */
442 struct iv_common_cand
443 {
444   tree base;
445   tree step;
446   /* IV uses from which this common candidate is derived.  */
447   auto_vec<struct iv_use *> uses;
448   hashval_t hash;
449 };
450 
451 /* Hashtable helpers.  */
452 
453 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
454 {
455   static inline hashval_t hash (const iv_common_cand *);
456   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
457 };
458 
459 /* Hash function for possible common candidates.  */
460 
461 inline hashval_t
hash(const iv_common_cand * ccand)462 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
463 {
464   return ccand->hash;
465 }
466 
467 /* Hash table equality function for common candidates.  */
468 
469 inline bool
equal(const iv_common_cand * ccand1,const iv_common_cand * ccand2)470 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
471 			      const iv_common_cand *ccand2)
472 {
473   return (ccand1->hash == ccand2->hash
474 	  && operand_equal_p (ccand1->base, ccand2->base, 0)
475 	  && operand_equal_p (ccand1->step, ccand2->step, 0)
476 	  && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
477 	      == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
478 }
479 
480 /* Loop invariant expression hashtable entry.  */
481 
482 struct iv_inv_expr_ent
483 {
484   /* Tree expression of the entry.  */
485   tree expr;
486   /* Unique indentifier.  */
487   int id;
488   /* Hash value.  */
489   hashval_t hash;
490 };
491 
492 /* Sort iv_inv_expr_ent pair A and B by id field.  */
493 
494 static int
sort_iv_inv_expr_ent(const void * a,const void * b)495 sort_iv_inv_expr_ent (const void *a, const void *b)
496 {
497   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
498   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
499 
500   unsigned id1 = (*e1)->id;
501   unsigned id2 = (*e2)->id;
502 
503   if (id1 < id2)
504     return -1;
505   else if (id1 > id2)
506     return 1;
507   else
508     return 0;
509 }
510 
511 /* Hashtable helpers.  */
512 
513 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
514 {
515   static inline hashval_t hash (const iv_inv_expr_ent *);
516   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
517 };
518 
519 /* Return true if uses of type TYPE represent some form of address.  */
520 
521 inline bool
address_p(use_type type)522 address_p (use_type type)
523 {
524   return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
525 }
526 
527 /* Hash function for loop invariant expressions.  */
528 
529 inline hashval_t
hash(const iv_inv_expr_ent * expr)530 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
531 {
532   return expr->hash;
533 }
534 
535 /* Hash table equality function for expressions.  */
536 
537 inline bool
equal(const iv_inv_expr_ent * expr1,const iv_inv_expr_ent * expr2)538 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
539 			   const iv_inv_expr_ent *expr2)
540 {
541   return expr1->hash == expr2->hash
542 	 && operand_equal_p (expr1->expr, expr2->expr, 0);
543 }
544 
545 struct ivopts_data
546 {
547   /* The currently optimized loop.  */
548   struct loop *current_loop;
549   location_t loop_loc;
550 
551   /* Numbers of iterations for all exits of the current loop.  */
552   hash_map<edge, tree_niter_desc *> *niters;
553 
554   /* Number of registers used in it.  */
555   unsigned regs_used;
556 
557   /* The size of version_info array allocated.  */
558   unsigned version_info_size;
559 
560   /* The array of information for the ssa names.  */
561   struct version_info *version_info;
562 
563   /* The hashtable of loop invariant expressions created
564      by ivopt.  */
565   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
566 
567   /* The bitmap of indices in version_info whose value was changed.  */
568   bitmap relevant;
569 
570   /* The uses of induction variables.  */
571   vec<iv_group *> vgroups;
572 
573   /* The candidates.  */
574   vec<iv_cand *> vcands;
575 
576   /* A bitmap of important candidates.  */
577   bitmap important_candidates;
578 
579   /* Cache used by tree_to_aff_combination_expand.  */
580   hash_map<tree, name_expansion *> *name_expansion_cache;
581 
582   /* The hashtable of common candidates derived from iv uses.  */
583   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
584 
585   /* The common candidates.  */
586   vec<iv_common_cand *> iv_common_cands;
587 
588   /* Hash map recording base object information of tree exp.  */
589   hash_map<tree, tree> *base_object_map;
590 
591   /* The maximum invariant variable id.  */
592   unsigned max_inv_var_id;
593 
594   /* The maximum invariant expression id.  */
595   unsigned max_inv_expr_id;
596 
597   /* Number of no_overflow BIVs which are not used in memory address.  */
598   unsigned bivs_not_used_in_addr;
599 
600   /* Obstack for iv structure.  */
601   struct obstack iv_obstack;
602 
603   /* Whether to consider just related and important candidates when replacing a
604      use.  */
605   bool consider_all_candidates;
606 
607   /* Are we optimizing for speed?  */
608   bool speed;
609 
610   /* Whether the loop body includes any function calls.  */
611   bool body_includes_call;
612 
613   /* Whether the loop body can only be exited via single exit.  */
614   bool loop_single_exit_p;
615 };
616 
617 /* An assignment of iv candidates to uses.  */
618 
619 struct iv_ca
620 {
621   /* The number of uses covered by the assignment.  */
622   unsigned upto;
623 
624   /* Number of uses that cannot be expressed by the candidates in the set.  */
625   unsigned bad_groups;
626 
627   /* Candidate assigned to a use, together with the related costs.  */
628   struct cost_pair **cand_for_group;
629 
630   /* Number of times each candidate is used.  */
631   unsigned *n_cand_uses;
632 
633   /* The candidates used.  */
634   bitmap cands;
635 
636   /* The number of candidates in the set.  */
637   unsigned n_cands;
638 
639   /* The number of invariants needed, including both invariant variants and
640      invariant expressions.  */
641   unsigned n_invs;
642 
643   /* Total cost of expressing uses.  */
644   comp_cost cand_use_cost;
645 
646   /* Total cost of candidates.  */
647   unsigned cand_cost;
648 
649   /* Number of times each invariant variable is used.  */
650   unsigned *n_inv_var_uses;
651 
652   /* Number of times each invariant expression is used.  */
653   unsigned *n_inv_expr_uses;
654 
655   /* Total cost of the assignment.  */
656   comp_cost cost;
657 };
658 
659 /* Difference of two iv candidate assignments.  */
660 
661 struct iv_ca_delta
662 {
663   /* Changed group.  */
664   struct iv_group *group;
665 
666   /* An old assignment (for rollback purposes).  */
667   struct cost_pair *old_cp;
668 
669   /* A new assignment.  */
670   struct cost_pair *new_cp;
671 
672   /* Next change in the list.  */
673   struct iv_ca_delta *next;
674 };
675 
676 /* Bound on number of candidates below that all candidates are considered.  */
677 
678 #define CONSIDER_ALL_CANDIDATES_BOUND \
679   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
680 
681 /* If there are more iv occurrences, we just give up (it is quite unlikely that
682    optimizing such a loop would help, and it would take ages).  */
683 
684 #define MAX_CONSIDERED_GROUPS \
685   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
686 
687 /* If there are at most this number of ivs in the set, try removing unnecessary
688    ivs from the set always.  */
689 
690 #define ALWAYS_PRUNE_CAND_SET_BOUND \
691   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
692 
693 /* The list of trees for that the decl_rtl field must be reset is stored
694    here.  */
695 
696 static vec<tree> decl_rtl_to_reset;
697 
698 static comp_cost force_expr_to_var_cost (tree, bool);
699 
700 /* The single loop exit if it dominates the latch, NULL otherwise.  */
701 
702 edge
single_dom_exit(struct loop * loop)703 single_dom_exit (struct loop *loop)
704 {
705   edge exit = single_exit (loop);
706 
707   if (!exit)
708     return NULL;
709 
710   if (!just_once_each_iteration_p (loop, exit->src))
711     return NULL;
712 
713   return exit;
714 }
715 
716 /* Dumps information about the induction variable IV to FILE.  Don't dump
717    variable's name if DUMP_NAME is FALSE.  The information is dumped with
718    preceding spaces indicated by INDENT_LEVEL.  */
719 
720 void
dump_iv(FILE * file,struct iv * iv,bool dump_name,unsigned indent_level)721 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
722 {
723   const char *p;
724   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
725 
726   if (indent_level > 4)
727     indent_level = 4;
728   p = spaces + 8 - (indent_level << 1);
729 
730   fprintf (file, "%sIV struct:\n", p);
731   if (iv->ssa_name && dump_name)
732     {
733       fprintf (file, "%s  SSA_NAME:\t", p);
734       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
735       fprintf (file, "\n");
736     }
737 
738   fprintf (file, "%s  Type:\t", p);
739   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
740   fprintf (file, "\n");
741 
742   fprintf (file, "%s  Base:\t", p);
743   print_generic_expr (file, iv->base, TDF_SLIM);
744   fprintf (file, "\n");
745 
746   fprintf (file, "%s  Step:\t", p);
747   print_generic_expr (file, iv->step, TDF_SLIM);
748   fprintf (file, "\n");
749 
750   if (iv->base_object)
751     {
752       fprintf (file, "%s  Object:\t", p);
753       print_generic_expr (file, iv->base_object, TDF_SLIM);
754       fprintf (file, "\n");
755     }
756 
757   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
758 
759   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
760 	   p, iv->no_overflow ? "No-overflow" : "Overflow");
761 }
762 
763 /* Dumps information about the USE to FILE.  */
764 
765 void
dump_use(FILE * file,struct iv_use * use)766 dump_use (FILE *file, struct iv_use *use)
767 {
768   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
769   fprintf (file, "    At stmt:\t");
770   print_gimple_stmt (file, use->stmt, 0);
771   fprintf (file, "    At pos:\t");
772   if (use->op_p)
773     print_generic_expr (file, *use->op_p, TDF_SLIM);
774   fprintf (file, "\n");
775   dump_iv (file, use->iv, false, 2);
776 }
777 
778 /* Dumps information about the uses to FILE.  */
779 
780 void
dump_groups(FILE * file,struct ivopts_data * data)781 dump_groups (FILE *file, struct ivopts_data *data)
782 {
783   unsigned i, j;
784   struct iv_group *group;
785 
786   for (i = 0; i < data->vgroups.length (); i++)
787     {
788       group = data->vgroups[i];
789       fprintf (file, "Group %d:\n", group->id);
790       if (group->type == USE_NONLINEAR_EXPR)
791 	fprintf (file, "  Type:\tGENERIC\n");
792       else if (group->type == USE_REF_ADDRESS)
793 	fprintf (file, "  Type:\tREFERENCE ADDRESS\n");
794       else if (group->type == USE_PTR_ADDRESS)
795 	fprintf (file, "  Type:\tPOINTER ARGUMENT ADDRESS\n");
796       else
797 	{
798 	  gcc_assert (group->type == USE_COMPARE);
799 	  fprintf (file, "  Type:\tCOMPARE\n");
800 	}
801       for (j = 0; j < group->vuses.length (); j++)
802 	dump_use (file, group->vuses[j]);
803     }
804 }
805 
806 /* Dumps information about induction variable candidate CAND to FILE.  */
807 
808 void
dump_cand(FILE * file,struct iv_cand * cand)809 dump_cand (FILE *file, struct iv_cand *cand)
810 {
811   struct iv *iv = cand->iv;
812 
813   fprintf (file, "Candidate %d:\n", cand->id);
814   if (cand->inv_vars)
815     {
816       fprintf (file, "  Depend on inv.vars: ");
817       dump_bitmap (file, cand->inv_vars);
818     }
819   if (cand->inv_exprs)
820     {
821       fprintf (file, "  Depend on inv.exprs: ");
822       dump_bitmap (file, cand->inv_exprs);
823     }
824 
825   if (cand->var_before)
826     {
827       fprintf (file, "  Var befor: ");
828       print_generic_expr (file, cand->var_before, TDF_SLIM);
829       fprintf (file, "\n");
830     }
831   if (cand->var_after)
832     {
833       fprintf (file, "  Var after: ");
834       print_generic_expr (file, cand->var_after, TDF_SLIM);
835       fprintf (file, "\n");
836     }
837 
838   switch (cand->pos)
839     {
840     case IP_NORMAL:
841       fprintf (file, "  Incr POS: before exit test\n");
842       break;
843 
844     case IP_BEFORE_USE:
845       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
846       break;
847 
848     case IP_AFTER_USE:
849       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
850       break;
851 
852     case IP_END:
853       fprintf (file, "  Incr POS: at end\n");
854       break;
855 
856     case IP_ORIGINAL:
857       fprintf (file, "  Incr POS: orig biv\n");
858       break;
859     }
860 
861   dump_iv (file, iv, false, 1);
862 }
863 
864 /* Returns the info for ssa version VER.  */
865 
866 static inline struct version_info *
ver_info(struct ivopts_data * data,unsigned ver)867 ver_info (struct ivopts_data *data, unsigned ver)
868 {
869   return data->version_info + ver;
870 }
871 
872 /* Returns the info for ssa name NAME.  */
873 
874 static inline struct version_info *
name_info(struct ivopts_data * data,tree name)875 name_info (struct ivopts_data *data, tree name)
876 {
877   return ver_info (data, SSA_NAME_VERSION (name));
878 }
879 
880 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
881    emitted in LOOP.  */
882 
883 static bool
stmt_after_ip_normal_pos(struct loop * loop,gimple * stmt)884 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
885 {
886   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
887 
888   gcc_assert (bb);
889 
890   if (sbb == loop->latch)
891     return true;
892 
893   if (sbb != bb)
894     return false;
895 
896   return stmt == last_stmt (bb);
897 }
898 
899 /* Returns true if STMT if after the place where the original induction
900    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
901    if the positions are identical.  */
902 
903 static bool
stmt_after_inc_pos(struct iv_cand * cand,gimple * stmt,bool true_if_equal)904 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
905 {
906   basic_block cand_bb = gimple_bb (cand->incremented_at);
907   basic_block stmt_bb = gimple_bb (stmt);
908 
909   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
910     return false;
911 
912   if (stmt_bb != cand_bb)
913     return true;
914 
915   if (true_if_equal
916       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
917     return true;
918   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
919 }
920 
921 /* Returns true if STMT if after the place where the induction variable
922    CAND is incremented in LOOP.  */
923 
924 static bool
stmt_after_increment(struct loop * loop,struct iv_cand * cand,gimple * stmt)925 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
926 {
927   switch (cand->pos)
928     {
929     case IP_END:
930       return false;
931 
932     case IP_NORMAL:
933       return stmt_after_ip_normal_pos (loop, stmt);
934 
935     case IP_ORIGINAL:
936     case IP_AFTER_USE:
937       return stmt_after_inc_pos (cand, stmt, false);
938 
939     case IP_BEFORE_USE:
940       return stmt_after_inc_pos (cand, stmt, true);
941 
942     default:
943       gcc_unreachable ();
944     }
945 }
946 
947 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
948 
949 static bool
abnormal_ssa_name_p(tree exp)950 abnormal_ssa_name_p (tree exp)
951 {
952   if (!exp)
953     return false;
954 
955   if (TREE_CODE (exp) != SSA_NAME)
956     return false;
957 
958   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
959 }
960 
961 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
962    abnormal phi node.  Callback for for_each_index.  */
963 
964 static bool
idx_contains_abnormal_ssa_name_p(tree base,tree * index,void * data ATTRIBUTE_UNUSED)965 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
966 				  void *data ATTRIBUTE_UNUSED)
967 {
968   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
969     {
970       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
971 	return false;
972       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
973 	return false;
974     }
975 
976   return !abnormal_ssa_name_p (*index);
977 }
978 
979 /* Returns true if EXPR contains a ssa name that occurs in an
980    abnormal phi node.  */
981 
982 bool
contains_abnormal_ssa_name_p(tree expr)983 contains_abnormal_ssa_name_p (tree expr)
984 {
985   enum tree_code code;
986   enum tree_code_class codeclass;
987 
988   if (!expr)
989     return false;
990 
991   code = TREE_CODE (expr);
992   codeclass = TREE_CODE_CLASS (code);
993 
994   if (code == CALL_EXPR)
995     {
996       tree arg;
997       call_expr_arg_iterator iter;
998       FOR_EACH_CALL_EXPR_ARG (arg, iter, expr)
999 	if (contains_abnormal_ssa_name_p (arg))
1000 	  return true;
1001       return false;
1002     }
1003 
1004   if (code == SSA_NAME)
1005     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
1006 
1007   if (code == INTEGER_CST
1008       || is_gimple_min_invariant (expr))
1009     return false;
1010 
1011   if (code == ADDR_EXPR)
1012     return !for_each_index (&TREE_OPERAND (expr, 0),
1013 			    idx_contains_abnormal_ssa_name_p,
1014 			    NULL);
1015 
1016   if (code == COND_EXPR)
1017     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
1018       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
1019       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
1020 
1021   switch (codeclass)
1022     {
1023     case tcc_binary:
1024     case tcc_comparison:
1025       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
1026 	return true;
1027 
1028       /* Fallthru.  */
1029     case tcc_unary:
1030       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
1031 	return true;
1032 
1033       break;
1034 
1035     default:
1036       gcc_unreachable ();
1037     }
1038 
1039   return false;
1040 }
1041 
1042 /*  Returns the structure describing number of iterations determined from
1043     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1044 
1045 static struct tree_niter_desc *
niter_for_exit(struct ivopts_data * data,edge exit)1046 niter_for_exit (struct ivopts_data *data, edge exit)
1047 {
1048   struct tree_niter_desc *desc;
1049   tree_niter_desc **slot;
1050 
1051   if (!data->niters)
1052     {
1053       data->niters = new hash_map<edge, tree_niter_desc *>;
1054       slot = NULL;
1055     }
1056   else
1057     slot = data->niters->get (exit);
1058 
1059   if (!slot)
1060     {
1061       /* Try to determine number of iterations.  We cannot safely work with ssa
1062 	 names that appear in phi nodes on abnormal edges, so that we do not
1063 	 create overlapping life ranges for them (PR 27283).  */
1064       desc = XNEW (struct tree_niter_desc);
1065       if (!number_of_iterations_exit (data->current_loop,
1066 				      exit, desc, true)
1067      	  || contains_abnormal_ssa_name_p (desc->niter))
1068 	{
1069 	  XDELETE (desc);
1070 	  desc = NULL;
1071 	}
1072       data->niters->put (exit, desc);
1073     }
1074   else
1075     desc = *slot;
1076 
1077   return desc;
1078 }
1079 
1080 /* Returns the structure describing number of iterations determined from
1081    single dominating exit of DATA->current_loop, or NULL if something
1082    goes wrong.  */
1083 
1084 static struct tree_niter_desc *
niter_for_single_dom_exit(struct ivopts_data * data)1085 niter_for_single_dom_exit (struct ivopts_data *data)
1086 {
1087   edge exit = single_dom_exit (data->current_loop);
1088 
1089   if (!exit)
1090     return NULL;
1091 
1092   return niter_for_exit (data, exit);
1093 }
1094 
1095 /* Initializes data structures used by the iv optimization pass, stored
1096    in DATA.  */
1097 
1098 static void
tree_ssa_iv_optimize_init(struct ivopts_data * data)1099 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1100 {
1101   data->version_info_size = 2 * num_ssa_names;
1102   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1103   data->relevant = BITMAP_ALLOC (NULL);
1104   data->important_candidates = BITMAP_ALLOC (NULL);
1105   data->max_inv_var_id = 0;
1106   data->max_inv_expr_id = 0;
1107   data->niters = NULL;
1108   data->vgroups.create (20);
1109   data->vcands.create (20);
1110   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1111   data->name_expansion_cache = NULL;
1112   data->base_object_map = NULL;
1113   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1114   data->iv_common_cands.create (20);
1115   decl_rtl_to_reset.create (20);
1116   gcc_obstack_init (&data->iv_obstack);
1117 }
1118 
1119 /* walk_tree callback for determine_base_object.  */
1120 
1121 static tree
determine_base_object_1(tree * tp,int * walk_subtrees,void * wdata)1122 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1123 {
1124   tree_code code = TREE_CODE (*tp);
1125   tree obj = NULL_TREE;
1126   if (code == ADDR_EXPR)
1127     {
1128       tree base = get_base_address (TREE_OPERAND (*tp, 0));
1129       if (!base)
1130 	obj = *tp;
1131       else if (TREE_CODE (base) != MEM_REF)
1132 	obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1133     }
1134   else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1135 	obj = fold_convert (ptr_type_node, *tp);
1136 
1137   if (!obj)
1138     {
1139       if (!EXPR_P (*tp))
1140 	*walk_subtrees = 0;
1141 
1142       return NULL_TREE;
1143     }
1144   /* Record special node for multiple base objects and stop.  */
1145   if (*static_cast<tree *> (wdata))
1146     {
1147       *static_cast<tree *> (wdata) = integer_zero_node;
1148       return integer_zero_node;
1149     }
1150   /* Record the base object and continue looking.  */
1151   *static_cast<tree *> (wdata) = obj;
1152   return NULL_TREE;
1153 }
1154 
1155 /* Returns a memory object to that EXPR points with caching.  Return NULL if we
1156    are able to determine that it does not point to any such object; specially
1157    return integer_zero_node if EXPR contains multiple base objects.  */
1158 
1159 static tree
determine_base_object(struct ivopts_data * data,tree expr)1160 determine_base_object (struct ivopts_data *data, tree expr)
1161 {
1162   tree *slot, obj = NULL_TREE;
1163   if (data->base_object_map)
1164     {
1165       if ((slot = data->base_object_map->get(expr)) != NULL)
1166 	return *slot;
1167     }
1168   else
1169     data->base_object_map = new hash_map<tree, tree>;
1170 
1171   (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1172   data->base_object_map->put (expr, obj);
1173   return obj;
1174 }
1175 
1176 /* Return true if address expression with non-DECL_P operand appears
1177    in EXPR.  */
1178 
1179 static bool
contain_complex_addr_expr(tree expr)1180 contain_complex_addr_expr (tree expr)
1181 {
1182   bool res = false;
1183 
1184   STRIP_NOPS (expr);
1185   switch (TREE_CODE (expr))
1186     {
1187     case POINTER_PLUS_EXPR:
1188     case PLUS_EXPR:
1189     case MINUS_EXPR:
1190       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1191       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1192       break;
1193 
1194     case ADDR_EXPR:
1195       return (!DECL_P (TREE_OPERAND (expr, 0)));
1196 
1197     default:
1198       return false;
1199     }
1200 
1201   return res;
1202 }
1203 
1204 /* Allocates an induction variable with given initial value BASE and step STEP
1205    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1206 
1207 static struct iv *
1208 alloc_iv (struct ivopts_data *data, tree base, tree step,
1209 	  bool no_overflow = false)
1210 {
1211   tree expr = base;
1212   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1213 					      sizeof (struct iv));
1214   gcc_assert (step != NULL_TREE);
1215 
1216   /* Lower address expression in base except ones with DECL_P as operand.
1217      By doing this:
1218        1) More accurate cost can be computed for address expressions;
1219        2) Duplicate candidates won't be created for bases in different
1220 	  forms, like &a[0] and &a.  */
1221   STRIP_NOPS (expr);
1222   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1223       || contain_complex_addr_expr (expr))
1224     {
1225       aff_tree comb;
1226       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1227       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1228     }
1229 
1230   iv->base = base;
1231   iv->base_object = determine_base_object (data, base);
1232   iv->step = step;
1233   iv->biv_p = false;
1234   iv->nonlin_use = NULL;
1235   iv->ssa_name = NULL_TREE;
1236   if (!no_overflow
1237        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1238 			      base, step))
1239     no_overflow = true;
1240   iv->no_overflow = no_overflow;
1241   iv->have_address_use = false;
1242 
1243   return iv;
1244 }
1245 
1246 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1247    doesn't overflow.  */
1248 
1249 static void
set_iv(struct ivopts_data * data,tree iv,tree base,tree step,bool no_overflow)1250 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1251 	bool no_overflow)
1252 {
1253   struct version_info *info = name_info (data, iv);
1254 
1255   gcc_assert (!info->iv);
1256 
1257   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1258   info->iv = alloc_iv (data, base, step, no_overflow);
1259   info->iv->ssa_name = iv;
1260 }
1261 
1262 /* Finds induction variable declaration for VAR.  */
1263 
1264 static struct iv *
get_iv(struct ivopts_data * data,tree var)1265 get_iv (struct ivopts_data *data, tree var)
1266 {
1267   basic_block bb;
1268   tree type = TREE_TYPE (var);
1269 
1270   if (!POINTER_TYPE_P (type)
1271       && !INTEGRAL_TYPE_P (type))
1272     return NULL;
1273 
1274   if (!name_info (data, var)->iv)
1275     {
1276       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1277 
1278       if (!bb
1279 	  || !flow_bb_inside_loop_p (data->current_loop, bb))
1280 	set_iv (data, var, var, build_int_cst (type, 0), true);
1281     }
1282 
1283   return name_info (data, var)->iv;
1284 }
1285 
1286 /* Return the first non-invariant ssa var found in EXPR.  */
1287 
1288 static tree
extract_single_var_from_expr(tree expr)1289 extract_single_var_from_expr (tree expr)
1290 {
1291   int i, n;
1292   tree tmp;
1293   enum tree_code code;
1294 
1295   if (!expr || is_gimple_min_invariant (expr))
1296     return NULL;
1297 
1298   code = TREE_CODE (expr);
1299   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1300     {
1301       n = TREE_OPERAND_LENGTH (expr);
1302       for (i = 0; i < n; i++)
1303 	{
1304 	  tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1305 
1306 	  if (tmp)
1307 	    return tmp;
1308 	}
1309     }
1310   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1311 }
1312 
1313 /* Finds basic ivs.  */
1314 
1315 static bool
find_bivs(struct ivopts_data * data)1316 find_bivs (struct ivopts_data *data)
1317 {
1318   gphi *phi;
1319   affine_iv iv;
1320   tree step, type, base, stop;
1321   bool found = false;
1322   struct loop *loop = data->current_loop;
1323   gphi_iterator psi;
1324 
1325   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1326     {
1327       phi = psi.phi ();
1328 
1329       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1330 	continue;
1331 
1332       if (virtual_operand_p (PHI_RESULT (phi)))
1333 	continue;
1334 
1335       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1336 	continue;
1337 
1338       if (integer_zerop (iv.step))
1339 	continue;
1340 
1341       step = iv.step;
1342       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1343       /* Stop expanding iv base at the first ssa var referred by iv step.
1344 	 Ideally we should stop at any ssa var, because that's expensive
1345 	 and unusual to happen, we just do it on the first one.
1346 
1347 	 See PR64705 for the rationale.  */
1348       stop = extract_single_var_from_expr (step);
1349       base = expand_simple_operations (base, stop);
1350       if (contains_abnormal_ssa_name_p (base)
1351 	  || contains_abnormal_ssa_name_p (step))
1352 	continue;
1353 
1354       type = TREE_TYPE (PHI_RESULT (phi));
1355       base = fold_convert (type, base);
1356       if (step)
1357 	{
1358 	  if (POINTER_TYPE_P (type))
1359 	    step = convert_to_ptrofftype (step);
1360 	  else
1361 	    step = fold_convert (type, step);
1362 	}
1363 
1364       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1365       found = true;
1366     }
1367 
1368   return found;
1369 }
1370 
1371 /* Marks basic ivs.  */
1372 
1373 static void
mark_bivs(struct ivopts_data * data)1374 mark_bivs (struct ivopts_data *data)
1375 {
1376   gphi *phi;
1377   gimple *def;
1378   tree var;
1379   struct iv *iv, *incr_iv;
1380   struct loop *loop = data->current_loop;
1381   basic_block incr_bb;
1382   gphi_iterator psi;
1383 
1384   data->bivs_not_used_in_addr = 0;
1385   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1386     {
1387       phi = psi.phi ();
1388 
1389       iv = get_iv (data, PHI_RESULT (phi));
1390       if (!iv)
1391 	continue;
1392 
1393       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1394       def = SSA_NAME_DEF_STMT (var);
1395       /* Don't mark iv peeled from other one as biv.  */
1396       if (def
1397 	  && gimple_code (def) == GIMPLE_PHI
1398 	  && gimple_bb (def) == loop->header)
1399 	continue;
1400 
1401       incr_iv = get_iv (data, var);
1402       if (!incr_iv)
1403 	continue;
1404 
1405       /* If the increment is in the subloop, ignore it.  */
1406       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1407       if (incr_bb->loop_father != data->current_loop
1408 	  || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1409 	continue;
1410 
1411       iv->biv_p = true;
1412       incr_iv->biv_p = true;
1413       if (iv->no_overflow)
1414 	data->bivs_not_used_in_addr++;
1415       if (incr_iv->no_overflow)
1416 	data->bivs_not_used_in_addr++;
1417     }
1418 }
1419 
1420 /* Checks whether STMT defines a linear induction variable and stores its
1421    parameters to IV.  */
1422 
1423 static bool
find_givs_in_stmt_scev(struct ivopts_data * data,gimple * stmt,affine_iv * iv)1424 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1425 {
1426   tree lhs, stop;
1427   struct loop *loop = data->current_loop;
1428 
1429   iv->base = NULL_TREE;
1430   iv->step = NULL_TREE;
1431 
1432   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1433     return false;
1434 
1435   lhs = gimple_assign_lhs (stmt);
1436   if (TREE_CODE (lhs) != SSA_NAME)
1437     return false;
1438 
1439   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1440     return false;
1441 
1442   /* Stop expanding iv base at the first ssa var referred by iv step.
1443      Ideally we should stop at any ssa var, because that's expensive
1444      and unusual to happen, we just do it on the first one.
1445 
1446      See PR64705 for the rationale.  */
1447   stop = extract_single_var_from_expr (iv->step);
1448   iv->base = expand_simple_operations (iv->base, stop);
1449   if (contains_abnormal_ssa_name_p (iv->base)
1450       || contains_abnormal_ssa_name_p (iv->step))
1451     return false;
1452 
1453   /* If STMT could throw, then do not consider STMT as defining a GIV.
1454      While this will suppress optimizations, we cannot safely delete this
1455      GIV and associated statements, even if it appears it is not used.  */
1456   if (stmt_could_throw_p (cfun, stmt))
1457     return false;
1458 
1459   return true;
1460 }
1461 
1462 /* Finds general ivs in statement STMT.  */
1463 
1464 static void
find_givs_in_stmt(struct ivopts_data * data,gimple * stmt)1465 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1466 {
1467   affine_iv iv;
1468 
1469   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1470     return;
1471 
1472   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1473 }
1474 
1475 /* Finds general ivs in basic block BB.  */
1476 
1477 static void
find_givs_in_bb(struct ivopts_data * data,basic_block bb)1478 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1479 {
1480   gimple_stmt_iterator bsi;
1481 
1482   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1483     find_givs_in_stmt (data, gsi_stmt (bsi));
1484 }
1485 
1486 /* Finds general ivs.  */
1487 
1488 static void
find_givs(struct ivopts_data * data)1489 find_givs (struct ivopts_data *data)
1490 {
1491   struct loop *loop = data->current_loop;
1492   basic_block *body = get_loop_body_in_dom_order (loop);
1493   unsigned i;
1494 
1495   for (i = 0; i < loop->num_nodes; i++)
1496     find_givs_in_bb (data, body[i]);
1497   free (body);
1498 }
1499 
1500 /* For each ssa name defined in LOOP determines whether it is an induction
1501    variable and if so, its initial value and step.  */
1502 
1503 static bool
find_induction_variables(struct ivopts_data * data)1504 find_induction_variables (struct ivopts_data *data)
1505 {
1506   unsigned i;
1507   bitmap_iterator bi;
1508 
1509   if (!find_bivs (data))
1510     return false;
1511 
1512   find_givs (data);
1513   mark_bivs (data);
1514 
1515   if (dump_file && (dump_flags & TDF_DETAILS))
1516     {
1517       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1518 
1519       if (niter)
1520 	{
1521 	  fprintf (dump_file, "  number of iterations ");
1522 	  print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1523 	  if (!integer_zerop (niter->may_be_zero))
1524 	    {
1525 	      fprintf (dump_file, "; zero if ");
1526 	      print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1527 	    }
1528 	  fprintf (dump_file, "\n");
1529 	};
1530 
1531       fprintf (dump_file, "\n<Induction Vars>:\n");
1532       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1533 	{
1534 	  struct version_info *info = ver_info (data, i);
1535 	  if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1536 	    dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1537 	}
1538     }
1539 
1540   return true;
1541 }
1542 
1543 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1544    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1545    is the const offset stripped from IV base and MEM_TYPE is the type
1546    of the memory being addressed.  For uses of other types, ADDR_BASE
1547    and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE.  */
1548 
1549 static struct iv_use *
record_use(struct iv_group * group,tree * use_p,struct iv * iv,gimple * stmt,enum use_type type,tree mem_type,tree addr_base,poly_uint64 addr_offset)1550 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1551 	    gimple *stmt, enum use_type type, tree mem_type,
1552 	    tree addr_base, poly_uint64 addr_offset)
1553 {
1554   struct iv_use *use = XCNEW (struct iv_use);
1555 
1556   use->id = group->vuses.length ();
1557   use->group_id = group->id;
1558   use->type = type;
1559   use->mem_type = mem_type;
1560   use->iv = iv;
1561   use->stmt = stmt;
1562   use->op_p = use_p;
1563   use->addr_base = addr_base;
1564   use->addr_offset = addr_offset;
1565 
1566   group->vuses.safe_push (use);
1567   return use;
1568 }
1569 
1570 /* Checks whether OP is a loop-level invariant and if so, records it.
1571    NONLINEAR_USE is true if the invariant is used in a way we do not
1572    handle specially.  */
1573 
1574 static void
record_invariant(struct ivopts_data * data,tree op,bool nonlinear_use)1575 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1576 {
1577   basic_block bb;
1578   struct version_info *info;
1579 
1580   if (TREE_CODE (op) != SSA_NAME
1581       || virtual_operand_p (op))
1582     return;
1583 
1584   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1585   if (bb
1586       && flow_bb_inside_loop_p (data->current_loop, bb))
1587     return;
1588 
1589   info = name_info (data, op);
1590   info->name = op;
1591   info->has_nonlin_use |= nonlinear_use;
1592   if (!info->inv_id)
1593     info->inv_id = ++data->max_inv_var_id;
1594   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1595 }
1596 
1597 /* Record a group of TYPE.  */
1598 
1599 static struct iv_group *
record_group(struct ivopts_data * data,enum use_type type)1600 record_group (struct ivopts_data *data, enum use_type type)
1601 {
1602   struct iv_group *group = XCNEW (struct iv_group);
1603 
1604   group->id = data->vgroups.length ();
1605   group->type = type;
1606   group->related_cands = BITMAP_ALLOC (NULL);
1607   group->vuses.create (1);
1608 
1609   data->vgroups.safe_push (group);
1610   return group;
1611 }
1612 
1613 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1614    New group will be created if there is no existing group for the use.
1615    MEM_TYPE is the type of memory being addressed, or NULL if this
1616    isn't an address reference.  */
1617 
1618 static struct iv_use *
record_group_use(struct ivopts_data * data,tree * use_p,struct iv * iv,gimple * stmt,enum use_type type,tree mem_type)1619 record_group_use (struct ivopts_data *data, tree *use_p,
1620 		  struct iv *iv, gimple *stmt, enum use_type type,
1621 		  tree mem_type)
1622 {
1623   tree addr_base = NULL;
1624   struct iv_group *group = NULL;
1625   poly_uint64 addr_offset = 0;
1626 
1627   /* Record non address type use in a new group.  */
1628   if (address_p (type))
1629     {
1630       unsigned int i;
1631 
1632       addr_base = strip_offset (iv->base, &addr_offset);
1633       for (i = 0; i < data->vgroups.length (); i++)
1634 	{
1635 	  struct iv_use *use;
1636 
1637 	  group = data->vgroups[i];
1638 	  use = group->vuses[0];
1639 	  if (!address_p (use->type))
1640 	    continue;
1641 
1642 	  /* Check if it has the same stripped base and step.  */
1643 	  if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1644 	      && operand_equal_p (iv->step, use->iv->step, 0)
1645 	      && operand_equal_p (addr_base, use->addr_base, 0))
1646 	    break;
1647 	}
1648       if (i == data->vgroups.length ())
1649 	group = NULL;
1650     }
1651 
1652   if (!group)
1653     group = record_group (data, type);
1654 
1655   return record_use (group, use_p, iv, stmt, type, mem_type,
1656 		     addr_base, addr_offset);
1657 }
1658 
1659 /* Checks whether the use OP is interesting and if so, records it.  */
1660 
1661 static struct iv_use *
find_interesting_uses_op(struct ivopts_data * data,tree op)1662 find_interesting_uses_op (struct ivopts_data *data, tree op)
1663 {
1664   struct iv *iv;
1665   gimple *stmt;
1666   struct iv_use *use;
1667 
1668   if (TREE_CODE (op) != SSA_NAME)
1669     return NULL;
1670 
1671   iv = get_iv (data, op);
1672   if (!iv)
1673     return NULL;
1674 
1675   if (iv->nonlin_use)
1676     {
1677       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1678       return iv->nonlin_use;
1679     }
1680 
1681   if (integer_zerop (iv->step))
1682     {
1683       record_invariant (data, op, true);
1684       return NULL;
1685     }
1686 
1687   stmt = SSA_NAME_DEF_STMT (op);
1688   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1689 
1690   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1691   iv->nonlin_use = use;
1692   return use;
1693 }
1694 
1695 /* Indicate how compare type iv_use can be handled.  */
1696 enum comp_iv_rewrite
1697 {
1698   COMP_IV_NA,
1699   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1700   COMP_IV_EXPR,
1701   /* We may rewrite compare type iv_uses on both sides of comparison by
1702      expressing value of each iv_use.  */
1703   COMP_IV_EXPR_2,
1704   /* We may rewrite compare type iv_use by expressing value of the iv_use
1705      or by eliminating it with other iv_cand.  */
1706   COMP_IV_ELIM
1707 };
1708 
1709 /* Given a condition in statement STMT, checks whether it is a compare
1710    of an induction variable and an invariant.  If this is the case,
1711    CONTROL_VAR is set to location of the iv, BOUND to the location of
1712    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1713    induction variable descriptions, and true is returned.  If this is not
1714    the case, CONTROL_VAR and BOUND are set to the arguments of the
1715    condition and false is returned.  */
1716 
1717 static enum comp_iv_rewrite
extract_cond_operands(struct ivopts_data * data,gimple * stmt,tree ** control_var,tree ** bound,struct iv ** iv_var,struct iv ** iv_bound)1718 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1719 		       tree **control_var, tree **bound,
1720 		       struct iv **iv_var, struct iv **iv_bound)
1721 {
1722   /* The objects returned when COND has constant operands.  */
1723   static struct iv const_iv;
1724   static tree zero;
1725   tree *op0 = &zero, *op1 = &zero;
1726   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1727   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1728 
1729   if (gimple_code (stmt) == GIMPLE_COND)
1730     {
1731       gcond *cond_stmt = as_a <gcond *> (stmt);
1732       op0 = gimple_cond_lhs_ptr (cond_stmt);
1733       op1 = gimple_cond_rhs_ptr (cond_stmt);
1734     }
1735   else
1736     {
1737       op0 = gimple_assign_rhs1_ptr (stmt);
1738       op1 = gimple_assign_rhs2_ptr (stmt);
1739     }
1740 
1741   zero = integer_zero_node;
1742   const_iv.step = integer_zero_node;
1743 
1744   if (TREE_CODE (*op0) == SSA_NAME)
1745     iv0 = get_iv (data, *op0);
1746   if (TREE_CODE (*op1) == SSA_NAME)
1747     iv1 = get_iv (data, *op1);
1748 
1749   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1750   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1751     {
1752       rewrite_type = COMP_IV_EXPR_2;
1753       goto end;
1754     }
1755 
1756   /* If none side of comparison is IV.  */
1757   if ((!iv0 || integer_zerop (iv0->step))
1758       && (!iv1 || integer_zerop (iv1->step)))
1759     goto end;
1760 
1761   /* Control variable may be on the other side.  */
1762   if (!iv0 || integer_zerop (iv0->step))
1763     {
1764       std::swap (op0, op1);
1765       std::swap (iv0, iv1);
1766     }
1767   /* If one side is IV and the other side isn't loop invariant.  */
1768   if (!iv1)
1769     rewrite_type = COMP_IV_EXPR;
1770   /* If one side is IV and the other side is loop invariant.  */
1771   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1772     rewrite_type = COMP_IV_ELIM;
1773 
1774 end:
1775   if (control_var)
1776     *control_var = op0;
1777   if (iv_var)
1778     *iv_var = iv0;
1779   if (bound)
1780     *bound = op1;
1781   if (iv_bound)
1782     *iv_bound = iv1;
1783 
1784   return rewrite_type;
1785 }
1786 
1787 /* Checks whether the condition in STMT is interesting and if so,
1788    records it.  */
1789 
1790 static void
find_interesting_uses_cond(struct ivopts_data * data,gimple * stmt)1791 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1792 {
1793   tree *var_p, *bound_p;
1794   struct iv *var_iv, *bound_iv;
1795   enum comp_iv_rewrite ret;
1796 
1797   ret = extract_cond_operands (data, stmt,
1798 			       &var_p, &bound_p, &var_iv, &bound_iv);
1799   if (ret == COMP_IV_NA)
1800     {
1801       find_interesting_uses_op (data, *var_p);
1802       find_interesting_uses_op (data, *bound_p);
1803       return;
1804     }
1805 
1806   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1807   /* Record compare type iv_use for iv on the other side of comparison.  */
1808   if (ret == COMP_IV_EXPR_2)
1809     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1810 }
1811 
1812 /* Returns the outermost loop EXPR is obviously invariant in
1813    relative to the loop LOOP, i.e. if all its operands are defined
1814    outside of the returned loop.  Returns NULL if EXPR is not
1815    even obviously invariant in LOOP.  */
1816 
1817 struct loop *
outermost_invariant_loop_for_expr(struct loop * loop,tree expr)1818 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1819 {
1820   basic_block def_bb;
1821   unsigned i, len;
1822 
1823   if (is_gimple_min_invariant (expr))
1824     return current_loops->tree_root;
1825 
1826   if (TREE_CODE (expr) == SSA_NAME)
1827     {
1828       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1829       if (def_bb)
1830 	{
1831 	  if (flow_bb_inside_loop_p (loop, def_bb))
1832 	    return NULL;
1833 	  return superloop_at_depth (loop,
1834 				     loop_depth (def_bb->loop_father) + 1);
1835 	}
1836 
1837       return current_loops->tree_root;
1838     }
1839 
1840   if (!EXPR_P (expr))
1841     return NULL;
1842 
1843   unsigned maxdepth = 0;
1844   len = TREE_OPERAND_LENGTH (expr);
1845   for (i = 0; i < len; i++)
1846     {
1847       struct loop *ivloop;
1848       if (!TREE_OPERAND (expr, i))
1849 	continue;
1850 
1851       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1852       if (!ivloop)
1853 	return NULL;
1854       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1855     }
1856 
1857   return superloop_at_depth (loop, maxdepth);
1858 }
1859 
1860 /* Returns true if expression EXPR is obviously invariant in LOOP,
1861    i.e. if all its operands are defined outside of the LOOP.  LOOP
1862    should not be the function body.  */
1863 
1864 bool
expr_invariant_in_loop_p(struct loop * loop,tree expr)1865 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1866 {
1867   basic_block def_bb;
1868   unsigned i, len;
1869 
1870   gcc_assert (loop_depth (loop) > 0);
1871 
1872   if (is_gimple_min_invariant (expr))
1873     return true;
1874 
1875   if (TREE_CODE (expr) == SSA_NAME)
1876     {
1877       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1878       if (def_bb
1879 	  && flow_bb_inside_loop_p (loop, def_bb))
1880 	return false;
1881 
1882       return true;
1883     }
1884 
1885   if (!EXPR_P (expr))
1886     return false;
1887 
1888   len = TREE_OPERAND_LENGTH (expr);
1889   for (i = 0; i < len; i++)
1890     if (TREE_OPERAND (expr, i)
1891 	&& !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1892       return false;
1893 
1894   return true;
1895 }
1896 
1897 /* Given expression EXPR which computes inductive values with respect
1898    to loop recorded in DATA, this function returns biv from which EXPR
1899    is derived by tracing definition chains of ssa variables in EXPR.  */
1900 
1901 static struct iv*
find_deriving_biv_for_expr(struct ivopts_data * data,tree expr)1902 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1903 {
1904   struct iv *iv;
1905   unsigned i, n;
1906   tree e2, e1;
1907   enum tree_code code;
1908   gimple *stmt;
1909 
1910   if (expr == NULL_TREE)
1911     return NULL;
1912 
1913   if (is_gimple_min_invariant (expr))
1914     return NULL;
1915 
1916   code = TREE_CODE (expr);
1917   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1918     {
1919       n = TREE_OPERAND_LENGTH (expr);
1920       for (i = 0; i < n; i++)
1921 	{
1922 	  iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1923 	  if (iv)
1924 	    return iv;
1925 	}
1926     }
1927 
1928   /* Stop if it's not ssa name.  */
1929   if (code != SSA_NAME)
1930     return NULL;
1931 
1932   iv = get_iv (data, expr);
1933   if (!iv || integer_zerop (iv->step))
1934     return NULL;
1935   else if (iv->biv_p)
1936     return iv;
1937 
1938   stmt = SSA_NAME_DEF_STMT (expr);
1939   if (gphi *phi = dyn_cast <gphi *> (stmt))
1940     {
1941       ssa_op_iter iter;
1942       use_operand_p use_p;
1943       basic_block phi_bb = gimple_bb (phi);
1944 
1945       /* Skip loop header PHI that doesn't define biv.  */
1946       if (phi_bb->loop_father == data->current_loop)
1947 	return NULL;
1948 
1949       if (virtual_operand_p (gimple_phi_result (phi)))
1950 	return NULL;
1951 
1952       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1953 	{
1954 	  tree use = USE_FROM_PTR (use_p);
1955 	  iv = find_deriving_biv_for_expr (data, use);
1956 	  if (iv)
1957 	    return iv;
1958 	}
1959       return NULL;
1960     }
1961   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1962     return NULL;
1963 
1964   e1 = gimple_assign_rhs1 (stmt);
1965   code = gimple_assign_rhs_code (stmt);
1966   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1967     return find_deriving_biv_for_expr (data, e1);
1968 
1969   switch (code)
1970     {
1971     case MULT_EXPR:
1972     case PLUS_EXPR:
1973     case MINUS_EXPR:
1974     case POINTER_PLUS_EXPR:
1975       /* Increments, decrements and multiplications by a constant
1976 	 are simple.  */
1977       e2 = gimple_assign_rhs2 (stmt);
1978       iv = find_deriving_biv_for_expr (data, e2);
1979       if (iv)
1980 	return iv;
1981       gcc_fallthrough ();
1982 
1983     CASE_CONVERT:
1984       /* Casts are simple.  */
1985       return find_deriving_biv_for_expr (data, e1);
1986 
1987     default:
1988       break;
1989     }
1990 
1991   return NULL;
1992 }
1993 
1994 /* Record BIV, its predecessor and successor that they are used in
1995    address type uses.  */
1996 
1997 static void
record_biv_for_address_use(struct ivopts_data * data,struct iv * biv)1998 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1999 {
2000   unsigned i;
2001   tree type, base_1, base_2;
2002   bitmap_iterator bi;
2003 
2004   if (!biv || !biv->biv_p || integer_zerop (biv->step)
2005       || biv->have_address_use || !biv->no_overflow)
2006     return;
2007 
2008   type = TREE_TYPE (biv->base);
2009   if (!INTEGRAL_TYPE_P (type))
2010     return;
2011 
2012   biv->have_address_use = true;
2013   data->bivs_not_used_in_addr--;
2014   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
2015   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2016     {
2017       struct iv *iv = ver_info (data, i)->iv;
2018 
2019       if (!iv || !iv->biv_p || integer_zerop (iv->step)
2020 	  || iv->have_address_use || !iv->no_overflow)
2021 	continue;
2022 
2023       if (type != TREE_TYPE (iv->base)
2024 	  || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2025 	continue;
2026 
2027       if (!operand_equal_p (biv->step, iv->step, 0))
2028 	continue;
2029 
2030       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2031       if (operand_equal_p (base_1, iv->base, 0)
2032 	  || operand_equal_p (base_2, biv->base, 0))
2033 	{
2034 	  iv->have_address_use = true;
2035 	  data->bivs_not_used_in_addr--;
2036 	}
2037     }
2038 }
2039 
2040 /* Cumulates the steps of indices into DATA and replaces their values with the
2041    initial ones.  Returns false when the value of the index cannot be determined.
2042    Callback for for_each_index.  */
2043 
2044 struct ifs_ivopts_data
2045 {
2046   struct ivopts_data *ivopts_data;
2047   gimple *stmt;
2048   tree step;
2049 };
2050 
2051 static bool
idx_find_step(tree base,tree * idx,void * data)2052 idx_find_step (tree base, tree *idx, void *data)
2053 {
2054   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2055   struct iv *iv;
2056   bool use_overflow_semantics = false;
2057   tree step, iv_base, iv_step, lbound, off;
2058   struct loop *loop = dta->ivopts_data->current_loop;
2059 
2060   /* If base is a component ref, require that the offset of the reference
2061      be invariant.  */
2062   if (TREE_CODE (base) == COMPONENT_REF)
2063     {
2064       off = component_ref_field_offset (base);
2065       return expr_invariant_in_loop_p (loop, off);
2066     }
2067 
2068   /* If base is array, first check whether we will be able to move the
2069      reference out of the loop (in order to take its address in strength
2070      reduction).  In order for this to work we need both lower bound
2071      and step to be loop invariants.  */
2072   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2073     {
2074       /* Moreover, for a range, the size needs to be invariant as well.  */
2075       if (TREE_CODE (base) == ARRAY_RANGE_REF
2076 	  && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2077 	return false;
2078 
2079       step = array_ref_element_size (base);
2080       lbound = array_ref_low_bound (base);
2081 
2082       if (!expr_invariant_in_loop_p (loop, step)
2083 	  || !expr_invariant_in_loop_p (loop, lbound))
2084 	return false;
2085     }
2086 
2087   if (TREE_CODE (*idx) != SSA_NAME)
2088     return true;
2089 
2090   iv = get_iv (dta->ivopts_data, *idx);
2091   if (!iv)
2092     return false;
2093 
2094   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2095 	  *&x[0], which is not folded and does not trigger the
2096 	  ARRAY_REF path below.  */
2097   *idx = iv->base;
2098 
2099   if (integer_zerop (iv->step))
2100     return true;
2101 
2102   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2103     {
2104       step = array_ref_element_size (base);
2105 
2106       /* We only handle addresses whose step is an integer constant.  */
2107       if (TREE_CODE (step) != INTEGER_CST)
2108 	return false;
2109     }
2110   else
2111     /* The step for pointer arithmetics already is 1 byte.  */
2112     step = size_one_node;
2113 
2114   iv_base = iv->base;
2115   iv_step = iv->step;
2116   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2117     use_overflow_semantics = true;
2118 
2119   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2120 			    sizetype, &iv_base, &iv_step, dta->stmt,
2121 			    use_overflow_semantics))
2122     {
2123       /* The index might wrap.  */
2124       return false;
2125     }
2126 
2127   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2128   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2129 
2130   if (dta->ivopts_data->bivs_not_used_in_addr)
2131     {
2132       if (!iv->biv_p)
2133 	iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2134 
2135       record_biv_for_address_use (dta->ivopts_data, iv);
2136     }
2137   return true;
2138 }
2139 
2140 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2141    object is passed to it in DATA.  */
2142 
2143 static bool
idx_record_use(tree base,tree * idx,void * vdata)2144 idx_record_use (tree base, tree *idx,
2145 		void *vdata)
2146 {
2147   struct ivopts_data *data = (struct ivopts_data *) vdata;
2148   find_interesting_uses_op (data, *idx);
2149   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2150     {
2151       find_interesting_uses_op (data, array_ref_element_size (base));
2152       find_interesting_uses_op (data, array_ref_low_bound (base));
2153     }
2154   return true;
2155 }
2156 
2157 /* If we can prove that TOP = cst * BOT for some constant cst,
2158    store cst to MUL and return true.  Otherwise return false.
2159    The returned value is always sign-extended, regardless of the
2160    signedness of TOP and BOT.  */
2161 
2162 static bool
constant_multiple_of(tree top,tree bot,widest_int * mul)2163 constant_multiple_of (tree top, tree bot, widest_int *mul)
2164 {
2165   tree mby;
2166   enum tree_code code;
2167   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2168   widest_int res, p0, p1;
2169 
2170   STRIP_NOPS (top);
2171   STRIP_NOPS (bot);
2172 
2173   if (operand_equal_p (top, bot, 0))
2174     {
2175       *mul = 1;
2176       return true;
2177     }
2178 
2179   code = TREE_CODE (top);
2180   switch (code)
2181     {
2182     case MULT_EXPR:
2183       mby = TREE_OPERAND (top, 1);
2184       if (TREE_CODE (mby) != INTEGER_CST)
2185 	return false;
2186 
2187       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2188 	return false;
2189 
2190       *mul = wi::sext (res * wi::to_widest (mby), precision);
2191       return true;
2192 
2193     case PLUS_EXPR:
2194     case MINUS_EXPR:
2195       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2196 	  || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2197 	return false;
2198 
2199       if (code == MINUS_EXPR)
2200 	p1 = -p1;
2201       *mul = wi::sext (p0 + p1, precision);
2202       return true;
2203 
2204     case INTEGER_CST:
2205       if (TREE_CODE (bot) != INTEGER_CST)
2206 	return false;
2207 
2208       p0 = widest_int::from (wi::to_wide (top), SIGNED);
2209       p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2210       if (p1 == 0)
2211 	return false;
2212       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2213       return res == 0;
2214 
2215     default:
2216       if (POLY_INT_CST_P (top)
2217 	  && POLY_INT_CST_P (bot)
2218 	  && constant_multiple_p (wi::to_poly_widest (top),
2219 				  wi::to_poly_widest (bot), mul))
2220 	return true;
2221 
2222       return false;
2223     }
2224 }
2225 
2226 /* Return true if memory reference REF with step STEP may be unaligned.  */
2227 
2228 static bool
may_be_unaligned_p(tree ref,tree step)2229 may_be_unaligned_p (tree ref, tree step)
2230 {
2231   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2232      thus they are not misaligned.  */
2233   if (TREE_CODE (ref) == TARGET_MEM_REF)
2234     return false;
2235 
2236   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2237   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2238     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2239 
2240   unsigned HOST_WIDE_INT bitpos;
2241   unsigned int ref_align;
2242   get_object_alignment_1 (ref, &ref_align, &bitpos);
2243   if (ref_align < align
2244       || (bitpos % align) != 0
2245       || (bitpos % BITS_PER_UNIT) != 0)
2246     return true;
2247 
2248   unsigned int trailing_zeros = tree_ctz (step);
2249   if (trailing_zeros < HOST_BITS_PER_INT
2250       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2251     return true;
2252 
2253   return false;
2254 }
2255 
2256 /* Return true if EXPR may be non-addressable.   */
2257 
2258 bool
may_be_nonaddressable_p(tree expr)2259 may_be_nonaddressable_p (tree expr)
2260 {
2261   switch (TREE_CODE (expr))
2262     {
2263     case VAR_DECL:
2264       /* Check if it's a register variable.  */
2265       return DECL_HARD_REGISTER (expr);
2266 
2267     case TARGET_MEM_REF:
2268       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2269 	 target, thus they are always addressable.  */
2270       return false;
2271 
2272     case MEM_REF:
2273       /* Likewise for MEM_REFs, modulo the storage order.  */
2274       return REF_REVERSE_STORAGE_ORDER (expr);
2275 
2276     case BIT_FIELD_REF:
2277       if (REF_REVERSE_STORAGE_ORDER (expr))
2278 	return true;
2279       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2280 
2281     case COMPONENT_REF:
2282       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2283 	return true;
2284       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2285 	     || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2286 
2287     case ARRAY_REF:
2288     case ARRAY_RANGE_REF:
2289       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2290 	return true;
2291       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2292 
2293     case VIEW_CONVERT_EXPR:
2294       /* This kind of view-conversions may wrap non-addressable objects
2295 	 and make them look addressable.  After some processing the
2296 	 non-addressability may be uncovered again, causing ADDR_EXPRs
2297 	 of inappropriate objects to be built.  */
2298       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2299 	  || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2300 	return true;
2301       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2302 
2303     CASE_CONVERT:
2304       return true;
2305 
2306     default:
2307       break;
2308     }
2309 
2310   return false;
2311 }
2312 
2313 /* Finds addresses in *OP_P inside STMT.  */
2314 
2315 static void
find_interesting_uses_address(struct ivopts_data * data,gimple * stmt,tree * op_p)2316 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2317 			       tree *op_p)
2318 {
2319   tree base = *op_p, step = size_zero_node;
2320   struct iv *civ;
2321   struct ifs_ivopts_data ifs_ivopts_data;
2322 
2323   /* Do not play with volatile memory references.  A bit too conservative,
2324      perhaps, but safe.  */
2325   if (gimple_has_volatile_ops (stmt))
2326     goto fail;
2327 
2328   /* Ignore bitfields for now.  Not really something terribly complicated
2329      to handle.  TODO.  */
2330   if (TREE_CODE (base) == BIT_FIELD_REF)
2331     goto fail;
2332 
2333   base = unshare_expr (base);
2334 
2335   if (TREE_CODE (base) == TARGET_MEM_REF)
2336     {
2337       tree type = build_pointer_type (TREE_TYPE (base));
2338       tree astep;
2339 
2340       if (TMR_BASE (base)
2341 	  && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2342 	{
2343 	  civ = get_iv (data, TMR_BASE (base));
2344 	  if (!civ)
2345 	    goto fail;
2346 
2347 	  TMR_BASE (base) = civ->base;
2348 	  step = civ->step;
2349 	}
2350       if (TMR_INDEX2 (base)
2351 	  && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2352 	{
2353 	  civ = get_iv (data, TMR_INDEX2 (base));
2354 	  if (!civ)
2355 	    goto fail;
2356 
2357 	  TMR_INDEX2 (base) = civ->base;
2358 	  step = civ->step;
2359 	}
2360       if (TMR_INDEX (base)
2361 	  && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2362 	{
2363 	  civ = get_iv (data, TMR_INDEX (base));
2364 	  if (!civ)
2365 	    goto fail;
2366 
2367 	  TMR_INDEX (base) = civ->base;
2368 	  astep = civ->step;
2369 
2370 	  if (astep)
2371 	    {
2372 	      if (TMR_STEP (base))
2373 		astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2374 
2375 	      step = fold_build2 (PLUS_EXPR, type, step, astep);
2376 	    }
2377 	}
2378 
2379       if (integer_zerop (step))
2380 	goto fail;
2381       base = tree_mem_ref_addr (type, base);
2382     }
2383   else
2384     {
2385       ifs_ivopts_data.ivopts_data = data;
2386       ifs_ivopts_data.stmt = stmt;
2387       ifs_ivopts_data.step = size_zero_node;
2388       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2389 	  || integer_zerop (ifs_ivopts_data.step))
2390 	goto fail;
2391       step = ifs_ivopts_data.step;
2392 
2393       /* Check that the base expression is addressable.  This needs
2394 	 to be done after substituting bases of IVs into it.  */
2395       if (may_be_nonaddressable_p (base))
2396 	goto fail;
2397 
2398       /* Moreover, on strict alignment platforms, check that it is
2399 	 sufficiently aligned.  */
2400       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2401 	goto fail;
2402 
2403       base = build_fold_addr_expr (base);
2404 
2405       /* Substituting bases of IVs into the base expression might
2406 	 have caused folding opportunities.  */
2407       if (TREE_CODE (base) == ADDR_EXPR)
2408 	{
2409 	  tree *ref = &TREE_OPERAND (base, 0);
2410 	  while (handled_component_p (*ref))
2411 	    ref = &TREE_OPERAND (*ref, 0);
2412 	  if (TREE_CODE (*ref) == MEM_REF)
2413 	    {
2414 	      tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2415 				      TREE_OPERAND (*ref, 0),
2416 				      TREE_OPERAND (*ref, 1));
2417 	      if (tem)
2418 		*ref = tem;
2419 	    }
2420 	}
2421     }
2422 
2423   civ = alloc_iv (data, base, step);
2424   /* Fail if base object of this memory reference is unknown.  */
2425   if (civ->base_object == NULL_TREE)
2426     goto fail;
2427 
2428   record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2429   return;
2430 
2431 fail:
2432   for_each_index (op_p, idx_record_use, data);
2433 }
2434 
2435 /* Finds and records invariants used in STMT.  */
2436 
2437 static void
find_invariants_stmt(struct ivopts_data * data,gimple * stmt)2438 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2439 {
2440   ssa_op_iter iter;
2441   use_operand_p use_p;
2442   tree op;
2443 
2444   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2445     {
2446       op = USE_FROM_PTR (use_p);
2447       record_invariant (data, op, false);
2448     }
2449 }
2450 
2451 /* CALL calls an internal function.  If operand *OP_P will become an
2452    address when the call is expanded, return the type of the memory
2453    being addressed, otherwise return null.  */
2454 
2455 static tree
get_mem_type_for_internal_fn(gcall * call,tree * op_p)2456 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2457 {
2458   switch (gimple_call_internal_fn (call))
2459     {
2460     case IFN_MASK_LOAD:
2461       if (op_p == gimple_call_arg_ptr (call, 0))
2462 	return TREE_TYPE (gimple_call_lhs (call));
2463       return NULL_TREE;
2464 
2465     case IFN_MASK_STORE:
2466       if (op_p == gimple_call_arg_ptr (call, 0))
2467 	return TREE_TYPE (gimple_call_arg (call, 3));
2468       return NULL_TREE;
2469 
2470     default:
2471       return NULL_TREE;
2472     }
2473 }
2474 
2475 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2476    Return true if the operand will become an address when STMT
2477    is expanded and record the associated address use if so.  */
2478 
2479 static bool
find_address_like_use(struct ivopts_data * data,gimple * stmt,tree * op_p,struct iv * iv)2480 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2481 		       struct iv *iv)
2482 {
2483   /* Fail if base object of this memory reference is unknown.  */
2484   if (iv->base_object == NULL_TREE)
2485     return false;
2486 
2487   tree mem_type = NULL_TREE;
2488   if (gcall *call = dyn_cast <gcall *> (stmt))
2489     if (gimple_call_internal_p (call))
2490       mem_type = get_mem_type_for_internal_fn (call, op_p);
2491   if (mem_type)
2492     {
2493       iv = alloc_iv (data, iv->base, iv->step);
2494       record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2495       return true;
2496     }
2497   return false;
2498 }
2499 
2500 /* Finds interesting uses of induction variables in the statement STMT.  */
2501 
2502 static void
find_interesting_uses_stmt(struct ivopts_data * data,gimple * stmt)2503 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2504 {
2505   struct iv *iv;
2506   tree op, *lhs, *rhs;
2507   ssa_op_iter iter;
2508   use_operand_p use_p;
2509   enum tree_code code;
2510 
2511   find_invariants_stmt (data, stmt);
2512 
2513   if (gimple_code (stmt) == GIMPLE_COND)
2514     {
2515       find_interesting_uses_cond (data, stmt);
2516       return;
2517     }
2518 
2519   if (is_gimple_assign (stmt))
2520     {
2521       lhs = gimple_assign_lhs_ptr (stmt);
2522       rhs = gimple_assign_rhs1_ptr (stmt);
2523 
2524       if (TREE_CODE (*lhs) == SSA_NAME)
2525 	{
2526 	  /* If the statement defines an induction variable, the uses are not
2527 	     interesting by themselves.  */
2528 
2529 	  iv = get_iv (data, *lhs);
2530 
2531 	  if (iv && !integer_zerop (iv->step))
2532 	    return;
2533 	}
2534 
2535       code = gimple_assign_rhs_code (stmt);
2536       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2537 	  && (REFERENCE_CLASS_P (*rhs)
2538 	      || is_gimple_val (*rhs)))
2539 	{
2540 	  if (REFERENCE_CLASS_P (*rhs))
2541 	    find_interesting_uses_address (data, stmt, rhs);
2542 	  else
2543 	    find_interesting_uses_op (data, *rhs);
2544 
2545 	  if (REFERENCE_CLASS_P (*lhs))
2546 	    find_interesting_uses_address (data, stmt, lhs);
2547 	  return;
2548 	}
2549       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2550 	{
2551 	  find_interesting_uses_cond (data, stmt);
2552 	  return;
2553 	}
2554 
2555       /* TODO -- we should also handle address uses of type
2556 
2557 	 memory = call (whatever);
2558 
2559 	 and
2560 
2561 	 call (memory).  */
2562     }
2563 
2564   if (gimple_code (stmt) == GIMPLE_PHI
2565       && gimple_bb (stmt) == data->current_loop->header)
2566     {
2567       iv = get_iv (data, PHI_RESULT (stmt));
2568 
2569       if (iv && !integer_zerop (iv->step))
2570 	return;
2571     }
2572 
2573   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2574     {
2575       op = USE_FROM_PTR (use_p);
2576 
2577       if (TREE_CODE (op) != SSA_NAME)
2578 	continue;
2579 
2580       iv = get_iv (data, op);
2581       if (!iv)
2582 	continue;
2583 
2584       if (!find_address_like_use (data, stmt, use_p->use, iv))
2585 	find_interesting_uses_op (data, op);
2586     }
2587 }
2588 
2589 /* Finds interesting uses of induction variables outside of loops
2590    on loop exit edge EXIT.  */
2591 
2592 static void
find_interesting_uses_outside(struct ivopts_data * data,edge exit)2593 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2594 {
2595   gphi *phi;
2596   gphi_iterator psi;
2597   tree def;
2598 
2599   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2600     {
2601       phi = psi.phi ();
2602       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2603       if (!virtual_operand_p (def))
2604 	find_interesting_uses_op (data, def);
2605     }
2606 }
2607 
2608 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2609    mode for memory reference represented by USE.  */
2610 
2611 static GTY (()) vec<rtx, va_gc> *addr_list;
2612 
2613 static bool
addr_offset_valid_p(struct iv_use * use,poly_int64 offset)2614 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2615 {
2616   rtx reg, addr;
2617   unsigned list_index;
2618   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2619   machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2620 
2621   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2622   if (list_index >= vec_safe_length (addr_list))
2623     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2624 
2625   addr = (*addr_list)[list_index];
2626   if (!addr)
2627     {
2628       addr_mode = targetm.addr_space.address_mode (as);
2629       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2630       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2631       (*addr_list)[list_index] = addr;
2632     }
2633   else
2634     addr_mode = GET_MODE (addr);
2635 
2636   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2637   return (memory_address_addr_space_p (mem_mode, addr, as));
2638 }
2639 
2640 /* Comparison function to sort group in ascending order of addr_offset.  */
2641 
2642 static int
group_compare_offset(const void * a,const void * b)2643 group_compare_offset (const void *a, const void *b)
2644 {
2645   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2646   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2647 
2648   return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2649 }
2650 
2651 /* Check if small groups should be split.  Return true if no group
2652    contains more than two uses with distinct addr_offsets.  Return
2653    false otherwise.  We want to split such groups because:
2654 
2655      1) Small groups don't have much benefit and may interfer with
2656 	general candidate selection.
2657      2) Size for problem with only small groups is usually small and
2658 	general algorithm can handle it well.
2659 
2660    TODO -- Above claim may not hold when we want to merge memory
2661    accesses with conseuctive addresses.  */
2662 
2663 static bool
split_small_address_groups_p(struct ivopts_data * data)2664 split_small_address_groups_p (struct ivopts_data *data)
2665 {
2666   unsigned int i, j, distinct = 1;
2667   struct iv_use *pre;
2668   struct iv_group *group;
2669 
2670   for (i = 0; i < data->vgroups.length (); i++)
2671     {
2672       group = data->vgroups[i];
2673       if (group->vuses.length () == 1)
2674 	continue;
2675 
2676       gcc_assert (address_p (group->type));
2677       if (group->vuses.length () == 2)
2678 	{
2679 	  if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2680 				      group->vuses[1]->addr_offset) > 0)
2681 	    std::swap (group->vuses[0], group->vuses[1]);
2682 	}
2683       else
2684 	group->vuses.qsort (group_compare_offset);
2685 
2686       if (distinct > 2)
2687 	continue;
2688 
2689       distinct = 1;
2690       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2691 	{
2692 	  if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2693 	    {
2694 	      pre = group->vuses[j];
2695 	      distinct++;
2696 	    }
2697 
2698 	  if (distinct > 2)
2699 	    break;
2700 	}
2701     }
2702 
2703   return (distinct <= 2);
2704 }
2705 
2706 /* For each group of address type uses, this function further groups
2707    these uses according to the maximum offset supported by target's
2708    [base + offset] addressing mode.  */
2709 
2710 static void
split_address_groups(struct ivopts_data * data)2711 split_address_groups (struct ivopts_data *data)
2712 {
2713   unsigned int i, j;
2714   /* Always split group.  */
2715   bool split_p = split_small_address_groups_p (data);
2716 
2717   for (i = 0; i < data->vgroups.length (); i++)
2718     {
2719       struct iv_group *new_group = NULL;
2720       struct iv_group *group = data->vgroups[i];
2721       struct iv_use *use = group->vuses[0];
2722 
2723       use->id = 0;
2724       use->group_id = group->id;
2725       if (group->vuses.length () == 1)
2726 	continue;
2727 
2728       gcc_assert (address_p (use->type));
2729 
2730       for (j = 1; j < group->vuses.length ();)
2731 	{
2732 	  struct iv_use *next = group->vuses[j];
2733 	  poly_int64 offset = next->addr_offset - use->addr_offset;
2734 
2735 	  /* Split group if aksed to, or the offset against the first
2736 	     use can't fit in offset part of addressing mode.  IV uses
2737 	     having the same offset are still kept in one group.  */
2738 	  if (maybe_ne (offset, 0)
2739 	      && (split_p || !addr_offset_valid_p (use, offset)))
2740 	    {
2741 	      if (!new_group)
2742 		new_group = record_group (data, group->type);
2743 	      group->vuses.ordered_remove (j);
2744 	      new_group->vuses.safe_push (next);
2745 	      continue;
2746 	    }
2747 
2748 	  next->id = j;
2749 	  next->group_id = group->id;
2750 	  j++;
2751 	}
2752     }
2753 }
2754 
2755 /* Finds uses of the induction variables that are interesting.  */
2756 
2757 static void
find_interesting_uses(struct ivopts_data * data)2758 find_interesting_uses (struct ivopts_data *data)
2759 {
2760   basic_block bb;
2761   gimple_stmt_iterator bsi;
2762   basic_block *body = get_loop_body (data->current_loop);
2763   unsigned i;
2764   edge e;
2765 
2766   for (i = 0; i < data->current_loop->num_nodes; i++)
2767     {
2768       edge_iterator ei;
2769       bb = body[i];
2770 
2771       FOR_EACH_EDGE (e, ei, bb->succs)
2772 	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2773 	    && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2774 	  find_interesting_uses_outside (data, e);
2775 
2776       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2777 	find_interesting_uses_stmt (data, gsi_stmt (bsi));
2778       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2779 	if (!is_gimple_debug (gsi_stmt (bsi)))
2780 	  find_interesting_uses_stmt (data, gsi_stmt (bsi));
2781     }
2782   free (body);
2783 
2784   split_address_groups (data);
2785 
2786   if (dump_file && (dump_flags & TDF_DETAILS))
2787     {
2788       fprintf (dump_file, "\n<IV Groups>:\n");
2789       dump_groups (dump_file, data);
2790       fprintf (dump_file, "\n");
2791     }
2792 }
2793 
2794 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2795    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2796    we are at the top-level of the processed address.  */
2797 
2798 static tree
strip_offset_1(tree expr,bool inside_addr,bool top_compref,poly_int64 * offset)2799 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2800 		poly_int64 *offset)
2801 {
2802   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2803   enum tree_code code;
2804   tree type, orig_type = TREE_TYPE (expr);
2805   poly_int64 off0, off1;
2806   HOST_WIDE_INT st;
2807   tree orig_expr = expr;
2808 
2809   STRIP_NOPS (expr);
2810 
2811   type = TREE_TYPE (expr);
2812   code = TREE_CODE (expr);
2813   *offset = 0;
2814 
2815   switch (code)
2816     {
2817     case POINTER_PLUS_EXPR:
2818     case PLUS_EXPR:
2819     case MINUS_EXPR:
2820       op0 = TREE_OPERAND (expr, 0);
2821       op1 = TREE_OPERAND (expr, 1);
2822 
2823       op0 = strip_offset_1 (op0, false, false, &off0);
2824       op1 = strip_offset_1 (op1, false, false, &off1);
2825 
2826       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2827       if (op0 == TREE_OPERAND (expr, 0)
2828 	  && op1 == TREE_OPERAND (expr, 1))
2829 	return orig_expr;
2830 
2831       if (integer_zerop (op1))
2832 	expr = op0;
2833       else if (integer_zerop (op0))
2834 	{
2835 	  if (code == MINUS_EXPR)
2836 	    expr = fold_build1 (NEGATE_EXPR, type, op1);
2837 	  else
2838 	    expr = op1;
2839 	}
2840       else
2841 	expr = fold_build2 (code, type, op0, op1);
2842 
2843       return fold_convert (orig_type, expr);
2844 
2845     case MULT_EXPR:
2846       op1 = TREE_OPERAND (expr, 1);
2847       if (!cst_and_fits_in_hwi (op1))
2848 	return orig_expr;
2849 
2850       op0 = TREE_OPERAND (expr, 0);
2851       op0 = strip_offset_1 (op0, false, false, &off0);
2852       if (op0 == TREE_OPERAND (expr, 0))
2853 	return orig_expr;
2854 
2855       *offset = off0 * int_cst_value (op1);
2856       if (integer_zerop (op0))
2857 	expr = op0;
2858       else
2859 	expr = fold_build2 (MULT_EXPR, type, op0, op1);
2860 
2861       return fold_convert (orig_type, expr);
2862 
2863     case ARRAY_REF:
2864     case ARRAY_RANGE_REF:
2865       if (!inside_addr)
2866 	return orig_expr;
2867 
2868       step = array_ref_element_size (expr);
2869       if (!cst_and_fits_in_hwi (step))
2870 	break;
2871 
2872       st = int_cst_value (step);
2873       op1 = TREE_OPERAND (expr, 1);
2874       op1 = strip_offset_1 (op1, false, false, &off1);
2875       *offset = off1 * st;
2876 
2877       if (top_compref
2878 	  && integer_zerop (op1))
2879 	{
2880 	  /* Strip the component reference completely.  */
2881 	  op0 = TREE_OPERAND (expr, 0);
2882 	  op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2883 	  *offset += off0;
2884 	  return op0;
2885 	}
2886       break;
2887 
2888     case COMPONENT_REF:
2889       {
2890 	tree field;
2891 
2892 	if (!inside_addr)
2893 	  return orig_expr;
2894 
2895 	tmp = component_ref_field_offset (expr);
2896 	field = TREE_OPERAND (expr, 1);
2897 	if (top_compref
2898 	    && cst_and_fits_in_hwi (tmp)
2899 	    && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2900 	  {
2901 	    HOST_WIDE_INT boffset, abs_off;
2902 
2903 	    /* Strip the component reference completely.  */
2904 	    op0 = TREE_OPERAND (expr, 0);
2905 	    op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2906 	    boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2907 	    abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2908 	    if (boffset < 0)
2909 	      abs_off = -abs_off;
2910 
2911 	    *offset = off0 + int_cst_value (tmp) + abs_off;
2912 	    return op0;
2913 	  }
2914       }
2915       break;
2916 
2917     case ADDR_EXPR:
2918       op0 = TREE_OPERAND (expr, 0);
2919       op0 = strip_offset_1 (op0, true, true, &off0);
2920       *offset += off0;
2921 
2922       if (op0 == TREE_OPERAND (expr, 0))
2923 	return orig_expr;
2924 
2925       expr = build_fold_addr_expr (op0);
2926       return fold_convert (orig_type, expr);
2927 
2928     case MEM_REF:
2929       /* ???  Offset operand?  */
2930       inside_addr = false;
2931       break;
2932 
2933     default:
2934       if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2935 	return build_int_cst (orig_type, 0);
2936       return orig_expr;
2937     }
2938 
2939   /* Default handling of expressions for that we want to recurse into
2940      the first operand.  */
2941   op0 = TREE_OPERAND (expr, 0);
2942   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2943   *offset += off0;
2944 
2945   if (op0 == TREE_OPERAND (expr, 0)
2946       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2947     return orig_expr;
2948 
2949   expr = copy_node (expr);
2950   TREE_OPERAND (expr, 0) = op0;
2951   if (op1)
2952     TREE_OPERAND (expr, 1) = op1;
2953 
2954   /* Inside address, we might strip the top level component references,
2955      thus changing type of the expression.  Handling of ADDR_EXPR
2956      will fix that.  */
2957   expr = fold_convert (orig_type, expr);
2958 
2959   return expr;
2960 }
2961 
2962 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2963 
2964 tree
strip_offset(tree expr,poly_uint64_pod * offset)2965 strip_offset (tree expr, poly_uint64_pod *offset)
2966 {
2967   poly_int64 off;
2968   tree core = strip_offset_1 (expr, false, false, &off);
2969   *offset = off;
2970   return core;
2971 }
2972 
2973 /* Returns variant of TYPE that can be used as base for different uses.
2974    We return unsigned type with the same precision, which avoids problems
2975    with overflows.  */
2976 
2977 static tree
generic_type_for(tree type)2978 generic_type_for (tree type)
2979 {
2980   if (POINTER_TYPE_P (type))
2981     return unsigned_type_for (type);
2982 
2983   if (TYPE_UNSIGNED (type))
2984     return type;
2985 
2986   return unsigned_type_for (type);
2987 }
2988 
2989 /* Private data for walk_tree.  */
2990 
2991 struct walk_tree_data
2992 {
2993   bitmap *inv_vars;
2994   struct ivopts_data *idata;
2995 };
2996 
2997 /* Callback function for walk_tree, it records invariants and symbol
2998    reference in *EXPR_P.  DATA is the structure storing result info.  */
2999 
3000 static tree
find_inv_vars_cb(tree * expr_p,int * ws ATTRIBUTE_UNUSED,void * data)3001 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
3002 {
3003   tree op = *expr_p;
3004   struct version_info *info;
3005   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
3006 
3007   if (TREE_CODE (op) != SSA_NAME)
3008     return NULL_TREE;
3009 
3010   info = name_info (wdata->idata, op);
3011   /* Because we expand simple operations when finding IVs, loop invariant
3012      variable that isn't referred by the original loop could be used now.
3013      Record such invariant variables here.  */
3014   if (!info->iv)
3015     {
3016       struct ivopts_data *idata = wdata->idata;
3017       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
3018 
3019       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
3020 	{
3021 	  set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
3022 	  record_invariant (idata, op, false);
3023 	}
3024     }
3025   if (!info->inv_id || info->has_nonlin_use)
3026     return NULL_TREE;
3027 
3028   if (!*wdata->inv_vars)
3029     *wdata->inv_vars = BITMAP_ALLOC (NULL);
3030   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3031 
3032   return NULL_TREE;
3033 }
3034 
3035 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
3036    store it.  */
3037 
3038 static inline void
find_inv_vars(struct ivopts_data * data,tree * expr_p,bitmap * inv_vars)3039 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3040 {
3041   struct walk_tree_data wdata;
3042 
3043   if (!inv_vars)
3044     return;
3045 
3046   wdata.idata = data;
3047   wdata.inv_vars = inv_vars;
3048   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3049 }
3050 
3051 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
3052    will be recorded if it doesn't exist yet.  Given below two exprs:
3053      inv_expr + cst1, inv_expr + cst2
3054    It's hard to make decision whether constant part should be stripped
3055    or not.  We choose to not strip based on below facts:
3056      1) We need to count ADD cost for constant part if it's stripped,
3057 	which isn't always trivial where this functions is called.
3058      2) Stripping constant away may be conflict with following loop
3059 	invariant hoisting pass.
3060      3) Not stripping constant away results in more invariant exprs,
3061 	which usually leads to decision preferring lower reg pressure.  */
3062 
3063 static iv_inv_expr_ent *
get_loop_invariant_expr(struct ivopts_data * data,tree inv_expr)3064 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3065 {
3066   STRIP_NOPS (inv_expr);
3067 
3068   if (poly_int_tree_p (inv_expr)
3069       || TREE_CODE (inv_expr) == SSA_NAME)
3070     return NULL;
3071 
3072   /* Don't strip constant part away as we used to.  */
3073 
3074   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
3075   struct iv_inv_expr_ent ent;
3076   ent.expr = inv_expr;
3077   ent.hash = iterative_hash_expr (inv_expr, 0);
3078   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3079 
3080   if (!*slot)
3081     {
3082       *slot = XNEW (struct iv_inv_expr_ent);
3083       (*slot)->expr = inv_expr;
3084       (*slot)->hash = ent.hash;
3085       (*slot)->id = ++data->max_inv_expr_id;
3086     }
3087 
3088   return *slot;
3089 }
3090 
3091 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3092    position to POS.  If USE is not NULL, the candidate is set as related to
3093    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
3094    replacement of the final value of the iv by a direct computation.  */
3095 
3096 static struct iv_cand *
3097 add_candidate_1 (struct ivopts_data *data,
3098 		 tree base, tree step, bool important, enum iv_position pos,
3099 		 struct iv_use *use, gimple *incremented_at,
3100 		 struct iv *orig_iv = NULL)
3101 {
3102   unsigned i;
3103   struct iv_cand *cand = NULL;
3104   tree type, orig_type;
3105 
3106   gcc_assert (base && step);
3107 
3108   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3109      live, but the ivopts code may replace a real pointer with one
3110      pointing before or after the memory block that is then adjusted
3111      into the memory block during the loop.  FIXME: It would likely be
3112      better to actually force the pointer live and still use ivopts;
3113      for example, it would be enough to write the pointer into memory
3114      and keep it there until after the loop.  */
3115   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3116     return NULL;
3117 
3118   /* For non-original variables, make sure their values are computed in a type
3119      that does not invoke undefined behavior on overflows (since in general,
3120      we cannot prove that these induction variables are non-wrapping).  */
3121   if (pos != IP_ORIGINAL)
3122     {
3123       orig_type = TREE_TYPE (base);
3124       type = generic_type_for (orig_type);
3125       if (type != orig_type)
3126 	{
3127 	  base = fold_convert (type, base);
3128 	  step = fold_convert (type, step);
3129 	}
3130     }
3131 
3132   for (i = 0; i < data->vcands.length (); i++)
3133     {
3134       cand = data->vcands[i];
3135 
3136       if (cand->pos != pos)
3137 	continue;
3138 
3139       if (cand->incremented_at != incremented_at
3140 	  || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3141 	      && cand->ainc_use != use))
3142 	continue;
3143 
3144       if (operand_equal_p (base, cand->iv->base, 0)
3145 	  && operand_equal_p (step, cand->iv->step, 0)
3146 	  && (TYPE_PRECISION (TREE_TYPE (base))
3147 	      == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3148 	break;
3149     }
3150 
3151   if (i == data->vcands.length ())
3152     {
3153       cand = XCNEW (struct iv_cand);
3154       cand->id = i;
3155       cand->iv = alloc_iv (data, base, step);
3156       cand->pos = pos;
3157       if (pos != IP_ORIGINAL)
3158 	{
3159 	  cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3160 	  cand->var_after = cand->var_before;
3161 	}
3162       cand->important = important;
3163       cand->incremented_at = incremented_at;
3164       data->vcands.safe_push (cand);
3165 
3166       if (!poly_int_tree_p (step))
3167 	{
3168 	  find_inv_vars (data, &step, &cand->inv_vars);
3169 
3170 	  iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3171 	  /* Share bitmap between inv_vars and inv_exprs for cand.  */
3172 	  if (inv_expr != NULL)
3173 	    {
3174 	      cand->inv_exprs = cand->inv_vars;
3175 	      cand->inv_vars = NULL;
3176 	      if (cand->inv_exprs)
3177 		bitmap_clear (cand->inv_exprs);
3178 	      else
3179 		cand->inv_exprs = BITMAP_ALLOC (NULL);
3180 
3181 	      bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3182 	    }
3183 	}
3184 
3185       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3186 	cand->ainc_use = use;
3187       else
3188 	cand->ainc_use = NULL;
3189 
3190       cand->orig_iv = orig_iv;
3191       if (dump_file && (dump_flags & TDF_DETAILS))
3192 	dump_cand (dump_file, cand);
3193     }
3194 
3195   cand->important |= important;
3196 
3197   /* Relate candidate to the group for which it is added.  */
3198   if (use)
3199     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3200 
3201   return cand;
3202 }
3203 
3204 /* Returns true if incrementing the induction variable at the end of the LOOP
3205    is allowed.
3206 
3207    The purpose is to avoid splitting latch edge with a biv increment, thus
3208    creating a jump, possibly confusing other optimization passes and leaving
3209    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3210    available (so we do not have a better alternative), or if the latch edge
3211    is already nonempty.  */
3212 
3213 static bool
allow_ip_end_pos_p(struct loop * loop)3214 allow_ip_end_pos_p (struct loop *loop)
3215 {
3216   if (!ip_normal_pos (loop))
3217     return true;
3218 
3219   if (!empty_block_p (ip_end_pos (loop)))
3220     return true;
3221 
3222   return false;
3223 }
3224 
3225 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3226    Important field is set to IMPORTANT.  */
3227 
3228 static void
add_autoinc_candidates(struct ivopts_data * data,tree base,tree step,bool important,struct iv_use * use)3229 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3230 			bool important, struct iv_use *use)
3231 {
3232   basic_block use_bb = gimple_bb (use->stmt);
3233   machine_mode mem_mode;
3234   unsigned HOST_WIDE_INT cstepi;
3235 
3236   /* If we insert the increment in any position other than the standard
3237      ones, we must ensure that it is incremented once per iteration.
3238      It must not be in an inner nested loop, or one side of an if
3239      statement.  */
3240   if (use_bb->loop_father != data->current_loop
3241       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3242       || stmt_can_throw_internal (cfun, use->stmt)
3243       || !cst_and_fits_in_hwi (step))
3244     return;
3245 
3246   cstepi = int_cst_value (step);
3247 
3248   mem_mode = TYPE_MODE (use->mem_type);
3249   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3250 	|| USE_STORE_PRE_INCREMENT (mem_mode))
3251        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3252       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3253 	   || USE_STORE_PRE_DECREMENT (mem_mode))
3254 	  && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3255     {
3256       enum tree_code code = MINUS_EXPR;
3257       tree new_base;
3258       tree new_step = step;
3259 
3260       if (POINTER_TYPE_P (TREE_TYPE (base)))
3261 	{
3262 	  new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3263 	  code = POINTER_PLUS_EXPR;
3264 	}
3265       else
3266 	new_step = fold_convert (TREE_TYPE (base), new_step);
3267       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3268       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3269 		       use->stmt);
3270     }
3271   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3272 	|| USE_STORE_POST_INCREMENT (mem_mode))
3273        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3274       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3275 	   || USE_STORE_POST_DECREMENT (mem_mode))
3276 	  && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3277     {
3278       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3279 		       use->stmt);
3280     }
3281 }
3282 
3283 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3284    position to POS.  If USE is not NULL, the candidate is set as related to
3285    it.  The candidate computation is scheduled before exit condition and at
3286    the end of loop.  */
3287 
3288 static void
3289 add_candidate (struct ivopts_data *data,
3290 	       tree base, tree step, bool important, struct iv_use *use,
3291 	       struct iv *orig_iv = NULL)
3292 {
3293   if (ip_normal_pos (data->current_loop))
3294     add_candidate_1 (data, base, step, important,
3295 		     IP_NORMAL, use, NULL, orig_iv);
3296   if (ip_end_pos (data->current_loop)
3297       && allow_ip_end_pos_p (data->current_loop))
3298     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3299 }
3300 
3301 /* Adds standard iv candidates.  */
3302 
3303 static void
add_standard_iv_candidates(struct ivopts_data * data)3304 add_standard_iv_candidates (struct ivopts_data *data)
3305 {
3306   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3307 
3308   /* The same for a double-integer type if it is still fast enough.  */
3309   if (TYPE_PRECISION
3310 	(long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3311       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3312     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3313 		   build_int_cst (long_integer_type_node, 1), true, NULL);
3314 
3315   /* The same for a double-integer type if it is still fast enough.  */
3316   if (TYPE_PRECISION
3317 	(long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3318       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3319     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3320 		   build_int_cst (long_long_integer_type_node, 1), true, NULL);
3321 }
3322 
3323 
3324 /* Adds candidates bases on the old induction variable IV.  */
3325 
3326 static void
add_iv_candidate_for_biv(struct ivopts_data * data,struct iv * iv)3327 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3328 {
3329   gimple *phi;
3330   tree def;
3331   struct iv_cand *cand;
3332 
3333   /* Check if this biv is used in address type use.  */
3334   if (iv->no_overflow  && iv->have_address_use
3335       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3336       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3337     {
3338       tree base = fold_convert (sizetype, iv->base);
3339       tree step = fold_convert (sizetype, iv->step);
3340 
3341       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3342       add_candidate (data, base, step, true, NULL, iv);
3343       /* Add iv cand of the original type only if it has nonlinear use.  */
3344       if (iv->nonlin_use)
3345 	add_candidate (data, iv->base, iv->step, true, NULL);
3346     }
3347   else
3348     add_candidate (data, iv->base, iv->step, true, NULL);
3349 
3350   /* The same, but with initial value zero.  */
3351   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3352     add_candidate (data, size_int (0), iv->step, true, NULL);
3353   else
3354     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3355 		   iv->step, true, NULL);
3356 
3357   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3358   if (gimple_code (phi) == GIMPLE_PHI)
3359     {
3360       /* Additionally record the possibility of leaving the original iv
3361 	 untouched.  */
3362       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3363       /* Don't add candidate if it's from another PHI node because
3364 	 it's an affine iv appearing in the form of PEELED_CHREC.  */
3365       phi = SSA_NAME_DEF_STMT (def);
3366       if (gimple_code (phi) != GIMPLE_PHI)
3367 	{
3368 	  cand = add_candidate_1 (data,
3369 				  iv->base, iv->step, true, IP_ORIGINAL, NULL,
3370 				  SSA_NAME_DEF_STMT (def));
3371 	  if (cand)
3372 	    {
3373 	      cand->var_before = iv->ssa_name;
3374 	      cand->var_after = def;
3375 	    }
3376 	}
3377       else
3378 	gcc_assert (gimple_bb (phi) == data->current_loop->header);
3379     }
3380 }
3381 
3382 /* Adds candidates based on the old induction variables.  */
3383 
3384 static void
add_iv_candidate_for_bivs(struct ivopts_data * data)3385 add_iv_candidate_for_bivs (struct ivopts_data *data)
3386 {
3387   unsigned i;
3388   struct iv *iv;
3389   bitmap_iterator bi;
3390 
3391   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3392     {
3393       iv = ver_info (data, i)->iv;
3394       if (iv && iv->biv_p && !integer_zerop (iv->step))
3395 	add_iv_candidate_for_biv (data, iv);
3396     }
3397 }
3398 
3399 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3400 
3401 static void
record_common_cand(struct ivopts_data * data,tree base,tree step,struct iv_use * use)3402 record_common_cand (struct ivopts_data *data, tree base,
3403 		    tree step, struct iv_use *use)
3404 {
3405   struct iv_common_cand ent;
3406   struct iv_common_cand **slot;
3407 
3408   ent.base = base;
3409   ent.step = step;
3410   ent.hash = iterative_hash_expr (base, 0);
3411   ent.hash = iterative_hash_expr (step, ent.hash);
3412 
3413   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3414   if (*slot == NULL)
3415     {
3416       *slot = new iv_common_cand ();
3417       (*slot)->base = base;
3418       (*slot)->step = step;
3419       (*slot)->uses.create (8);
3420       (*slot)->hash = ent.hash;
3421       data->iv_common_cands.safe_push ((*slot));
3422     }
3423 
3424   gcc_assert (use != NULL);
3425   (*slot)->uses.safe_push (use);
3426   return;
3427 }
3428 
3429 /* Comparison function used to sort common candidates.  */
3430 
3431 static int
common_cand_cmp(const void * p1,const void * p2)3432 common_cand_cmp (const void *p1, const void *p2)
3433 {
3434   unsigned n1, n2;
3435   const struct iv_common_cand *const *const ccand1
3436     = (const struct iv_common_cand *const *)p1;
3437   const struct iv_common_cand *const *const ccand2
3438     = (const struct iv_common_cand *const *)p2;
3439 
3440   n1 = (*ccand1)->uses.length ();
3441   n2 = (*ccand2)->uses.length ();
3442   return n2 - n1;
3443 }
3444 
3445 /* Adds IV candidates based on common candidated recorded.  */
3446 
3447 static void
add_iv_candidate_derived_from_uses(struct ivopts_data * data)3448 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3449 {
3450   unsigned i, j;
3451   struct iv_cand *cand_1, *cand_2;
3452 
3453   data->iv_common_cands.qsort (common_cand_cmp);
3454   for (i = 0; i < data->iv_common_cands.length (); i++)
3455     {
3456       struct iv_common_cand *ptr = data->iv_common_cands[i];
3457 
3458       /* Only add IV candidate if it's derived from multiple uses.  */
3459       if (ptr->uses.length () <= 1)
3460 	break;
3461 
3462       cand_1 = NULL;
3463       cand_2 = NULL;
3464       if (ip_normal_pos (data->current_loop))
3465 	cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3466 				  false, IP_NORMAL, NULL, NULL);
3467 
3468       if (ip_end_pos (data->current_loop)
3469 	  && allow_ip_end_pos_p (data->current_loop))
3470 	cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3471 				  false, IP_END, NULL, NULL);
3472 
3473       /* Bind deriving uses and the new candidates.  */
3474       for (j = 0; j < ptr->uses.length (); j++)
3475 	{
3476 	  struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3477 	  if (cand_1)
3478 	    bitmap_set_bit (group->related_cands, cand_1->id);
3479 	  if (cand_2)
3480 	    bitmap_set_bit (group->related_cands, cand_2->id);
3481 	}
3482     }
3483 
3484   /* Release data since it is useless from this point.  */
3485   data->iv_common_cand_tab->empty ();
3486   data->iv_common_cands.truncate (0);
3487 }
3488 
3489 /* Adds candidates based on the value of USE's iv.  */
3490 
3491 static void
add_iv_candidate_for_use(struct ivopts_data * data,struct iv_use * use)3492 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3493 {
3494   poly_uint64 offset;
3495   tree base;
3496   struct iv *iv = use->iv;
3497   tree basetype = TREE_TYPE (iv->base);
3498 
3499   /* Don't add candidate for iv_use with non integer, pointer or non-mode
3500      precision types, instead, add candidate for the corresponding scev in
3501      unsigned type with the same precision.  See PR93674 for more info.  */
3502   if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3503       || !type_has_mode_precision_p (basetype))
3504     {
3505       basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3506 						 TYPE_UNSIGNED (basetype));
3507       add_candidate (data, fold_convert (basetype, iv->base),
3508 		     fold_convert (basetype, iv->step), false, NULL);
3509       return;
3510     }
3511 
3512   add_candidate (data, iv->base, iv->step, false, use);
3513 
3514   /* Record common candidate for use in case it can be shared by others.  */
3515   record_common_cand (data, iv->base, iv->step, use);
3516 
3517   /* Record common candidate with initial value zero.  */
3518   basetype = TREE_TYPE (iv->base);
3519   if (POINTER_TYPE_P (basetype))
3520     basetype = sizetype;
3521   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3522 
3523   /* Record common candidate with constant offset stripped in base.
3524      Like the use itself, we also add candidate directly for it.  */
3525   base = strip_offset (iv->base, &offset);
3526   if (maybe_ne (offset, 0U) || base != iv->base)
3527     {
3528       record_common_cand (data, base, iv->step, use);
3529       add_candidate (data, base, iv->step, false, use);
3530     }
3531 
3532   /* Record common candidate with base_object removed in base.  */
3533   base = iv->base;
3534   STRIP_NOPS (base);
3535   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3536     {
3537       tree step = iv->step;
3538 
3539       STRIP_NOPS (step);
3540       base = TREE_OPERAND (base, 1);
3541       step = fold_convert (sizetype, step);
3542       record_common_cand (data, base, step, use);
3543       /* Also record common candidate with offset stripped.  */
3544       base = strip_offset (base, &offset);
3545       if (maybe_ne (offset, 0U))
3546 	record_common_cand (data, base, step, use);
3547     }
3548 
3549   /* At last, add auto-incremental candidates.  Make such variables
3550      important since other iv uses with same base object may be based
3551      on it.  */
3552   if (use != NULL && address_p (use->type))
3553     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3554 }
3555 
3556 /* Adds candidates based on the uses.  */
3557 
3558 static void
add_iv_candidate_for_groups(struct ivopts_data * data)3559 add_iv_candidate_for_groups (struct ivopts_data *data)
3560 {
3561   unsigned i;
3562 
3563   /* Only add candidate for the first use in group.  */
3564   for (i = 0; i < data->vgroups.length (); i++)
3565     {
3566       struct iv_group *group = data->vgroups[i];
3567 
3568       gcc_assert (group->vuses[0] != NULL);
3569       add_iv_candidate_for_use (data, group->vuses[0]);
3570     }
3571   add_iv_candidate_derived_from_uses (data);
3572 }
3573 
3574 /* Record important candidates and add them to related_cands bitmaps.  */
3575 
3576 static void
record_important_candidates(struct ivopts_data * data)3577 record_important_candidates (struct ivopts_data *data)
3578 {
3579   unsigned i;
3580   struct iv_group *group;
3581 
3582   for (i = 0; i < data->vcands.length (); i++)
3583     {
3584       struct iv_cand *cand = data->vcands[i];
3585 
3586       if (cand->important)
3587 	bitmap_set_bit (data->important_candidates, i);
3588     }
3589 
3590   data->consider_all_candidates = (data->vcands.length ()
3591 				   <= CONSIDER_ALL_CANDIDATES_BOUND);
3592 
3593   /* Add important candidates to groups' related_cands bitmaps.  */
3594   for (i = 0; i < data->vgroups.length (); i++)
3595     {
3596       group = data->vgroups[i];
3597       bitmap_ior_into (group->related_cands, data->important_candidates);
3598     }
3599 }
3600 
3601 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3602    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3603    we allocate a simple list to every use.  */
3604 
3605 static void
alloc_use_cost_map(struct ivopts_data * data)3606 alloc_use_cost_map (struct ivopts_data *data)
3607 {
3608   unsigned i, size, s;
3609 
3610   for (i = 0; i < data->vgroups.length (); i++)
3611     {
3612       struct iv_group *group = data->vgroups[i];
3613 
3614       if (data->consider_all_candidates)
3615 	size = data->vcands.length ();
3616       else
3617 	{
3618 	  s = bitmap_count_bits (group->related_cands);
3619 
3620 	  /* Round up to the power of two, so that moduling by it is fast.  */
3621 	  size = s ? (1 << ceil_log2 (s)) : 1;
3622 	}
3623 
3624       group->n_map_members = size;
3625       group->cost_map = XCNEWVEC (struct cost_pair, size);
3626     }
3627 }
3628 
3629 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3630    on invariants INV_VARS and that the value used in expressing it is
3631    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3632 
3633 static void
set_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand,comp_cost cost,bitmap inv_vars,tree value,enum tree_code comp,bitmap inv_exprs)3634 set_group_iv_cost (struct ivopts_data *data,
3635 		   struct iv_group *group, struct iv_cand *cand,
3636 		   comp_cost cost, bitmap inv_vars, tree value,
3637 		   enum tree_code comp, bitmap inv_exprs)
3638 {
3639   unsigned i, s;
3640 
3641   if (cost.infinite_cost_p ())
3642     {
3643       BITMAP_FREE (inv_vars);
3644       BITMAP_FREE (inv_exprs);
3645       return;
3646     }
3647 
3648   if (data->consider_all_candidates)
3649     {
3650       group->cost_map[cand->id].cand = cand;
3651       group->cost_map[cand->id].cost = cost;
3652       group->cost_map[cand->id].inv_vars = inv_vars;
3653       group->cost_map[cand->id].inv_exprs = inv_exprs;
3654       group->cost_map[cand->id].value = value;
3655       group->cost_map[cand->id].comp = comp;
3656       return;
3657     }
3658 
3659   /* n_map_members is a power of two, so this computes modulo.  */
3660   s = cand->id & (group->n_map_members - 1);
3661   for (i = s; i < group->n_map_members; i++)
3662     if (!group->cost_map[i].cand)
3663       goto found;
3664   for (i = 0; i < s; i++)
3665     if (!group->cost_map[i].cand)
3666       goto found;
3667 
3668   gcc_unreachable ();
3669 
3670 found:
3671   group->cost_map[i].cand = cand;
3672   group->cost_map[i].cost = cost;
3673   group->cost_map[i].inv_vars = inv_vars;
3674   group->cost_map[i].inv_exprs = inv_exprs;
3675   group->cost_map[i].value = value;
3676   group->cost_map[i].comp = comp;
3677 }
3678 
3679 /* Gets cost of (GROUP, CAND) pair.  */
3680 
3681 static struct cost_pair *
get_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)3682 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3683 		   struct iv_cand *cand)
3684 {
3685   unsigned i, s;
3686   struct cost_pair *ret;
3687 
3688   if (!cand)
3689     return NULL;
3690 
3691   if (data->consider_all_candidates)
3692     {
3693       ret = group->cost_map + cand->id;
3694       if (!ret->cand)
3695 	return NULL;
3696 
3697       return ret;
3698     }
3699 
3700   /* n_map_members is a power of two, so this computes modulo.  */
3701   s = cand->id & (group->n_map_members - 1);
3702   for (i = s; i < group->n_map_members; i++)
3703     if (group->cost_map[i].cand == cand)
3704       return group->cost_map + i;
3705     else if (group->cost_map[i].cand == NULL)
3706       return NULL;
3707   for (i = 0; i < s; i++)
3708     if (group->cost_map[i].cand == cand)
3709       return group->cost_map + i;
3710     else if (group->cost_map[i].cand == NULL)
3711       return NULL;
3712 
3713   return NULL;
3714 }
3715 
3716 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3717 static rtx
produce_memory_decl_rtl(tree obj,int * regno)3718 produce_memory_decl_rtl (tree obj, int *regno)
3719 {
3720   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3721   machine_mode address_mode = targetm.addr_space.address_mode (as);
3722   rtx x;
3723 
3724   gcc_assert (obj);
3725   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3726     {
3727       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3728       x = gen_rtx_SYMBOL_REF (address_mode, name);
3729       SET_SYMBOL_REF_DECL (x, obj);
3730       x = gen_rtx_MEM (DECL_MODE (obj), x);
3731       set_mem_addr_space (x, as);
3732       targetm.encode_section_info (obj, x, true);
3733     }
3734   else
3735     {
3736       x = gen_raw_REG (address_mode, (*regno)++);
3737       x = gen_rtx_MEM (DECL_MODE (obj), x);
3738       set_mem_addr_space (x, as);
3739     }
3740 
3741   return x;
3742 }
3743 
3744 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3745    walk_tree.  DATA contains the actual fake register number.  */
3746 
3747 static tree
prepare_decl_rtl(tree * expr_p,int * ws,void * data)3748 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3749 {
3750   tree obj = NULL_TREE;
3751   rtx x = NULL_RTX;
3752   int *regno = (int *) data;
3753 
3754   switch (TREE_CODE (*expr_p))
3755     {
3756     case ADDR_EXPR:
3757       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3758 	   handled_component_p (*expr_p);
3759 	   expr_p = &TREE_OPERAND (*expr_p, 0))
3760 	continue;
3761       obj = *expr_p;
3762       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3763 	x = produce_memory_decl_rtl (obj, regno);
3764       break;
3765 
3766     case SSA_NAME:
3767       *ws = 0;
3768       obj = SSA_NAME_VAR (*expr_p);
3769       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3770       if (!obj)
3771 	return NULL_TREE;
3772       if (!DECL_RTL_SET_P (obj))
3773 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3774       break;
3775 
3776     case VAR_DECL:
3777     case PARM_DECL:
3778     case RESULT_DECL:
3779       *ws = 0;
3780       obj = *expr_p;
3781 
3782       if (DECL_RTL_SET_P (obj))
3783 	break;
3784 
3785       if (DECL_MODE (obj) == BLKmode)
3786 	x = produce_memory_decl_rtl (obj, regno);
3787       else
3788 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3789 
3790       break;
3791 
3792     default:
3793       break;
3794     }
3795 
3796   if (x)
3797     {
3798       decl_rtl_to_reset.safe_push (obj);
3799       SET_DECL_RTL (obj, x);
3800     }
3801 
3802   return NULL_TREE;
3803 }
3804 
3805 /* Determines cost of the computation of EXPR.  */
3806 
3807 static unsigned
computation_cost(tree expr,bool speed)3808 computation_cost (tree expr, bool speed)
3809 {
3810   rtx_insn *seq;
3811   rtx rslt;
3812   tree type = TREE_TYPE (expr);
3813   unsigned cost;
3814   /* Avoid using hard regs in ways which may be unsupported.  */
3815   int regno = LAST_VIRTUAL_REGISTER + 1;
3816   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3817   enum node_frequency real_frequency = node->frequency;
3818 
3819   node->frequency = NODE_FREQUENCY_NORMAL;
3820   crtl->maybe_hot_insn_p = speed;
3821   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3822   start_sequence ();
3823   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3824   seq = get_insns ();
3825   end_sequence ();
3826   default_rtl_profile ();
3827   node->frequency = real_frequency;
3828 
3829   cost = seq_cost (seq, speed);
3830   if (MEM_P (rslt))
3831     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3832 			  TYPE_ADDR_SPACE (type), speed);
3833   else if (!REG_P (rslt))
3834     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3835 
3836   return cost;
3837 }
3838 
3839 /* Returns variable containing the value of candidate CAND at statement AT.  */
3840 
3841 static tree
var_at_stmt(struct loop * loop,struct iv_cand * cand,gimple * stmt)3842 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3843 {
3844   if (stmt_after_increment (loop, cand, stmt))
3845     return cand->var_after;
3846   else
3847     return cand->var_before;
3848 }
3849 
3850 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3851    same precision that is at least as wide as the precision of TYPE, stores
3852    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3853    type of A and B.  */
3854 
3855 static tree
determine_common_wider_type(tree * a,tree * b)3856 determine_common_wider_type (tree *a, tree *b)
3857 {
3858   tree wider_type = NULL;
3859   tree suba, subb;
3860   tree atype = TREE_TYPE (*a);
3861 
3862   if (CONVERT_EXPR_P (*a))
3863     {
3864       suba = TREE_OPERAND (*a, 0);
3865       wider_type = TREE_TYPE (suba);
3866       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3867 	return atype;
3868     }
3869   else
3870     return atype;
3871 
3872   if (CONVERT_EXPR_P (*b))
3873     {
3874       subb = TREE_OPERAND (*b, 0);
3875       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3876 	return atype;
3877     }
3878   else
3879     return atype;
3880 
3881   *a = suba;
3882   *b = subb;
3883   return wider_type;
3884 }
3885 
3886 /* Determines the expression by that USE is expressed from induction variable
3887    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3888    decomposed form.  The invariant part is stored in AFF_INV; while variant
3889    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3890    non-null.  Returns false if USE cannot be expressed using CAND.  */
3891 
3892 static bool
3893 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3894 		       struct iv_cand *cand, struct aff_tree *aff_inv,
3895 		       struct aff_tree *aff_var, widest_int *prat = NULL)
3896 {
3897   tree ubase = use->iv->base, ustep = use->iv->step;
3898   tree cbase = cand->iv->base, cstep = cand->iv->step;
3899   tree common_type, uutype, var, cstep_common;
3900   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3901   aff_tree aff_cbase;
3902   widest_int rat;
3903 
3904   /* We must have a precision to express the values of use.  */
3905   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3906     return false;
3907 
3908   var = var_at_stmt (loop, cand, at);
3909   uutype = unsigned_type_for (utype);
3910 
3911   /* If the conversion is not noop, perform it.  */
3912   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3913     {
3914       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3915 	  && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3916 	{
3917 	  tree inner_base, inner_step, inner_type;
3918 	  inner_base = TREE_OPERAND (cbase, 0);
3919 	  if (CONVERT_EXPR_P (cstep))
3920 	    inner_step = TREE_OPERAND (cstep, 0);
3921 	  else
3922 	    inner_step = cstep;
3923 
3924 	  inner_type = TREE_TYPE (inner_base);
3925 	  /* If candidate is added from a biv whose type is smaller than
3926 	     ctype, we know both candidate and the biv won't overflow.
3927 	     In this case, it's safe to skip the convertion in candidate.
3928 	     As an example, (unsigned short)((unsigned long)A) equals to
3929 	     (unsigned short)A, if A has a type no larger than short.  */
3930 	  if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3931 	    {
3932 	      cbase = inner_base;
3933 	      cstep = inner_step;
3934 	    }
3935 	}
3936       cbase = fold_convert (uutype, cbase);
3937       cstep = fold_convert (uutype, cstep);
3938       var = fold_convert (uutype, var);
3939     }
3940 
3941   /* Ratio is 1 when computing the value of biv cand by itself.
3942      We can't rely on constant_multiple_of in this case because the
3943      use is created after the original biv is selected.  The call
3944      could fail because of inconsistent fold behavior.  See PR68021
3945      for more information.  */
3946   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3947     {
3948       gcc_assert (is_gimple_assign (use->stmt));
3949       gcc_assert (use->iv->ssa_name == cand->var_after);
3950       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3951       rat = 1;
3952     }
3953   else if (!constant_multiple_of (ustep, cstep, &rat))
3954     return false;
3955 
3956   if (prat)
3957     *prat = rat;
3958 
3959   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3960      type, we achieve better folding by computing their difference in this
3961      wider type, and cast the result to UUTYPE.  We do not need to worry about
3962      overflows, as all the arithmetics will in the end be performed in UUTYPE
3963      anyway.  */
3964   common_type = determine_common_wider_type (&ubase, &cbase);
3965 
3966   /* use = ubase - ratio * cbase + ratio * var.  */
3967   tree_to_aff_combination (ubase, common_type, aff_inv);
3968   tree_to_aff_combination (cbase, common_type, &aff_cbase);
3969   tree_to_aff_combination (var, uutype, aff_var);
3970 
3971   /* We need to shift the value if we are after the increment.  */
3972   if (stmt_after_increment (loop, cand, at))
3973     {
3974       aff_tree cstep_aff;
3975 
3976       if (common_type != uutype)
3977 	cstep_common = fold_convert (common_type, cstep);
3978       else
3979 	cstep_common = cstep;
3980 
3981       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3982       aff_combination_add (&aff_cbase, &cstep_aff);
3983     }
3984 
3985   aff_combination_scale (&aff_cbase, -rat);
3986   aff_combination_add (aff_inv, &aff_cbase);
3987   if (common_type != uutype)
3988     aff_combination_convert (aff_inv, uutype);
3989 
3990   aff_combination_scale (aff_var, rat);
3991   return true;
3992 }
3993 
3994 /* Determines the expression by that USE is expressed from induction variable
3995    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3996    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3997 
3998 static bool
get_computation_aff(struct loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand,struct aff_tree * aff)3999 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
4000 		     struct iv_cand *cand, struct aff_tree *aff)
4001 {
4002   aff_tree aff_var;
4003 
4004   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
4005     return false;
4006 
4007   aff_combination_add (aff, &aff_var);
4008   return true;
4009 }
4010 
4011 /* Return the type of USE.  */
4012 
4013 static tree
get_use_type(struct iv_use * use)4014 get_use_type (struct iv_use *use)
4015 {
4016   tree base_type = TREE_TYPE (use->iv->base);
4017   tree type;
4018 
4019   if (use->type == USE_REF_ADDRESS)
4020     {
4021       /* The base_type may be a void pointer.  Create a pointer type based on
4022 	 the mem_ref instead.  */
4023       type = build_pointer_type (TREE_TYPE (*use->op_p));
4024       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4025 		  == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4026     }
4027   else
4028     type = base_type;
4029 
4030   return type;
4031 }
4032 
4033 /* Determines the expression by that USE is expressed from induction variable
4034    CAND at statement AT in LOOP.  The computation is unshared.  */
4035 
4036 static tree
get_computation_at(struct loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand)4037 get_computation_at (struct loop *loop, gimple *at,
4038 		    struct iv_use *use, struct iv_cand *cand)
4039 {
4040   aff_tree aff;
4041   tree type = get_use_type (use);
4042 
4043   if (!get_computation_aff (loop, at, use, cand, &aff))
4044     return NULL_TREE;
4045   unshare_aff_combination (&aff);
4046   return fold_convert (type, aff_combination_to_tree (&aff));
4047 }
4048 
4049 /* Adjust the cost COST for being in loop setup rather than loop body.
4050    If we're optimizing for space, the loop setup overhead is constant;
4051    if we're optimizing for speed, amortize it over the per-iteration cost.
4052    If ROUND_UP_P is true, the result is round up rather than to zero when
4053    optimizing for speed.  */
4054 static unsigned
4055 adjust_setup_cost (struct ivopts_data *data, unsigned cost,
4056 		   bool round_up_p = false)
4057 {
4058   if (cost == INFTY)
4059     return cost;
4060   else if (optimize_loop_for_speed_p (data->current_loop))
4061     {
4062       HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
4063       return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
4064     }
4065   else
4066     return cost;
4067 }
4068 
4069 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4070    EXPR operand holding the shift.  COST0 and COST1 are the costs for
4071    calculating the operands of EXPR.  Returns true if successful, and returns
4072    the cost in COST.  */
4073 
4074 static bool
get_shiftadd_cost(tree expr,scalar_int_mode mode,comp_cost cost0,comp_cost cost1,tree mult,bool speed,comp_cost * cost)4075 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4076 		   comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4077 {
4078   comp_cost res;
4079   tree op1 = TREE_OPERAND (expr, 1);
4080   tree cst = TREE_OPERAND (mult, 1);
4081   tree multop = TREE_OPERAND (mult, 0);
4082   int m = exact_log2 (int_cst_value (cst));
4083   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4084   int as_cost, sa_cost;
4085   bool mult_in_op1;
4086 
4087   if (!(m >= 0 && m < maxm))
4088     return false;
4089 
4090   STRIP_NOPS (op1);
4091   mult_in_op1 = operand_equal_p (op1, mult, 0);
4092 
4093   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4094 
4095   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4096      use that in preference to a shift insn followed by an add insn.  */
4097   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4098 	     ? shiftadd_cost (speed, mode, m)
4099 	     : (mult_in_op1
4100 		? shiftsub1_cost (speed, mode, m)
4101 		: shiftsub0_cost (speed, mode, m)));
4102 
4103   res = comp_cost (MIN (as_cost, sa_cost), 0);
4104   res += (mult_in_op1 ? cost0 : cost1);
4105 
4106   STRIP_NOPS (multop);
4107   if (!is_gimple_val (multop))
4108     res += force_expr_to_var_cost (multop, speed);
4109 
4110   *cost = res;
4111   return true;
4112 }
4113 
4114 /* Estimates cost of forcing expression EXPR into a variable.  */
4115 
4116 static comp_cost
force_expr_to_var_cost(tree expr,bool speed)4117 force_expr_to_var_cost (tree expr, bool speed)
4118 {
4119   static bool costs_initialized = false;
4120   static unsigned integer_cost [2];
4121   static unsigned symbol_cost [2];
4122   static unsigned address_cost [2];
4123   tree op0, op1;
4124   comp_cost cost0, cost1, cost;
4125   machine_mode mode;
4126   scalar_int_mode int_mode;
4127 
4128   if (!costs_initialized)
4129     {
4130       tree type = build_pointer_type (integer_type_node);
4131       tree var, addr;
4132       rtx x;
4133       int i;
4134 
4135       var = create_tmp_var_raw (integer_type_node, "test_var");
4136       TREE_STATIC (var) = 1;
4137       x = produce_memory_decl_rtl (var, NULL);
4138       SET_DECL_RTL (var, x);
4139 
4140       addr = build1 (ADDR_EXPR, type, var);
4141 
4142 
4143       for (i = 0; i < 2; i++)
4144 	{
4145 	  integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4146 							     2000), i);
4147 
4148 	  symbol_cost[i] = computation_cost (addr, i) + 1;
4149 
4150 	  address_cost[i]
4151 	    = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4152 	  if (dump_file && (dump_flags & TDF_DETAILS))
4153 	    {
4154 	      fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4155 	      fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4156 	      fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4157 	      fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4158 	      fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4159 	      fprintf (dump_file, "\n");
4160 	    }
4161 	}
4162 
4163       costs_initialized = true;
4164     }
4165 
4166   STRIP_NOPS (expr);
4167 
4168   if (SSA_VAR_P (expr))
4169     return no_cost;
4170 
4171   if (is_gimple_min_invariant (expr))
4172     {
4173       if (poly_int_tree_p (expr))
4174 	return comp_cost (integer_cost [speed], 0);
4175 
4176       if (TREE_CODE (expr) == ADDR_EXPR)
4177 	{
4178 	  tree obj = TREE_OPERAND (expr, 0);
4179 
4180 	  if (VAR_P (obj)
4181 	      || TREE_CODE (obj) == PARM_DECL
4182 	      || TREE_CODE (obj) == RESULT_DECL)
4183 	    return comp_cost (symbol_cost [speed], 0);
4184 	}
4185 
4186       return comp_cost (address_cost [speed], 0);
4187     }
4188 
4189   switch (TREE_CODE (expr))
4190     {
4191     case POINTER_PLUS_EXPR:
4192     case PLUS_EXPR:
4193     case MINUS_EXPR:
4194     case MULT_EXPR:
4195     case TRUNC_DIV_EXPR:
4196     case BIT_AND_EXPR:
4197     case BIT_IOR_EXPR:
4198     case LSHIFT_EXPR:
4199     case RSHIFT_EXPR:
4200       op0 = TREE_OPERAND (expr, 0);
4201       op1 = TREE_OPERAND (expr, 1);
4202       STRIP_NOPS (op0);
4203       STRIP_NOPS (op1);
4204       break;
4205 
4206     CASE_CONVERT:
4207     case NEGATE_EXPR:
4208     case BIT_NOT_EXPR:
4209       op0 = TREE_OPERAND (expr, 0);
4210       STRIP_NOPS (op0);
4211       op1 = NULL_TREE;
4212       break;
4213 
4214     default:
4215       /* Just an arbitrary value, FIXME.  */
4216       return comp_cost (target_spill_cost[speed], 0);
4217     }
4218 
4219   if (op0 == NULL_TREE
4220       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4221     cost0 = no_cost;
4222   else
4223     cost0 = force_expr_to_var_cost (op0, speed);
4224 
4225   if (op1 == NULL_TREE
4226       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4227     cost1 = no_cost;
4228   else
4229     cost1 = force_expr_to_var_cost (op1, speed);
4230 
4231   mode = TYPE_MODE (TREE_TYPE (expr));
4232   switch (TREE_CODE (expr))
4233     {
4234     case POINTER_PLUS_EXPR:
4235     case PLUS_EXPR:
4236     case MINUS_EXPR:
4237     case NEGATE_EXPR:
4238       cost = comp_cost (add_cost (speed, mode), 0);
4239       if (TREE_CODE (expr) != NEGATE_EXPR)
4240 	{
4241 	  tree mult = NULL_TREE;
4242 	  comp_cost sa_cost;
4243 	  if (TREE_CODE (op1) == MULT_EXPR)
4244 	    mult = op1;
4245 	  else if (TREE_CODE (op0) == MULT_EXPR)
4246 	    mult = op0;
4247 
4248 	  if (mult != NULL_TREE
4249 	      && is_a <scalar_int_mode> (mode, &int_mode)
4250 	      && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4251 	      && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4252 				    speed, &sa_cost))
4253 	    return sa_cost;
4254 	}
4255       break;
4256 
4257     CASE_CONVERT:
4258       {
4259 	tree inner_mode, outer_mode;
4260 	outer_mode = TREE_TYPE (expr);
4261 	inner_mode = TREE_TYPE (op0);
4262 	cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4263 				       TYPE_MODE (inner_mode), speed), 0);
4264       }
4265       break;
4266 
4267     case MULT_EXPR:
4268       if (cst_and_fits_in_hwi (op0))
4269 	cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4270 					     mode, speed), 0);
4271       else if (cst_and_fits_in_hwi (op1))
4272 	cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4273 					     mode, speed), 0);
4274       else
4275 	return comp_cost (target_spill_cost [speed], 0);
4276       break;
4277 
4278     case TRUNC_DIV_EXPR:
4279       /* Division by power of two is usually cheap, so we allow it.  Forbid
4280 	 anything else.  */
4281       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4282 	cost = comp_cost (add_cost (speed, mode), 0);
4283       else
4284 	cost = comp_cost (target_spill_cost[speed], 0);
4285       break;
4286 
4287     case BIT_AND_EXPR:
4288     case BIT_IOR_EXPR:
4289     case BIT_NOT_EXPR:
4290     case LSHIFT_EXPR:
4291     case RSHIFT_EXPR:
4292       cost = comp_cost (add_cost (speed, mode), 0);
4293       break;
4294 
4295     default:
4296       gcc_unreachable ();
4297     }
4298 
4299   cost += cost0;
4300   cost += cost1;
4301   return cost;
4302 }
4303 
4304 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4305    invariants the computation depends on.  */
4306 
4307 static comp_cost
force_var_cost(struct ivopts_data * data,tree expr,bitmap * inv_vars)4308 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4309 {
4310   if (!expr)
4311     return no_cost;
4312 
4313   find_inv_vars (data, &expr, inv_vars);
4314   return force_expr_to_var_cost (expr, data->speed);
4315 }
4316 
4317 /* Returns cost of auto-modifying address expression in shape base + offset.
4318    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4319    address expression.  The address expression has ADDR_MODE in addr space
4320    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4321    speed or size.  */
4322 
4323 enum ainc_type
4324 {
4325   AINC_PRE_INC,		/* Pre increment.  */
4326   AINC_PRE_DEC,		/* Pre decrement.  */
4327   AINC_POST_INC,	/* Post increment.  */
4328   AINC_POST_DEC,	/* Post decrement.  */
4329   AINC_NONE		/* Also the number of auto increment types.  */
4330 };
4331 
4332 struct ainc_cost_data
4333 {
4334   unsigned costs[AINC_NONE];
4335 };
4336 
4337 static comp_cost
get_address_cost_ainc(poly_int64 ainc_step,poly_int64 ainc_offset,machine_mode addr_mode,machine_mode mem_mode,addr_space_t as,bool speed)4338 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4339 		       machine_mode addr_mode, machine_mode mem_mode,
4340 		       addr_space_t as, bool speed)
4341 {
4342   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4343       && !USE_STORE_PRE_DECREMENT (mem_mode)
4344       && !USE_LOAD_POST_DECREMENT (mem_mode)
4345       && !USE_STORE_POST_DECREMENT (mem_mode)
4346       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4347       && !USE_STORE_PRE_INCREMENT (mem_mode)
4348       && !USE_LOAD_POST_INCREMENT (mem_mode)
4349       && !USE_STORE_POST_INCREMENT (mem_mode))
4350     return infinite_cost;
4351 
4352   static vec<ainc_cost_data *> ainc_cost_data_list;
4353   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4354   if (idx >= ainc_cost_data_list.length ())
4355     {
4356       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4357 
4358       gcc_assert (nsize > idx);
4359       ainc_cost_data_list.safe_grow_cleared (nsize);
4360     }
4361 
4362   ainc_cost_data *data = ainc_cost_data_list[idx];
4363   if (data == NULL)
4364     {
4365       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4366 
4367       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4368       data->costs[AINC_PRE_DEC] = INFTY;
4369       data->costs[AINC_POST_DEC] = INFTY;
4370       data->costs[AINC_PRE_INC] = INFTY;
4371       data->costs[AINC_POST_INC] = INFTY;
4372       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4373 	  || USE_STORE_PRE_DECREMENT (mem_mode))
4374 	{
4375 	  rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4376 
4377 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4378 	    data->costs[AINC_PRE_DEC]
4379 	      = address_cost (addr, mem_mode, as, speed);
4380 	}
4381       if (USE_LOAD_POST_DECREMENT (mem_mode)
4382 	  || USE_STORE_POST_DECREMENT (mem_mode))
4383 	{
4384 	  rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4385 
4386 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4387 	    data->costs[AINC_POST_DEC]
4388 	      = address_cost (addr, mem_mode, as, speed);
4389 	}
4390       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4391 	  || USE_STORE_PRE_INCREMENT (mem_mode))
4392 	{
4393 	  rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4394 
4395 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4396 	    data->costs[AINC_PRE_INC]
4397 	      = address_cost (addr, mem_mode, as, speed);
4398 	}
4399       if (USE_LOAD_POST_INCREMENT (mem_mode)
4400 	  || USE_STORE_POST_INCREMENT (mem_mode))
4401 	{
4402 	  rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4403 
4404 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4405 	    data->costs[AINC_POST_INC]
4406 	      = address_cost (addr, mem_mode, as, speed);
4407 	}
4408       ainc_cost_data_list[idx] = data;
4409     }
4410 
4411   poly_int64 msize = GET_MODE_SIZE (mem_mode);
4412   if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4413     return comp_cost (data->costs[AINC_POST_INC], 0);
4414   if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4415     return comp_cost (data->costs[AINC_POST_DEC], 0);
4416   if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4417     return comp_cost (data->costs[AINC_PRE_INC], 0);
4418   if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4419     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4420 
4421   return infinite_cost;
4422 }
4423 
4424 /* Return cost of computing USE's address expression by using CAND.
4425    AFF_INV and AFF_VAR represent invariant and variant parts of the
4426    address expression, respectively.  If AFF_INV is simple, store
4427    the loop invariant variables which are depended by it in INV_VARS;
4428    if AFF_INV is complicated, handle it as a new invariant expression
4429    and record it in INV_EXPR.  RATIO indicates multiple times between
4430    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4431    value to it indicating if this is an auto-increment address.  */
4432 
4433 static comp_cost
get_address_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,aff_tree * aff_inv,aff_tree * aff_var,HOST_WIDE_INT ratio,bitmap * inv_vars,iv_inv_expr_ent ** inv_expr,bool * can_autoinc,bool speed)4434 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4435 		  struct iv_cand *cand, aff_tree *aff_inv,
4436 		  aff_tree *aff_var, HOST_WIDE_INT ratio,
4437 		  bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4438 		  bool *can_autoinc, bool speed)
4439 {
4440   rtx addr;
4441   bool simple_inv = true;
4442   tree comp_inv = NULL_TREE, type = aff_var->type;
4443   comp_cost var_cost = no_cost, cost = no_cost;
4444   struct mem_address parts = {NULL_TREE, integer_one_node,
4445 			      NULL_TREE, NULL_TREE, NULL_TREE};
4446   machine_mode addr_mode = TYPE_MODE (type);
4447   machine_mode mem_mode = TYPE_MODE (use->mem_type);
4448   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4449   /* Only true if ratio != 1.  */
4450   bool ok_with_ratio_p = false;
4451   bool ok_without_ratio_p = false;
4452 
4453   if (!aff_combination_const_p (aff_inv))
4454     {
4455       parts.index = integer_one_node;
4456       /* Addressing mode "base + index".  */
4457       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4458       if (ratio != 1)
4459 	{
4460 	  parts.step = wide_int_to_tree (type, ratio);
4461 	  /* Addressing mode "base + index << scale".  */
4462 	  ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4463 	  if (!ok_with_ratio_p)
4464 	    parts.step = NULL_TREE;
4465 	}
4466       if (ok_with_ratio_p || ok_without_ratio_p)
4467 	{
4468 	  if (maybe_ne (aff_inv->offset, 0))
4469 	    {
4470 	      parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4471 	      /* Addressing mode "base + index [<< scale] + offset".  */
4472 	      if (!valid_mem_ref_p (mem_mode, as, &parts))
4473 		parts.offset = NULL_TREE;
4474 	      else
4475 		aff_inv->offset = 0;
4476 	    }
4477 
4478 	  move_fixed_address_to_symbol (&parts, aff_inv);
4479 	  /* Base is fixed address and is moved to symbol part.  */
4480 	  if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4481 	    parts.base = NULL_TREE;
4482 
4483 	  /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4484 	  if (parts.symbol != NULL_TREE
4485 	      && !valid_mem_ref_p (mem_mode, as, &parts))
4486 	    {
4487 	      aff_combination_add_elt (aff_inv, parts.symbol, 1);
4488 	      parts.symbol = NULL_TREE;
4489 	      /* Reset SIMPLE_INV since symbol address needs to be computed
4490 		 outside of address expression in this case.  */
4491 	      simple_inv = false;
4492 	      /* Symbol part is moved back to base part, it can't be NULL.  */
4493 	      parts.base = integer_one_node;
4494 	    }
4495 	}
4496       else
4497 	parts.index = NULL_TREE;
4498     }
4499   else
4500     {
4501       poly_int64 ainc_step;
4502       if (can_autoinc
4503 	  && ratio == 1
4504 	  && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4505 	{
4506 	  poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4507 
4508 	  if (stmt_after_increment (data->current_loop, cand, use->stmt))
4509 	    ainc_offset += ainc_step;
4510 	  cost = get_address_cost_ainc (ainc_step, ainc_offset,
4511 					addr_mode, mem_mode, as, speed);
4512 	  if (!cost.infinite_cost_p ())
4513 	    {
4514 	      *can_autoinc = true;
4515 	      return cost;
4516 	    }
4517 	  cost = no_cost;
4518 	}
4519       if (!aff_combination_zero_p (aff_inv))
4520 	{
4521 	  parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4522 	  /* Addressing mode "base + offset".  */
4523 	  if (!valid_mem_ref_p (mem_mode, as, &parts))
4524 	    parts.offset = NULL_TREE;
4525 	  else
4526 	    aff_inv->offset = 0;
4527 	}
4528     }
4529 
4530   if (simple_inv)
4531     simple_inv = (aff_inv == NULL
4532 		  || aff_combination_const_p (aff_inv)
4533 		  || aff_combination_singleton_var_p (aff_inv));
4534   if (!aff_combination_zero_p (aff_inv))
4535     comp_inv = aff_combination_to_tree (aff_inv);
4536   if (comp_inv != NULL_TREE)
4537     cost = force_var_cost (data, comp_inv, inv_vars);
4538   if (ratio != 1 && parts.step == NULL_TREE)
4539     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4540   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4541     var_cost += add_cost (speed, addr_mode);
4542 
4543   if (comp_inv && inv_expr && !simple_inv)
4544     {
4545       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4546       /* Clear depends on.  */
4547       if (*inv_expr != NULL && inv_vars && *inv_vars)
4548 	bitmap_clear (*inv_vars);
4549 
4550       /* Cost of small invariant expression adjusted against loop niters
4551 	 is usually zero, which makes it difficult to be differentiated
4552 	 from candidate based on loop invariant variables.  Secondly, the
4553 	 generated invariant expression may not be hoisted out of loop by
4554 	 following pass.  We penalize the cost by rounding up in order to
4555 	 neutralize such effects.  */
4556       cost.cost = adjust_setup_cost (data, cost.cost, true);
4557       cost.scratch = cost.cost;
4558     }
4559 
4560   cost += var_cost;
4561   addr = addr_for_mem_ref (&parts, as, false);
4562   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4563   cost += address_cost (addr, mem_mode, as, speed);
4564 
4565   if (parts.symbol != NULL_TREE)
4566     cost.complexity += 1;
4567   /* Don't increase the complexity of adding a scaled index if it's
4568      the only kind of index that the target allows.  */
4569   if (parts.step != NULL_TREE && ok_without_ratio_p)
4570     cost.complexity += 1;
4571   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4572     cost.complexity += 1;
4573   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4574     cost.complexity += 1;
4575 
4576   return cost;
4577 }
4578 
4579 /* Scale (multiply) the computed COST (except scratch part that should be
4580    hoisted out a loop) by header->frequency / AT->frequency, which makes
4581    expected cost more accurate.  */
4582 
4583 static comp_cost
get_scaled_computation_cost_at(ivopts_data * data,gimple * at,comp_cost cost)4584 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4585 {
4586    int loop_freq = data->current_loop->header->count.to_frequency (cfun);
4587    int bb_freq = gimple_bb (at)->count.to_frequency (cfun);
4588    if (loop_freq != 0)
4589      {
4590        gcc_assert (cost.scratch <= cost.cost);
4591        int scaled_cost
4592 	 = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4593 
4594        if (dump_file && (dump_flags & TDF_DETAILS))
4595 	 fprintf (dump_file, "Scaling cost based on bb prob "
4596 		  "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4597 		  1.0f * bb_freq / loop_freq, cost.cost,
4598 		  cost.scratch, scaled_cost, bb_freq, loop_freq);
4599 
4600        cost.cost = scaled_cost;
4601      }
4602 
4603   return cost;
4604 }
4605 
4606 /* Determines the cost of the computation by that USE is expressed
4607    from induction variable CAND.  If ADDRESS_P is true, we just need
4608    to create an address from it, otherwise we want to get it into
4609    register.  A set of invariants we depend on is stored in INV_VARS.
4610    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4611    addressing is likely.  If INV_EXPR is nonnull, record invariant
4612    expr entry in it.  */
4613 
4614 static comp_cost
get_computation_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,bool address_p,bitmap * inv_vars,bool * can_autoinc,iv_inv_expr_ent ** inv_expr)4615 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4616 		      struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4617 		      bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4618 {
4619   gimple *at = use->stmt;
4620   tree ubase = use->iv->base, cbase = cand->iv->base;
4621   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4622   tree comp_inv = NULL_TREE;
4623   HOST_WIDE_INT ratio, aratio;
4624   comp_cost cost;
4625   widest_int rat;
4626   aff_tree aff_inv, aff_var;
4627   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4628 
4629   if (inv_vars)
4630     *inv_vars = NULL;
4631   if (can_autoinc)
4632     *can_autoinc = false;
4633   if (inv_expr)
4634     *inv_expr = NULL;
4635 
4636   /* Check if we have enough precision to express the values of use.  */
4637   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4638     return infinite_cost;
4639 
4640   if (address_p
4641       || (use->iv->base_object
4642 	  && cand->iv->base_object
4643 	  && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4644 	  && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4645     {
4646       /* Do not try to express address of an object with computation based
4647 	 on address of a different object.  This may cause problems in rtl
4648 	 level alias analysis (that does not expect this to be happening,
4649 	 as this is illegal in C), and would be unlikely to be useful
4650 	 anyway.  */
4651       if (use->iv->base_object
4652 	  && cand->iv->base_object
4653 	  && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4654 	return infinite_cost;
4655     }
4656 
4657   if (!get_computation_aff_1 (data->current_loop, at, use,
4658 			      cand, &aff_inv, &aff_var, &rat)
4659       || !wi::fits_shwi_p (rat))
4660     return infinite_cost;
4661 
4662   ratio = rat.to_shwi ();
4663   if (address_p)
4664     {
4665       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4666 			       inv_vars, inv_expr, can_autoinc, speed);
4667       return get_scaled_computation_cost_at (data, at, cost);
4668     }
4669 
4670   bool simple_inv = (aff_combination_const_p (&aff_inv)
4671 		     || aff_combination_singleton_var_p (&aff_inv));
4672   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4673   aff_combination_convert (&aff_inv, signed_type);
4674   if (!aff_combination_zero_p (&aff_inv))
4675     comp_inv = aff_combination_to_tree (&aff_inv);
4676 
4677   cost = force_var_cost (data, comp_inv, inv_vars);
4678   if (comp_inv && inv_expr && !simple_inv)
4679     {
4680       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4681       /* Clear depends on.  */
4682       if (*inv_expr != NULL && inv_vars && *inv_vars)
4683 	bitmap_clear (*inv_vars);
4684 
4685       cost.cost = adjust_setup_cost (data, cost.cost);
4686       /* Record setup cost in scratch field.  */
4687       cost.scratch = cost.cost;
4688     }
4689   /* Cost of constant integer can be covered when adding invariant part to
4690      variant part.  */
4691   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4692     cost = no_cost;
4693 
4694   /* Need type narrowing to represent use with cand.  */
4695   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4696     {
4697       machine_mode outer_mode = TYPE_MODE (utype);
4698       machine_mode inner_mode = TYPE_MODE (ctype);
4699       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4700     }
4701 
4702   /* Turn a + i * (-c) into a - i * c.  */
4703   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4704     aratio = -ratio;
4705   else
4706     aratio = ratio;
4707 
4708   if (ratio != 1)
4709     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4710 
4711   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4712      instruction.  */
4713   /* Need to add up the invariant and variant parts.  */
4714   if (comp_inv && !integer_zerop (comp_inv))
4715     cost += add_cost (speed, TYPE_MODE (utype));
4716 
4717   return get_scaled_computation_cost_at (data, at, cost);
4718 }
4719 
4720 /* Determines cost of computing the use in GROUP with CAND in a generic
4721    expression.  */
4722 
4723 static bool
determine_group_iv_cost_generic(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)4724 determine_group_iv_cost_generic (struct ivopts_data *data,
4725 				 struct iv_group *group, struct iv_cand *cand)
4726 {
4727   comp_cost cost;
4728   iv_inv_expr_ent *inv_expr = NULL;
4729   bitmap inv_vars = NULL, inv_exprs = NULL;
4730   struct iv_use *use = group->vuses[0];
4731 
4732   /* The simple case first -- if we need to express value of the preserved
4733      original biv, the cost is 0.  This also prevents us from counting the
4734      cost of increment twice -- once at this use and once in the cost of
4735      the candidate.  */
4736   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4737     cost = no_cost;
4738   else
4739     cost = get_computation_cost (data, use, cand, false,
4740 				 &inv_vars, NULL, &inv_expr);
4741 
4742   if (inv_expr)
4743     {
4744       inv_exprs = BITMAP_ALLOC (NULL);
4745       bitmap_set_bit (inv_exprs, inv_expr->id);
4746     }
4747   set_group_iv_cost (data, group, cand, cost, inv_vars,
4748 		     NULL_TREE, ERROR_MARK, inv_exprs);
4749   return !cost.infinite_cost_p ();
4750 }
4751 
4752 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
4753 
4754 static bool
determine_group_iv_cost_address(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)4755 determine_group_iv_cost_address (struct ivopts_data *data,
4756 				 struct iv_group *group, struct iv_cand *cand)
4757 {
4758   unsigned i;
4759   bitmap inv_vars = NULL, inv_exprs = NULL;
4760   bool can_autoinc;
4761   iv_inv_expr_ent *inv_expr = NULL;
4762   struct iv_use *use = group->vuses[0];
4763   comp_cost sum_cost = no_cost, cost;
4764 
4765   cost = get_computation_cost (data, use, cand, true,
4766 			       &inv_vars, &can_autoinc, &inv_expr);
4767 
4768   if (inv_expr)
4769     {
4770       inv_exprs = BITMAP_ALLOC (NULL);
4771       bitmap_set_bit (inv_exprs, inv_expr->id);
4772     }
4773   sum_cost = cost;
4774   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4775     {
4776       if (can_autoinc)
4777 	sum_cost -= cand->cost_step;
4778       /* If we generated the candidate solely for exploiting autoincrement
4779 	 opportunities, and it turns out it can't be used, set the cost to
4780 	 infinity to make sure we ignore it.  */
4781       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4782 	sum_cost = infinite_cost;
4783     }
4784 
4785   /* Uses in a group can share setup code, so only add setup cost once.  */
4786   cost -= cost.scratch;
4787   /* Compute and add costs for rest uses of this group.  */
4788   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4789     {
4790       struct iv_use *next = group->vuses[i];
4791 
4792       /* TODO: We could skip computing cost for sub iv_use when it has the
4793 	 same cost as the first iv_use, but the cost really depends on the
4794 	 offset and where the iv_use is.  */
4795 	cost = get_computation_cost (data, next, cand, true,
4796 				     NULL, &can_autoinc, &inv_expr);
4797 	if (inv_expr)
4798 	  {
4799 	    if (!inv_exprs)
4800 	      inv_exprs = BITMAP_ALLOC (NULL);
4801 
4802 	    bitmap_set_bit (inv_exprs, inv_expr->id);
4803 	  }
4804       sum_cost += cost;
4805     }
4806   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4807 		     NULL_TREE, ERROR_MARK, inv_exprs);
4808 
4809   return !sum_cost.infinite_cost_p ();
4810 }
4811 
4812 /* Computes value of candidate CAND at position AT in iteration NITER, and
4813    stores it to VAL.  */
4814 
4815 static void
cand_value_at(struct loop * loop,struct iv_cand * cand,gimple * at,tree niter,aff_tree * val)4816 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4817 	       aff_tree *val)
4818 {
4819   aff_tree step, delta, nit;
4820   struct iv *iv = cand->iv;
4821   tree type = TREE_TYPE (iv->base);
4822   tree steptype;
4823   if (POINTER_TYPE_P (type))
4824     steptype = sizetype;
4825   else
4826     steptype = unsigned_type_for (type);
4827 
4828   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4829   aff_combination_convert (&step, steptype);
4830   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4831   aff_combination_convert (&nit, steptype);
4832   aff_combination_mult (&nit, &step, &delta);
4833   if (stmt_after_increment (loop, cand, at))
4834     aff_combination_add (&delta, &step);
4835 
4836   tree_to_aff_combination (iv->base, type, val);
4837   if (!POINTER_TYPE_P (type))
4838     aff_combination_convert (val, steptype);
4839   aff_combination_add (val, &delta);
4840 }
4841 
4842 /* Returns period of induction variable iv.  */
4843 
4844 static tree
iv_period(struct iv * iv)4845 iv_period (struct iv *iv)
4846 {
4847   tree step = iv->step, period, type;
4848   tree pow2div;
4849 
4850   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4851 
4852   type = unsigned_type_for (TREE_TYPE (step));
4853   /* Period of the iv is lcm (step, type_range)/step -1,
4854      i.e., N*type_range/step - 1. Since type range is power
4855      of two, N == (step >> num_of_ending_zeros_binary (step),
4856      so the final result is
4857 
4858        (type_range >> num_of_ending_zeros_binary (step)) - 1
4859 
4860   */
4861   pow2div = num_ending_zeros (step);
4862 
4863   period = build_low_bits_mask (type,
4864 				(TYPE_PRECISION (type)
4865 				 - tree_to_uhwi (pow2div)));
4866 
4867   return period;
4868 }
4869 
4870 /* Returns the comparison operator used when eliminating the iv USE.  */
4871 
4872 static enum tree_code
iv_elimination_compare(struct ivopts_data * data,struct iv_use * use)4873 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4874 {
4875   struct loop *loop = data->current_loop;
4876   basic_block ex_bb;
4877   edge exit;
4878 
4879   ex_bb = gimple_bb (use->stmt);
4880   exit = EDGE_SUCC (ex_bb, 0);
4881   if (flow_bb_inside_loop_p (loop, exit->dest))
4882     exit = EDGE_SUCC (ex_bb, 1);
4883 
4884   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4885 }
4886 
4887 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4888    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4889    calculation is performed in non-wrapping type.
4890 
4891    TODO: More generally, we could test for the situation that
4892 	 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4893 	 This would require knowing the sign of OFFSET.  */
4894 
4895 static bool
difference_cannot_overflow_p(struct ivopts_data * data,tree base,tree offset)4896 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4897 {
4898   enum tree_code code;
4899   tree e1, e2;
4900   aff_tree aff_e1, aff_e2, aff_offset;
4901 
4902   if (!nowrap_type_p (TREE_TYPE (base)))
4903     return false;
4904 
4905   base = expand_simple_operations (base);
4906 
4907   if (TREE_CODE (base) == SSA_NAME)
4908     {
4909       gimple *stmt = SSA_NAME_DEF_STMT (base);
4910 
4911       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4912 	return false;
4913 
4914       code = gimple_assign_rhs_code (stmt);
4915       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4916 	return false;
4917 
4918       e1 = gimple_assign_rhs1 (stmt);
4919       e2 = gimple_assign_rhs2 (stmt);
4920     }
4921   else
4922     {
4923       code = TREE_CODE (base);
4924       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4925 	return false;
4926       e1 = TREE_OPERAND (base, 0);
4927       e2 = TREE_OPERAND (base, 1);
4928     }
4929 
4930   /* Use affine expansion as deeper inspection to prove the equality.  */
4931   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4932 				  &aff_e2, &data->name_expansion_cache);
4933   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4934 				  &aff_offset, &data->name_expansion_cache);
4935   aff_combination_scale (&aff_offset, -1);
4936   switch (code)
4937     {
4938     case PLUS_EXPR:
4939       aff_combination_add (&aff_e2, &aff_offset);
4940       if (aff_combination_zero_p (&aff_e2))
4941 	return true;
4942 
4943       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4944 				      &aff_e1, &data->name_expansion_cache);
4945       aff_combination_add (&aff_e1, &aff_offset);
4946       return aff_combination_zero_p (&aff_e1);
4947 
4948     case POINTER_PLUS_EXPR:
4949       aff_combination_add (&aff_e2, &aff_offset);
4950       return aff_combination_zero_p (&aff_e2);
4951 
4952     default:
4953       return false;
4954     }
4955 }
4956 
4957 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4958    comparison with CAND.  NITER describes the number of iterations of
4959    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4960 
4961    We aim to handle the following situation:
4962 
4963    sometype *base, *p;
4964    int a, b, i;
4965 
4966    i = a;
4967    p = p_0 = base + a;
4968 
4969    do
4970      {
4971        bla (*p);
4972        p++;
4973        i++;
4974      }
4975    while (i < b);
4976 
4977    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4978    We aim to optimize this to
4979 
4980    p = p_0 = base + a;
4981    do
4982      {
4983        bla (*p);
4984        p++;
4985      }
4986    while (p < p_0 - a + b);
4987 
4988    This preserves the correctness, since the pointer arithmetics does not
4989    overflow.  More precisely:
4990 
4991    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4992       overflow in computing it or the values of p.
4993    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4994       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4995 
4996 static bool
iv_elimination_compare_lt(struct ivopts_data * data,struct iv_cand * cand,enum tree_code * comp_p,struct tree_niter_desc * niter)4997 iv_elimination_compare_lt (struct ivopts_data *data,
4998 			   struct iv_cand *cand, enum tree_code *comp_p,
4999 			   struct tree_niter_desc *niter)
5000 {
5001   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5002   struct aff_tree nit, tmpa, tmpb;
5003   enum tree_code comp;
5004   HOST_WIDE_INT step;
5005 
5006   /* We need to know that the candidate induction variable does not overflow.
5007      While more complex analysis may be used to prove this, for now just
5008      check that the variable appears in the original program and that it
5009      is computed in a type that guarantees no overflows.  */
5010   cand_type = TREE_TYPE (cand->iv->base);
5011   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5012     return false;
5013 
5014   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5015      the calculation of the BOUND could overflow, making the comparison
5016      invalid.  */
5017   if (!data->loop_single_exit_p)
5018     return false;
5019 
5020   /* We need to be able to decide whether candidate is increasing or decreasing
5021      in order to choose the right comparison operator.  */
5022   if (!cst_and_fits_in_hwi (cand->iv->step))
5023     return false;
5024   step = int_cst_value (cand->iv->step);
5025 
5026   /* Check that the number of iterations matches the expected pattern:
5027      a + 1 > b ? 0 : b - a - 1.  */
5028   mbz = niter->may_be_zero;
5029   if (TREE_CODE (mbz) == GT_EXPR)
5030     {
5031       /* Handle a + 1 > b.  */
5032       tree op0 = TREE_OPERAND (mbz, 0);
5033       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5034 	{
5035 	  a = TREE_OPERAND (op0, 0);
5036 	  b = TREE_OPERAND (mbz, 1);
5037 	}
5038       else
5039 	return false;
5040     }
5041   else if (TREE_CODE (mbz) == LT_EXPR)
5042     {
5043       tree op1 = TREE_OPERAND (mbz, 1);
5044 
5045       /* Handle b < a + 1.  */
5046       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5047 	{
5048 	  a = TREE_OPERAND (op1, 0);
5049 	  b = TREE_OPERAND (mbz, 0);
5050 	}
5051       else
5052 	return false;
5053     }
5054   else
5055     return false;
5056 
5057   /* Expected number of iterations is B - A - 1.  Check that it matches
5058      the actual number, i.e., that B - A - NITER = 1.  */
5059   tree_to_aff_combination (niter->niter, nit_type, &nit);
5060   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5061   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5062   aff_combination_scale (&nit, -1);
5063   aff_combination_scale (&tmpa, -1);
5064   aff_combination_add (&tmpb, &tmpa);
5065   aff_combination_add (&tmpb, &nit);
5066   if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5067     return false;
5068 
5069   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5070      overflow.  */
5071   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5072 			cand->iv->step,
5073 			fold_convert (TREE_TYPE (cand->iv->step), a));
5074   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5075     return false;
5076 
5077   /* Determine the new comparison operator.  */
5078   comp = step < 0 ? GT_EXPR : LT_EXPR;
5079   if (*comp_p == NE_EXPR)
5080     *comp_p = comp;
5081   else if (*comp_p == EQ_EXPR)
5082     *comp_p = invert_tree_comparison (comp, false);
5083   else
5084     gcc_unreachable ();
5085 
5086   return true;
5087 }
5088 
5089 /* Check whether it is possible to express the condition in USE by comparison
5090    of candidate CAND.  If so, store the value compared with to BOUND, and the
5091    comparison operator to COMP.  */
5092 
5093 static bool
may_eliminate_iv(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,tree * bound,enum tree_code * comp)5094 may_eliminate_iv (struct ivopts_data *data,
5095 		  struct iv_use *use, struct iv_cand *cand, tree *bound,
5096 		  enum tree_code *comp)
5097 {
5098   basic_block ex_bb;
5099   edge exit;
5100   tree period;
5101   struct loop *loop = data->current_loop;
5102   aff_tree bnd;
5103   struct tree_niter_desc *desc = NULL;
5104 
5105   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5106     return false;
5107 
5108   /* For now works only for exits that dominate the loop latch.
5109      TODO: extend to other conditions inside loop body.  */
5110   ex_bb = gimple_bb (use->stmt);
5111   if (use->stmt != last_stmt (ex_bb)
5112       || gimple_code (use->stmt) != GIMPLE_COND
5113       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5114     return false;
5115 
5116   exit = EDGE_SUCC (ex_bb, 0);
5117   if (flow_bb_inside_loop_p (loop, exit->dest))
5118     exit = EDGE_SUCC (ex_bb, 1);
5119   if (flow_bb_inside_loop_p (loop, exit->dest))
5120     return false;
5121 
5122   desc = niter_for_exit (data, exit);
5123   if (!desc)
5124     return false;
5125 
5126   /* Determine whether we can use the variable to test the exit condition.
5127      This is the case iff the period of the induction variable is greater
5128      than the number of iterations for which the exit condition is true.  */
5129   period = iv_period (cand->iv);
5130 
5131   /* If the number of iterations is constant, compare against it directly.  */
5132   if (TREE_CODE (desc->niter) == INTEGER_CST)
5133     {
5134       /* See cand_value_at.  */
5135       if (stmt_after_increment (loop, cand, use->stmt))
5136 	{
5137 	  if (!tree_int_cst_lt (desc->niter, period))
5138 	    return false;
5139 	}
5140       else
5141 	{
5142 	  if (tree_int_cst_lt (period, desc->niter))
5143 	    return false;
5144 	}
5145     }
5146 
5147   /* If not, and if this is the only possible exit of the loop, see whether
5148      we can get a conservative estimate on the number of iterations of the
5149      entire loop and compare against that instead.  */
5150   else
5151     {
5152       widest_int period_value, max_niter;
5153 
5154       max_niter = desc->max;
5155       if (stmt_after_increment (loop, cand, use->stmt))
5156 	max_niter += 1;
5157       period_value = wi::to_widest (period);
5158       if (wi::gtu_p (max_niter, period_value))
5159 	{
5160 	  /* See if we can take advantage of inferred loop bound
5161 	     information.  */
5162 	  if (data->loop_single_exit_p)
5163 	    {
5164 	      if (!max_loop_iterations (loop, &max_niter))
5165 		return false;
5166 	      /* The loop bound is already adjusted by adding 1.  */
5167 	      if (wi::gtu_p (max_niter, period_value))
5168 		return false;
5169 	    }
5170 	  else
5171 	    return false;
5172 	}
5173     }
5174 
5175   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5176 
5177   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5178 			 aff_combination_to_tree (&bnd));
5179   *comp = iv_elimination_compare (data, use);
5180 
5181   /* It is unlikely that computing the number of iterations using division
5182      would be more profitable than keeping the original induction variable.  */
5183   if (expression_expensive_p (*bound))
5184     return false;
5185 
5186   /* Sometimes, it is possible to handle the situation that the number of
5187      iterations may be zero unless additional assumptions by using <
5188      instead of != in the exit condition.
5189 
5190      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5191 	   base the exit condition on it.  However, that is often too
5192 	   expensive.  */
5193   if (!integer_zerop (desc->may_be_zero))
5194     return iv_elimination_compare_lt (data, cand, comp, desc);
5195 
5196   return true;
5197 }
5198 
5199  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5200     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5201 
5202 static int
parm_decl_cost(struct ivopts_data * data,tree bound)5203 parm_decl_cost (struct ivopts_data *data, tree bound)
5204 {
5205   tree sbound = bound;
5206   STRIP_NOPS (sbound);
5207 
5208   if (TREE_CODE (sbound) == SSA_NAME
5209       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5210       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5211       && data->body_includes_call)
5212     return COSTS_N_INSNS (1);
5213 
5214   return 0;
5215 }
5216 
5217 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5218 
5219 static bool
determine_group_iv_cost_cond(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5220 determine_group_iv_cost_cond (struct ivopts_data *data,
5221 			      struct iv_group *group, struct iv_cand *cand)
5222 {
5223   tree bound = NULL_TREE;
5224   struct iv *cmp_iv;
5225   bitmap inv_exprs = NULL;
5226   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5227   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5228   enum comp_iv_rewrite rewrite_type;
5229   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5230   tree *control_var, *bound_cst;
5231   enum tree_code comp = ERROR_MARK;
5232   struct iv_use *use = group->vuses[0];
5233 
5234   /* Extract condition operands.  */
5235   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5236 					&bound_cst, NULL, &cmp_iv);
5237   gcc_assert (rewrite_type != COMP_IV_NA);
5238 
5239   /* Try iv elimination.  */
5240   if (rewrite_type == COMP_IV_ELIM
5241       && may_eliminate_iv (data, use, cand, &bound, &comp))
5242     {
5243       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5244       if (elim_cost.cost == 0)
5245 	elim_cost.cost = parm_decl_cost (data, bound);
5246       else if (TREE_CODE (bound) == INTEGER_CST)
5247 	elim_cost.cost = 0;
5248       /* If we replace a loop condition 'i < n' with 'p < base + n',
5249 	 inv_vars_elim will have 'base' and 'n' set, which implies that both
5250 	 'base' and 'n' will be live during the loop.	 More likely,
5251 	 'base + n' will be loop invariant, resulting in only one live value
5252 	 during the loop.  So in that case we clear inv_vars_elim and set
5253 	 inv_expr_elim instead.  */
5254       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5255 	{
5256 	  inv_expr_elim = get_loop_invariant_expr (data, bound);
5257 	  bitmap_clear (inv_vars_elim);
5258 	}
5259       /* The bound is a loop invariant, so it will be only computed
5260 	 once.  */
5261       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5262     }
5263 
5264   /* When the condition is a comparison of the candidate IV against
5265      zero, prefer this IV.
5266 
5267      TODO: The constant that we're subtracting from the cost should
5268      be target-dependent.  This information should be added to the
5269      target costs for each backend.  */
5270   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5271       && integer_zerop (*bound_cst)
5272       && (operand_equal_p (*control_var, cand->var_after, 0)
5273 	  || operand_equal_p (*control_var, cand->var_before, 0)))
5274     elim_cost -= 1;
5275 
5276   express_cost = get_computation_cost (data, use, cand, false,
5277 				       &inv_vars_express, NULL,
5278 				       &inv_expr_express);
5279   if (cmp_iv != NULL)
5280     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5281 
5282   /* Count the cost of the original bound as well.  */
5283   bound_cost = force_var_cost (data, *bound_cst, NULL);
5284   if (bound_cost.cost == 0)
5285     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5286   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5287     bound_cost.cost = 0;
5288   express_cost += bound_cost;
5289 
5290   /* Choose the better approach, preferring the eliminated IV. */
5291   if (elim_cost <= express_cost)
5292     {
5293       cost = elim_cost;
5294       inv_vars = inv_vars_elim;
5295       inv_vars_elim = NULL;
5296       inv_expr = inv_expr_elim;
5297     }
5298   else
5299     {
5300       cost = express_cost;
5301       inv_vars = inv_vars_express;
5302       inv_vars_express = NULL;
5303       bound = NULL_TREE;
5304       comp = ERROR_MARK;
5305       inv_expr = inv_expr_express;
5306     }
5307 
5308   if (inv_expr)
5309     {
5310       inv_exprs = BITMAP_ALLOC (NULL);
5311       bitmap_set_bit (inv_exprs, inv_expr->id);
5312     }
5313   set_group_iv_cost (data, group, cand, cost,
5314 		     inv_vars, bound, comp, inv_exprs);
5315 
5316   if (inv_vars_elim)
5317     BITMAP_FREE (inv_vars_elim);
5318   if (inv_vars_express)
5319     BITMAP_FREE (inv_vars_express);
5320 
5321   return !cost.infinite_cost_p ();
5322 }
5323 
5324 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5325    if USE cannot be represented with CAND.  */
5326 
5327 static bool
determine_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5328 determine_group_iv_cost (struct ivopts_data *data,
5329 			 struct iv_group *group, struct iv_cand *cand)
5330 {
5331   switch (group->type)
5332     {
5333     case USE_NONLINEAR_EXPR:
5334       return determine_group_iv_cost_generic (data, group, cand);
5335 
5336     case USE_REF_ADDRESS:
5337     case USE_PTR_ADDRESS:
5338       return determine_group_iv_cost_address (data, group, cand);
5339 
5340     case USE_COMPARE:
5341       return determine_group_iv_cost_cond (data, group, cand);
5342 
5343     default:
5344       gcc_unreachable ();
5345     }
5346 }
5347 
5348 /* Return true if get_computation_cost indicates that autoincrement is
5349    a possibility for the pair of USE and CAND, false otherwise.  */
5350 
5351 static bool
autoinc_possible_for_pair(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5352 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5353 			   struct iv_cand *cand)
5354 {
5355   if (!address_p (use->type))
5356     return false;
5357 
5358   bool can_autoinc = false;
5359   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5360   return can_autoinc;
5361 }
5362 
5363 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5364    use that allows autoincrement, and set their AINC_USE if possible.  */
5365 
5366 static void
set_autoinc_for_original_candidates(struct ivopts_data * data)5367 set_autoinc_for_original_candidates (struct ivopts_data *data)
5368 {
5369   unsigned i, j;
5370 
5371   for (i = 0; i < data->vcands.length (); i++)
5372     {
5373       struct iv_cand *cand = data->vcands[i];
5374       struct iv_use *closest_before = NULL;
5375       struct iv_use *closest_after = NULL;
5376       if (cand->pos != IP_ORIGINAL)
5377 	continue;
5378 
5379       for (j = 0; j < data->vgroups.length (); j++)
5380 	{
5381 	  struct iv_group *group = data->vgroups[j];
5382 	  struct iv_use *use = group->vuses[0];
5383 	  unsigned uid = gimple_uid (use->stmt);
5384 
5385 	  if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5386 	    continue;
5387 
5388 	  if (uid < gimple_uid (cand->incremented_at)
5389 	      && (closest_before == NULL
5390 		  || uid > gimple_uid (closest_before->stmt)))
5391 	    closest_before = use;
5392 
5393 	  if (uid > gimple_uid (cand->incremented_at)
5394 	      && (closest_after == NULL
5395 		  || uid < gimple_uid (closest_after->stmt)))
5396 	    closest_after = use;
5397 	}
5398 
5399       if (closest_before != NULL
5400 	  && autoinc_possible_for_pair (data, closest_before, cand))
5401 	cand->ainc_use = closest_before;
5402       else if (closest_after != NULL
5403 	       && autoinc_possible_for_pair (data, closest_after, cand))
5404 	cand->ainc_use = closest_after;
5405     }
5406 }
5407 
5408 /* Relate compare use with all candidates.  */
5409 
5410 static void
relate_compare_use_with_all_cands(struct ivopts_data * data)5411 relate_compare_use_with_all_cands (struct ivopts_data *data)
5412 {
5413   unsigned i, count = data->vcands.length ();
5414   for (i = 0; i < data->vgroups.length (); i++)
5415     {
5416       struct iv_group *group = data->vgroups[i];
5417 
5418       if (group->type == USE_COMPARE)
5419 	bitmap_set_range (group->related_cands, 0, count);
5420     }
5421 }
5422 
5423 /* Finds the candidates for the induction variables.  */
5424 
5425 static void
find_iv_candidates(struct ivopts_data * data)5426 find_iv_candidates (struct ivopts_data *data)
5427 {
5428   /* Add commonly used ivs.  */
5429   add_standard_iv_candidates (data);
5430 
5431   /* Add old induction variables.  */
5432   add_iv_candidate_for_bivs (data);
5433 
5434   /* Add induction variables derived from uses.  */
5435   add_iv_candidate_for_groups (data);
5436 
5437   set_autoinc_for_original_candidates (data);
5438 
5439   /* Record the important candidates.  */
5440   record_important_candidates (data);
5441 
5442   /* Relate compare iv_use with all candidates.  */
5443   if (!data->consider_all_candidates)
5444     relate_compare_use_with_all_cands (data);
5445 
5446   if (dump_file && (dump_flags & TDF_DETAILS))
5447     {
5448       unsigned i;
5449 
5450       fprintf (dump_file, "\n<Important Candidates>:\t");
5451       for (i = 0; i < data->vcands.length (); i++)
5452 	if (data->vcands[i]->important)
5453 	  fprintf (dump_file, " %d,", data->vcands[i]->id);
5454       fprintf (dump_file, "\n");
5455 
5456       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5457       for (i = 0; i < data->vgroups.length (); i++)
5458 	{
5459 	  struct iv_group *group = data->vgroups[i];
5460 
5461 	  if (group->related_cands)
5462 	    {
5463 	      fprintf (dump_file, "  Group %d:\t", group->id);
5464 	      dump_bitmap (dump_file, group->related_cands);
5465 	    }
5466 	}
5467       fprintf (dump_file, "\n");
5468     }
5469 }
5470 
5471 /* Determines costs of computing use of iv with an iv candidate.  */
5472 
5473 static void
determine_group_iv_costs(struct ivopts_data * data)5474 determine_group_iv_costs (struct ivopts_data *data)
5475 {
5476   unsigned i, j;
5477   struct iv_cand *cand;
5478   struct iv_group *group;
5479   bitmap to_clear = BITMAP_ALLOC (NULL);
5480 
5481   alloc_use_cost_map (data);
5482 
5483   for (i = 0; i < data->vgroups.length (); i++)
5484     {
5485       group = data->vgroups[i];
5486 
5487       if (data->consider_all_candidates)
5488 	{
5489 	  for (j = 0; j < data->vcands.length (); j++)
5490 	    {
5491 	      cand = data->vcands[j];
5492 	      determine_group_iv_cost (data, group, cand);
5493 	    }
5494 	}
5495       else
5496 	{
5497 	  bitmap_iterator bi;
5498 
5499 	  EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5500 	    {
5501 	      cand = data->vcands[j];
5502 	      if (!determine_group_iv_cost (data, group, cand))
5503 		bitmap_set_bit (to_clear, j);
5504 	    }
5505 
5506 	  /* Remove the candidates for that the cost is infinite from
5507 	     the list of related candidates.  */
5508 	  bitmap_and_compl_into (group->related_cands, to_clear);
5509 	  bitmap_clear (to_clear);
5510 	}
5511     }
5512 
5513   BITMAP_FREE (to_clear);
5514 
5515   if (dump_file && (dump_flags & TDF_DETAILS))
5516     {
5517       bitmap_iterator bi;
5518 
5519       /* Dump invariant variables.  */
5520       fprintf (dump_file, "\n<Invariant Vars>:\n");
5521       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5522 	{
5523 	  struct version_info *info = ver_info (data, i);
5524 	  if (info->inv_id)
5525 	    {
5526 	      fprintf (dump_file, "Inv %d:\t", info->inv_id);
5527 	      print_generic_expr (dump_file, info->name, TDF_SLIM);
5528 	      fprintf (dump_file, "%s\n",
5529 		       info->has_nonlin_use ? "" : "\t(eliminable)");
5530 	    }
5531 	}
5532 
5533       /* Dump invariant expressions.  */
5534       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5535       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5536 
5537       for (hash_table<iv_inv_expr_hasher>::iterator it
5538 	   = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5539 	   ++it)
5540 	list.safe_push (*it);
5541 
5542       list.qsort (sort_iv_inv_expr_ent);
5543 
5544       for (i = 0; i < list.length (); ++i)
5545 	{
5546 	  fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5547 	  print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5548 	  fprintf (dump_file, "\n");
5549 	}
5550 
5551       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5552 
5553       for (i = 0; i < data->vgroups.length (); i++)
5554 	{
5555 	  group = data->vgroups[i];
5556 
5557 	  fprintf (dump_file, "Group %d:\n", i);
5558 	  fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5559 	  for (j = 0; j < group->n_map_members; j++)
5560 	    {
5561 	      if (!group->cost_map[j].cand
5562 		  || group->cost_map[j].cost.infinite_cost_p ())
5563 		continue;
5564 
5565 	      fprintf (dump_file, "  %d\t%d\t%d\t",
5566 		       group->cost_map[j].cand->id,
5567 		       group->cost_map[j].cost.cost,
5568 		       group->cost_map[j].cost.complexity);
5569 	      if (!group->cost_map[j].inv_exprs
5570 		  || bitmap_empty_p (group->cost_map[j].inv_exprs))
5571 		fprintf (dump_file, "NIL;\t");
5572 	      else
5573 		bitmap_print (dump_file,
5574 			      group->cost_map[j].inv_exprs, "", ";\t");
5575 	      if (!group->cost_map[j].inv_vars
5576 		  || bitmap_empty_p (group->cost_map[j].inv_vars))
5577 		fprintf (dump_file, "NIL;\n");
5578 	      else
5579 		bitmap_print (dump_file,
5580 			      group->cost_map[j].inv_vars, "", "\n");
5581 	    }
5582 
5583 	  fprintf (dump_file, "\n");
5584 	}
5585       fprintf (dump_file, "\n");
5586     }
5587 }
5588 
5589 /* Determines cost of the candidate CAND.  */
5590 
5591 static void
determine_iv_cost(struct ivopts_data * data,struct iv_cand * cand)5592 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5593 {
5594   comp_cost cost_base;
5595   unsigned cost, cost_step;
5596   tree base;
5597 
5598   gcc_assert (cand->iv != NULL);
5599 
5600   /* There are two costs associated with the candidate -- its increment
5601      and its initialization.  The second is almost negligible for any loop
5602      that rolls enough, so we take it just very little into account.  */
5603 
5604   base = cand->iv->base;
5605   cost_base = force_var_cost (data, base, NULL);
5606   /* It will be exceptional that the iv register happens to be initialized with
5607      the proper value at no cost.  In general, there will at least be a regcopy
5608      or a const set.  */
5609   if (cost_base.cost == 0)
5610     cost_base.cost = COSTS_N_INSNS (1);
5611   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5612 
5613   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5614 
5615   /* Prefer the original ivs unless we may gain something by replacing it.
5616      The reason is to make debugging simpler; so this is not relevant for
5617      artificial ivs created by other optimization passes.  */
5618   if (cand->pos != IP_ORIGINAL
5619       || !SSA_NAME_VAR (cand->var_before)
5620       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5621     cost++;
5622 
5623   /* Prefer not to insert statements into latch unless there are some
5624      already (so that we do not create unnecessary jumps).  */
5625   if (cand->pos == IP_END
5626       && empty_block_p (ip_end_pos (data->current_loop)))
5627     cost++;
5628 
5629   cand->cost = cost;
5630   cand->cost_step = cost_step;
5631 }
5632 
5633 /* Determines costs of computation of the candidates.  */
5634 
5635 static void
determine_iv_costs(struct ivopts_data * data)5636 determine_iv_costs (struct ivopts_data *data)
5637 {
5638   unsigned i;
5639 
5640   if (dump_file && (dump_flags & TDF_DETAILS))
5641     {
5642       fprintf (dump_file, "<Candidate Costs>:\n");
5643       fprintf (dump_file, "  cand\tcost\n");
5644     }
5645 
5646   for (i = 0; i < data->vcands.length (); i++)
5647     {
5648       struct iv_cand *cand = data->vcands[i];
5649 
5650       determine_iv_cost (data, cand);
5651 
5652       if (dump_file && (dump_flags & TDF_DETAILS))
5653 	fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5654     }
5655 
5656   if (dump_file && (dump_flags & TDF_DETAILS))
5657     fprintf (dump_file, "\n");
5658 }
5659 
5660 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5661    induction variables.  Note N_INVS includes both invariant variables and
5662    invariant expressions.  */
5663 
5664 static unsigned
ivopts_estimate_reg_pressure(struct ivopts_data * data,unsigned n_invs,unsigned n_cands)5665 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5666 			      unsigned n_cands)
5667 {
5668   unsigned cost;
5669   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5670   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5671   bool speed = data->speed;
5672 
5673   /* If there is a call in the loop body, the call-clobbered registers
5674      are not available for loop invariants.  */
5675   if (data->body_includes_call)
5676     available_regs = available_regs - target_clobbered_regs;
5677 
5678   /* If we have enough registers.  */
5679   if (regs_needed + target_res_regs < available_regs)
5680     cost = n_new;
5681   /* If close to running out of registers, try to preserve them.  */
5682   else if (regs_needed <= available_regs)
5683     cost = target_reg_cost [speed] * regs_needed;
5684   /* If we run out of available registers but the number of candidates
5685      does not, we penalize extra registers using target_spill_cost.  */
5686   else if (n_cands <= available_regs)
5687     cost = target_reg_cost [speed] * available_regs
5688 	   + target_spill_cost [speed] * (regs_needed - available_regs);
5689   /* If the number of candidates runs out available registers, we penalize
5690      extra candidate registers using target_spill_cost * 2.  Because it is
5691      more expensive to spill induction variable than invariant.  */
5692   else
5693     cost = target_reg_cost [speed] * available_regs
5694 	   + target_spill_cost [speed] * (n_cands - available_regs) * 2
5695 	   + target_spill_cost [speed] * (regs_needed - n_cands);
5696 
5697   /* Finally, add the number of candidates, so that we prefer eliminating
5698      induction variables if possible.  */
5699   return cost + n_cands;
5700 }
5701 
5702 /* For each size of the induction variable set determine the penalty.  */
5703 
5704 static void
determine_set_costs(struct ivopts_data * data)5705 determine_set_costs (struct ivopts_data *data)
5706 {
5707   unsigned j, n;
5708   gphi *phi;
5709   gphi_iterator psi;
5710   tree op;
5711   struct loop *loop = data->current_loop;
5712   bitmap_iterator bi;
5713 
5714   if (dump_file && (dump_flags & TDF_DETAILS))
5715     {
5716       fprintf (dump_file, "<Global Costs>:\n");
5717       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5718       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5719       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5720       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5721     }
5722 
5723   n = 0;
5724   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5725     {
5726       phi = psi.phi ();
5727       op = PHI_RESULT (phi);
5728 
5729       if (virtual_operand_p (op))
5730 	continue;
5731 
5732       if (get_iv (data, op))
5733 	continue;
5734 
5735       if (!POINTER_TYPE_P (TREE_TYPE (op))
5736 	  && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5737 	continue;
5738 
5739       n++;
5740     }
5741 
5742   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5743     {
5744       struct version_info *info = ver_info (data, j);
5745 
5746       if (info->inv_id && info->has_nonlin_use)
5747 	n++;
5748     }
5749 
5750   data->regs_used = n;
5751   if (dump_file && (dump_flags & TDF_DETAILS))
5752     fprintf (dump_file, "  regs_used %d\n", n);
5753 
5754   if (dump_file && (dump_flags & TDF_DETAILS))
5755     {
5756       fprintf (dump_file, "  cost for size:\n");
5757       fprintf (dump_file, "  ivs\tcost\n");
5758       for (j = 0; j <= 2 * target_avail_regs; j++)
5759 	fprintf (dump_file, "  %d\t%d\n", j,
5760 		 ivopts_estimate_reg_pressure (data, 0, j));
5761       fprintf (dump_file, "\n");
5762     }
5763 }
5764 
5765 /* Returns true if A is a cheaper cost pair than B.  */
5766 
5767 static bool
cheaper_cost_pair(struct cost_pair * a,struct cost_pair * b)5768 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5769 {
5770   if (!a)
5771     return false;
5772 
5773   if (!b)
5774     return true;
5775 
5776   if (a->cost < b->cost)
5777     return true;
5778 
5779   if (b->cost < a->cost)
5780     return false;
5781 
5782   /* In case the costs are the same, prefer the cheaper candidate.  */
5783   if (a->cand->cost < b->cand->cost)
5784     return true;
5785 
5786   return false;
5787 }
5788 
5789 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
5790    for more expensive, equal and cheaper respectively.  */
5791 
5792 static int
compare_cost_pair(struct cost_pair * a,struct cost_pair * b)5793 compare_cost_pair (struct cost_pair *a, struct cost_pair *b)
5794 {
5795   if (cheaper_cost_pair (a, b))
5796     return -1;
5797   if (cheaper_cost_pair (b, a))
5798     return 1;
5799 
5800   return 0;
5801 }
5802 
5803 /* Returns candidate by that USE is expressed in IVS.  */
5804 
5805 static struct cost_pair *
iv_ca_cand_for_group(struct iv_ca * ivs,struct iv_group * group)5806 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5807 {
5808   return ivs->cand_for_group[group->id];
5809 }
5810 
5811 /* Computes the cost field of IVS structure.  */
5812 
5813 static void
iv_ca_recount_cost(struct ivopts_data * data,struct iv_ca * ivs)5814 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5815 {
5816   comp_cost cost = ivs->cand_use_cost;
5817 
5818   cost += ivs->cand_cost;
5819   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
5820   ivs->cost = cost;
5821 }
5822 
5823 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5824    and IVS.  */
5825 
5826 static void
iv_ca_set_remove_invs(struct iv_ca * ivs,bitmap invs,unsigned * n_inv_uses)5827 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5828 {
5829   bitmap_iterator bi;
5830   unsigned iid;
5831 
5832   if (!invs)
5833     return;
5834 
5835   gcc_assert (n_inv_uses != NULL);
5836   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5837     {
5838       n_inv_uses[iid]--;
5839       if (n_inv_uses[iid] == 0)
5840 	ivs->n_invs--;
5841     }
5842 }
5843 
5844 /* Set USE not to be expressed by any candidate in IVS.  */
5845 
5846 static void
iv_ca_set_no_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group)5847 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5848 		 struct iv_group *group)
5849 {
5850   unsigned gid = group->id, cid;
5851   struct cost_pair *cp;
5852 
5853   cp = ivs->cand_for_group[gid];
5854   if (!cp)
5855     return;
5856   cid = cp->cand->id;
5857 
5858   ivs->bad_groups++;
5859   ivs->cand_for_group[gid] = NULL;
5860   ivs->n_cand_uses[cid]--;
5861 
5862   if (ivs->n_cand_uses[cid] == 0)
5863     {
5864       bitmap_clear_bit (ivs->cands, cid);
5865       ivs->n_cands--;
5866       ivs->cand_cost -= cp->cand->cost;
5867       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5868       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5869     }
5870 
5871   ivs->cand_use_cost -= cp->cost;
5872   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5873   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5874   iv_ca_recount_cost (data, ivs);
5875 }
5876 
5877 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5878    IVS.  */
5879 
5880 static void
iv_ca_set_add_invs(struct iv_ca * ivs,bitmap invs,unsigned * n_inv_uses)5881 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5882 {
5883   bitmap_iterator bi;
5884   unsigned iid;
5885 
5886   if (!invs)
5887     return;
5888 
5889   gcc_assert (n_inv_uses != NULL);
5890   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5891     {
5892       n_inv_uses[iid]++;
5893       if (n_inv_uses[iid] == 1)
5894 	ivs->n_invs++;
5895     }
5896 }
5897 
5898 /* Set cost pair for GROUP in set IVS to CP.  */
5899 
5900 static void
iv_ca_set_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group,struct cost_pair * cp)5901 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5902 	      struct iv_group *group, struct cost_pair *cp)
5903 {
5904   unsigned gid = group->id, cid;
5905 
5906   if (ivs->cand_for_group[gid] == cp)
5907     return;
5908 
5909   if (ivs->cand_for_group[gid])
5910     iv_ca_set_no_cp (data, ivs, group);
5911 
5912   if (cp)
5913     {
5914       cid = cp->cand->id;
5915 
5916       ivs->bad_groups--;
5917       ivs->cand_for_group[gid] = cp;
5918       ivs->n_cand_uses[cid]++;
5919       if (ivs->n_cand_uses[cid] == 1)
5920 	{
5921 	  bitmap_set_bit (ivs->cands, cid);
5922 	  ivs->n_cands++;
5923 	  ivs->cand_cost += cp->cand->cost;
5924 	  iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5925 	  iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5926 	}
5927 
5928       ivs->cand_use_cost += cp->cost;
5929       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5930       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5931       iv_ca_recount_cost (data, ivs);
5932     }
5933 }
5934 
5935 /* Extend set IVS by expressing USE by some of the candidates in it
5936    if possible.  Consider all important candidates if candidates in
5937    set IVS don't give any result.  */
5938 
5939 static void
iv_ca_add_group(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group)5940 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5941 	       struct iv_group *group)
5942 {
5943   struct cost_pair *best_cp = NULL, *cp;
5944   bitmap_iterator bi;
5945   unsigned i;
5946   struct iv_cand *cand;
5947 
5948   gcc_assert (ivs->upto >= group->id);
5949   ivs->upto++;
5950   ivs->bad_groups++;
5951 
5952   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5953     {
5954       cand = data->vcands[i];
5955       cp = get_group_iv_cost (data, group, cand);
5956       if (cheaper_cost_pair (cp, best_cp))
5957 	best_cp = cp;
5958     }
5959 
5960   if (best_cp == NULL)
5961     {
5962       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5963 	{
5964 	  cand = data->vcands[i];
5965 	  cp = get_group_iv_cost (data, group, cand);
5966 	  if (cheaper_cost_pair (cp, best_cp))
5967 	    best_cp = cp;
5968 	}
5969     }
5970 
5971   iv_ca_set_cp (data, ivs, group, best_cp);
5972 }
5973 
5974 /* Get cost for assignment IVS.  */
5975 
5976 static comp_cost
iv_ca_cost(struct iv_ca * ivs)5977 iv_ca_cost (struct iv_ca *ivs)
5978 {
5979   /* This was a conditional expression but it triggered a bug in
5980      Sun C 5.5.  */
5981   if (ivs->bad_groups)
5982     return infinite_cost;
5983   else
5984     return ivs->cost;
5985 }
5986 
5987 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5988    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
5989    respectively.  */
5990 
5991 static int
iv_ca_compare_deps(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group,struct cost_pair * old_cp,struct cost_pair * new_cp)5992 iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs,
5993 		    struct iv_group *group, struct cost_pair *old_cp,
5994 		    struct cost_pair *new_cp)
5995 {
5996   gcc_assert (old_cp && new_cp && old_cp != new_cp);
5997   unsigned old_n_invs = ivs->n_invs;
5998   iv_ca_set_cp (data, ivs, group, new_cp);
5999   unsigned new_n_invs = ivs->n_invs;
6000   iv_ca_set_cp (data, ivs, group, old_cp);
6001 
6002   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6003 }
6004 
6005 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6006    it before NEXT.  */
6007 
6008 static struct iv_ca_delta *
iv_ca_delta_add(struct iv_group * group,struct cost_pair * old_cp,struct cost_pair * new_cp,struct iv_ca_delta * next)6009 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
6010 		 struct cost_pair *new_cp, struct iv_ca_delta *next)
6011 {
6012   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6013 
6014   change->group = group;
6015   change->old_cp = old_cp;
6016   change->new_cp = new_cp;
6017   change->next = next;
6018 
6019   return change;
6020 }
6021 
6022 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6023    are rewritten.  */
6024 
6025 static struct iv_ca_delta *
iv_ca_delta_join(struct iv_ca_delta * l1,struct iv_ca_delta * l2)6026 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6027 {
6028   struct iv_ca_delta *last;
6029 
6030   if (!l2)
6031     return l1;
6032 
6033   if (!l1)
6034     return l2;
6035 
6036   for (last = l1; last->next; last = last->next)
6037     continue;
6038   last->next = l2;
6039 
6040   return l1;
6041 }
6042 
6043 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6044 
6045 static struct iv_ca_delta *
iv_ca_delta_reverse(struct iv_ca_delta * delta)6046 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6047 {
6048   struct iv_ca_delta *act, *next, *prev = NULL;
6049 
6050   for (act = delta; act; act = next)
6051     {
6052       next = act->next;
6053       act->next = prev;
6054       prev = act;
6055 
6056       std::swap (act->old_cp, act->new_cp);
6057     }
6058 
6059   return prev;
6060 }
6061 
6062 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6063    reverted instead.  */
6064 
6065 static void
iv_ca_delta_commit(struct ivopts_data * data,struct iv_ca * ivs,struct iv_ca_delta * delta,bool forward)6066 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6067 		    struct iv_ca_delta *delta, bool forward)
6068 {
6069   struct cost_pair *from, *to;
6070   struct iv_ca_delta *act;
6071 
6072   if (!forward)
6073     delta = iv_ca_delta_reverse (delta);
6074 
6075   for (act = delta; act; act = act->next)
6076     {
6077       from = act->old_cp;
6078       to = act->new_cp;
6079       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6080       iv_ca_set_cp (data, ivs, act->group, to);
6081     }
6082 
6083   if (!forward)
6084     iv_ca_delta_reverse (delta);
6085 }
6086 
6087 /* Returns true if CAND is used in IVS.  */
6088 
6089 static bool
iv_ca_cand_used_p(struct iv_ca * ivs,struct iv_cand * cand)6090 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6091 {
6092   return ivs->n_cand_uses[cand->id] > 0;
6093 }
6094 
6095 /* Returns number of induction variable candidates in the set IVS.  */
6096 
6097 static unsigned
iv_ca_n_cands(struct iv_ca * ivs)6098 iv_ca_n_cands (struct iv_ca *ivs)
6099 {
6100   return ivs->n_cands;
6101 }
6102 
6103 /* Free the list of changes DELTA.  */
6104 
6105 static void
iv_ca_delta_free(struct iv_ca_delta ** delta)6106 iv_ca_delta_free (struct iv_ca_delta **delta)
6107 {
6108   struct iv_ca_delta *act, *next;
6109 
6110   for (act = *delta; act; act = next)
6111     {
6112       next = act->next;
6113       free (act);
6114     }
6115 
6116   *delta = NULL;
6117 }
6118 
6119 /* Allocates new iv candidates assignment.  */
6120 
6121 static struct iv_ca *
iv_ca_new(struct ivopts_data * data)6122 iv_ca_new (struct ivopts_data *data)
6123 {
6124   struct iv_ca *nw = XNEW (struct iv_ca);
6125 
6126   nw->upto = 0;
6127   nw->bad_groups = 0;
6128   nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6129 				 data->vgroups.length ());
6130   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6131   nw->cands = BITMAP_ALLOC (NULL);
6132   nw->n_cands = 0;
6133   nw->n_invs = 0;
6134   nw->cand_use_cost = no_cost;
6135   nw->cand_cost = 0;
6136   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6137   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6138   nw->cost = no_cost;
6139 
6140   return nw;
6141 }
6142 
6143 /* Free memory occupied by the set IVS.  */
6144 
6145 static void
iv_ca_free(struct iv_ca ** ivs)6146 iv_ca_free (struct iv_ca **ivs)
6147 {
6148   free ((*ivs)->cand_for_group);
6149   free ((*ivs)->n_cand_uses);
6150   BITMAP_FREE ((*ivs)->cands);
6151   free ((*ivs)->n_inv_var_uses);
6152   free ((*ivs)->n_inv_expr_uses);
6153   free (*ivs);
6154   *ivs = NULL;
6155 }
6156 
6157 /* Dumps IVS to FILE.  */
6158 
6159 static void
iv_ca_dump(struct ivopts_data * data,FILE * file,struct iv_ca * ivs)6160 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6161 {
6162   unsigned i;
6163   comp_cost cost = iv_ca_cost (ivs);
6164 
6165   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost,
6166 	   cost.complexity);
6167   fprintf (file, "  cand_cost: %d\n  cand_group_cost: %d (complexity %d)\n",
6168 	   ivs->cand_cost, ivs->cand_use_cost.cost,
6169 	   ivs->cand_use_cost.complexity);
6170   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6171 
6172   for (i = 0; i < ivs->upto; i++)
6173     {
6174       struct iv_group *group = data->vgroups[i];
6175       struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6176       if (cp)
6177         fprintf (file, "   group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6178 		 group->id, cp->cand->id, cp->cost.cost,
6179 		 cp->cost.complexity);
6180       else
6181 	fprintf (file, "   group:%d --> ??\n", group->id);
6182     }
6183 
6184   const char *pref = "";
6185   fprintf (file, "  invariant variables: ");
6186   for (i = 1; i <= data->max_inv_var_id; i++)
6187     if (ivs->n_inv_var_uses[i])
6188       {
6189 	fprintf (file, "%s%d", pref, i);
6190 	pref = ", ";
6191       }
6192 
6193   pref = "";
6194   fprintf (file, "\n  invariant expressions: ");
6195   for (i = 1; i <= data->max_inv_expr_id; i++)
6196     if (ivs->n_inv_expr_uses[i])
6197       {
6198 	fprintf (file, "%s%d", pref, i);
6199 	pref = ", ";
6200       }
6201 
6202   fprintf (file, "\n\n");
6203 }
6204 
6205 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6206    new set, and store differences in DELTA.  Number of induction variables
6207    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6208    the function will try to find a solution with mimimal iv candidates.  */
6209 
6210 static comp_cost
iv_ca_extend(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_ca_delta ** delta,unsigned * n_ivs,bool min_ncand)6211 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6212 	      struct iv_cand *cand, struct iv_ca_delta **delta,
6213 	      unsigned *n_ivs, bool min_ncand)
6214 {
6215   unsigned i;
6216   comp_cost cost;
6217   struct iv_group *group;
6218   struct cost_pair *old_cp, *new_cp;
6219 
6220   *delta = NULL;
6221   for (i = 0; i < ivs->upto; i++)
6222     {
6223       group = data->vgroups[i];
6224       old_cp = iv_ca_cand_for_group (ivs, group);
6225 
6226       if (old_cp
6227 	  && old_cp->cand == cand)
6228 	continue;
6229 
6230       new_cp = get_group_iv_cost (data, group, cand);
6231       if (!new_cp)
6232 	continue;
6233 
6234       if (!min_ncand)
6235 	{
6236 	  int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6237 	  /* Skip if new_cp depends on more invariants.  */
6238 	  if (cmp_invs > 0)
6239 	    continue;
6240 
6241 	  int cmp_cost = compare_cost_pair (new_cp, old_cp);
6242 	  /* Skip if new_cp is not cheaper.  */
6243 	  if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6244 	    continue;
6245 	}
6246 
6247       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6248     }
6249 
6250   iv_ca_delta_commit (data, ivs, *delta, true);
6251   cost = iv_ca_cost (ivs);
6252   if (n_ivs)
6253     *n_ivs = iv_ca_n_cands (ivs);
6254   iv_ca_delta_commit (data, ivs, *delta, false);
6255 
6256   return cost;
6257 }
6258 
6259 /* Try narrowing set IVS by removing CAND.  Return the cost of
6260    the new set and store the differences in DELTA.  START is
6261    the candidate with which we start narrowing.  */
6262 
6263 static comp_cost
iv_ca_narrow(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_cand * start,struct iv_ca_delta ** delta)6264 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6265 	      struct iv_cand *cand, struct iv_cand *start,
6266 	      struct iv_ca_delta **delta)
6267 {
6268   unsigned i, ci;
6269   struct iv_group *group;
6270   struct cost_pair *old_cp, *new_cp, *cp;
6271   bitmap_iterator bi;
6272   struct iv_cand *cnd;
6273   comp_cost cost, best_cost, acost;
6274 
6275   *delta = NULL;
6276   for (i = 0; i < data->vgroups.length (); i++)
6277     {
6278       group = data->vgroups[i];
6279 
6280       old_cp = iv_ca_cand_for_group (ivs, group);
6281       if (old_cp->cand != cand)
6282 	continue;
6283 
6284       best_cost = iv_ca_cost (ivs);
6285       /* Start narrowing with START.  */
6286       new_cp = get_group_iv_cost (data, group, start);
6287 
6288       if (data->consider_all_candidates)
6289 	{
6290 	  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6291 	    {
6292 	      if (ci == cand->id || (start && ci == start->id))
6293 		continue;
6294 
6295 	      cnd = data->vcands[ci];
6296 
6297 	      cp = get_group_iv_cost (data, group, cnd);
6298 	      if (!cp)
6299 		continue;
6300 
6301 	      iv_ca_set_cp (data, ivs, group, cp);
6302 	      acost = iv_ca_cost (ivs);
6303 
6304 	      if (acost < best_cost)
6305 		{
6306 		  best_cost = acost;
6307 		  new_cp = cp;
6308 		}
6309 	    }
6310 	}
6311       else
6312 	{
6313 	  EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6314 	    {
6315 	      if (ci == cand->id || (start && ci == start->id))
6316 		continue;
6317 
6318 	      cnd = data->vcands[ci];
6319 
6320 	      cp = get_group_iv_cost (data, group, cnd);
6321 	      if (!cp)
6322 		continue;
6323 
6324 	      iv_ca_set_cp (data, ivs, group, cp);
6325 	      acost = iv_ca_cost (ivs);
6326 
6327 	      if (acost < best_cost)
6328 		{
6329 		  best_cost = acost;
6330 		  new_cp = cp;
6331 		}
6332 	    }
6333 	}
6334       /* Restore to old cp for use.  */
6335       iv_ca_set_cp (data, ivs, group, old_cp);
6336 
6337       if (!new_cp)
6338 	{
6339 	  iv_ca_delta_free (delta);
6340 	  return infinite_cost;
6341 	}
6342 
6343       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6344     }
6345 
6346   iv_ca_delta_commit (data, ivs, *delta, true);
6347   cost = iv_ca_cost (ivs);
6348   iv_ca_delta_commit (data, ivs, *delta, false);
6349 
6350   return cost;
6351 }
6352 
6353 /* Try optimizing the set of candidates IVS by removing candidates different
6354    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6355    differences in DELTA.  */
6356 
6357 static comp_cost
iv_ca_prune(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * except_cand,struct iv_ca_delta ** delta)6358 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6359 	     struct iv_cand *except_cand, struct iv_ca_delta **delta)
6360 {
6361   bitmap_iterator bi;
6362   struct iv_ca_delta *act_delta, *best_delta;
6363   unsigned i;
6364   comp_cost best_cost, acost;
6365   struct iv_cand *cand;
6366 
6367   best_delta = NULL;
6368   best_cost = iv_ca_cost (ivs);
6369 
6370   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6371     {
6372       cand = data->vcands[i];
6373 
6374       if (cand == except_cand)
6375 	continue;
6376 
6377       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6378 
6379       if (acost < best_cost)
6380 	{
6381 	  best_cost = acost;
6382 	  iv_ca_delta_free (&best_delta);
6383 	  best_delta = act_delta;
6384 	}
6385       else
6386 	iv_ca_delta_free (&act_delta);
6387     }
6388 
6389   if (!best_delta)
6390     {
6391       *delta = NULL;
6392       return best_cost;
6393     }
6394 
6395   /* Recurse to possibly remove other unnecessary ivs.  */
6396   iv_ca_delta_commit (data, ivs, best_delta, true);
6397   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6398   iv_ca_delta_commit (data, ivs, best_delta, false);
6399   *delta = iv_ca_delta_join (best_delta, *delta);
6400   return best_cost;
6401 }
6402 
6403 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6404    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6405    the corresponding cost_pair, otherwise just return BEST_CP.  */
6406 
6407 static struct cost_pair*
cheaper_cost_with_cand(struct ivopts_data * data,struct iv_group * group,unsigned int cand_idx,struct iv_cand * old_cand,struct cost_pair * best_cp)6408 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6409 			unsigned int cand_idx, struct iv_cand *old_cand,
6410 			struct cost_pair *best_cp)
6411 {
6412   struct iv_cand *cand;
6413   struct cost_pair *cp;
6414 
6415   gcc_assert (old_cand != NULL && best_cp != NULL);
6416   if (cand_idx == old_cand->id)
6417     return best_cp;
6418 
6419   cand = data->vcands[cand_idx];
6420   cp = get_group_iv_cost (data, group, cand);
6421   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6422     return cp;
6423 
6424   return best_cp;
6425 }
6426 
6427 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6428    which are used by more than one iv uses.  For each of those candidates,
6429    this function tries to represent iv uses under that candidate using
6430    other ones with lower local cost, then tries to prune the new set.
6431    If the new set has lower cost, It returns the new cost after recording
6432    candidate replacement in list DELTA.  */
6433 
6434 static comp_cost
iv_ca_replace(struct ivopts_data * data,struct iv_ca * ivs,struct iv_ca_delta ** delta)6435 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6436 	       struct iv_ca_delta **delta)
6437 {
6438   bitmap_iterator bi, bj;
6439   unsigned int i, j, k;
6440   struct iv_cand *cand;
6441   comp_cost orig_cost, acost;
6442   struct iv_ca_delta *act_delta, *tmp_delta;
6443   struct cost_pair *old_cp, *best_cp = NULL;
6444 
6445   *delta = NULL;
6446   orig_cost = iv_ca_cost (ivs);
6447 
6448   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6449     {
6450       if (ivs->n_cand_uses[i] == 1
6451 	  || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6452 	continue;
6453 
6454       cand = data->vcands[i];
6455 
6456       act_delta = NULL;
6457       /*  Represent uses under current candidate using other ones with
6458 	  lower local cost.  */
6459       for (j = 0; j < ivs->upto; j++)
6460 	{
6461 	  struct iv_group *group = data->vgroups[j];
6462 	  old_cp = iv_ca_cand_for_group (ivs, group);
6463 
6464 	  if (old_cp->cand != cand)
6465 	    continue;
6466 
6467 	  best_cp = old_cp;
6468 	  if (data->consider_all_candidates)
6469 	    for (k = 0; k < data->vcands.length (); k++)
6470 	      best_cp = cheaper_cost_with_cand (data, group, k,
6471 						old_cp->cand, best_cp);
6472 	  else
6473 	    EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6474 	      best_cp = cheaper_cost_with_cand (data, group, k,
6475 						old_cp->cand, best_cp);
6476 
6477 	  if (best_cp == old_cp)
6478 	    continue;
6479 
6480 	  act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6481 	}
6482       /* No need for further prune.  */
6483       if (!act_delta)
6484 	continue;
6485 
6486       /* Prune the new candidate set.  */
6487       iv_ca_delta_commit (data, ivs, act_delta, true);
6488       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6489       iv_ca_delta_commit (data, ivs, act_delta, false);
6490       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6491 
6492       if (acost < orig_cost)
6493 	{
6494 	  *delta = act_delta;
6495 	  return acost;
6496 	}
6497       else
6498 	iv_ca_delta_free (&act_delta);
6499     }
6500 
6501   return orig_cost;
6502 }
6503 
6504 /* Tries to extend the sets IVS in the best possible way in order to
6505    express the GROUP.  If ORIGINALP is true, prefer candidates from
6506    the original set of IVs, otherwise favor important candidates not
6507    based on any memory object.  */
6508 
6509 static bool
try_add_cand_for(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group,bool originalp)6510 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6511 		  struct iv_group *group, bool originalp)
6512 {
6513   comp_cost best_cost, act_cost;
6514   unsigned i;
6515   bitmap_iterator bi;
6516   struct iv_cand *cand;
6517   struct iv_ca_delta *best_delta = NULL, *act_delta;
6518   struct cost_pair *cp;
6519 
6520   iv_ca_add_group (data, ivs, group);
6521   best_cost = iv_ca_cost (ivs);
6522   cp = iv_ca_cand_for_group (ivs, group);
6523   if (cp)
6524     {
6525       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6526       iv_ca_set_no_cp (data, ivs, group);
6527     }
6528 
6529   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6530      first try important candidates not based on any memory object.  Only if
6531      this fails, try the specific ones.  Rationale -- in loops with many
6532      variables the best choice often is to use just one generic biv.  If we
6533      added here many ivs specific to the uses, the optimization algorithm later
6534      would be likely to get stuck in a local minimum, thus causing us to create
6535      too many ivs.  The approach from few ivs to more seems more likely to be
6536      successful -- starting from few ivs, replacing an expensive use by a
6537      specific iv should always be a win.  */
6538   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6539     {
6540       cand = data->vcands[i];
6541 
6542       if (originalp && cand->pos !=IP_ORIGINAL)
6543 	continue;
6544 
6545       if (!originalp && cand->iv->base_object != NULL_TREE)
6546 	continue;
6547 
6548       if (iv_ca_cand_used_p (ivs, cand))
6549 	continue;
6550 
6551       cp = get_group_iv_cost (data, group, cand);
6552       if (!cp)
6553 	continue;
6554 
6555       iv_ca_set_cp (data, ivs, group, cp);
6556       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6557 			       true);
6558       iv_ca_set_no_cp (data, ivs, group);
6559       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6560 
6561       if (act_cost < best_cost)
6562 	{
6563 	  best_cost = act_cost;
6564 
6565 	  iv_ca_delta_free (&best_delta);
6566 	  best_delta = act_delta;
6567 	}
6568       else
6569 	iv_ca_delta_free (&act_delta);
6570     }
6571 
6572   if (best_cost.infinite_cost_p ())
6573     {
6574       for (i = 0; i < group->n_map_members; i++)
6575 	{
6576 	  cp = group->cost_map + i;
6577 	  cand = cp->cand;
6578 	  if (!cand)
6579 	    continue;
6580 
6581 	  /* Already tried this.  */
6582 	  if (cand->important)
6583 	    {
6584 	      if (originalp && cand->pos == IP_ORIGINAL)
6585 		continue;
6586 	      if (!originalp && cand->iv->base_object == NULL_TREE)
6587 		continue;
6588 	    }
6589 
6590 	  if (iv_ca_cand_used_p (ivs, cand))
6591 	    continue;
6592 
6593 	  act_delta = NULL;
6594 	  iv_ca_set_cp (data, ivs, group, cp);
6595 	  act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6596 	  iv_ca_set_no_cp (data, ivs, group);
6597 	  act_delta = iv_ca_delta_add (group,
6598 				       iv_ca_cand_for_group (ivs, group),
6599 				       cp, act_delta);
6600 
6601 	  if (act_cost < best_cost)
6602 	    {
6603 	      best_cost = act_cost;
6604 
6605 	      if (best_delta)
6606 		iv_ca_delta_free (&best_delta);
6607 	      best_delta = act_delta;
6608 	    }
6609 	  else
6610 	    iv_ca_delta_free (&act_delta);
6611 	}
6612     }
6613 
6614   iv_ca_delta_commit (data, ivs, best_delta, true);
6615   iv_ca_delta_free (&best_delta);
6616 
6617   return !best_cost.infinite_cost_p ();
6618 }
6619 
6620 /* Finds an initial assignment of candidates to uses.  */
6621 
6622 static struct iv_ca *
get_initial_solution(struct ivopts_data * data,bool originalp)6623 get_initial_solution (struct ivopts_data *data, bool originalp)
6624 {
6625   unsigned i;
6626   struct iv_ca *ivs = iv_ca_new (data);
6627 
6628   for (i = 0; i < data->vgroups.length (); i++)
6629     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6630       {
6631 	iv_ca_free (&ivs);
6632 	return NULL;
6633       }
6634 
6635   return ivs;
6636 }
6637 
6638 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6639    points to a bool variable, this function tries to break local
6640    optimal fixed-point by replacing candidates in IVS if it's true.  */
6641 
6642 static bool
try_improve_iv_set(struct ivopts_data * data,struct iv_ca * ivs,bool * try_replace_p)6643 try_improve_iv_set (struct ivopts_data *data,
6644 		    struct iv_ca *ivs, bool *try_replace_p)
6645 {
6646   unsigned i, n_ivs;
6647   comp_cost acost, best_cost = iv_ca_cost (ivs);
6648   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6649   struct iv_cand *cand;
6650 
6651   /* Try extending the set of induction variables by one.  */
6652   for (i = 0; i < data->vcands.length (); i++)
6653     {
6654       cand = data->vcands[i];
6655 
6656       if (iv_ca_cand_used_p (ivs, cand))
6657 	continue;
6658 
6659       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6660       if (!act_delta)
6661 	continue;
6662 
6663       /* If we successfully added the candidate and the set is small enough,
6664 	 try optimizing it by removing other candidates.  */
6665       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6666       	{
6667 	  iv_ca_delta_commit (data, ivs, act_delta, true);
6668 	  acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6669 	  iv_ca_delta_commit (data, ivs, act_delta, false);
6670 	  act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6671 	}
6672 
6673       if (acost < best_cost)
6674 	{
6675 	  best_cost = acost;
6676 	  iv_ca_delta_free (&best_delta);
6677 	  best_delta = act_delta;
6678 	}
6679       else
6680 	iv_ca_delta_free (&act_delta);
6681     }
6682 
6683   if (!best_delta)
6684     {
6685       /* Try removing the candidates from the set instead.  */
6686       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6687 
6688       if (!best_delta && *try_replace_p)
6689 	{
6690 	  *try_replace_p = false;
6691 	  /* So far candidate selecting algorithm tends to choose fewer IVs
6692 	     so that it can handle cases in which loops have many variables
6693 	     but the best choice is often to use only one general biv.  One
6694 	     weakness is it can't handle opposite cases, in which different
6695 	     candidates should be chosen with respect to each use.  To solve
6696 	     the problem, we replace candidates in a manner described by the
6697 	     comments of iv_ca_replace, thus give general algorithm a chance
6698 	     to break local optimal fixed-point in these cases.  */
6699 	  best_cost = iv_ca_replace (data, ivs, &best_delta);
6700 	}
6701 
6702       if (!best_delta)
6703 	return false;
6704     }
6705 
6706   iv_ca_delta_commit (data, ivs, best_delta, true);
6707   gcc_assert (best_cost == iv_ca_cost (ivs));
6708   iv_ca_delta_free (&best_delta);
6709   return true;
6710 }
6711 
6712 /* Attempts to find the optimal set of induction variables.  We do simple
6713    greedy heuristic -- we try to replace at most one candidate in the selected
6714    solution and remove the unused ivs while this improves the cost.  */
6715 
6716 static struct iv_ca *
find_optimal_iv_set_1(struct ivopts_data * data,bool originalp)6717 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6718 {
6719   struct iv_ca *set;
6720   bool try_replace_p = true;
6721 
6722   /* Get the initial solution.  */
6723   set = get_initial_solution (data, originalp);
6724   if (!set)
6725     {
6726       if (dump_file && (dump_flags & TDF_DETAILS))
6727 	fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6728       return NULL;
6729     }
6730 
6731   if (dump_file && (dump_flags & TDF_DETAILS))
6732     {
6733       fprintf (dump_file, "Initial set of candidates:\n");
6734       iv_ca_dump (data, dump_file, set);
6735     }
6736 
6737   while (try_improve_iv_set (data, set, &try_replace_p))
6738     {
6739       if (dump_file && (dump_flags & TDF_DETAILS))
6740 	{
6741 	  fprintf (dump_file, "Improved to:\n");
6742 	  iv_ca_dump (data, dump_file, set);
6743 	}
6744     }
6745 
6746   return set;
6747 }
6748 
6749 static struct iv_ca *
find_optimal_iv_set(struct ivopts_data * data)6750 find_optimal_iv_set (struct ivopts_data *data)
6751 {
6752   unsigned i;
6753   comp_cost cost, origcost;
6754   struct iv_ca *set, *origset;
6755 
6756   /* Determine the cost based on a strategy that starts with original IVs,
6757      and try again using a strategy that prefers candidates not based
6758      on any IVs.  */
6759   origset = find_optimal_iv_set_1 (data, true);
6760   set = find_optimal_iv_set_1 (data, false);
6761 
6762   if (!origset && !set)
6763     return NULL;
6764 
6765   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6766   cost = set ? iv_ca_cost (set) : infinite_cost;
6767 
6768   if (dump_file && (dump_flags & TDF_DETAILS))
6769     {
6770       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6771 	       origcost.cost, origcost.complexity);
6772       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6773 	       cost.cost, cost.complexity);
6774     }
6775 
6776   /* Choose the one with the best cost.  */
6777   if (origcost <= cost)
6778     {
6779       if (set)
6780 	iv_ca_free (&set);
6781       set = origset;
6782     }
6783   else if (origset)
6784     iv_ca_free (&origset);
6785 
6786   for (i = 0; i < data->vgroups.length (); i++)
6787     {
6788       struct iv_group *group = data->vgroups[i];
6789       group->selected = iv_ca_cand_for_group (set, group)->cand;
6790     }
6791 
6792   return set;
6793 }
6794 
6795 /* Creates a new induction variable corresponding to CAND.  */
6796 
6797 static void
create_new_iv(struct ivopts_data * data,struct iv_cand * cand)6798 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6799 {
6800   gimple_stmt_iterator incr_pos;
6801   tree base;
6802   struct iv_use *use;
6803   struct iv_group *group;
6804   bool after = false;
6805 
6806   gcc_assert (cand->iv != NULL);
6807 
6808   switch (cand->pos)
6809     {
6810     case IP_NORMAL:
6811       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6812       break;
6813 
6814     case IP_END:
6815       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6816       after = true;
6817       break;
6818 
6819     case IP_AFTER_USE:
6820       after = true;
6821       /* fall through */
6822     case IP_BEFORE_USE:
6823       incr_pos = gsi_for_stmt (cand->incremented_at);
6824       break;
6825 
6826     case IP_ORIGINAL:
6827       /* Mark that the iv is preserved.  */
6828       name_info (data, cand->var_before)->preserve_biv = true;
6829       name_info (data, cand->var_after)->preserve_biv = true;
6830 
6831       /* Rewrite the increment so that it uses var_before directly.  */
6832       use = find_interesting_uses_op (data, cand->var_after);
6833       group = data->vgroups[use->group_id];
6834       group->selected = cand;
6835       return;
6836     }
6837 
6838   gimple_add_tmp_var (cand->var_before);
6839 
6840   base = unshare_expr (cand->iv->base);
6841 
6842   create_iv (base, unshare_expr (cand->iv->step),
6843 	     cand->var_before, data->current_loop,
6844 	     &incr_pos, after, &cand->var_before, &cand->var_after);
6845 }
6846 
6847 /* Creates new induction variables described in SET.  */
6848 
6849 static void
create_new_ivs(struct ivopts_data * data,struct iv_ca * set)6850 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6851 {
6852   unsigned i;
6853   struct iv_cand *cand;
6854   bitmap_iterator bi;
6855 
6856   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6857     {
6858       cand = data->vcands[i];
6859       create_new_iv (data, cand);
6860     }
6861 
6862   if (dump_file && (dump_flags & TDF_DETAILS))
6863     {
6864       fprintf (dump_file, "Selected IV set for loop %d",
6865 	       data->current_loop->num);
6866       if (data->loop_loc != UNKNOWN_LOCATION)
6867 	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6868 		 LOCATION_LINE (data->loop_loc));
6869       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
6870 	       avg_loop_niter (data->current_loop));
6871       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6872       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6873 	{
6874 	  cand = data->vcands[i];
6875 	  dump_cand (dump_file, cand);
6876 	}
6877       fprintf (dump_file, "\n");
6878     }
6879 }
6880 
6881 /* Rewrites USE (definition of iv used in a nonlinear expression)
6882    using candidate CAND.  */
6883 
6884 static void
rewrite_use_nonlinear_expr(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)6885 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6886 			    struct iv_use *use, struct iv_cand *cand)
6887 {
6888   gassign *ass;
6889   gimple_stmt_iterator bsi;
6890   tree comp, type = get_use_type (use), tgt;
6891 
6892   /* An important special case -- if we are asked to express value of
6893      the original iv by itself, just exit; there is no need to
6894      introduce a new computation (that might also need casting the
6895      variable to unsigned and back).  */
6896   if (cand->pos == IP_ORIGINAL
6897       && cand->incremented_at == use->stmt)
6898     {
6899       tree op = NULL_TREE;
6900       enum tree_code stmt_code;
6901 
6902       gcc_assert (is_gimple_assign (use->stmt));
6903       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6904 
6905       /* Check whether we may leave the computation unchanged.
6906 	 This is the case only if it does not rely on other
6907 	 computations in the loop -- otherwise, the computation
6908 	 we rely upon may be removed in remove_unused_ivs,
6909 	 thus leading to ICE.  */
6910       stmt_code = gimple_assign_rhs_code (use->stmt);
6911       if (stmt_code == PLUS_EXPR
6912 	  || stmt_code == MINUS_EXPR
6913 	  || stmt_code == POINTER_PLUS_EXPR)
6914 	{
6915 	  if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6916 	    op = gimple_assign_rhs2 (use->stmt);
6917 	  else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6918 	    op = gimple_assign_rhs1 (use->stmt);
6919 	}
6920 
6921       if (op != NULL_TREE)
6922 	{
6923 	  if (expr_invariant_in_loop_p (data->current_loop, op))
6924 	    return;
6925 	  if (TREE_CODE (op) == SSA_NAME)
6926 	    {
6927 	      struct iv *iv = get_iv (data, op);
6928 	      if (iv != NULL && integer_zerop (iv->step))
6929 		return;
6930 	    }
6931 	}
6932     }
6933 
6934   switch (gimple_code (use->stmt))
6935     {
6936     case GIMPLE_PHI:
6937       tgt = PHI_RESULT (use->stmt);
6938 
6939       /* If we should keep the biv, do not replace it.  */
6940       if (name_info (data, tgt)->preserve_biv)
6941 	return;
6942 
6943       bsi = gsi_after_labels (gimple_bb (use->stmt));
6944       break;
6945 
6946     case GIMPLE_ASSIGN:
6947       tgt = gimple_assign_lhs (use->stmt);
6948       bsi = gsi_for_stmt (use->stmt);
6949       break;
6950 
6951     default:
6952       gcc_unreachable ();
6953     }
6954 
6955   aff_tree aff_inv, aff_var;
6956   if (!get_computation_aff_1 (data->current_loop, use->stmt,
6957 			      use, cand, &aff_inv, &aff_var))
6958     gcc_unreachable ();
6959 
6960   unshare_aff_combination (&aff_inv);
6961   unshare_aff_combination (&aff_var);
6962   /* Prefer CSE opportunity than loop invariant by adding offset at last
6963      so that iv_uses have different offsets can be CSEed.  */
6964   poly_widest_int offset = aff_inv.offset;
6965   aff_inv.offset = 0;
6966 
6967   gimple_seq stmt_list = NULL, seq = NULL;
6968   tree comp_op1 = aff_combination_to_tree (&aff_inv);
6969   tree comp_op2 = aff_combination_to_tree (&aff_var);
6970   gcc_assert (comp_op1 && comp_op2);
6971 
6972   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
6973   gimple_seq_add_seq (&stmt_list, seq);
6974   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
6975   gimple_seq_add_seq (&stmt_list, seq);
6976 
6977   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
6978     std::swap (comp_op1, comp_op2);
6979 
6980   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
6981     {
6982       comp = fold_build_pointer_plus (comp_op1,
6983 				      fold_convert (sizetype, comp_op2));
6984       comp = fold_build_pointer_plus (comp,
6985 				      wide_int_to_tree (sizetype, offset));
6986     }
6987   else
6988     {
6989       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
6990 			  fold_convert (TREE_TYPE (comp_op1), comp_op2));
6991       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
6992 			  wide_int_to_tree (TREE_TYPE (comp_op1), offset));
6993     }
6994 
6995   comp = fold_convert (type, comp);
6996   if (!valid_gimple_rhs_p (comp)
6997       || (gimple_code (use->stmt) != GIMPLE_PHI
6998 	  /* We can't allow re-allocating the stmt as it might be pointed
6999 	     to still.  */
7000 	  && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7001 	      >= gimple_num_ops (gsi_stmt (bsi)))))
7002     {
7003       comp = force_gimple_operand (comp, &seq, true, NULL);
7004       gimple_seq_add_seq (&stmt_list, seq);
7005       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7006 	{
7007 	  duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7008 	  /* As this isn't a plain copy we have to reset alignment
7009 	     information.  */
7010 	  if (SSA_NAME_PTR_INFO (comp))
7011 	    mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7012 	}
7013     }
7014 
7015   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7016   if (gimple_code (use->stmt) == GIMPLE_PHI)
7017     {
7018       ass = gimple_build_assign (tgt, comp);
7019       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7020 
7021       bsi = gsi_for_stmt (use->stmt);
7022       remove_phi_node (&bsi, false);
7023     }
7024   else
7025     {
7026       gimple_assign_set_rhs_from_tree (&bsi, comp);
7027       use->stmt = gsi_stmt (bsi);
7028     }
7029 }
7030 
7031 /* Performs a peephole optimization to reorder the iv update statement with
7032    a mem ref to enable instruction combining in later phases. The mem ref uses
7033    the iv value before the update, so the reordering transformation requires
7034    adjustment of the offset. CAND is the selected IV_CAND.
7035 
7036    Example:
7037 
7038    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7039    iv2 = iv1 + 1;
7040 
7041    if (t < val)      (1)
7042      goto L;
7043    goto Head;
7044 
7045 
7046    directly propagating t over to (1) will introduce overlapping live range
7047    thus increase register pressure. This peephole transform it into:
7048 
7049 
7050    iv2 = iv1 + 1;
7051    t = MEM_REF (base, iv2, 8, 8);
7052    if (t < val)
7053      goto L;
7054    goto Head;
7055 */
7056 
7057 static void
adjust_iv_update_pos(struct iv_cand * cand,struct iv_use * use)7058 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7059 {
7060   tree var_after;
7061   gimple *iv_update, *stmt;
7062   basic_block bb;
7063   gimple_stmt_iterator gsi, gsi_iv;
7064 
7065   if (cand->pos != IP_NORMAL)
7066     return;
7067 
7068   var_after = cand->var_after;
7069   iv_update = SSA_NAME_DEF_STMT (var_after);
7070 
7071   bb = gimple_bb (iv_update);
7072   gsi = gsi_last_nondebug_bb (bb);
7073   stmt = gsi_stmt (gsi);
7074 
7075   /* Only handle conditional statement for now.  */
7076   if (gimple_code (stmt) != GIMPLE_COND)
7077     return;
7078 
7079   gsi_prev_nondebug (&gsi);
7080   stmt = gsi_stmt (gsi);
7081   if (stmt != iv_update)
7082     return;
7083 
7084   gsi_prev_nondebug (&gsi);
7085   if (gsi_end_p (gsi))
7086     return;
7087 
7088   stmt = gsi_stmt (gsi);
7089   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7090     return;
7091 
7092   if (stmt != use->stmt)
7093     return;
7094 
7095   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7096     return;
7097 
7098   if (dump_file && (dump_flags & TDF_DETAILS))
7099     {
7100       fprintf (dump_file, "Reordering \n");
7101       print_gimple_stmt (dump_file, iv_update, 0);
7102       print_gimple_stmt (dump_file, use->stmt, 0);
7103       fprintf (dump_file, "\n");
7104     }
7105 
7106   gsi = gsi_for_stmt (use->stmt);
7107   gsi_iv = gsi_for_stmt (iv_update);
7108   gsi_move_before (&gsi_iv, &gsi);
7109 
7110   cand->pos = IP_BEFORE_USE;
7111   cand->incremented_at = use->stmt;
7112 }
7113 
7114 /* Return the alias pointer type that should be used for a MEM_REF
7115    associated with USE, which has type USE_PTR_ADDRESS.  */
7116 
7117 static tree
get_alias_ptr_type_for_ptr_address(iv_use * use)7118 get_alias_ptr_type_for_ptr_address (iv_use *use)
7119 {
7120   gcall *call = as_a <gcall *> (use->stmt);
7121   switch (gimple_call_internal_fn (call))
7122     {
7123     case IFN_MASK_LOAD:
7124     case IFN_MASK_STORE:
7125       /* The second argument contains the correct alias type.  */
7126       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7127       return TREE_TYPE (gimple_call_arg (call, 1));
7128 
7129     default:
7130       gcc_unreachable ();
7131     }
7132 }
7133 
7134 
7135 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7136 
7137 static void
rewrite_use_address(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7138 rewrite_use_address (struct ivopts_data *data,
7139 		     struct iv_use *use, struct iv_cand *cand)
7140 {
7141   aff_tree aff;
7142   bool ok;
7143 
7144   adjust_iv_update_pos (cand, use);
7145   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7146   gcc_assert (ok);
7147   unshare_aff_combination (&aff);
7148 
7149   /* To avoid undefined overflow problems, all IV candidates use unsigned
7150      integer types.  The drawback is that this makes it impossible for
7151      create_mem_ref to distinguish an IV that is based on a memory object
7152      from one that represents simply an offset.
7153 
7154      To work around this problem, we pass a hint to create_mem_ref that
7155      indicates which variable (if any) in aff is an IV based on a memory
7156      object.  Note that we only consider the candidate.  If this is not
7157      based on an object, the base of the reference is in some subexpression
7158      of the use -- but these will use pointer types, so they are recognized
7159      by the create_mem_ref heuristics anyway.  */
7160   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7161   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7162   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7163   tree type = use->mem_type;
7164   tree alias_ptr_type;
7165   if (use->type == USE_PTR_ADDRESS)
7166     alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7167   else
7168     {
7169       gcc_assert (type == TREE_TYPE (*use->op_p));
7170       unsigned int align = get_object_alignment (*use->op_p);
7171       if (align != TYPE_ALIGN (type))
7172 	type = build_aligned_type (type, align);
7173       alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7174     }
7175   tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7176 			     iv, base_hint, data->speed);
7177 
7178   if (use->type == USE_PTR_ADDRESS)
7179     {
7180       ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7181       ref = fold_convert (get_use_type (use), ref);
7182       ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7183 				      true, GSI_SAME_STMT);
7184     }
7185   else
7186     copy_ref_info (ref, *use->op_p);
7187 
7188   *use->op_p = ref;
7189 }
7190 
7191 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7192    candidate CAND.  */
7193 
7194 static void
rewrite_use_compare(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7195 rewrite_use_compare (struct ivopts_data *data,
7196 		     struct iv_use *use, struct iv_cand *cand)
7197 {
7198   tree comp, op, bound;
7199   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7200   enum tree_code compare;
7201   struct iv_group *group = data->vgroups[use->group_id];
7202   struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7203 
7204   bound = cp->value;
7205   if (bound)
7206     {
7207       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7208       tree var_type = TREE_TYPE (var);
7209       gimple_seq stmts;
7210 
7211       if (dump_file && (dump_flags & TDF_DETAILS))
7212 	{
7213 	  fprintf (dump_file, "Replacing exit test: ");
7214 	  print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7215 	}
7216       compare = cp->comp;
7217       bound = unshare_expr (fold_convert (var_type, bound));
7218       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7219       if (stmts)
7220 	gsi_insert_seq_on_edge_immediate (
7221 		loop_preheader_edge (data->current_loop),
7222 		stmts);
7223 
7224       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7225       gimple_cond_set_lhs (cond_stmt, var);
7226       gimple_cond_set_code (cond_stmt, compare);
7227       gimple_cond_set_rhs (cond_stmt, op);
7228       return;
7229     }
7230 
7231   /* The induction variable elimination failed; just express the original
7232      giv.  */
7233   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7234   gcc_assert (comp != NULL_TREE);
7235   gcc_assert (use->op_p != NULL);
7236   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7237 					 SSA_NAME_VAR (*use->op_p),
7238 					 true, GSI_SAME_STMT);
7239 }
7240 
7241 /* Rewrite the groups using the selected induction variables.  */
7242 
7243 static void
rewrite_groups(struct ivopts_data * data)7244 rewrite_groups (struct ivopts_data *data)
7245 {
7246   unsigned i, j;
7247 
7248   for (i = 0; i < data->vgroups.length (); i++)
7249     {
7250       struct iv_group *group = data->vgroups[i];
7251       struct iv_cand *cand = group->selected;
7252 
7253       gcc_assert (cand);
7254 
7255       if (group->type == USE_NONLINEAR_EXPR)
7256 	{
7257 	  for (j = 0; j < group->vuses.length (); j++)
7258 	    {
7259 	      rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7260 	      update_stmt (group->vuses[j]->stmt);
7261 	    }
7262 	}
7263       else if (address_p (group->type))
7264 	{
7265 	  for (j = 0; j < group->vuses.length (); j++)
7266 	    {
7267 	      rewrite_use_address (data, group->vuses[j], cand);
7268 	      update_stmt (group->vuses[j]->stmt);
7269 	    }
7270 	}
7271       else
7272 	{
7273 	  gcc_assert (group->type == USE_COMPARE);
7274 
7275 	  for (j = 0; j < group->vuses.length (); j++)
7276 	    {
7277 	      rewrite_use_compare (data, group->vuses[j], cand);
7278 	      update_stmt (group->vuses[j]->stmt);
7279 	    }
7280 	}
7281     }
7282 }
7283 
7284 /* Removes the ivs that are not used after rewriting.  */
7285 
7286 static void
remove_unused_ivs(struct ivopts_data * data,bitmap toremove)7287 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7288 {
7289   unsigned j;
7290   bitmap_iterator bi;
7291 
7292   /* Figure out an order in which to release SSA DEFs so that we don't
7293      release something that we'd have to propagate into a debug stmt
7294      afterwards.  */
7295   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7296     {
7297       struct version_info *info;
7298 
7299       info = ver_info (data, j);
7300       if (info->iv
7301 	  && !integer_zerop (info->iv->step)
7302 	  && !info->inv_id
7303 	  && !info->iv->nonlin_use
7304 	  && !info->preserve_biv)
7305 	{
7306 	  bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7307 
7308 	  tree def = info->iv->ssa_name;
7309 
7310 	  if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7311 	    {
7312 	      imm_use_iterator imm_iter;
7313 	      use_operand_p use_p;
7314 	      gimple *stmt;
7315 	      int count = 0;
7316 
7317 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7318 		{
7319 		  if (!gimple_debug_bind_p (stmt))
7320 		    continue;
7321 
7322 		  /* We just want to determine whether to do nothing
7323 		     (count == 0), to substitute the computed
7324 		     expression into a single use of the SSA DEF by
7325 		     itself (count == 1), or to use a debug temp
7326 		     because the SSA DEF is used multiple times or as
7327 		     part of a larger expression (count > 1). */
7328 		  count++;
7329 		  if (gimple_debug_bind_get_value (stmt) != def)
7330 		    count++;
7331 
7332 		  if (count > 1)
7333 		    BREAK_FROM_IMM_USE_STMT (imm_iter);
7334 		}
7335 
7336 	      if (!count)
7337 		continue;
7338 
7339 	      struct iv_use dummy_use;
7340 	      struct iv_cand *best_cand = NULL, *cand;
7341 	      unsigned i, best_pref = 0, cand_pref;
7342 
7343 	      memset (&dummy_use, 0, sizeof (dummy_use));
7344 	      dummy_use.iv = info->iv;
7345 	      for (i = 0; i < data->vgroups.length () && i < 64; i++)
7346 		{
7347 		  cand = data->vgroups[i]->selected;
7348 		  if (cand == best_cand)
7349 		    continue;
7350 		  cand_pref = operand_equal_p (cand->iv->step,
7351 					       info->iv->step, 0)
7352 		    ? 4 : 0;
7353 		  cand_pref
7354 		    += TYPE_MODE (TREE_TYPE (cand->iv->base))
7355 		    == TYPE_MODE (TREE_TYPE (info->iv->base))
7356 		    ? 2 : 0;
7357 		  cand_pref
7358 		    += TREE_CODE (cand->iv->base) == INTEGER_CST
7359 		    ? 1 : 0;
7360 		  if (best_cand == NULL || best_pref < cand_pref)
7361 		    {
7362 		      best_cand = cand;
7363 		      best_pref = cand_pref;
7364 		    }
7365 		}
7366 
7367 	      if (!best_cand)
7368 		continue;
7369 
7370 	      tree comp = get_computation_at (data->current_loop,
7371 					      SSA_NAME_DEF_STMT (def),
7372 					      &dummy_use, best_cand);
7373 	      if (!comp)
7374 		continue;
7375 
7376 	      if (count > 1)
7377 		{
7378 		  tree vexpr = make_node (DEBUG_EXPR_DECL);
7379 		  DECL_ARTIFICIAL (vexpr) = 1;
7380 		  TREE_TYPE (vexpr) = TREE_TYPE (comp);
7381 		  if (SSA_NAME_VAR (def))
7382 		    SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7383 		  else
7384 		    SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7385 		  gdebug *def_temp
7386 		    = gimple_build_debug_bind (vexpr, comp, NULL);
7387 		  gimple_stmt_iterator gsi;
7388 
7389 		  if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7390 		    gsi = gsi_after_labels (gimple_bb
7391 					    (SSA_NAME_DEF_STMT (def)));
7392 		  else
7393 		    gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7394 
7395 		  gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7396 		  comp = vexpr;
7397 		}
7398 
7399 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7400 		{
7401 		  if (!gimple_debug_bind_p (stmt))
7402 		    continue;
7403 
7404 		  FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7405 		    SET_USE (use_p, comp);
7406 
7407 		  update_stmt (stmt);
7408 		}
7409 	    }
7410 	}
7411     }
7412 }
7413 
7414 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7415    for hash_map::traverse.  */
7416 
7417 bool
free_tree_niter_desc(edge const &,tree_niter_desc * const & value,void *)7418 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7419 {
7420   free (value);
7421   return true;
7422 }
7423 
7424 /* Frees data allocated by the optimization of a single loop.  */
7425 
7426 static void
free_loop_data(struct ivopts_data * data)7427 free_loop_data (struct ivopts_data *data)
7428 {
7429   unsigned i, j;
7430   bitmap_iterator bi;
7431   tree obj;
7432 
7433   if (data->niters)
7434     {
7435       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7436       delete data->niters;
7437       data->niters = NULL;
7438     }
7439 
7440   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7441     {
7442       struct version_info *info;
7443 
7444       info = ver_info (data, i);
7445       info->iv = NULL;
7446       info->has_nonlin_use = false;
7447       info->preserve_biv = false;
7448       info->inv_id = 0;
7449     }
7450   bitmap_clear (data->relevant);
7451   bitmap_clear (data->important_candidates);
7452 
7453   for (i = 0; i < data->vgroups.length (); i++)
7454     {
7455       struct iv_group *group = data->vgroups[i];
7456 
7457       for (j = 0; j < group->vuses.length (); j++)
7458 	free (group->vuses[j]);
7459       group->vuses.release ();
7460 
7461       BITMAP_FREE (group->related_cands);
7462       for (j = 0; j < group->n_map_members; j++)
7463 	{
7464 	  if (group->cost_map[j].inv_vars)
7465 	    BITMAP_FREE (group->cost_map[j].inv_vars);
7466 	  if (group->cost_map[j].inv_exprs)
7467 	    BITMAP_FREE (group->cost_map[j].inv_exprs);
7468 	}
7469 
7470       free (group->cost_map);
7471       free (group);
7472     }
7473   data->vgroups.truncate (0);
7474 
7475   for (i = 0; i < data->vcands.length (); i++)
7476     {
7477       struct iv_cand *cand = data->vcands[i];
7478 
7479       if (cand->inv_vars)
7480 	BITMAP_FREE (cand->inv_vars);
7481       if (cand->inv_exprs)
7482 	BITMAP_FREE (cand->inv_exprs);
7483       free (cand);
7484     }
7485   data->vcands.truncate (0);
7486 
7487   if (data->version_info_size < num_ssa_names)
7488     {
7489       data->version_info_size = 2 * num_ssa_names;
7490       free (data->version_info);
7491       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7492     }
7493 
7494   data->max_inv_var_id = 0;
7495   data->max_inv_expr_id = 0;
7496 
7497   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7498     SET_DECL_RTL (obj, NULL_RTX);
7499 
7500   decl_rtl_to_reset.truncate (0);
7501 
7502   data->inv_expr_tab->empty ();
7503 
7504   data->iv_common_cand_tab->empty ();
7505   data->iv_common_cands.truncate (0);
7506 }
7507 
7508 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7509    loop tree.  */
7510 
7511 static void
tree_ssa_iv_optimize_finalize(struct ivopts_data * data)7512 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7513 {
7514   free_loop_data (data);
7515   free (data->version_info);
7516   BITMAP_FREE (data->relevant);
7517   BITMAP_FREE (data->important_candidates);
7518 
7519   decl_rtl_to_reset.release ();
7520   data->vgroups.release ();
7521   data->vcands.release ();
7522   delete data->inv_expr_tab;
7523   data->inv_expr_tab = NULL;
7524   free_affine_expand_cache (&data->name_expansion_cache);
7525   if (data->base_object_map)
7526     delete data->base_object_map;
7527   delete data->iv_common_cand_tab;
7528   data->iv_common_cand_tab = NULL;
7529   data->iv_common_cands.release ();
7530   obstack_free (&data->iv_obstack, NULL);
7531 }
7532 
7533 /* Returns true if the loop body BODY includes any function calls.  */
7534 
7535 static bool
loop_body_includes_call(basic_block * body,unsigned num_nodes)7536 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7537 {
7538   gimple_stmt_iterator gsi;
7539   unsigned i;
7540 
7541   for (i = 0; i < num_nodes; i++)
7542     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7543       {
7544 	gimple *stmt = gsi_stmt (gsi);
7545 	if (is_gimple_call (stmt)
7546 	    && !gimple_call_internal_p (stmt)
7547 	    && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7548 	  return true;
7549       }
7550   return false;
7551 }
7552 
7553 /* Optimizes the LOOP.  Returns true if anything changed.  */
7554 
7555 static bool
tree_ssa_iv_optimize_loop(struct ivopts_data * data,struct loop * loop,bitmap toremove)7556 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop,
7557 			   bitmap toremove)
7558 {
7559   bool changed = false;
7560   struct iv_ca *iv_ca;
7561   edge exit = single_dom_exit (loop);
7562   basic_block *body;
7563 
7564   gcc_assert (!data->niters);
7565   data->current_loop = loop;
7566   data->loop_loc = find_loop_location (loop).get_location_t ();
7567   data->speed = optimize_loop_for_speed_p (loop);
7568 
7569   if (dump_file && (dump_flags & TDF_DETAILS))
7570     {
7571       fprintf (dump_file, "Processing loop %d", loop->num);
7572       if (data->loop_loc != UNKNOWN_LOCATION)
7573 	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7574 		 LOCATION_LINE (data->loop_loc));
7575       fprintf (dump_file, "\n");
7576 
7577       if (exit)
7578 	{
7579 	  fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7580 		   exit->src->index, exit->dest->index);
7581 	  print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7582 	  fprintf (dump_file, "\n");
7583 	}
7584 
7585       fprintf (dump_file, "\n");
7586     }
7587 
7588   body = get_loop_body (loop);
7589   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7590   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7591   free (body);
7592 
7593   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7594 
7595   /* For each ssa name determines whether it behaves as an induction variable
7596      in some loop.  */
7597   if (!find_induction_variables (data))
7598     goto finish;
7599 
7600   /* Finds interesting uses (item 1).  */
7601   find_interesting_uses (data);
7602   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7603     goto finish;
7604 
7605   /* Finds candidates for the induction variables (item 2).  */
7606   find_iv_candidates (data);
7607 
7608   /* Calculates the costs (item 3, part 1).  */
7609   determine_iv_costs (data);
7610   determine_group_iv_costs (data);
7611   determine_set_costs (data);
7612 
7613   /* Find the optimal set of induction variables (item 3, part 2).  */
7614   iv_ca = find_optimal_iv_set (data);
7615   if (!iv_ca)
7616     goto finish;
7617   changed = true;
7618 
7619   /* Create the new induction variables (item 4, part 1).  */
7620   create_new_ivs (data, iv_ca);
7621   iv_ca_free (&iv_ca);
7622 
7623   /* Rewrite the uses (item 4, part 2).  */
7624   rewrite_groups (data);
7625 
7626   /* Remove the ivs that are unused after rewriting.  */
7627   remove_unused_ivs (data, toremove);
7628 
7629 finish:
7630   free_loop_data (data);
7631 
7632   return changed;
7633 }
7634 
7635 /* Main entry point.  Optimizes induction variables in loops.  */
7636 
7637 void
tree_ssa_iv_optimize(void)7638 tree_ssa_iv_optimize (void)
7639 {
7640   struct loop *loop;
7641   struct ivopts_data data;
7642   auto_bitmap toremove;
7643 
7644   tree_ssa_iv_optimize_init (&data);
7645 
7646   /* Optimize the loops starting with the innermost ones.  */
7647   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7648     {
7649       if (dump_file && (dump_flags & TDF_DETAILS))
7650 	flow_loop_dump (loop, dump_file, NULL, 1);
7651 
7652       tree_ssa_iv_optimize_loop (&data, loop, toremove);
7653     }
7654 
7655   /* Remove eliminated IV defs.  */
7656   release_defs_bitset (toremove);
7657 
7658   /* We have changed the structure of induction variables; it might happen
7659      that definitions in the scev database refer to some of them that were
7660      eliminated.  */
7661   scev_reset_htab ();
7662   /* Likewise niter and control-IV information.  */
7663   free_numbers_of_iterations_estimates (cfun);
7664 
7665   tree_ssa_iv_optimize_finalize (&data);
7666 }
7667 
7668 #include "gt-tree-ssa-loop-ivopts.h"
7669