1 /* Induction variable optimizations.
2    Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This pass tries to find the optimal set of induction variables for the loop.
21    It optimizes just the basic linear induction variables (although adding
22    support for other types should not be too hard).  It includes the
23    optimizations commonly known as strength reduction, induction variable
24    coalescing and induction variable elimination.  It does it in the
25    following steps:
26 
27    1) The interesting uses of induction variables are found.  This includes
28 
29       -- uses of induction variables in non-linear expressions
30       -- addresses of arrays
31       -- comparisons of induction variables
32 
33       Note the interesting uses are categorized and handled in group.
34       Generally, address type uses are grouped together if their iv bases
35       are different in constant offset.
36 
37    2) Candidates for the induction variables are found.  This includes
38 
39       -- old induction variables
40       -- the variables defined by expressions derived from the "interesting
41 	 groups/uses" above
42 
43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
44       cost function assigns a cost to sets of induction variables and consists
45       of three parts:
46 
47       -- The group/use costs.  Each of the interesting groups/uses chooses
48 	 the best induction variable in the set and adds its cost to the sum.
49 	 The cost reflects the time spent on modifying the induction variables
50 	 value to be usable for the given purpose (adding base and offset for
51 	 arrays, etc.).
52       -- The variable costs.  Each of the variables has a cost assigned that
53 	 reflects the costs associated with incrementing the value of the
54 	 variable.  The original variables are somewhat preferred.
55       -- The set cost.  Depending on the size of the set, extra cost may be
56 	 added to reflect register pressure.
57 
58       All the costs are defined in a machine-specific way, using the target
59       hooks and machine descriptions to determine them.
60 
61    4) The trees are transformed to use the new variables, the dead code is
62       removed.
63 
64    All of this is done loop by loop.  Doing it globally is theoretically
65    possible, it might give a better performance and it might enable us
66    to decide costs more precisely, but getting all the interactions right
67    would be complicated.
68 
69    For the targets supporting low-overhead loops, IVOPTs has to take care of
70    the loops which will probably be transformed in RTL doloop optimization,
71    to try to make selected IV candidate set optimal.  The process of doloop
72    support includes:
73 
74    1) Analyze the current loop will be transformed to doloop or not, find and
75       mark its compare type IV use as doloop use (iv_group field doloop_p), and
76       set flag doloop_use_p of ivopts_data to notify subsequent processings on
77       doloop.  See analyze_and_mark_doloop_use and its callees for the details.
78       The target hook predict_doloop_p can be used for target specific checks.
79 
80    2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
81       set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
82       like biv.  For cost determination between doloop IV cand and IV use, the
83       target hooks doloop_cost_for_generic and doloop_cost_for_address are
84       provided to add on extra costs for generic type and address type IV use.
85       Zero cost is assigned to the pair between doloop IV cand and doloop IV
86       use, and bound zero is set for IV elimination.
87 
88    3) With the cost setting in step 2), the current cost model based IV
89       selection algorithm will process as usual, pick up doloop dedicated IV if
90       profitable.  */
91 
92 #include "config.h"
93 #include "system.h"
94 #include "coretypes.h"
95 #include "backend.h"
96 #include "rtl.h"
97 #include "tree.h"
98 #include "gimple.h"
99 #include "cfghooks.h"
100 #include "tree-pass.h"
101 #include "memmodel.h"
102 #include "tm_p.h"
103 #include "ssa.h"
104 #include "expmed.h"
105 #include "insn-config.h"
106 #include "emit-rtl.h"
107 #include "recog.h"
108 #include "cgraph.h"
109 #include "gimple-pretty-print.h"
110 #include "alias.h"
111 #include "fold-const.h"
112 #include "stor-layout.h"
113 #include "tree-eh.h"
114 #include "gimplify.h"
115 #include "gimple-iterator.h"
116 #include "gimplify-me.h"
117 #include "tree-cfg.h"
118 #include "tree-ssa-loop-ivopts.h"
119 #include "tree-ssa-loop-manip.h"
120 #include "tree-ssa-loop-niter.h"
121 #include "tree-ssa-loop.h"
122 #include "explow.h"
123 #include "expr.h"
124 #include "tree-dfa.h"
125 #include "tree-ssa.h"
126 #include "cfgloop.h"
127 #include "tree-scalar-evolution.h"
128 #include "tree-affine.h"
129 #include "tree-ssa-propagate.h"
130 #include "tree-ssa-address.h"
131 #include "builtins.h"
132 #include "tree-vectorizer.h"
133 #include "dbgcnt.h"
134 
135 /* For lang_hooks.types.type_for_mode.  */
136 #include "langhooks.h"
137 
138 /* FIXME: Expressions are expanded to RTL in this pass to determine the
139    cost of different addressing modes.  This should be moved to a TBD
140    interface between the GIMPLE and RTL worlds.  */
141 
142 /* The infinite cost.  */
143 #define INFTY 1000000000
144 
145 /* Returns the expected number of loop iterations for LOOP.
146    The average trip count is computed from profile data if it
147    exists. */
148 
149 static inline HOST_WIDE_INT
avg_loop_niter(class loop * loop)150 avg_loop_niter (class loop *loop)
151 {
152   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
153   if (niter == -1)
154     {
155       niter = likely_max_stmt_executions_int (loop);
156 
157       if (niter == -1 || niter > param_avg_loop_niter)
158 	return param_avg_loop_niter;
159     }
160 
161   return niter;
162 }
163 
164 struct iv_use;
165 
166 /* Representation of the induction variable.  */
167 struct iv
168 {
169   tree base;		/* Initial value of the iv.  */
170   tree base_object;	/* A memory object to that the induction variable points.  */
171   tree step;		/* Step of the iv (constant only).  */
172   tree ssa_name;	/* The ssa name with the value.  */
173   struct iv_use *nonlin_use;	/* The identifier in the use if it is the case.  */
174   bool biv_p;		/* Is it a biv?  */
175   bool no_overflow;	/* True if the iv doesn't overflow.  */
176   bool have_address_use;/* For biv, indicate if it's used in any address
177 			   type use.  */
178 };
179 
180 /* Per-ssa version information (induction variable descriptions, etc.).  */
181 struct version_info
182 {
183   tree name;		/* The ssa name.  */
184   struct iv *iv;	/* Induction variable description.  */
185   bool has_nonlin_use;	/* For a loop-level invariant, whether it is used in
186 			   an expression that is not an induction variable.  */
187   bool preserve_biv;	/* For the original biv, whether to preserve it.  */
188   unsigned inv_id;	/* Id of an invariant.  */
189 };
190 
191 /* Types of uses.  */
192 enum use_type
193 {
194   USE_NONLINEAR_EXPR,	/* Use in a nonlinear expression.  */
195   USE_REF_ADDRESS,	/* Use is an address for an explicit memory
196 			   reference.  */
197   USE_PTR_ADDRESS,	/* Use is a pointer argument to a function in
198 			   cases where the expansion of the function
199 			   will turn the argument into a normal address.  */
200   USE_COMPARE		/* Use is a compare.  */
201 };
202 
203 /* Cost of a computation.  */
204 class comp_cost
205 {
206 public:
comp_cost()207   comp_cost (): cost (0), complexity (0), scratch (0)
208   {}
209 
210   comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
cost(cost)211     : cost (cost), complexity (complexity), scratch (scratch)
212   {}
213 
214   /* Returns true if COST is infinite.  */
215   bool infinite_cost_p ();
216 
217   /* Adds costs COST1 and COST2.  */
218   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
219 
220   /* Adds COST to the comp_cost.  */
221   comp_cost operator+= (comp_cost cost);
222 
223   /* Adds constant C to this comp_cost.  */
224   comp_cost operator+= (HOST_WIDE_INT c);
225 
226   /* Subtracts constant C to this comp_cost.  */
227   comp_cost operator-= (HOST_WIDE_INT c);
228 
229   /* Divide the comp_cost by constant C.  */
230   comp_cost operator/= (HOST_WIDE_INT c);
231 
232   /* Multiply the comp_cost by constant C.  */
233   comp_cost operator*= (HOST_WIDE_INT c);
234 
235   /* Subtracts costs COST1 and COST2.  */
236   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
237 
238   /* Subtracts COST from this comp_cost.  */
239   comp_cost operator-= (comp_cost cost);
240 
241   /* Returns true if COST1 is smaller than COST2.  */
242   friend bool operator< (comp_cost cost1, comp_cost cost2);
243 
244   /* Returns true if COST1 and COST2 are equal.  */
245   friend bool operator== (comp_cost cost1, comp_cost cost2);
246 
247   /* Returns true if COST1 is smaller or equal than COST2.  */
248   friend bool operator<= (comp_cost cost1, comp_cost cost2);
249 
250   int64_t cost;		/* The runtime cost.  */
251   unsigned complexity;  /* The estimate of the complexity of the code for
252 			   the computation (in no concrete units --
253 			   complexity field should be larger for more
254 			   complex expressions and addressing modes).  */
255   int64_t scratch;	/* Scratch used during cost computation.  */
256 };
257 
258 static const comp_cost no_cost;
259 static const comp_cost infinite_cost (INFTY, 0, INFTY);
260 
261 bool
infinite_cost_p()262 comp_cost::infinite_cost_p ()
263 {
264   return cost == INFTY;
265 }
266 
267 comp_cost
268 operator+ (comp_cost cost1, comp_cost cost2)
269 {
270   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
271     return infinite_cost;
272 
273   gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
274   cost1.cost += cost2.cost;
275   cost1.complexity += cost2.complexity;
276 
277   return cost1;
278 }
279 
280 comp_cost
281 operator- (comp_cost cost1, comp_cost cost2)
282 {
283   if (cost1.infinite_cost_p ())
284     return infinite_cost;
285 
286   gcc_assert (!cost2.infinite_cost_p ());
287   gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
288 
289   cost1.cost -= cost2.cost;
290   cost1.complexity -= cost2.complexity;
291 
292   return cost1;
293 }
294 
295 comp_cost
296 comp_cost::operator+= (comp_cost cost)
297 {
298   *this = *this + cost;
299   return *this;
300 }
301 
302 comp_cost
303 comp_cost::operator+= (HOST_WIDE_INT c)
304 {
305   if (c >= INFTY)
306     this->cost = INFTY;
307 
308   if (infinite_cost_p ())
309     return *this;
310 
311   gcc_assert (this->cost + c < infinite_cost.cost);
312   this->cost += c;
313 
314   return *this;
315 }
316 
317 comp_cost
318 comp_cost::operator-= (HOST_WIDE_INT c)
319 {
320   if (infinite_cost_p ())
321     return *this;
322 
323   gcc_assert (this->cost - c < infinite_cost.cost);
324   this->cost -= c;
325 
326   return *this;
327 }
328 
329 comp_cost
330 comp_cost::operator/= (HOST_WIDE_INT c)
331 {
332   gcc_assert (c != 0);
333   if (infinite_cost_p ())
334     return *this;
335 
336   this->cost /= c;
337 
338   return *this;
339 }
340 
341 comp_cost
342 comp_cost::operator*= (HOST_WIDE_INT c)
343 {
344   if (infinite_cost_p ())
345     return *this;
346 
347   gcc_assert (this->cost * c < infinite_cost.cost);
348   this->cost *= c;
349 
350   return *this;
351 }
352 
353 comp_cost
354 comp_cost::operator-= (comp_cost cost)
355 {
356   *this = *this - cost;
357   return *this;
358 }
359 
360 bool
361 operator< (comp_cost cost1, comp_cost cost2)
362 {
363   if (cost1.cost == cost2.cost)
364     return cost1.complexity < cost2.complexity;
365 
366   return cost1.cost < cost2.cost;
367 }
368 
369 bool
370 operator== (comp_cost cost1, comp_cost cost2)
371 {
372   return cost1.cost == cost2.cost
373     && cost1.complexity == cost2.complexity;
374 }
375 
376 bool
377 operator<= (comp_cost cost1, comp_cost cost2)
378 {
379   return cost1 < cost2 || cost1 == cost2;
380 }
381 
382 struct iv_inv_expr_ent;
383 
384 /* The candidate - cost pair.  */
385 class cost_pair
386 {
387 public:
388   struct iv_cand *cand;	/* The candidate.  */
389   comp_cost cost;	/* The cost.  */
390   enum tree_code comp;	/* For iv elimination, the comparison.  */
391   bitmap inv_vars;	/* The list of invariant ssa_vars that have to be
392 			   preserved when representing iv_use with iv_cand.  */
393   bitmap inv_exprs;	/* The list of newly created invariant expressions
394 			   when representing iv_use with iv_cand.  */
395   tree value;		/* For final value elimination, the expression for
396 			   the final value of the iv.  For iv elimination,
397 			   the new bound to compare with.  */
398 };
399 
400 /* Use.  */
401 struct iv_use
402 {
403   unsigned id;		/* The id of the use.  */
404   unsigned group_id;	/* The group id the use belongs to.  */
405   enum use_type type;	/* Type of the use.  */
406   tree mem_type;	/* The memory type to use when testing whether an
407 			   address is legitimate, and what the address's
408 			   cost is.  */
409   struct iv *iv;	/* The induction variable it is based on.  */
410   gimple *stmt;		/* Statement in that it occurs.  */
411   tree *op_p;		/* The place where it occurs.  */
412 
413   tree addr_base;	/* Base address with const offset stripped.  */
414   poly_uint64_pod addr_offset;
415 			/* Const offset stripped from base address.  */
416 };
417 
418 /* Group of uses.  */
419 struct iv_group
420 {
421   /* The id of the group.  */
422   unsigned id;
423   /* Uses of the group are of the same type.  */
424   enum use_type type;
425   /* The set of "related" IV candidates, plus the important ones.  */
426   bitmap related_cands;
427   /* Number of IV candidates in the cost_map.  */
428   unsigned n_map_members;
429   /* The costs wrto the iv candidates.  */
430   class cost_pair *cost_map;
431   /* The selected candidate for the group.  */
432   struct iv_cand *selected;
433   /* To indicate this is a doloop use group.  */
434   bool doloop_p;
435   /* Uses in the group.  */
436   vec<struct iv_use *> vuses;
437 };
438 
439 /* The position where the iv is computed.  */
440 enum iv_position
441 {
442   IP_NORMAL,		/* At the end, just before the exit condition.  */
443   IP_END,		/* At the end of the latch block.  */
444   IP_BEFORE_USE,	/* Immediately before a specific use.  */
445   IP_AFTER_USE,		/* Immediately after a specific use.  */
446   IP_ORIGINAL		/* The original biv.  */
447 };
448 
449 /* The induction variable candidate.  */
450 struct iv_cand
451 {
452   unsigned id;		/* The number of the candidate.  */
453   bool important;	/* Whether this is an "important" candidate, i.e. such
454 			   that it should be considered by all uses.  */
455   ENUM_BITFIELD(iv_position) pos : 8;	/* Where it is computed.  */
456   gimple *incremented_at;/* For original biv, the statement where it is
457 			   incremented.  */
458   tree var_before;	/* The variable used for it before increment.  */
459   tree var_after;	/* The variable used for it after increment.  */
460   struct iv *iv;	/* The value of the candidate.  NULL for
461 			   "pseudocandidate" used to indicate the possibility
462 			   to replace the final value of an iv by direct
463 			   computation of the value.  */
464   unsigned cost;	/* Cost of the candidate.  */
465   unsigned cost_step;	/* Cost of the candidate's increment operation.  */
466   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
467 			      where it is incremented.  */
468   bitmap inv_vars;	/* The list of invariant ssa_vars used in step of the
469 			   iv_cand.  */
470   bitmap inv_exprs;	/* If step is more complicated than a single ssa_var,
471 			   hanlde it as a new invariant expression which will
472 			   be hoisted out of loop.  */
473   struct iv *orig_iv;	/* The original iv if this cand is added from biv with
474 			   smaller type.  */
475   bool doloop_p;	/* Whether this is a doloop candidate.  */
476 };
477 
478 /* Hashtable entry for common candidate derived from iv uses.  */
479 class iv_common_cand
480 {
481 public:
482   tree base;
483   tree step;
484   /* IV uses from which this common candidate is derived.  */
485   auto_vec<struct iv_use *> uses;
486   hashval_t hash;
487 };
488 
489 /* Hashtable helpers.  */
490 
491 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
492 {
493   static inline hashval_t hash (const iv_common_cand *);
494   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
495 };
496 
497 /* Hash function for possible common candidates.  */
498 
499 inline hashval_t
hash(const iv_common_cand * ccand)500 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
501 {
502   return ccand->hash;
503 }
504 
505 /* Hash table equality function for common candidates.  */
506 
507 inline bool
equal(const iv_common_cand * ccand1,const iv_common_cand * ccand2)508 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
509 			      const iv_common_cand *ccand2)
510 {
511   return (ccand1->hash == ccand2->hash
512 	  && operand_equal_p (ccand1->base, ccand2->base, 0)
513 	  && operand_equal_p (ccand1->step, ccand2->step, 0)
514 	  && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
515 	      == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
516 }
517 
518 /* Loop invariant expression hashtable entry.  */
519 
520 struct iv_inv_expr_ent
521 {
522   /* Tree expression of the entry.  */
523   tree expr;
524   /* Unique indentifier.  */
525   int id;
526   /* Hash value.  */
527   hashval_t hash;
528 };
529 
530 /* Sort iv_inv_expr_ent pair A and B by id field.  */
531 
532 static int
sort_iv_inv_expr_ent(const void * a,const void * b)533 sort_iv_inv_expr_ent (const void *a, const void *b)
534 {
535   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
536   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
537 
538   unsigned id1 = (*e1)->id;
539   unsigned id2 = (*e2)->id;
540 
541   if (id1 < id2)
542     return -1;
543   else if (id1 > id2)
544     return 1;
545   else
546     return 0;
547 }
548 
549 /* Hashtable helpers.  */
550 
551 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
552 {
553   static inline hashval_t hash (const iv_inv_expr_ent *);
554   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
555 };
556 
557 /* Return true if uses of type TYPE represent some form of address.  */
558 
559 inline bool
address_p(use_type type)560 address_p (use_type type)
561 {
562   return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
563 }
564 
565 /* Hash function for loop invariant expressions.  */
566 
567 inline hashval_t
hash(const iv_inv_expr_ent * expr)568 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
569 {
570   return expr->hash;
571 }
572 
573 /* Hash table equality function for expressions.  */
574 
575 inline bool
equal(const iv_inv_expr_ent * expr1,const iv_inv_expr_ent * expr2)576 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
577 			   const iv_inv_expr_ent *expr2)
578 {
579   return expr1->hash == expr2->hash
580 	 && operand_equal_p (expr1->expr, expr2->expr, 0);
581 }
582 
583 struct ivopts_data
584 {
585   /* The currently optimized loop.  */
586   class loop *current_loop;
587   location_t loop_loc;
588 
589   /* Numbers of iterations for all exits of the current loop.  */
590   hash_map<edge, tree_niter_desc *> *niters;
591 
592   /* Number of registers used in it.  */
593   unsigned regs_used;
594 
595   /* The size of version_info array allocated.  */
596   unsigned version_info_size;
597 
598   /* The array of information for the ssa names.  */
599   struct version_info *version_info;
600 
601   /* The hashtable of loop invariant expressions created
602      by ivopt.  */
603   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
604 
605   /* The bitmap of indices in version_info whose value was changed.  */
606   bitmap relevant;
607 
608   /* The uses of induction variables.  */
609   vec<iv_group *> vgroups;
610 
611   /* The candidates.  */
612   vec<iv_cand *> vcands;
613 
614   /* A bitmap of important candidates.  */
615   bitmap important_candidates;
616 
617   /* Cache used by tree_to_aff_combination_expand.  */
618   hash_map<tree, name_expansion *> *name_expansion_cache;
619 
620   /* The hashtable of common candidates derived from iv uses.  */
621   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
622 
623   /* The common candidates.  */
624   vec<iv_common_cand *> iv_common_cands;
625 
626   /* Hash map recording base object information of tree exp.  */
627   hash_map<tree, tree> *base_object_map;
628 
629   /* The maximum invariant variable id.  */
630   unsigned max_inv_var_id;
631 
632   /* The maximum invariant expression id.  */
633   unsigned max_inv_expr_id;
634 
635   /* Number of no_overflow BIVs which are not used in memory address.  */
636   unsigned bivs_not_used_in_addr;
637 
638   /* Obstack for iv structure.  */
639   struct obstack iv_obstack;
640 
641   /* Whether to consider just related and important candidates when replacing a
642      use.  */
643   bool consider_all_candidates;
644 
645   /* Are we optimizing for speed?  */
646   bool speed;
647 
648   /* Whether the loop body includes any function calls.  */
649   bool body_includes_call;
650 
651   /* Whether the loop body can only be exited via single exit.  */
652   bool loop_single_exit_p;
653 
654   /* Whether the loop has doloop comparison use.  */
655   bool doloop_use_p;
656 };
657 
658 /* An assignment of iv candidates to uses.  */
659 
660 class iv_ca
661 {
662 public:
663   /* The number of uses covered by the assignment.  */
664   unsigned upto;
665 
666   /* Number of uses that cannot be expressed by the candidates in the set.  */
667   unsigned bad_groups;
668 
669   /* Candidate assigned to a use, together with the related costs.  */
670   class cost_pair **cand_for_group;
671 
672   /* Number of times each candidate is used.  */
673   unsigned *n_cand_uses;
674 
675   /* The candidates used.  */
676   bitmap cands;
677 
678   /* The number of candidates in the set.  */
679   unsigned n_cands;
680 
681   /* The number of invariants needed, including both invariant variants and
682      invariant expressions.  */
683   unsigned n_invs;
684 
685   /* Total cost of expressing uses.  */
686   comp_cost cand_use_cost;
687 
688   /* Total cost of candidates.  */
689   int64_t cand_cost;
690 
691   /* Number of times each invariant variable is used.  */
692   unsigned *n_inv_var_uses;
693 
694   /* Number of times each invariant expression is used.  */
695   unsigned *n_inv_expr_uses;
696 
697   /* Total cost of the assignment.  */
698   comp_cost cost;
699 };
700 
701 /* Difference of two iv candidate assignments.  */
702 
703 struct iv_ca_delta
704 {
705   /* Changed group.  */
706   struct iv_group *group;
707 
708   /* An old assignment (for rollback purposes).  */
709   class cost_pair *old_cp;
710 
711   /* A new assignment.  */
712   class cost_pair *new_cp;
713 
714   /* Next change in the list.  */
715   struct iv_ca_delta *next;
716 };
717 
718 /* Bound on number of candidates below that all candidates are considered.  */
719 
720 #define CONSIDER_ALL_CANDIDATES_BOUND \
721   ((unsigned) param_iv_consider_all_candidates_bound)
722 
723 /* If there are more iv occurrences, we just give up (it is quite unlikely that
724    optimizing such a loop would help, and it would take ages).  */
725 
726 #define MAX_CONSIDERED_GROUPS \
727   ((unsigned) param_iv_max_considered_uses)
728 
729 /* If there are at most this number of ivs in the set, try removing unnecessary
730    ivs from the set always.  */
731 
732 #define ALWAYS_PRUNE_CAND_SET_BOUND \
733   ((unsigned) param_iv_always_prune_cand_set_bound)
734 
735 /* The list of trees for that the decl_rtl field must be reset is stored
736    here.  */
737 
738 static vec<tree> decl_rtl_to_reset;
739 
740 static comp_cost force_expr_to_var_cost (tree, bool);
741 
742 /* The single loop exit if it dominates the latch, NULL otherwise.  */
743 
744 edge
single_dom_exit(class loop * loop)745 single_dom_exit (class loop *loop)
746 {
747   edge exit = single_exit (loop);
748 
749   if (!exit)
750     return NULL;
751 
752   if (!just_once_each_iteration_p (loop, exit->src))
753     return NULL;
754 
755   return exit;
756 }
757 
758 /* Dumps information about the induction variable IV to FILE.  Don't dump
759    variable's name if DUMP_NAME is FALSE.  The information is dumped with
760    preceding spaces indicated by INDENT_LEVEL.  */
761 
762 void
dump_iv(FILE * file,struct iv * iv,bool dump_name,unsigned indent_level)763 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
764 {
765   const char *p;
766   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
767 
768   if (indent_level > 4)
769     indent_level = 4;
770   p = spaces + 8 - (indent_level << 1);
771 
772   fprintf (file, "%sIV struct:\n", p);
773   if (iv->ssa_name && dump_name)
774     {
775       fprintf (file, "%s  SSA_NAME:\t", p);
776       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
777       fprintf (file, "\n");
778     }
779 
780   fprintf (file, "%s  Type:\t", p);
781   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
782   fprintf (file, "\n");
783 
784   fprintf (file, "%s  Base:\t", p);
785   print_generic_expr (file, iv->base, TDF_SLIM);
786   fprintf (file, "\n");
787 
788   fprintf (file, "%s  Step:\t", p);
789   print_generic_expr (file, iv->step, TDF_SLIM);
790   fprintf (file, "\n");
791 
792   if (iv->base_object)
793     {
794       fprintf (file, "%s  Object:\t", p);
795       print_generic_expr (file, iv->base_object, TDF_SLIM);
796       fprintf (file, "\n");
797     }
798 
799   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
800 
801   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
802 	   p, iv->no_overflow ? "No-overflow" : "Overflow");
803 }
804 
805 /* Dumps information about the USE to FILE.  */
806 
807 void
dump_use(FILE * file,struct iv_use * use)808 dump_use (FILE *file, struct iv_use *use)
809 {
810   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
811   fprintf (file, "    At stmt:\t");
812   print_gimple_stmt (file, use->stmt, 0);
813   fprintf (file, "    At pos:\t");
814   if (use->op_p)
815     print_generic_expr (file, *use->op_p, TDF_SLIM);
816   fprintf (file, "\n");
817   dump_iv (file, use->iv, false, 2);
818 }
819 
820 /* Dumps information about the uses to FILE.  */
821 
822 void
dump_groups(FILE * file,struct ivopts_data * data)823 dump_groups (FILE *file, struct ivopts_data *data)
824 {
825   unsigned i, j;
826   struct iv_group *group;
827 
828   for (i = 0; i < data->vgroups.length (); i++)
829     {
830       group = data->vgroups[i];
831       fprintf (file, "Group %d:\n", group->id);
832       if (group->type == USE_NONLINEAR_EXPR)
833 	fprintf (file, "  Type:\tGENERIC\n");
834       else if (group->type == USE_REF_ADDRESS)
835 	fprintf (file, "  Type:\tREFERENCE ADDRESS\n");
836       else if (group->type == USE_PTR_ADDRESS)
837 	fprintf (file, "  Type:\tPOINTER ARGUMENT ADDRESS\n");
838       else
839 	{
840 	  gcc_assert (group->type == USE_COMPARE);
841 	  fprintf (file, "  Type:\tCOMPARE\n");
842 	}
843       for (j = 0; j < group->vuses.length (); j++)
844 	dump_use (file, group->vuses[j]);
845     }
846 }
847 
848 /* Dumps information about induction variable candidate CAND to FILE.  */
849 
850 void
dump_cand(FILE * file,struct iv_cand * cand)851 dump_cand (FILE *file, struct iv_cand *cand)
852 {
853   struct iv *iv = cand->iv;
854 
855   fprintf (file, "Candidate %d:\n", cand->id);
856   if (cand->inv_vars)
857     {
858       fprintf (file, "  Depend on inv.vars: ");
859       dump_bitmap (file, cand->inv_vars);
860     }
861   if (cand->inv_exprs)
862     {
863       fprintf (file, "  Depend on inv.exprs: ");
864       dump_bitmap (file, cand->inv_exprs);
865     }
866 
867   if (cand->var_before)
868     {
869       fprintf (file, "  Var befor: ");
870       print_generic_expr (file, cand->var_before, TDF_SLIM);
871       fprintf (file, "\n");
872     }
873   if (cand->var_after)
874     {
875       fprintf (file, "  Var after: ");
876       print_generic_expr (file, cand->var_after, TDF_SLIM);
877       fprintf (file, "\n");
878     }
879 
880   switch (cand->pos)
881     {
882     case IP_NORMAL:
883       fprintf (file, "  Incr POS: before exit test\n");
884       break;
885 
886     case IP_BEFORE_USE:
887       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
888       break;
889 
890     case IP_AFTER_USE:
891       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
892       break;
893 
894     case IP_END:
895       fprintf (file, "  Incr POS: at end\n");
896       break;
897 
898     case IP_ORIGINAL:
899       fprintf (file, "  Incr POS: orig biv\n");
900       break;
901     }
902 
903   dump_iv (file, iv, false, 1);
904 }
905 
906 /* Returns the info for ssa version VER.  */
907 
908 static inline struct version_info *
ver_info(struct ivopts_data * data,unsigned ver)909 ver_info (struct ivopts_data *data, unsigned ver)
910 {
911   return data->version_info + ver;
912 }
913 
914 /* Returns the info for ssa name NAME.  */
915 
916 static inline struct version_info *
name_info(struct ivopts_data * data,tree name)917 name_info (struct ivopts_data *data, tree name)
918 {
919   return ver_info (data, SSA_NAME_VERSION (name));
920 }
921 
922 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
923    emitted in LOOP.  */
924 
925 static bool
stmt_after_ip_normal_pos(class loop * loop,gimple * stmt)926 stmt_after_ip_normal_pos (class loop *loop, gimple *stmt)
927 {
928   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
929 
930   gcc_assert (bb);
931 
932   if (sbb == loop->latch)
933     return true;
934 
935   if (sbb != bb)
936     return false;
937 
938   return stmt == last_stmt (bb);
939 }
940 
941 /* Returns true if STMT if after the place where the original induction
942    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
943    if the positions are identical.  */
944 
945 static bool
stmt_after_inc_pos(struct iv_cand * cand,gimple * stmt,bool true_if_equal)946 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
947 {
948   basic_block cand_bb = gimple_bb (cand->incremented_at);
949   basic_block stmt_bb = gimple_bb (stmt);
950 
951   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
952     return false;
953 
954   if (stmt_bb != cand_bb)
955     return true;
956 
957   if (true_if_equal
958       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
959     return true;
960   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
961 }
962 
963 /* Returns true if STMT if after the place where the induction variable
964    CAND is incremented in LOOP.  */
965 
966 static bool
stmt_after_increment(class loop * loop,struct iv_cand * cand,gimple * stmt)967 stmt_after_increment (class loop *loop, struct iv_cand *cand, gimple *stmt)
968 {
969   switch (cand->pos)
970     {
971     case IP_END:
972       return false;
973 
974     case IP_NORMAL:
975       return stmt_after_ip_normal_pos (loop, stmt);
976 
977     case IP_ORIGINAL:
978     case IP_AFTER_USE:
979       return stmt_after_inc_pos (cand, stmt, false);
980 
981     case IP_BEFORE_USE:
982       return stmt_after_inc_pos (cand, stmt, true);
983 
984     default:
985       gcc_unreachable ();
986     }
987 }
988 
989 /* walk_tree callback for contains_abnormal_ssa_name_p.  */
990 
991 static tree
contains_abnormal_ssa_name_p_1(tree * tp,int * walk_subtrees,void *)992 contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
993 {
994   if (TREE_CODE (*tp) == SSA_NAME
995       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
996     return *tp;
997 
998   if (!EXPR_P (*tp))
999     *walk_subtrees = 0;
1000 
1001   return NULL_TREE;
1002 }
1003 
1004 /* Returns true if EXPR contains a ssa name that occurs in an
1005    abnormal phi node.  */
1006 
1007 bool
contains_abnormal_ssa_name_p(tree expr)1008 contains_abnormal_ssa_name_p (tree expr)
1009 {
1010   return walk_tree_without_duplicates
1011 	   (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1012 }
1013 
1014 /*  Returns the structure describing number of iterations determined from
1015     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1016 
1017 static class tree_niter_desc *
niter_for_exit(struct ivopts_data * data,edge exit)1018 niter_for_exit (struct ivopts_data *data, edge exit)
1019 {
1020   class tree_niter_desc *desc;
1021   tree_niter_desc **slot;
1022 
1023   if (!data->niters)
1024     {
1025       data->niters = new hash_map<edge, tree_niter_desc *>;
1026       slot = NULL;
1027     }
1028   else
1029     slot = data->niters->get (exit);
1030 
1031   if (!slot)
1032     {
1033       /* Try to determine number of iterations.  We cannot safely work with ssa
1034 	 names that appear in phi nodes on abnormal edges, so that we do not
1035 	 create overlapping life ranges for them (PR 27283).  */
1036       desc = XNEW (class tree_niter_desc);
1037       if (!number_of_iterations_exit (data->current_loop,
1038 				      exit, desc, true)
1039      	  || contains_abnormal_ssa_name_p (desc->niter))
1040 	{
1041 	  XDELETE (desc);
1042 	  desc = NULL;
1043 	}
1044       data->niters->put (exit, desc);
1045     }
1046   else
1047     desc = *slot;
1048 
1049   return desc;
1050 }
1051 
1052 /* Returns the structure describing number of iterations determined from
1053    single dominating exit of DATA->current_loop, or NULL if something
1054    goes wrong.  */
1055 
1056 static class tree_niter_desc *
niter_for_single_dom_exit(struct ivopts_data * data)1057 niter_for_single_dom_exit (struct ivopts_data *data)
1058 {
1059   edge exit = single_dom_exit (data->current_loop);
1060 
1061   if (!exit)
1062     return NULL;
1063 
1064   return niter_for_exit (data, exit);
1065 }
1066 
1067 /* Initializes data structures used by the iv optimization pass, stored
1068    in DATA.  */
1069 
1070 static void
tree_ssa_iv_optimize_init(struct ivopts_data * data)1071 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1072 {
1073   data->version_info_size = 2 * num_ssa_names;
1074   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1075   data->relevant = BITMAP_ALLOC (NULL);
1076   data->important_candidates = BITMAP_ALLOC (NULL);
1077   data->max_inv_var_id = 0;
1078   data->max_inv_expr_id = 0;
1079   data->niters = NULL;
1080   data->vgroups.create (20);
1081   data->vcands.create (20);
1082   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1083   data->name_expansion_cache = NULL;
1084   data->base_object_map = NULL;
1085   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1086   data->iv_common_cands.create (20);
1087   decl_rtl_to_reset.create (20);
1088   gcc_obstack_init (&data->iv_obstack);
1089 }
1090 
1091 /* walk_tree callback for determine_base_object.  */
1092 
1093 static tree
determine_base_object_1(tree * tp,int * walk_subtrees,void * wdata)1094 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1095 {
1096   tree_code code = TREE_CODE (*tp);
1097   tree obj = NULL_TREE;
1098   if (code == ADDR_EXPR)
1099     {
1100       tree base = get_base_address (TREE_OPERAND (*tp, 0));
1101       if (!base)
1102 	obj = *tp;
1103       else if (TREE_CODE (base) != MEM_REF)
1104 	obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1105     }
1106   else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1107 	obj = fold_convert (ptr_type_node, *tp);
1108 
1109   if (!obj)
1110     {
1111       if (!EXPR_P (*tp))
1112 	*walk_subtrees = 0;
1113 
1114       return NULL_TREE;
1115     }
1116   /* Record special node for multiple base objects and stop.  */
1117   if (*static_cast<tree *> (wdata))
1118     {
1119       *static_cast<tree *> (wdata) = integer_zero_node;
1120       return integer_zero_node;
1121     }
1122   /* Record the base object and continue looking.  */
1123   *static_cast<tree *> (wdata) = obj;
1124   return NULL_TREE;
1125 }
1126 
1127 /* Returns a memory object to that EXPR points with caching.  Return NULL if we
1128    are able to determine that it does not point to any such object; specially
1129    return integer_zero_node if EXPR contains multiple base objects.  */
1130 
1131 static tree
determine_base_object(struct ivopts_data * data,tree expr)1132 determine_base_object (struct ivopts_data *data, tree expr)
1133 {
1134   tree *slot, obj = NULL_TREE;
1135   if (data->base_object_map)
1136     {
1137       if ((slot = data->base_object_map->get(expr)) != NULL)
1138 	return *slot;
1139     }
1140   else
1141     data->base_object_map = new hash_map<tree, tree>;
1142 
1143   (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1144   data->base_object_map->put (expr, obj);
1145   return obj;
1146 }
1147 
1148 /* Return true if address expression with non-DECL_P operand appears
1149    in EXPR.  */
1150 
1151 static bool
contain_complex_addr_expr(tree expr)1152 contain_complex_addr_expr (tree expr)
1153 {
1154   bool res = false;
1155 
1156   STRIP_NOPS (expr);
1157   switch (TREE_CODE (expr))
1158     {
1159     case POINTER_PLUS_EXPR:
1160     case PLUS_EXPR:
1161     case MINUS_EXPR:
1162       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1163       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1164       break;
1165 
1166     case ADDR_EXPR:
1167       return (!DECL_P (TREE_OPERAND (expr, 0)));
1168 
1169     default:
1170       return false;
1171     }
1172 
1173   return res;
1174 }
1175 
1176 /* Allocates an induction variable with given initial value BASE and step STEP
1177    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1178 
1179 static struct iv *
1180 alloc_iv (struct ivopts_data *data, tree base, tree step,
1181 	  bool no_overflow = false)
1182 {
1183   tree expr = base;
1184   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1185 					      sizeof (struct iv));
1186   gcc_assert (step != NULL_TREE);
1187 
1188   /* Lower address expression in base except ones with DECL_P as operand.
1189      By doing this:
1190        1) More accurate cost can be computed for address expressions;
1191        2) Duplicate candidates won't be created for bases in different
1192 	  forms, like &a[0] and &a.  */
1193   STRIP_NOPS (expr);
1194   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1195       || contain_complex_addr_expr (expr))
1196     {
1197       aff_tree comb;
1198       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1199       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1200     }
1201 
1202   iv->base = base;
1203   iv->base_object = determine_base_object (data, base);
1204   iv->step = step;
1205   iv->biv_p = false;
1206   iv->nonlin_use = NULL;
1207   iv->ssa_name = NULL_TREE;
1208   if (!no_overflow
1209        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1210 			      base, step))
1211     no_overflow = true;
1212   iv->no_overflow = no_overflow;
1213   iv->have_address_use = false;
1214 
1215   return iv;
1216 }
1217 
1218 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1219    doesn't overflow.  */
1220 
1221 static void
set_iv(struct ivopts_data * data,tree iv,tree base,tree step,bool no_overflow)1222 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1223 	bool no_overflow)
1224 {
1225   struct version_info *info = name_info (data, iv);
1226 
1227   gcc_assert (!info->iv);
1228 
1229   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1230   info->iv = alloc_iv (data, base, step, no_overflow);
1231   info->iv->ssa_name = iv;
1232 }
1233 
1234 /* Finds induction variable declaration for VAR.  */
1235 
1236 static struct iv *
get_iv(struct ivopts_data * data,tree var)1237 get_iv (struct ivopts_data *data, tree var)
1238 {
1239   basic_block bb;
1240   tree type = TREE_TYPE (var);
1241 
1242   if (!POINTER_TYPE_P (type)
1243       && !INTEGRAL_TYPE_P (type))
1244     return NULL;
1245 
1246   if (!name_info (data, var)->iv)
1247     {
1248       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1249 
1250       if (!bb
1251 	  || !flow_bb_inside_loop_p (data->current_loop, bb))
1252 	{
1253 	  if (POINTER_TYPE_P (type))
1254 	    type = sizetype;
1255 	  set_iv (data, var, var, build_int_cst (type, 0), true);
1256 	}
1257     }
1258 
1259   return name_info (data, var)->iv;
1260 }
1261 
1262 /* Return the first non-invariant ssa var found in EXPR.  */
1263 
1264 static tree
extract_single_var_from_expr(tree expr)1265 extract_single_var_from_expr (tree expr)
1266 {
1267   int i, n;
1268   tree tmp;
1269   enum tree_code code;
1270 
1271   if (!expr || is_gimple_min_invariant (expr))
1272     return NULL;
1273 
1274   code = TREE_CODE (expr);
1275   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1276     {
1277       n = TREE_OPERAND_LENGTH (expr);
1278       for (i = 0; i < n; i++)
1279 	{
1280 	  tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1281 
1282 	  if (tmp)
1283 	    return tmp;
1284 	}
1285     }
1286   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1287 }
1288 
1289 /* Finds basic ivs.  */
1290 
1291 static bool
find_bivs(struct ivopts_data * data)1292 find_bivs (struct ivopts_data *data)
1293 {
1294   gphi *phi;
1295   affine_iv iv;
1296   tree step, type, base, stop;
1297   bool found = false;
1298   class loop *loop = data->current_loop;
1299   gphi_iterator psi;
1300 
1301   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1302     {
1303       phi = psi.phi ();
1304 
1305       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1306 	continue;
1307 
1308       if (virtual_operand_p (PHI_RESULT (phi)))
1309 	continue;
1310 
1311       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1312 	continue;
1313 
1314       if (integer_zerop (iv.step))
1315 	continue;
1316 
1317       step = iv.step;
1318       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1319       /* Stop expanding iv base at the first ssa var referred by iv step.
1320 	 Ideally we should stop at any ssa var, because that's expensive
1321 	 and unusual to happen, we just do it on the first one.
1322 
1323 	 See PR64705 for the rationale.  */
1324       stop = extract_single_var_from_expr (step);
1325       base = expand_simple_operations (base, stop);
1326       if (contains_abnormal_ssa_name_p (base)
1327 	  || contains_abnormal_ssa_name_p (step))
1328 	continue;
1329 
1330       type = TREE_TYPE (PHI_RESULT (phi));
1331       base = fold_convert (type, base);
1332       if (step)
1333 	{
1334 	  if (POINTER_TYPE_P (type))
1335 	    step = convert_to_ptrofftype (step);
1336 	  else
1337 	    step = fold_convert (type, step);
1338 	}
1339 
1340       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1341       found = true;
1342     }
1343 
1344   return found;
1345 }
1346 
1347 /* Marks basic ivs.  */
1348 
1349 static void
mark_bivs(struct ivopts_data * data)1350 mark_bivs (struct ivopts_data *data)
1351 {
1352   gphi *phi;
1353   gimple *def;
1354   tree var;
1355   struct iv *iv, *incr_iv;
1356   class loop *loop = data->current_loop;
1357   basic_block incr_bb;
1358   gphi_iterator psi;
1359 
1360   data->bivs_not_used_in_addr = 0;
1361   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1362     {
1363       phi = psi.phi ();
1364 
1365       iv = get_iv (data, PHI_RESULT (phi));
1366       if (!iv)
1367 	continue;
1368 
1369       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1370       def = SSA_NAME_DEF_STMT (var);
1371       /* Don't mark iv peeled from other one as biv.  */
1372       if (def
1373 	  && gimple_code (def) == GIMPLE_PHI
1374 	  && gimple_bb (def) == loop->header)
1375 	continue;
1376 
1377       incr_iv = get_iv (data, var);
1378       if (!incr_iv)
1379 	continue;
1380 
1381       /* If the increment is in the subloop, ignore it.  */
1382       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1383       if (incr_bb->loop_father != data->current_loop
1384 	  || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1385 	continue;
1386 
1387       iv->biv_p = true;
1388       incr_iv->biv_p = true;
1389       if (iv->no_overflow)
1390 	data->bivs_not_used_in_addr++;
1391       if (incr_iv->no_overflow)
1392 	data->bivs_not_used_in_addr++;
1393     }
1394 }
1395 
1396 /* Checks whether STMT defines a linear induction variable and stores its
1397    parameters to IV.  */
1398 
1399 static bool
find_givs_in_stmt_scev(struct ivopts_data * data,gimple * stmt,affine_iv * iv)1400 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1401 {
1402   tree lhs, stop;
1403   class loop *loop = data->current_loop;
1404 
1405   iv->base = NULL_TREE;
1406   iv->step = NULL_TREE;
1407 
1408   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1409     return false;
1410 
1411   lhs = gimple_assign_lhs (stmt);
1412   if (TREE_CODE (lhs) != SSA_NAME)
1413     return false;
1414 
1415   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1416     return false;
1417 
1418   /* Stop expanding iv base at the first ssa var referred by iv step.
1419      Ideally we should stop at any ssa var, because that's expensive
1420      and unusual to happen, we just do it on the first one.
1421 
1422      See PR64705 for the rationale.  */
1423   stop = extract_single_var_from_expr (iv->step);
1424   iv->base = expand_simple_operations (iv->base, stop);
1425   if (contains_abnormal_ssa_name_p (iv->base)
1426       || contains_abnormal_ssa_name_p (iv->step))
1427     return false;
1428 
1429   /* If STMT could throw, then do not consider STMT as defining a GIV.
1430      While this will suppress optimizations, we cannot safely delete this
1431      GIV and associated statements, even if it appears it is not used.  */
1432   if (stmt_could_throw_p (cfun, stmt))
1433     return false;
1434 
1435   return true;
1436 }
1437 
1438 /* Finds general ivs in statement STMT.  */
1439 
1440 static void
find_givs_in_stmt(struct ivopts_data * data,gimple * stmt)1441 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1442 {
1443   affine_iv iv;
1444 
1445   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1446     return;
1447 
1448   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1449 }
1450 
1451 /* Finds general ivs in basic block BB.  */
1452 
1453 static void
find_givs_in_bb(struct ivopts_data * data,basic_block bb)1454 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1455 {
1456   gimple_stmt_iterator bsi;
1457 
1458   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1459     find_givs_in_stmt (data, gsi_stmt (bsi));
1460 }
1461 
1462 /* Finds general ivs.  */
1463 
1464 static void
find_givs(struct ivopts_data * data)1465 find_givs (struct ivopts_data *data)
1466 {
1467   class loop *loop = data->current_loop;
1468   basic_block *body = get_loop_body_in_dom_order (loop);
1469   unsigned i;
1470 
1471   for (i = 0; i < loop->num_nodes; i++)
1472     find_givs_in_bb (data, body[i]);
1473   free (body);
1474 }
1475 
1476 /* For each ssa name defined in LOOP determines whether it is an induction
1477    variable and if so, its initial value and step.  */
1478 
1479 static bool
find_induction_variables(struct ivopts_data * data)1480 find_induction_variables (struct ivopts_data *data)
1481 {
1482   unsigned i;
1483   bitmap_iterator bi;
1484 
1485   if (!find_bivs (data))
1486     return false;
1487 
1488   find_givs (data);
1489   mark_bivs (data);
1490 
1491   if (dump_file && (dump_flags & TDF_DETAILS))
1492     {
1493       class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1494 
1495       if (niter)
1496 	{
1497 	  fprintf (dump_file, "  number of iterations ");
1498 	  print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1499 	  if (!integer_zerop (niter->may_be_zero))
1500 	    {
1501 	      fprintf (dump_file, "; zero if ");
1502 	      print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1503 	    }
1504 	  fprintf (dump_file, "\n");
1505 	};
1506 
1507       fprintf (dump_file, "\n<Induction Vars>:\n");
1508       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1509 	{
1510 	  struct version_info *info = ver_info (data, i);
1511 	  if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1512 	    dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1513 	}
1514     }
1515 
1516   return true;
1517 }
1518 
1519 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1520    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1521    is the const offset stripped from IV base and MEM_TYPE is the type
1522    of the memory being addressed.  For uses of other types, ADDR_BASE
1523    and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE.  */
1524 
1525 static struct iv_use *
record_use(struct iv_group * group,tree * use_p,struct iv * iv,gimple * stmt,enum use_type type,tree mem_type,tree addr_base,poly_uint64 addr_offset)1526 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1527 	    gimple *stmt, enum use_type type, tree mem_type,
1528 	    tree addr_base, poly_uint64 addr_offset)
1529 {
1530   struct iv_use *use = XCNEW (struct iv_use);
1531 
1532   use->id = group->vuses.length ();
1533   use->group_id = group->id;
1534   use->type = type;
1535   use->mem_type = mem_type;
1536   use->iv = iv;
1537   use->stmt = stmt;
1538   use->op_p = use_p;
1539   use->addr_base = addr_base;
1540   use->addr_offset = addr_offset;
1541 
1542   group->vuses.safe_push (use);
1543   return use;
1544 }
1545 
1546 /* Checks whether OP is a loop-level invariant and if so, records it.
1547    NONLINEAR_USE is true if the invariant is used in a way we do not
1548    handle specially.  */
1549 
1550 static void
record_invariant(struct ivopts_data * data,tree op,bool nonlinear_use)1551 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1552 {
1553   basic_block bb;
1554   struct version_info *info;
1555 
1556   if (TREE_CODE (op) != SSA_NAME
1557       || virtual_operand_p (op))
1558     return;
1559 
1560   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1561   if (bb
1562       && flow_bb_inside_loop_p (data->current_loop, bb))
1563     return;
1564 
1565   info = name_info (data, op);
1566   info->name = op;
1567   info->has_nonlin_use |= nonlinear_use;
1568   if (!info->inv_id)
1569     info->inv_id = ++data->max_inv_var_id;
1570   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1571 }
1572 
1573 /* Record a group of TYPE.  */
1574 
1575 static struct iv_group *
record_group(struct ivopts_data * data,enum use_type type)1576 record_group (struct ivopts_data *data, enum use_type type)
1577 {
1578   struct iv_group *group = XCNEW (struct iv_group);
1579 
1580   group->id = data->vgroups.length ();
1581   group->type = type;
1582   group->related_cands = BITMAP_ALLOC (NULL);
1583   group->vuses.create (1);
1584   group->doloop_p = false;
1585 
1586   data->vgroups.safe_push (group);
1587   return group;
1588 }
1589 
1590 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1591    New group will be created if there is no existing group for the use.
1592    MEM_TYPE is the type of memory being addressed, or NULL if this
1593    isn't an address reference.  */
1594 
1595 static struct iv_use *
record_group_use(struct ivopts_data * data,tree * use_p,struct iv * iv,gimple * stmt,enum use_type type,tree mem_type)1596 record_group_use (struct ivopts_data *data, tree *use_p,
1597 		  struct iv *iv, gimple *stmt, enum use_type type,
1598 		  tree mem_type)
1599 {
1600   tree addr_base = NULL;
1601   struct iv_group *group = NULL;
1602   poly_uint64 addr_offset = 0;
1603 
1604   /* Record non address type use in a new group.  */
1605   if (address_p (type))
1606     {
1607       unsigned int i;
1608 
1609       addr_base = strip_offset (iv->base, &addr_offset);
1610       for (i = 0; i < data->vgroups.length (); i++)
1611 	{
1612 	  struct iv_use *use;
1613 
1614 	  group = data->vgroups[i];
1615 	  use = group->vuses[0];
1616 	  if (!address_p (use->type))
1617 	    continue;
1618 
1619 	  /* Check if it has the same stripped base and step.  */
1620 	  if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1621 	      && operand_equal_p (iv->step, use->iv->step, 0)
1622 	      && operand_equal_p (addr_base, use->addr_base, 0))
1623 	    break;
1624 	}
1625       if (i == data->vgroups.length ())
1626 	group = NULL;
1627     }
1628 
1629   if (!group)
1630     group = record_group (data, type);
1631 
1632   return record_use (group, use_p, iv, stmt, type, mem_type,
1633 		     addr_base, addr_offset);
1634 }
1635 
1636 /* Checks whether the use OP is interesting and if so, records it.  */
1637 
1638 static struct iv_use *
find_interesting_uses_op(struct ivopts_data * data,tree op)1639 find_interesting_uses_op (struct ivopts_data *data, tree op)
1640 {
1641   struct iv *iv;
1642   gimple *stmt;
1643   struct iv_use *use;
1644 
1645   if (TREE_CODE (op) != SSA_NAME)
1646     return NULL;
1647 
1648   iv = get_iv (data, op);
1649   if (!iv)
1650     return NULL;
1651 
1652   if (iv->nonlin_use)
1653     {
1654       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1655       return iv->nonlin_use;
1656     }
1657 
1658   if (integer_zerop (iv->step))
1659     {
1660       record_invariant (data, op, true);
1661       return NULL;
1662     }
1663 
1664   stmt = SSA_NAME_DEF_STMT (op);
1665   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1666 
1667   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1668   iv->nonlin_use = use;
1669   return use;
1670 }
1671 
1672 /* Indicate how compare type iv_use can be handled.  */
1673 enum comp_iv_rewrite
1674 {
1675   COMP_IV_NA,
1676   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1677   COMP_IV_EXPR,
1678   /* We may rewrite compare type iv_uses on both sides of comparison by
1679      expressing value of each iv_use.  */
1680   COMP_IV_EXPR_2,
1681   /* We may rewrite compare type iv_use by expressing value of the iv_use
1682      or by eliminating it with other iv_cand.  */
1683   COMP_IV_ELIM
1684 };
1685 
1686 /* Given a condition in statement STMT, checks whether it is a compare
1687    of an induction variable and an invariant.  If this is the case,
1688    CONTROL_VAR is set to location of the iv, BOUND to the location of
1689    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1690    induction variable descriptions, and true is returned.  If this is not
1691    the case, CONTROL_VAR and BOUND are set to the arguments of the
1692    condition and false is returned.  */
1693 
1694 static enum comp_iv_rewrite
extract_cond_operands(struct ivopts_data * data,gimple * stmt,tree ** control_var,tree ** bound,struct iv ** iv_var,struct iv ** iv_bound)1695 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1696 		       tree **control_var, tree **bound,
1697 		       struct iv **iv_var, struct iv **iv_bound)
1698 {
1699   /* The objects returned when COND has constant operands.  */
1700   static struct iv const_iv;
1701   static tree zero;
1702   tree *op0 = &zero, *op1 = &zero;
1703   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1704   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1705 
1706   if (gimple_code (stmt) == GIMPLE_COND)
1707     {
1708       gcond *cond_stmt = as_a <gcond *> (stmt);
1709       op0 = gimple_cond_lhs_ptr (cond_stmt);
1710       op1 = gimple_cond_rhs_ptr (cond_stmt);
1711     }
1712   else
1713     {
1714       op0 = gimple_assign_rhs1_ptr (stmt);
1715       op1 = gimple_assign_rhs2_ptr (stmt);
1716     }
1717 
1718   zero = integer_zero_node;
1719   const_iv.step = integer_zero_node;
1720 
1721   if (TREE_CODE (*op0) == SSA_NAME)
1722     iv0 = get_iv (data, *op0);
1723   if (TREE_CODE (*op1) == SSA_NAME)
1724     iv1 = get_iv (data, *op1);
1725 
1726   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1727   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1728     {
1729       rewrite_type = COMP_IV_EXPR_2;
1730       goto end;
1731     }
1732 
1733   /* If none side of comparison is IV.  */
1734   if ((!iv0 || integer_zerop (iv0->step))
1735       && (!iv1 || integer_zerop (iv1->step)))
1736     goto end;
1737 
1738   /* Control variable may be on the other side.  */
1739   if (!iv0 || integer_zerop (iv0->step))
1740     {
1741       std::swap (op0, op1);
1742       std::swap (iv0, iv1);
1743     }
1744   /* If one side is IV and the other side isn't loop invariant.  */
1745   if (!iv1)
1746     rewrite_type = COMP_IV_EXPR;
1747   /* If one side is IV and the other side is loop invariant.  */
1748   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1749     rewrite_type = COMP_IV_ELIM;
1750 
1751 end:
1752   if (control_var)
1753     *control_var = op0;
1754   if (iv_var)
1755     *iv_var = iv0;
1756   if (bound)
1757     *bound = op1;
1758   if (iv_bound)
1759     *iv_bound = iv1;
1760 
1761   return rewrite_type;
1762 }
1763 
1764 /* Checks whether the condition in STMT is interesting and if so,
1765    records it.  */
1766 
1767 static void
find_interesting_uses_cond(struct ivopts_data * data,gimple * stmt)1768 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1769 {
1770   tree *var_p, *bound_p;
1771   struct iv *var_iv, *bound_iv;
1772   enum comp_iv_rewrite ret;
1773 
1774   ret = extract_cond_operands (data, stmt,
1775 			       &var_p, &bound_p, &var_iv, &bound_iv);
1776   if (ret == COMP_IV_NA)
1777     {
1778       find_interesting_uses_op (data, *var_p);
1779       find_interesting_uses_op (data, *bound_p);
1780       return;
1781     }
1782 
1783   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1784   /* Record compare type iv_use for iv on the other side of comparison.  */
1785   if (ret == COMP_IV_EXPR_2)
1786     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1787 }
1788 
1789 /* Returns the outermost loop EXPR is obviously invariant in
1790    relative to the loop LOOP, i.e. if all its operands are defined
1791    outside of the returned loop.  Returns NULL if EXPR is not
1792    even obviously invariant in LOOP.  */
1793 
1794 class loop *
outermost_invariant_loop_for_expr(class loop * loop,tree expr)1795 outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1796 {
1797   basic_block def_bb;
1798   unsigned i, len;
1799 
1800   if (is_gimple_min_invariant (expr))
1801     return current_loops->tree_root;
1802 
1803   if (TREE_CODE (expr) == SSA_NAME)
1804     {
1805       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1806       if (def_bb)
1807 	{
1808 	  if (flow_bb_inside_loop_p (loop, def_bb))
1809 	    return NULL;
1810 	  return superloop_at_depth (loop,
1811 				     loop_depth (def_bb->loop_father) + 1);
1812 	}
1813 
1814       return current_loops->tree_root;
1815     }
1816 
1817   if (!EXPR_P (expr))
1818     return NULL;
1819 
1820   unsigned maxdepth = 0;
1821   len = TREE_OPERAND_LENGTH (expr);
1822   for (i = 0; i < len; i++)
1823     {
1824       class loop *ivloop;
1825       if (!TREE_OPERAND (expr, i))
1826 	continue;
1827 
1828       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1829       if (!ivloop)
1830 	return NULL;
1831       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1832     }
1833 
1834   return superloop_at_depth (loop, maxdepth);
1835 }
1836 
1837 /* Returns true if expression EXPR is obviously invariant in LOOP,
1838    i.e. if all its operands are defined outside of the LOOP.  LOOP
1839    should not be the function body.  */
1840 
1841 bool
expr_invariant_in_loop_p(class loop * loop,tree expr)1842 expr_invariant_in_loop_p (class loop *loop, tree expr)
1843 {
1844   basic_block def_bb;
1845   unsigned i, len;
1846 
1847   gcc_assert (loop_depth (loop) > 0);
1848 
1849   if (is_gimple_min_invariant (expr))
1850     return true;
1851 
1852   if (TREE_CODE (expr) == SSA_NAME)
1853     {
1854       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1855       if (def_bb
1856 	  && flow_bb_inside_loop_p (loop, def_bb))
1857 	return false;
1858 
1859       return true;
1860     }
1861 
1862   if (!EXPR_P (expr))
1863     return false;
1864 
1865   len = TREE_OPERAND_LENGTH (expr);
1866   for (i = 0; i < len; i++)
1867     if (TREE_OPERAND (expr, i)
1868 	&& !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1869       return false;
1870 
1871   return true;
1872 }
1873 
1874 /* Given expression EXPR which computes inductive values with respect
1875    to loop recorded in DATA, this function returns biv from which EXPR
1876    is derived by tracing definition chains of ssa variables in EXPR.  */
1877 
1878 static struct iv*
find_deriving_biv_for_expr(struct ivopts_data * data,tree expr)1879 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1880 {
1881   struct iv *iv;
1882   unsigned i, n;
1883   tree e2, e1;
1884   enum tree_code code;
1885   gimple *stmt;
1886 
1887   if (expr == NULL_TREE)
1888     return NULL;
1889 
1890   if (is_gimple_min_invariant (expr))
1891     return NULL;
1892 
1893   code = TREE_CODE (expr);
1894   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1895     {
1896       n = TREE_OPERAND_LENGTH (expr);
1897       for (i = 0; i < n; i++)
1898 	{
1899 	  iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1900 	  if (iv)
1901 	    return iv;
1902 	}
1903     }
1904 
1905   /* Stop if it's not ssa name.  */
1906   if (code != SSA_NAME)
1907     return NULL;
1908 
1909   iv = get_iv (data, expr);
1910   if (!iv || integer_zerop (iv->step))
1911     return NULL;
1912   else if (iv->biv_p)
1913     return iv;
1914 
1915   stmt = SSA_NAME_DEF_STMT (expr);
1916   if (gphi *phi = dyn_cast <gphi *> (stmt))
1917     {
1918       ssa_op_iter iter;
1919       use_operand_p use_p;
1920       basic_block phi_bb = gimple_bb (phi);
1921 
1922       /* Skip loop header PHI that doesn't define biv.  */
1923       if (phi_bb->loop_father == data->current_loop)
1924 	return NULL;
1925 
1926       if (virtual_operand_p (gimple_phi_result (phi)))
1927 	return NULL;
1928 
1929       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1930 	{
1931 	  tree use = USE_FROM_PTR (use_p);
1932 	  iv = find_deriving_biv_for_expr (data, use);
1933 	  if (iv)
1934 	    return iv;
1935 	}
1936       return NULL;
1937     }
1938   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1939     return NULL;
1940 
1941   e1 = gimple_assign_rhs1 (stmt);
1942   code = gimple_assign_rhs_code (stmt);
1943   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1944     return find_deriving_biv_for_expr (data, e1);
1945 
1946   switch (code)
1947     {
1948     case MULT_EXPR:
1949     case PLUS_EXPR:
1950     case MINUS_EXPR:
1951     case POINTER_PLUS_EXPR:
1952       /* Increments, decrements and multiplications by a constant
1953 	 are simple.  */
1954       e2 = gimple_assign_rhs2 (stmt);
1955       iv = find_deriving_biv_for_expr (data, e2);
1956       if (iv)
1957 	return iv;
1958       gcc_fallthrough ();
1959 
1960     CASE_CONVERT:
1961       /* Casts are simple.  */
1962       return find_deriving_biv_for_expr (data, e1);
1963 
1964     default:
1965       break;
1966     }
1967 
1968   return NULL;
1969 }
1970 
1971 /* Record BIV, its predecessor and successor that they are used in
1972    address type uses.  */
1973 
1974 static void
record_biv_for_address_use(struct ivopts_data * data,struct iv * biv)1975 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1976 {
1977   unsigned i;
1978   tree type, base_1, base_2;
1979   bitmap_iterator bi;
1980 
1981   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1982       || biv->have_address_use || !biv->no_overflow)
1983     return;
1984 
1985   type = TREE_TYPE (biv->base);
1986   if (!INTEGRAL_TYPE_P (type))
1987     return;
1988 
1989   biv->have_address_use = true;
1990   data->bivs_not_used_in_addr--;
1991   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1992   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1993     {
1994       struct iv *iv = ver_info (data, i)->iv;
1995 
1996       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1997 	  || iv->have_address_use || !iv->no_overflow)
1998 	continue;
1999 
2000       if (type != TREE_TYPE (iv->base)
2001 	  || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2002 	continue;
2003 
2004       if (!operand_equal_p (biv->step, iv->step, 0))
2005 	continue;
2006 
2007       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2008       if (operand_equal_p (base_1, iv->base, 0)
2009 	  || operand_equal_p (base_2, biv->base, 0))
2010 	{
2011 	  iv->have_address_use = true;
2012 	  data->bivs_not_used_in_addr--;
2013 	}
2014     }
2015 }
2016 
2017 /* Cumulates the steps of indices into DATA and replaces their values with the
2018    initial ones.  Returns false when the value of the index cannot be determined.
2019    Callback for for_each_index.  */
2020 
2021 struct ifs_ivopts_data
2022 {
2023   struct ivopts_data *ivopts_data;
2024   gimple *stmt;
2025   tree step;
2026 };
2027 
2028 static bool
idx_find_step(tree base,tree * idx,void * data)2029 idx_find_step (tree base, tree *idx, void *data)
2030 {
2031   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2032   struct iv *iv;
2033   bool use_overflow_semantics = false;
2034   tree step, iv_base, iv_step, lbound, off;
2035   class loop *loop = dta->ivopts_data->current_loop;
2036 
2037   /* If base is a component ref, require that the offset of the reference
2038      be invariant.  */
2039   if (TREE_CODE (base) == COMPONENT_REF)
2040     {
2041       off = component_ref_field_offset (base);
2042       return expr_invariant_in_loop_p (loop, off);
2043     }
2044 
2045   /* If base is array, first check whether we will be able to move the
2046      reference out of the loop (in order to take its address in strength
2047      reduction).  In order for this to work we need both lower bound
2048      and step to be loop invariants.  */
2049   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2050     {
2051       /* Moreover, for a range, the size needs to be invariant as well.  */
2052       if (TREE_CODE (base) == ARRAY_RANGE_REF
2053 	  && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2054 	return false;
2055 
2056       step = array_ref_element_size (base);
2057       lbound = array_ref_low_bound (base);
2058 
2059       if (!expr_invariant_in_loop_p (loop, step)
2060 	  || !expr_invariant_in_loop_p (loop, lbound))
2061 	return false;
2062     }
2063 
2064   if (TREE_CODE (*idx) != SSA_NAME)
2065     return true;
2066 
2067   iv = get_iv (dta->ivopts_data, *idx);
2068   if (!iv)
2069     return false;
2070 
2071   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2072 	  *&x[0], which is not folded and does not trigger the
2073 	  ARRAY_REF path below.  */
2074   *idx = iv->base;
2075 
2076   if (integer_zerop (iv->step))
2077     return true;
2078 
2079   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2080     {
2081       step = array_ref_element_size (base);
2082 
2083       /* We only handle addresses whose step is an integer constant.  */
2084       if (TREE_CODE (step) != INTEGER_CST)
2085 	return false;
2086     }
2087   else
2088     /* The step for pointer arithmetics already is 1 byte.  */
2089     step = size_one_node;
2090 
2091   iv_base = iv->base;
2092   iv_step = iv->step;
2093   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2094     use_overflow_semantics = true;
2095 
2096   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2097 			    sizetype, &iv_base, &iv_step, dta->stmt,
2098 			    use_overflow_semantics))
2099     {
2100       /* The index might wrap.  */
2101       return false;
2102     }
2103 
2104   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2105   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2106 
2107   if (dta->ivopts_data->bivs_not_used_in_addr)
2108     {
2109       if (!iv->biv_p)
2110 	iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2111 
2112       record_biv_for_address_use (dta->ivopts_data, iv);
2113     }
2114   return true;
2115 }
2116 
2117 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2118    object is passed to it in DATA.  */
2119 
2120 static bool
idx_record_use(tree base,tree * idx,void * vdata)2121 idx_record_use (tree base, tree *idx,
2122 		void *vdata)
2123 {
2124   struct ivopts_data *data = (struct ivopts_data *) vdata;
2125   find_interesting_uses_op (data, *idx);
2126   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2127     {
2128       find_interesting_uses_op (data, array_ref_element_size (base));
2129       find_interesting_uses_op (data, array_ref_low_bound (base));
2130     }
2131   return true;
2132 }
2133 
2134 /* If we can prove that TOP = cst * BOT for some constant cst,
2135    store cst to MUL and return true.  Otherwise return false.
2136    The returned value is always sign-extended, regardless of the
2137    signedness of TOP and BOT.  */
2138 
2139 static bool
constant_multiple_of(tree top,tree bot,widest_int * mul)2140 constant_multiple_of (tree top, tree bot, widest_int *mul)
2141 {
2142   tree mby;
2143   enum tree_code code;
2144   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2145   widest_int res, p0, p1;
2146 
2147   STRIP_NOPS (top);
2148   STRIP_NOPS (bot);
2149 
2150   if (operand_equal_p (top, bot, 0))
2151     {
2152       *mul = 1;
2153       return true;
2154     }
2155 
2156   code = TREE_CODE (top);
2157   switch (code)
2158     {
2159     case MULT_EXPR:
2160       mby = TREE_OPERAND (top, 1);
2161       if (TREE_CODE (mby) != INTEGER_CST)
2162 	return false;
2163 
2164       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2165 	return false;
2166 
2167       *mul = wi::sext (res * wi::to_widest (mby), precision);
2168       return true;
2169 
2170     case PLUS_EXPR:
2171     case MINUS_EXPR:
2172       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2173 	  || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2174 	return false;
2175 
2176       if (code == MINUS_EXPR)
2177 	p1 = -p1;
2178       *mul = wi::sext (p0 + p1, precision);
2179       return true;
2180 
2181     case INTEGER_CST:
2182       if (TREE_CODE (bot) != INTEGER_CST)
2183 	return false;
2184 
2185       p0 = widest_int::from (wi::to_wide (top), SIGNED);
2186       p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2187       if (p1 == 0)
2188 	return false;
2189       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2190       return res == 0;
2191 
2192     default:
2193       if (POLY_INT_CST_P (top)
2194 	  && POLY_INT_CST_P (bot)
2195 	  && constant_multiple_p (wi::to_poly_widest (top),
2196 				  wi::to_poly_widest (bot), mul))
2197 	return true;
2198 
2199       return false;
2200     }
2201 }
2202 
2203 /* Return true if memory reference REF with step STEP may be unaligned.  */
2204 
2205 static bool
may_be_unaligned_p(tree ref,tree step)2206 may_be_unaligned_p (tree ref, tree step)
2207 {
2208   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2209      thus they are not misaligned.  */
2210   if (TREE_CODE (ref) == TARGET_MEM_REF)
2211     return false;
2212 
2213   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2214   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2215     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2216 
2217   unsigned HOST_WIDE_INT bitpos;
2218   unsigned int ref_align;
2219   get_object_alignment_1 (ref, &ref_align, &bitpos);
2220   if (ref_align < align
2221       || (bitpos % align) != 0
2222       || (bitpos % BITS_PER_UNIT) != 0)
2223     return true;
2224 
2225   unsigned int trailing_zeros = tree_ctz (step);
2226   if (trailing_zeros < HOST_BITS_PER_INT
2227       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2228     return true;
2229 
2230   return false;
2231 }
2232 
2233 /* Return true if EXPR may be non-addressable.   */
2234 
2235 bool
may_be_nonaddressable_p(tree expr)2236 may_be_nonaddressable_p (tree expr)
2237 {
2238   switch (TREE_CODE (expr))
2239     {
2240     case VAR_DECL:
2241       /* Check if it's a register variable.  */
2242       return DECL_HARD_REGISTER (expr);
2243 
2244     case TARGET_MEM_REF:
2245       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2246 	 target, thus they are always addressable.  */
2247       return false;
2248 
2249     case MEM_REF:
2250       /* Likewise for MEM_REFs, modulo the storage order.  */
2251       return REF_REVERSE_STORAGE_ORDER (expr);
2252 
2253     case BIT_FIELD_REF:
2254       if (REF_REVERSE_STORAGE_ORDER (expr))
2255 	return true;
2256       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2257 
2258     case COMPONENT_REF:
2259       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2260 	return true;
2261       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2262 	     || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2263 
2264     case ARRAY_REF:
2265     case ARRAY_RANGE_REF:
2266       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2267 	return true;
2268       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2269 
2270     case VIEW_CONVERT_EXPR:
2271       /* This kind of view-conversions may wrap non-addressable objects
2272 	 and make them look addressable.  After some processing the
2273 	 non-addressability may be uncovered again, causing ADDR_EXPRs
2274 	 of inappropriate objects to be built.  */
2275       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2276 	  || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2277 	return true;
2278       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2279 
2280     CASE_CONVERT:
2281       return true;
2282 
2283     default:
2284       break;
2285     }
2286 
2287   return false;
2288 }
2289 
2290 /* Finds addresses in *OP_P inside STMT.  */
2291 
2292 static void
find_interesting_uses_address(struct ivopts_data * data,gimple * stmt,tree * op_p)2293 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2294 			       tree *op_p)
2295 {
2296   tree base = *op_p, step = size_zero_node;
2297   struct iv *civ;
2298   struct ifs_ivopts_data ifs_ivopts_data;
2299 
2300   /* Do not play with volatile memory references.  A bit too conservative,
2301      perhaps, but safe.  */
2302   if (gimple_has_volatile_ops (stmt))
2303     goto fail;
2304 
2305   /* Ignore bitfields for now.  Not really something terribly complicated
2306      to handle.  TODO.  */
2307   if (TREE_CODE (base) == BIT_FIELD_REF)
2308     goto fail;
2309 
2310   base = unshare_expr (base);
2311 
2312   if (TREE_CODE (base) == TARGET_MEM_REF)
2313     {
2314       tree type = build_pointer_type (TREE_TYPE (base));
2315       tree astep;
2316 
2317       if (TMR_BASE (base)
2318 	  && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2319 	{
2320 	  civ = get_iv (data, TMR_BASE (base));
2321 	  if (!civ)
2322 	    goto fail;
2323 
2324 	  TMR_BASE (base) = civ->base;
2325 	  step = civ->step;
2326 	}
2327       if (TMR_INDEX2 (base)
2328 	  && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2329 	{
2330 	  civ = get_iv (data, TMR_INDEX2 (base));
2331 	  if (!civ)
2332 	    goto fail;
2333 
2334 	  TMR_INDEX2 (base) = civ->base;
2335 	  step = civ->step;
2336 	}
2337       if (TMR_INDEX (base)
2338 	  && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2339 	{
2340 	  civ = get_iv (data, TMR_INDEX (base));
2341 	  if (!civ)
2342 	    goto fail;
2343 
2344 	  TMR_INDEX (base) = civ->base;
2345 	  astep = civ->step;
2346 
2347 	  if (astep)
2348 	    {
2349 	      if (TMR_STEP (base))
2350 		astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2351 
2352 	      step = fold_build2 (PLUS_EXPR, type, step, astep);
2353 	    }
2354 	}
2355 
2356       if (integer_zerop (step))
2357 	goto fail;
2358       base = tree_mem_ref_addr (type, base);
2359     }
2360   else
2361     {
2362       ifs_ivopts_data.ivopts_data = data;
2363       ifs_ivopts_data.stmt = stmt;
2364       ifs_ivopts_data.step = size_zero_node;
2365       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2366 	  || integer_zerop (ifs_ivopts_data.step))
2367 	goto fail;
2368       step = ifs_ivopts_data.step;
2369 
2370       /* Check that the base expression is addressable.  This needs
2371 	 to be done after substituting bases of IVs into it.  */
2372       if (may_be_nonaddressable_p (base))
2373 	goto fail;
2374 
2375       /* Moreover, on strict alignment platforms, check that it is
2376 	 sufficiently aligned.  */
2377       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2378 	goto fail;
2379 
2380       base = build_fold_addr_expr (base);
2381 
2382       /* Substituting bases of IVs into the base expression might
2383 	 have caused folding opportunities.  */
2384       if (TREE_CODE (base) == ADDR_EXPR)
2385 	{
2386 	  tree *ref = &TREE_OPERAND (base, 0);
2387 	  while (handled_component_p (*ref))
2388 	    ref = &TREE_OPERAND (*ref, 0);
2389 	  if (TREE_CODE (*ref) == MEM_REF)
2390 	    {
2391 	      tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2392 				      TREE_OPERAND (*ref, 0),
2393 				      TREE_OPERAND (*ref, 1));
2394 	      if (tem)
2395 		*ref = tem;
2396 	    }
2397 	}
2398     }
2399 
2400   civ = alloc_iv (data, base, step);
2401   /* Fail if base object of this memory reference is unknown.  */
2402   if (civ->base_object == NULL_TREE)
2403     goto fail;
2404 
2405   record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2406   return;
2407 
2408 fail:
2409   for_each_index (op_p, idx_record_use, data);
2410 }
2411 
2412 /* Finds and records invariants used in STMT.  */
2413 
2414 static void
find_invariants_stmt(struct ivopts_data * data,gimple * stmt)2415 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2416 {
2417   ssa_op_iter iter;
2418   use_operand_p use_p;
2419   tree op;
2420 
2421   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2422     {
2423       op = USE_FROM_PTR (use_p);
2424       record_invariant (data, op, false);
2425     }
2426 }
2427 
2428 /* CALL calls an internal function.  If operand *OP_P will become an
2429    address when the call is expanded, return the type of the memory
2430    being addressed, otherwise return null.  */
2431 
2432 static tree
get_mem_type_for_internal_fn(gcall * call,tree * op_p)2433 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2434 {
2435   switch (gimple_call_internal_fn (call))
2436     {
2437     case IFN_MASK_LOAD:
2438     case IFN_MASK_LOAD_LANES:
2439       if (op_p == gimple_call_arg_ptr (call, 0))
2440 	return TREE_TYPE (gimple_call_lhs (call));
2441       return NULL_TREE;
2442 
2443     case IFN_MASK_STORE:
2444     case IFN_MASK_STORE_LANES:
2445       if (op_p == gimple_call_arg_ptr (call, 0))
2446 	return TREE_TYPE (gimple_call_arg (call, 3));
2447       return NULL_TREE;
2448 
2449     default:
2450       return NULL_TREE;
2451     }
2452 }
2453 
2454 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2455    Return true if the operand will become an address when STMT
2456    is expanded and record the associated address use if so.  */
2457 
2458 static bool
find_address_like_use(struct ivopts_data * data,gimple * stmt,tree * op_p,struct iv * iv)2459 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2460 		       struct iv *iv)
2461 {
2462   /* Fail if base object of this memory reference is unknown.  */
2463   if (iv->base_object == NULL_TREE)
2464     return false;
2465 
2466   tree mem_type = NULL_TREE;
2467   if (gcall *call = dyn_cast <gcall *> (stmt))
2468     if (gimple_call_internal_p (call))
2469       mem_type = get_mem_type_for_internal_fn (call, op_p);
2470   if (mem_type)
2471     {
2472       iv = alloc_iv (data, iv->base, iv->step);
2473       record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2474       return true;
2475     }
2476   return false;
2477 }
2478 
2479 /* Finds interesting uses of induction variables in the statement STMT.  */
2480 
2481 static void
find_interesting_uses_stmt(struct ivopts_data * data,gimple * stmt)2482 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2483 {
2484   struct iv *iv;
2485   tree op, *lhs, *rhs;
2486   ssa_op_iter iter;
2487   use_operand_p use_p;
2488   enum tree_code code;
2489 
2490   find_invariants_stmt (data, stmt);
2491 
2492   if (gimple_code (stmt) == GIMPLE_COND)
2493     {
2494       find_interesting_uses_cond (data, stmt);
2495       return;
2496     }
2497 
2498   if (is_gimple_assign (stmt))
2499     {
2500       lhs = gimple_assign_lhs_ptr (stmt);
2501       rhs = gimple_assign_rhs1_ptr (stmt);
2502 
2503       if (TREE_CODE (*lhs) == SSA_NAME)
2504 	{
2505 	  /* If the statement defines an induction variable, the uses are not
2506 	     interesting by themselves.  */
2507 
2508 	  iv = get_iv (data, *lhs);
2509 
2510 	  if (iv && !integer_zerop (iv->step))
2511 	    return;
2512 	}
2513 
2514       code = gimple_assign_rhs_code (stmt);
2515       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2516 	  && (REFERENCE_CLASS_P (*rhs)
2517 	      || is_gimple_val (*rhs)))
2518 	{
2519 	  if (REFERENCE_CLASS_P (*rhs))
2520 	    find_interesting_uses_address (data, stmt, rhs);
2521 	  else
2522 	    find_interesting_uses_op (data, *rhs);
2523 
2524 	  if (REFERENCE_CLASS_P (*lhs))
2525 	    find_interesting_uses_address (data, stmt, lhs);
2526 	  return;
2527 	}
2528       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2529 	{
2530 	  find_interesting_uses_cond (data, stmt);
2531 	  return;
2532 	}
2533 
2534       /* TODO -- we should also handle address uses of type
2535 
2536 	 memory = call (whatever);
2537 
2538 	 and
2539 
2540 	 call (memory).  */
2541     }
2542 
2543   if (gimple_code (stmt) == GIMPLE_PHI
2544       && gimple_bb (stmt) == data->current_loop->header)
2545     {
2546       iv = get_iv (data, PHI_RESULT (stmt));
2547 
2548       if (iv && !integer_zerop (iv->step))
2549 	return;
2550     }
2551 
2552   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2553     {
2554       op = USE_FROM_PTR (use_p);
2555 
2556       if (TREE_CODE (op) != SSA_NAME)
2557 	continue;
2558 
2559       iv = get_iv (data, op);
2560       if (!iv)
2561 	continue;
2562 
2563       if (!find_address_like_use (data, stmt, use_p->use, iv))
2564 	find_interesting_uses_op (data, op);
2565     }
2566 }
2567 
2568 /* Finds interesting uses of induction variables outside of loops
2569    on loop exit edge EXIT.  */
2570 
2571 static void
find_interesting_uses_outside(struct ivopts_data * data,edge exit)2572 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2573 {
2574   gphi *phi;
2575   gphi_iterator psi;
2576   tree def;
2577 
2578   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2579     {
2580       phi = psi.phi ();
2581       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2582       if (!virtual_operand_p (def))
2583 	find_interesting_uses_op (data, def);
2584     }
2585 }
2586 
2587 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2588    mode for memory reference represented by USE.  */
2589 
2590 static GTY (()) vec<rtx, va_gc> *addr_list;
2591 
2592 static bool
addr_offset_valid_p(struct iv_use * use,poly_int64 offset)2593 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2594 {
2595   rtx reg, addr;
2596   unsigned list_index;
2597   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2598   machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2599 
2600   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2601   if (list_index >= vec_safe_length (addr_list))
2602     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2603 
2604   addr = (*addr_list)[list_index];
2605   if (!addr)
2606     {
2607       addr_mode = targetm.addr_space.address_mode (as);
2608       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2609       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2610       (*addr_list)[list_index] = addr;
2611     }
2612   else
2613     addr_mode = GET_MODE (addr);
2614 
2615   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2616   return (memory_address_addr_space_p (mem_mode, addr, as));
2617 }
2618 
2619 /* Comparison function to sort group in ascending order of addr_offset.  */
2620 
2621 static int
group_compare_offset(const void * a,const void * b)2622 group_compare_offset (const void *a, const void *b)
2623 {
2624   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2625   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2626 
2627   return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2628 }
2629 
2630 /* Check if small groups should be split.  Return true if no group
2631    contains more than two uses with distinct addr_offsets.  Return
2632    false otherwise.  We want to split such groups because:
2633 
2634      1) Small groups don't have much benefit and may interfer with
2635 	general candidate selection.
2636      2) Size for problem with only small groups is usually small and
2637 	general algorithm can handle it well.
2638 
2639    TODO -- Above claim may not hold when we want to merge memory
2640    accesses with conseuctive addresses.  */
2641 
2642 static bool
split_small_address_groups_p(struct ivopts_data * data)2643 split_small_address_groups_p (struct ivopts_data *data)
2644 {
2645   unsigned int i, j, distinct = 1;
2646   struct iv_use *pre;
2647   struct iv_group *group;
2648 
2649   for (i = 0; i < data->vgroups.length (); i++)
2650     {
2651       group = data->vgroups[i];
2652       if (group->vuses.length () == 1)
2653 	continue;
2654 
2655       gcc_assert (address_p (group->type));
2656       if (group->vuses.length () == 2)
2657 	{
2658 	  if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2659 				      group->vuses[1]->addr_offset) > 0)
2660 	    std::swap (group->vuses[0], group->vuses[1]);
2661 	}
2662       else
2663 	group->vuses.qsort (group_compare_offset);
2664 
2665       if (distinct > 2)
2666 	continue;
2667 
2668       distinct = 1;
2669       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2670 	{
2671 	  if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2672 	    {
2673 	      pre = group->vuses[j];
2674 	      distinct++;
2675 	    }
2676 
2677 	  if (distinct > 2)
2678 	    break;
2679 	}
2680     }
2681 
2682   return (distinct <= 2);
2683 }
2684 
2685 /* For each group of address type uses, this function further groups
2686    these uses according to the maximum offset supported by target's
2687    [base + offset] addressing mode.  */
2688 
2689 static void
split_address_groups(struct ivopts_data * data)2690 split_address_groups (struct ivopts_data *data)
2691 {
2692   unsigned int i, j;
2693   /* Always split group.  */
2694   bool split_p = split_small_address_groups_p (data);
2695 
2696   for (i = 0; i < data->vgroups.length (); i++)
2697     {
2698       struct iv_group *new_group = NULL;
2699       struct iv_group *group = data->vgroups[i];
2700       struct iv_use *use = group->vuses[0];
2701 
2702       use->id = 0;
2703       use->group_id = group->id;
2704       if (group->vuses.length () == 1)
2705 	continue;
2706 
2707       gcc_assert (address_p (use->type));
2708 
2709       for (j = 1; j < group->vuses.length ();)
2710 	{
2711 	  struct iv_use *next = group->vuses[j];
2712 	  poly_int64 offset = next->addr_offset - use->addr_offset;
2713 
2714 	  /* Split group if aksed to, or the offset against the first
2715 	     use can't fit in offset part of addressing mode.  IV uses
2716 	     having the same offset are still kept in one group.  */
2717 	  if (maybe_ne (offset, 0)
2718 	      && (split_p || !addr_offset_valid_p (use, offset)))
2719 	    {
2720 	      if (!new_group)
2721 		new_group = record_group (data, group->type);
2722 	      group->vuses.ordered_remove (j);
2723 	      new_group->vuses.safe_push (next);
2724 	      continue;
2725 	    }
2726 
2727 	  next->id = j;
2728 	  next->group_id = group->id;
2729 	  j++;
2730 	}
2731     }
2732 }
2733 
2734 /* Finds uses of the induction variables that are interesting.  */
2735 
2736 static void
find_interesting_uses(struct ivopts_data * data)2737 find_interesting_uses (struct ivopts_data *data)
2738 {
2739   basic_block bb;
2740   gimple_stmt_iterator bsi;
2741   basic_block *body = get_loop_body (data->current_loop);
2742   unsigned i;
2743   edge e;
2744 
2745   for (i = 0; i < data->current_loop->num_nodes; i++)
2746     {
2747       edge_iterator ei;
2748       bb = body[i];
2749 
2750       FOR_EACH_EDGE (e, ei, bb->succs)
2751 	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2752 	    && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2753 	  find_interesting_uses_outside (data, e);
2754 
2755       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2756 	find_interesting_uses_stmt (data, gsi_stmt (bsi));
2757       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2758 	if (!is_gimple_debug (gsi_stmt (bsi)))
2759 	  find_interesting_uses_stmt (data, gsi_stmt (bsi));
2760     }
2761   free (body);
2762 
2763   split_address_groups (data);
2764 
2765   if (dump_file && (dump_flags & TDF_DETAILS))
2766     {
2767       fprintf (dump_file, "\n<IV Groups>:\n");
2768       dump_groups (dump_file, data);
2769       fprintf (dump_file, "\n");
2770     }
2771 }
2772 
2773 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2774    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2775    we are at the top-level of the processed address.  */
2776 
2777 static tree
strip_offset_1(tree expr,bool inside_addr,bool top_compref,poly_int64 * offset)2778 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2779 		poly_int64 *offset)
2780 {
2781   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2782   enum tree_code code;
2783   tree type, orig_type = TREE_TYPE (expr);
2784   poly_int64 off0, off1;
2785   HOST_WIDE_INT st;
2786   tree orig_expr = expr;
2787 
2788   STRIP_NOPS (expr);
2789 
2790   type = TREE_TYPE (expr);
2791   code = TREE_CODE (expr);
2792   *offset = 0;
2793 
2794   switch (code)
2795     {
2796     case POINTER_PLUS_EXPR:
2797     case PLUS_EXPR:
2798     case MINUS_EXPR:
2799       op0 = TREE_OPERAND (expr, 0);
2800       op1 = TREE_OPERAND (expr, 1);
2801 
2802       op0 = strip_offset_1 (op0, false, false, &off0);
2803       op1 = strip_offset_1 (op1, false, false, &off1);
2804 
2805       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2806       if (op0 == TREE_OPERAND (expr, 0)
2807 	  && op1 == TREE_OPERAND (expr, 1))
2808 	return orig_expr;
2809 
2810       if (integer_zerop (op1))
2811 	expr = op0;
2812       else if (integer_zerop (op0))
2813 	{
2814 	  if (code == MINUS_EXPR)
2815 	    expr = fold_build1 (NEGATE_EXPR, type, op1);
2816 	  else
2817 	    expr = op1;
2818 	}
2819       else
2820 	expr = fold_build2 (code, type, op0, op1);
2821 
2822       return fold_convert (orig_type, expr);
2823 
2824     case MULT_EXPR:
2825       op1 = TREE_OPERAND (expr, 1);
2826       if (!cst_and_fits_in_hwi (op1))
2827 	return orig_expr;
2828 
2829       op0 = TREE_OPERAND (expr, 0);
2830       op0 = strip_offset_1 (op0, false, false, &off0);
2831       if (op0 == TREE_OPERAND (expr, 0))
2832 	return orig_expr;
2833 
2834       *offset = off0 * int_cst_value (op1);
2835       if (integer_zerop (op0))
2836 	expr = op0;
2837       else
2838 	expr = fold_build2 (MULT_EXPR, type, op0, op1);
2839 
2840       return fold_convert (orig_type, expr);
2841 
2842     case ARRAY_REF:
2843     case ARRAY_RANGE_REF:
2844       if (!inside_addr)
2845 	return orig_expr;
2846 
2847       step = array_ref_element_size (expr);
2848       if (!cst_and_fits_in_hwi (step))
2849 	break;
2850 
2851       st = int_cst_value (step);
2852       op1 = TREE_OPERAND (expr, 1);
2853       op1 = strip_offset_1 (op1, false, false, &off1);
2854       *offset = off1 * st;
2855 
2856       if (top_compref
2857 	  && integer_zerop (op1))
2858 	{
2859 	  /* Strip the component reference completely.  */
2860 	  op0 = TREE_OPERAND (expr, 0);
2861 	  op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2862 	  *offset += off0;
2863 	  return op0;
2864 	}
2865       break;
2866 
2867     case COMPONENT_REF:
2868       {
2869 	tree field;
2870 
2871 	if (!inside_addr)
2872 	  return orig_expr;
2873 
2874 	tmp = component_ref_field_offset (expr);
2875 	field = TREE_OPERAND (expr, 1);
2876 	if (top_compref
2877 	    && cst_and_fits_in_hwi (tmp)
2878 	    && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2879 	  {
2880 	    HOST_WIDE_INT boffset, abs_off;
2881 
2882 	    /* Strip the component reference completely.  */
2883 	    op0 = TREE_OPERAND (expr, 0);
2884 	    op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2885 	    boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2886 	    abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2887 	    if (boffset < 0)
2888 	      abs_off = -abs_off;
2889 
2890 	    *offset = off0 + int_cst_value (tmp) + abs_off;
2891 	    return op0;
2892 	  }
2893       }
2894       break;
2895 
2896     case ADDR_EXPR:
2897       op0 = TREE_OPERAND (expr, 0);
2898       op0 = strip_offset_1 (op0, true, true, &off0);
2899       *offset += off0;
2900 
2901       if (op0 == TREE_OPERAND (expr, 0))
2902 	return orig_expr;
2903 
2904       expr = build_fold_addr_expr (op0);
2905       return fold_convert (orig_type, expr);
2906 
2907     case MEM_REF:
2908       /* ???  Offset operand?  */
2909       inside_addr = false;
2910       break;
2911 
2912     default:
2913       if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2914 	return build_int_cst (orig_type, 0);
2915       return orig_expr;
2916     }
2917 
2918   /* Default handling of expressions for that we want to recurse into
2919      the first operand.  */
2920   op0 = TREE_OPERAND (expr, 0);
2921   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2922   *offset += off0;
2923 
2924   if (op0 == TREE_OPERAND (expr, 0)
2925       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2926     return orig_expr;
2927 
2928   expr = copy_node (expr);
2929   TREE_OPERAND (expr, 0) = op0;
2930   if (op1)
2931     TREE_OPERAND (expr, 1) = op1;
2932 
2933   /* Inside address, we might strip the top level component references,
2934      thus changing type of the expression.  Handling of ADDR_EXPR
2935      will fix that.  */
2936   expr = fold_convert (orig_type, expr);
2937 
2938   return expr;
2939 }
2940 
2941 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2942 
2943 tree
strip_offset(tree expr,poly_uint64_pod * offset)2944 strip_offset (tree expr, poly_uint64_pod *offset)
2945 {
2946   poly_int64 off;
2947   tree core = strip_offset_1 (expr, false, false, &off);
2948   *offset = off;
2949   return core;
2950 }
2951 
2952 /* Returns variant of TYPE that can be used as base for different uses.
2953    We return unsigned type with the same precision, which avoids problems
2954    with overflows.  */
2955 
2956 static tree
generic_type_for(tree type)2957 generic_type_for (tree type)
2958 {
2959   if (POINTER_TYPE_P (type))
2960     return unsigned_type_for (type);
2961 
2962   if (TYPE_UNSIGNED (type))
2963     return type;
2964 
2965   return unsigned_type_for (type);
2966 }
2967 
2968 /* Private data for walk_tree.  */
2969 
2970 struct walk_tree_data
2971 {
2972   bitmap *inv_vars;
2973   struct ivopts_data *idata;
2974 };
2975 
2976 /* Callback function for walk_tree, it records invariants and symbol
2977    reference in *EXPR_P.  DATA is the structure storing result info.  */
2978 
2979 static tree
find_inv_vars_cb(tree * expr_p,int * ws ATTRIBUTE_UNUSED,void * data)2980 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2981 {
2982   tree op = *expr_p;
2983   struct version_info *info;
2984   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2985 
2986   if (TREE_CODE (op) != SSA_NAME)
2987     return NULL_TREE;
2988 
2989   info = name_info (wdata->idata, op);
2990   /* Because we expand simple operations when finding IVs, loop invariant
2991      variable that isn't referred by the original loop could be used now.
2992      Record such invariant variables here.  */
2993   if (!info->iv)
2994     {
2995       struct ivopts_data *idata = wdata->idata;
2996       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2997 
2998       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
2999 	{
3000 	  tree steptype = TREE_TYPE (op);
3001 	  if (POINTER_TYPE_P (steptype))
3002 	    steptype = sizetype;
3003 	  set_iv (idata, op, op, build_int_cst (steptype, 0), true);
3004 	  record_invariant (idata, op, false);
3005 	}
3006     }
3007   if (!info->inv_id || info->has_nonlin_use)
3008     return NULL_TREE;
3009 
3010   if (!*wdata->inv_vars)
3011     *wdata->inv_vars = BITMAP_ALLOC (NULL);
3012   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3013 
3014   return NULL_TREE;
3015 }
3016 
3017 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
3018    store it.  */
3019 
3020 static inline void
find_inv_vars(struct ivopts_data * data,tree * expr_p,bitmap * inv_vars)3021 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3022 {
3023   struct walk_tree_data wdata;
3024 
3025   if (!inv_vars)
3026     return;
3027 
3028   wdata.idata = data;
3029   wdata.inv_vars = inv_vars;
3030   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3031 }
3032 
3033 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
3034    will be recorded if it doesn't exist yet.  Given below two exprs:
3035      inv_expr + cst1, inv_expr + cst2
3036    It's hard to make decision whether constant part should be stripped
3037    or not.  We choose to not strip based on below facts:
3038      1) We need to count ADD cost for constant part if it's stripped,
3039 	which isn't always trivial where this functions is called.
3040      2) Stripping constant away may be conflict with following loop
3041 	invariant hoisting pass.
3042      3) Not stripping constant away results in more invariant exprs,
3043 	which usually leads to decision preferring lower reg pressure.  */
3044 
3045 static iv_inv_expr_ent *
get_loop_invariant_expr(struct ivopts_data * data,tree inv_expr)3046 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3047 {
3048   STRIP_NOPS (inv_expr);
3049 
3050   if (poly_int_tree_p (inv_expr)
3051       || TREE_CODE (inv_expr) == SSA_NAME)
3052     return NULL;
3053 
3054   /* Don't strip constant part away as we used to.  */
3055 
3056   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
3057   struct iv_inv_expr_ent ent;
3058   ent.expr = inv_expr;
3059   ent.hash = iterative_hash_expr (inv_expr, 0);
3060   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3061 
3062   if (!*slot)
3063     {
3064       *slot = XNEW (struct iv_inv_expr_ent);
3065       (*slot)->expr = inv_expr;
3066       (*slot)->hash = ent.hash;
3067       (*slot)->id = ++data->max_inv_expr_id;
3068     }
3069 
3070   return *slot;
3071 }
3072 
3073 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3074    position to POS.  If USE is not NULL, the candidate is set as related to
3075    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
3076    replacement of the final value of the iv by a direct computation.  */
3077 
3078 static struct iv_cand *
3079 add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important,
3080 		 enum iv_position pos, struct iv_use *use,
3081 		 gimple *incremented_at, struct iv *orig_iv = NULL,
3082 		 bool doloop = false)
3083 {
3084   unsigned i;
3085   struct iv_cand *cand = NULL;
3086   tree type, orig_type;
3087 
3088   gcc_assert (base && step);
3089 
3090   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3091      live, but the ivopts code may replace a real pointer with one
3092      pointing before or after the memory block that is then adjusted
3093      into the memory block during the loop.  FIXME: It would likely be
3094      better to actually force the pointer live and still use ivopts;
3095      for example, it would be enough to write the pointer into memory
3096      and keep it there until after the loop.  */
3097   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3098     return NULL;
3099 
3100   /* For non-original variables, make sure their values are computed in a type
3101      that does not invoke undefined behavior on overflows (since in general,
3102      we cannot prove that these induction variables are non-wrapping).  */
3103   if (pos != IP_ORIGINAL)
3104     {
3105       orig_type = TREE_TYPE (base);
3106       type = generic_type_for (orig_type);
3107       if (type != orig_type)
3108 	{
3109 	  base = fold_convert (type, base);
3110 	  step = fold_convert (type, step);
3111 	}
3112     }
3113 
3114   for (i = 0; i < data->vcands.length (); i++)
3115     {
3116       cand = data->vcands[i];
3117 
3118       if (cand->pos != pos)
3119 	continue;
3120 
3121       if (cand->incremented_at != incremented_at
3122 	  || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3123 	      && cand->ainc_use != use))
3124 	continue;
3125 
3126       if (operand_equal_p (base, cand->iv->base, 0)
3127 	  && operand_equal_p (step, cand->iv->step, 0)
3128 	  && (TYPE_PRECISION (TREE_TYPE (base))
3129 	      == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3130 	break;
3131     }
3132 
3133   if (i == data->vcands.length ())
3134     {
3135       cand = XCNEW (struct iv_cand);
3136       cand->id = i;
3137       cand->iv = alloc_iv (data, base, step);
3138       cand->pos = pos;
3139       if (pos != IP_ORIGINAL)
3140 	{
3141 	  if (doloop)
3142 	    cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3143 	  else
3144 	    cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3145 	  cand->var_after = cand->var_before;
3146 	}
3147       cand->important = important;
3148       cand->incremented_at = incremented_at;
3149       cand->doloop_p = doloop;
3150       data->vcands.safe_push (cand);
3151 
3152       if (!poly_int_tree_p (step))
3153 	{
3154 	  find_inv_vars (data, &step, &cand->inv_vars);
3155 
3156 	  iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3157 	  /* Share bitmap between inv_vars and inv_exprs for cand.  */
3158 	  if (inv_expr != NULL)
3159 	    {
3160 	      cand->inv_exprs = cand->inv_vars;
3161 	      cand->inv_vars = NULL;
3162 	      if (cand->inv_exprs)
3163 		bitmap_clear (cand->inv_exprs);
3164 	      else
3165 		cand->inv_exprs = BITMAP_ALLOC (NULL);
3166 
3167 	      bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3168 	    }
3169 	}
3170 
3171       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3172 	cand->ainc_use = use;
3173       else
3174 	cand->ainc_use = NULL;
3175 
3176       cand->orig_iv = orig_iv;
3177       if (dump_file && (dump_flags & TDF_DETAILS))
3178 	dump_cand (dump_file, cand);
3179     }
3180 
3181   cand->important |= important;
3182   cand->doloop_p |= doloop;
3183 
3184   /* Relate candidate to the group for which it is added.  */
3185   if (use)
3186     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3187 
3188   return cand;
3189 }
3190 
3191 /* Returns true if incrementing the induction variable at the end of the LOOP
3192    is allowed.
3193 
3194    The purpose is to avoid splitting latch edge with a biv increment, thus
3195    creating a jump, possibly confusing other optimization passes and leaving
3196    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3197    available (so we do not have a better alternative), or if the latch edge
3198    is already nonempty.  */
3199 
3200 static bool
allow_ip_end_pos_p(class loop * loop)3201 allow_ip_end_pos_p (class loop *loop)
3202 {
3203   if (!ip_normal_pos (loop))
3204     return true;
3205 
3206   if (!empty_block_p (ip_end_pos (loop)))
3207     return true;
3208 
3209   return false;
3210 }
3211 
3212 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3213    Important field is set to IMPORTANT.  */
3214 
3215 static void
add_autoinc_candidates(struct ivopts_data * data,tree base,tree step,bool important,struct iv_use * use)3216 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3217 			bool important, struct iv_use *use)
3218 {
3219   basic_block use_bb = gimple_bb (use->stmt);
3220   machine_mode mem_mode;
3221   unsigned HOST_WIDE_INT cstepi;
3222 
3223   /* If we insert the increment in any position other than the standard
3224      ones, we must ensure that it is incremented once per iteration.
3225      It must not be in an inner nested loop, or one side of an if
3226      statement.  */
3227   if (use_bb->loop_father != data->current_loop
3228       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3229       || stmt_can_throw_internal (cfun, use->stmt)
3230       || !cst_and_fits_in_hwi (step))
3231     return;
3232 
3233   cstepi = int_cst_value (step);
3234 
3235   mem_mode = TYPE_MODE (use->mem_type);
3236   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3237 	|| USE_STORE_PRE_INCREMENT (mem_mode))
3238        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3239       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3240 	   || USE_STORE_PRE_DECREMENT (mem_mode))
3241 	  && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3242     {
3243       enum tree_code code = MINUS_EXPR;
3244       tree new_base;
3245       tree new_step = step;
3246 
3247       if (POINTER_TYPE_P (TREE_TYPE (base)))
3248 	{
3249 	  new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3250 	  code = POINTER_PLUS_EXPR;
3251 	}
3252       else
3253 	new_step = fold_convert (TREE_TYPE (base), new_step);
3254       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3255       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3256 		       use->stmt);
3257     }
3258   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3259 	|| USE_STORE_POST_INCREMENT (mem_mode))
3260        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3261       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3262 	   || USE_STORE_POST_DECREMENT (mem_mode))
3263 	  && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3264     {
3265       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3266 		       use->stmt);
3267     }
3268 }
3269 
3270 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3271    position to POS.  If USE is not NULL, the candidate is set as related to
3272    it.  The candidate computation is scheduled before exit condition and at
3273    the end of loop.  */
3274 
3275 static void
3276 add_candidate (struct ivopts_data *data, tree base, tree step, bool important,
3277 	       struct iv_use *use, struct iv *orig_iv = NULL,
3278 	       bool doloop = false)
3279 {
3280   if (ip_normal_pos (data->current_loop))
3281     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL, orig_iv,
3282 		     doloop);
3283   /* Exclude doloop candidate here since it requires decrement then comparison
3284      and jump, the IP_END position doesn't match.  */
3285   if (!doloop && ip_end_pos (data->current_loop)
3286       && allow_ip_end_pos_p (data->current_loop))
3287     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3288 }
3289 
3290 /* Adds standard iv candidates.  */
3291 
3292 static void
add_standard_iv_candidates(struct ivopts_data * data)3293 add_standard_iv_candidates (struct ivopts_data *data)
3294 {
3295   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3296 
3297   /* The same for a double-integer type if it is still fast enough.  */
3298   if (TYPE_PRECISION
3299 	(long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3300       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3301     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3302 		   build_int_cst (long_integer_type_node, 1), true, NULL);
3303 
3304   /* The same for a double-integer type if it is still fast enough.  */
3305   if (TYPE_PRECISION
3306 	(long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3307       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3308     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3309 		   build_int_cst (long_long_integer_type_node, 1), true, NULL);
3310 }
3311 
3312 
3313 /* Adds candidates bases on the old induction variable IV.  */
3314 
3315 static void
add_iv_candidate_for_biv(struct ivopts_data * data,struct iv * iv)3316 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3317 {
3318   gimple *phi;
3319   tree def;
3320   struct iv_cand *cand;
3321 
3322   /* Check if this biv is used in address type use.  */
3323   if (iv->no_overflow  && iv->have_address_use
3324       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3325       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3326     {
3327       tree base = fold_convert (sizetype, iv->base);
3328       tree step = fold_convert (sizetype, iv->step);
3329 
3330       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3331       add_candidate (data, base, step, true, NULL, iv);
3332       /* Add iv cand of the original type only if it has nonlinear use.  */
3333       if (iv->nonlin_use)
3334 	add_candidate (data, iv->base, iv->step, true, NULL);
3335     }
3336   else
3337     add_candidate (data, iv->base, iv->step, true, NULL);
3338 
3339   /* The same, but with initial value zero.  */
3340   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3341     add_candidate (data, size_int (0), iv->step, true, NULL);
3342   else
3343     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3344 		   iv->step, true, NULL);
3345 
3346   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3347   if (gimple_code (phi) == GIMPLE_PHI)
3348     {
3349       /* Additionally record the possibility of leaving the original iv
3350 	 untouched.  */
3351       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3352       /* Don't add candidate if it's from another PHI node because
3353 	 it's an affine iv appearing in the form of PEELED_CHREC.  */
3354       phi = SSA_NAME_DEF_STMT (def);
3355       if (gimple_code (phi) != GIMPLE_PHI)
3356 	{
3357 	  cand = add_candidate_1 (data,
3358 				  iv->base, iv->step, true, IP_ORIGINAL, NULL,
3359 				  SSA_NAME_DEF_STMT (def));
3360 	  if (cand)
3361 	    {
3362 	      cand->var_before = iv->ssa_name;
3363 	      cand->var_after = def;
3364 	    }
3365 	}
3366       else
3367 	gcc_assert (gimple_bb (phi) == data->current_loop->header);
3368     }
3369 }
3370 
3371 /* Adds candidates based on the old induction variables.  */
3372 
3373 static void
add_iv_candidate_for_bivs(struct ivopts_data * data)3374 add_iv_candidate_for_bivs (struct ivopts_data *data)
3375 {
3376   unsigned i;
3377   struct iv *iv;
3378   bitmap_iterator bi;
3379 
3380   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3381     {
3382       iv = ver_info (data, i)->iv;
3383       if (iv && iv->biv_p && !integer_zerop (iv->step))
3384 	add_iv_candidate_for_biv (data, iv);
3385     }
3386 }
3387 
3388 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3389 
3390 static void
record_common_cand(struct ivopts_data * data,tree base,tree step,struct iv_use * use)3391 record_common_cand (struct ivopts_data *data, tree base,
3392 		    tree step, struct iv_use *use)
3393 {
3394   class iv_common_cand ent;
3395   class iv_common_cand **slot;
3396 
3397   ent.base = base;
3398   ent.step = step;
3399   ent.hash = iterative_hash_expr (base, 0);
3400   ent.hash = iterative_hash_expr (step, ent.hash);
3401 
3402   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3403   if (*slot == NULL)
3404     {
3405       *slot = new iv_common_cand ();
3406       (*slot)->base = base;
3407       (*slot)->step = step;
3408       (*slot)->uses.create (8);
3409       (*slot)->hash = ent.hash;
3410       data->iv_common_cands.safe_push ((*slot));
3411     }
3412 
3413   gcc_assert (use != NULL);
3414   (*slot)->uses.safe_push (use);
3415   return;
3416 }
3417 
3418 /* Comparison function used to sort common candidates.  */
3419 
3420 static int
common_cand_cmp(const void * p1,const void * p2)3421 common_cand_cmp (const void *p1, const void *p2)
3422 {
3423   unsigned n1, n2;
3424   const class iv_common_cand *const *const ccand1
3425     = (const class iv_common_cand *const *)p1;
3426   const class iv_common_cand *const *const ccand2
3427     = (const class iv_common_cand *const *)p2;
3428 
3429   n1 = (*ccand1)->uses.length ();
3430   n2 = (*ccand2)->uses.length ();
3431   return n2 - n1;
3432 }
3433 
3434 /* Adds IV candidates based on common candidated recorded.  */
3435 
3436 static void
add_iv_candidate_derived_from_uses(struct ivopts_data * data)3437 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3438 {
3439   unsigned i, j;
3440   struct iv_cand *cand_1, *cand_2;
3441 
3442   data->iv_common_cands.qsort (common_cand_cmp);
3443   for (i = 0; i < data->iv_common_cands.length (); i++)
3444     {
3445       class iv_common_cand *ptr = data->iv_common_cands[i];
3446 
3447       /* Only add IV candidate if it's derived from multiple uses.  */
3448       if (ptr->uses.length () <= 1)
3449 	break;
3450 
3451       cand_1 = NULL;
3452       cand_2 = NULL;
3453       if (ip_normal_pos (data->current_loop))
3454 	cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3455 				  false, IP_NORMAL, NULL, NULL);
3456 
3457       if (ip_end_pos (data->current_loop)
3458 	  && allow_ip_end_pos_p (data->current_loop))
3459 	cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3460 				  false, IP_END, NULL, NULL);
3461 
3462       /* Bind deriving uses and the new candidates.  */
3463       for (j = 0; j < ptr->uses.length (); j++)
3464 	{
3465 	  struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3466 	  if (cand_1)
3467 	    bitmap_set_bit (group->related_cands, cand_1->id);
3468 	  if (cand_2)
3469 	    bitmap_set_bit (group->related_cands, cand_2->id);
3470 	}
3471     }
3472 
3473   /* Release data since it is useless from this point.  */
3474   data->iv_common_cand_tab->empty ();
3475   data->iv_common_cands.truncate (0);
3476 }
3477 
3478 /* Adds candidates based on the value of USE's iv.  */
3479 
3480 static void
add_iv_candidate_for_use(struct ivopts_data * data,struct iv_use * use)3481 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3482 {
3483   poly_uint64 offset;
3484   tree base;
3485   struct iv *iv = use->iv;
3486   tree basetype = TREE_TYPE (iv->base);
3487 
3488   /* Don't add candidate for iv_use with non integer, pointer or non-mode
3489      precision types, instead, add candidate for the corresponding scev in
3490      unsigned type with the same precision.  See PR93674 for more info.  */
3491   if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3492       || !type_has_mode_precision_p (basetype))
3493     {
3494       basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3495 						 TYPE_UNSIGNED (basetype));
3496       add_candidate (data, fold_convert (basetype, iv->base),
3497 		     fold_convert (basetype, iv->step), false, NULL);
3498       return;
3499     }
3500 
3501   add_candidate (data, iv->base, iv->step, false, use);
3502 
3503   /* Record common candidate for use in case it can be shared by others.  */
3504   record_common_cand (data, iv->base, iv->step, use);
3505 
3506   /* Record common candidate with initial value zero.  */
3507   basetype = TREE_TYPE (iv->base);
3508   if (POINTER_TYPE_P (basetype))
3509     basetype = sizetype;
3510   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3511 
3512   /* Compare the cost of an address with an unscaled index with the cost of
3513     an address with a scaled index and add candidate if useful.  */
3514   poly_int64 step;
3515   if (use != NULL
3516       && poly_int_tree_p (iv->step, &step)
3517       && address_p (use->type))
3518     {
3519       poly_int64 new_step;
3520       unsigned int fact = preferred_mem_scale_factor
3521 	(use->iv->base,
3522 	 TYPE_MODE (use->mem_type),
3523 	 optimize_loop_for_speed_p (data->current_loop));
3524 
3525       if (fact != 1
3526 	  && multiple_p (step, fact, &new_step))
3527 	add_candidate (data, size_int (0),
3528 		       wide_int_to_tree (sizetype, new_step),
3529 		       true, NULL);
3530     }
3531 
3532   /* Record common candidate with constant offset stripped in base.
3533      Like the use itself, we also add candidate directly for it.  */
3534   base = strip_offset (iv->base, &offset);
3535   if (maybe_ne (offset, 0U) || base != iv->base)
3536     {
3537       record_common_cand (data, base, iv->step, use);
3538       add_candidate (data, base, iv->step, false, use);
3539     }
3540 
3541   /* Record common candidate with base_object removed in base.  */
3542   base = iv->base;
3543   STRIP_NOPS (base);
3544   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3545     {
3546       tree step = iv->step;
3547 
3548       STRIP_NOPS (step);
3549       base = TREE_OPERAND (base, 1);
3550       step = fold_convert (sizetype, step);
3551       record_common_cand (data, base, step, use);
3552       /* Also record common candidate with offset stripped.  */
3553       base = strip_offset (base, &offset);
3554       if (maybe_ne (offset, 0U))
3555 	record_common_cand (data, base, step, use);
3556     }
3557 
3558   /* At last, add auto-incremental candidates.  Make such variables
3559      important since other iv uses with same base object may be based
3560      on it.  */
3561   if (use != NULL && address_p (use->type))
3562     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3563 }
3564 
3565 /* Adds candidates based on the uses.  */
3566 
3567 static void
add_iv_candidate_for_groups(struct ivopts_data * data)3568 add_iv_candidate_for_groups (struct ivopts_data *data)
3569 {
3570   unsigned i;
3571 
3572   /* Only add candidate for the first use in group.  */
3573   for (i = 0; i < data->vgroups.length (); i++)
3574     {
3575       struct iv_group *group = data->vgroups[i];
3576 
3577       gcc_assert (group->vuses[0] != NULL);
3578       add_iv_candidate_for_use (data, group->vuses[0]);
3579     }
3580   add_iv_candidate_derived_from_uses (data);
3581 }
3582 
3583 /* Record important candidates and add them to related_cands bitmaps.  */
3584 
3585 static void
record_important_candidates(struct ivopts_data * data)3586 record_important_candidates (struct ivopts_data *data)
3587 {
3588   unsigned i;
3589   struct iv_group *group;
3590 
3591   for (i = 0; i < data->vcands.length (); i++)
3592     {
3593       struct iv_cand *cand = data->vcands[i];
3594 
3595       if (cand->important)
3596 	bitmap_set_bit (data->important_candidates, i);
3597     }
3598 
3599   data->consider_all_candidates = (data->vcands.length ()
3600 				   <= CONSIDER_ALL_CANDIDATES_BOUND);
3601 
3602   /* Add important candidates to groups' related_cands bitmaps.  */
3603   for (i = 0; i < data->vgroups.length (); i++)
3604     {
3605       group = data->vgroups[i];
3606       bitmap_ior_into (group->related_cands, data->important_candidates);
3607     }
3608 }
3609 
3610 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3611    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3612    we allocate a simple list to every use.  */
3613 
3614 static void
alloc_use_cost_map(struct ivopts_data * data)3615 alloc_use_cost_map (struct ivopts_data *data)
3616 {
3617   unsigned i, size, s;
3618 
3619   for (i = 0; i < data->vgroups.length (); i++)
3620     {
3621       struct iv_group *group = data->vgroups[i];
3622 
3623       if (data->consider_all_candidates)
3624 	size = data->vcands.length ();
3625       else
3626 	{
3627 	  s = bitmap_count_bits (group->related_cands);
3628 
3629 	  /* Round up to the power of two, so that moduling by it is fast.  */
3630 	  size = s ? (1 << ceil_log2 (s)) : 1;
3631 	}
3632 
3633       group->n_map_members = size;
3634       group->cost_map = XCNEWVEC (class cost_pair, size);
3635     }
3636 }
3637 
3638 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3639    on invariants INV_VARS and that the value used in expressing it is
3640    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3641 
3642 static void
set_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand,comp_cost cost,bitmap inv_vars,tree value,enum tree_code comp,bitmap inv_exprs)3643 set_group_iv_cost (struct ivopts_data *data,
3644 		   struct iv_group *group, struct iv_cand *cand,
3645 		   comp_cost cost, bitmap inv_vars, tree value,
3646 		   enum tree_code comp, bitmap inv_exprs)
3647 {
3648   unsigned i, s;
3649 
3650   if (cost.infinite_cost_p ())
3651     {
3652       BITMAP_FREE (inv_vars);
3653       BITMAP_FREE (inv_exprs);
3654       return;
3655     }
3656 
3657   if (data->consider_all_candidates)
3658     {
3659       group->cost_map[cand->id].cand = cand;
3660       group->cost_map[cand->id].cost = cost;
3661       group->cost_map[cand->id].inv_vars = inv_vars;
3662       group->cost_map[cand->id].inv_exprs = inv_exprs;
3663       group->cost_map[cand->id].value = value;
3664       group->cost_map[cand->id].comp = comp;
3665       return;
3666     }
3667 
3668   /* n_map_members is a power of two, so this computes modulo.  */
3669   s = cand->id & (group->n_map_members - 1);
3670   for (i = s; i < group->n_map_members; i++)
3671     if (!group->cost_map[i].cand)
3672       goto found;
3673   for (i = 0; i < s; i++)
3674     if (!group->cost_map[i].cand)
3675       goto found;
3676 
3677   gcc_unreachable ();
3678 
3679 found:
3680   group->cost_map[i].cand = cand;
3681   group->cost_map[i].cost = cost;
3682   group->cost_map[i].inv_vars = inv_vars;
3683   group->cost_map[i].inv_exprs = inv_exprs;
3684   group->cost_map[i].value = value;
3685   group->cost_map[i].comp = comp;
3686 }
3687 
3688 /* Gets cost of (GROUP, CAND) pair.  */
3689 
3690 static class cost_pair *
get_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)3691 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3692 		   struct iv_cand *cand)
3693 {
3694   unsigned i, s;
3695   class cost_pair *ret;
3696 
3697   if (!cand)
3698     return NULL;
3699 
3700   if (data->consider_all_candidates)
3701     {
3702       ret = group->cost_map + cand->id;
3703       if (!ret->cand)
3704 	return NULL;
3705 
3706       return ret;
3707     }
3708 
3709   /* n_map_members is a power of two, so this computes modulo.  */
3710   s = cand->id & (group->n_map_members - 1);
3711   for (i = s; i < group->n_map_members; i++)
3712     if (group->cost_map[i].cand == cand)
3713       return group->cost_map + i;
3714     else if (group->cost_map[i].cand == NULL)
3715       return NULL;
3716   for (i = 0; i < s; i++)
3717     if (group->cost_map[i].cand == cand)
3718       return group->cost_map + i;
3719     else if (group->cost_map[i].cand == NULL)
3720       return NULL;
3721 
3722   return NULL;
3723 }
3724 
3725 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3726 static rtx
produce_memory_decl_rtl(tree obj,int * regno)3727 produce_memory_decl_rtl (tree obj, int *regno)
3728 {
3729   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3730   machine_mode address_mode = targetm.addr_space.address_mode (as);
3731   rtx x;
3732 
3733   gcc_assert (obj);
3734   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3735     {
3736       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3737       x = gen_rtx_SYMBOL_REF (address_mode, name);
3738       SET_SYMBOL_REF_DECL (x, obj);
3739       x = gen_rtx_MEM (DECL_MODE (obj), x);
3740       set_mem_addr_space (x, as);
3741       targetm.encode_section_info (obj, x, true);
3742     }
3743   else
3744     {
3745       x = gen_raw_REG (address_mode, (*regno)++);
3746       x = gen_rtx_MEM (DECL_MODE (obj), x);
3747       set_mem_addr_space (x, as);
3748     }
3749 
3750   return x;
3751 }
3752 
3753 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3754    walk_tree.  DATA contains the actual fake register number.  */
3755 
3756 static tree
prepare_decl_rtl(tree * expr_p,int * ws,void * data)3757 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3758 {
3759   tree obj = NULL_TREE;
3760   rtx x = NULL_RTX;
3761   int *regno = (int *) data;
3762 
3763   switch (TREE_CODE (*expr_p))
3764     {
3765     case ADDR_EXPR:
3766       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3767 	   handled_component_p (*expr_p);
3768 	   expr_p = &TREE_OPERAND (*expr_p, 0))
3769 	continue;
3770       obj = *expr_p;
3771       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3772 	x = produce_memory_decl_rtl (obj, regno);
3773       break;
3774 
3775     case SSA_NAME:
3776       *ws = 0;
3777       obj = SSA_NAME_VAR (*expr_p);
3778       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3779       if (!obj)
3780 	return NULL_TREE;
3781       if (!DECL_RTL_SET_P (obj))
3782 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3783       break;
3784 
3785     case VAR_DECL:
3786     case PARM_DECL:
3787     case RESULT_DECL:
3788       *ws = 0;
3789       obj = *expr_p;
3790 
3791       if (DECL_RTL_SET_P (obj))
3792 	break;
3793 
3794       if (DECL_MODE (obj) == BLKmode)
3795 	x = produce_memory_decl_rtl (obj, regno);
3796       else
3797 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3798 
3799       break;
3800 
3801     default:
3802       break;
3803     }
3804 
3805   if (x)
3806     {
3807       decl_rtl_to_reset.safe_push (obj);
3808       SET_DECL_RTL (obj, x);
3809     }
3810 
3811   return NULL_TREE;
3812 }
3813 
3814 /* Predict whether the given loop will be transformed in the RTL
3815    doloop_optimize pass.  Attempt to duplicate some doloop_optimize checks.
3816    This is only for target independent checks, see targetm.predict_doloop_p
3817    for the target dependent ones.
3818 
3819    Note that according to some initial investigation, some checks like costly
3820    niter check and invalid stmt scanning don't have much gains among general
3821    cases, so keep this as simple as possible first.
3822 
3823    Some RTL specific checks seems unable to be checked in gimple, if any new
3824    checks or easy checks _are_ missing here, please add them.  */
3825 
3826 static bool
generic_predict_doloop_p(struct ivopts_data * data)3827 generic_predict_doloop_p (struct ivopts_data *data)
3828 {
3829   class loop *loop = data->current_loop;
3830 
3831   /* Call target hook for target dependent checks.  */
3832   if (!targetm.predict_doloop_p (loop))
3833     {
3834       if (dump_file && (dump_flags & TDF_DETAILS))
3835 	fprintf (dump_file, "Predict doloop failure due to"
3836 			    " target specific checks.\n");
3837       return false;
3838     }
3839 
3840   /* Similar to doloop_optimize, check iteration description to know it's
3841      suitable or not.  Keep it as simple as possible, feel free to extend it
3842      if you find any multiple exits cases matter.  */
3843   edge exit = single_dom_exit (loop);
3844   class tree_niter_desc *niter_desc;
3845   if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3846     {
3847       if (dump_file && (dump_flags & TDF_DETAILS))
3848 	fprintf (dump_file, "Predict doloop failure due to"
3849 			    " unexpected niters.\n");
3850       return false;
3851     }
3852 
3853   /* Similar to doloop_optimize, check whether iteration count too small
3854      and not profitable.  */
3855   HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3856   if (est_niter == -1)
3857     est_niter = get_likely_max_loop_iterations_int (loop);
3858   if (est_niter >= 0 && est_niter < 3)
3859     {
3860       if (dump_file && (dump_flags & TDF_DETAILS))
3861 	fprintf (dump_file,
3862 		 "Predict doloop failure due to"
3863 		 " too few iterations (%u).\n",
3864 		 (unsigned int) est_niter);
3865       return false;
3866     }
3867 
3868   return true;
3869 }
3870 
3871 /* Determines cost of the computation of EXPR.  */
3872 
3873 static unsigned
computation_cost(tree expr,bool speed)3874 computation_cost (tree expr, bool speed)
3875 {
3876   rtx_insn *seq;
3877   rtx rslt;
3878   tree type = TREE_TYPE (expr);
3879   unsigned cost;
3880   /* Avoid using hard regs in ways which may be unsupported.  */
3881   int regno = LAST_VIRTUAL_REGISTER + 1;
3882   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3883   enum node_frequency real_frequency = node->frequency;
3884 
3885   node->frequency = NODE_FREQUENCY_NORMAL;
3886   crtl->maybe_hot_insn_p = speed;
3887   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3888   start_sequence ();
3889   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3890   seq = get_insns ();
3891   end_sequence ();
3892   default_rtl_profile ();
3893   node->frequency = real_frequency;
3894 
3895   cost = seq_cost (seq, speed);
3896   if (MEM_P (rslt))
3897     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3898 			  TYPE_ADDR_SPACE (type), speed);
3899   else if (!REG_P (rslt))
3900     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3901 
3902   return cost;
3903 }
3904 
3905 /* Returns variable containing the value of candidate CAND at statement AT.  */
3906 
3907 static tree
var_at_stmt(class loop * loop,struct iv_cand * cand,gimple * stmt)3908 var_at_stmt (class loop *loop, struct iv_cand *cand, gimple *stmt)
3909 {
3910   if (stmt_after_increment (loop, cand, stmt))
3911     return cand->var_after;
3912   else
3913     return cand->var_before;
3914 }
3915 
3916 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3917    same precision that is at least as wide as the precision of TYPE, stores
3918    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3919    type of A and B.  */
3920 
3921 static tree
determine_common_wider_type(tree * a,tree * b)3922 determine_common_wider_type (tree *a, tree *b)
3923 {
3924   tree wider_type = NULL;
3925   tree suba, subb;
3926   tree atype = TREE_TYPE (*a);
3927 
3928   if (CONVERT_EXPR_P (*a))
3929     {
3930       suba = TREE_OPERAND (*a, 0);
3931       wider_type = TREE_TYPE (suba);
3932       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3933 	return atype;
3934     }
3935   else
3936     return atype;
3937 
3938   if (CONVERT_EXPR_P (*b))
3939     {
3940       subb = TREE_OPERAND (*b, 0);
3941       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3942 	return atype;
3943     }
3944   else
3945     return atype;
3946 
3947   *a = suba;
3948   *b = subb;
3949   return wider_type;
3950 }
3951 
3952 /* Determines the expression by that USE is expressed from induction variable
3953    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3954    decomposed form.  The invariant part is stored in AFF_INV; while variant
3955    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3956    non-null.  Returns false if USE cannot be expressed using CAND.  */
3957 
3958 static bool
3959 get_computation_aff_1 (class loop *loop, gimple *at, struct iv_use *use,
3960 		       struct iv_cand *cand, class aff_tree *aff_inv,
3961 		       class aff_tree *aff_var, widest_int *prat = NULL)
3962 {
3963   tree ubase = use->iv->base, ustep = use->iv->step;
3964   tree cbase = cand->iv->base, cstep = cand->iv->step;
3965   tree common_type, uutype, var, cstep_common;
3966   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3967   aff_tree aff_cbase;
3968   widest_int rat;
3969 
3970   /* We must have a precision to express the values of use.  */
3971   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3972     return false;
3973 
3974   var = var_at_stmt (loop, cand, at);
3975   uutype = unsigned_type_for (utype);
3976 
3977   /* If the conversion is not noop, perform it.  */
3978   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3979     {
3980       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3981 	  && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3982 	{
3983 	  tree inner_base, inner_step, inner_type;
3984 	  inner_base = TREE_OPERAND (cbase, 0);
3985 	  if (CONVERT_EXPR_P (cstep))
3986 	    inner_step = TREE_OPERAND (cstep, 0);
3987 	  else
3988 	    inner_step = cstep;
3989 
3990 	  inner_type = TREE_TYPE (inner_base);
3991 	  /* If candidate is added from a biv whose type is smaller than
3992 	     ctype, we know both candidate and the biv won't overflow.
3993 	     In this case, it's safe to skip the convertion in candidate.
3994 	     As an example, (unsigned short)((unsigned long)A) equals to
3995 	     (unsigned short)A, if A has a type no larger than short.  */
3996 	  if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3997 	    {
3998 	      cbase = inner_base;
3999 	      cstep = inner_step;
4000 	    }
4001 	}
4002       cbase = fold_convert (uutype, cbase);
4003       cstep = fold_convert (uutype, cstep);
4004       var = fold_convert (uutype, var);
4005     }
4006 
4007   /* Ratio is 1 when computing the value of biv cand by itself.
4008      We can't rely on constant_multiple_of in this case because the
4009      use is created after the original biv is selected.  The call
4010      could fail because of inconsistent fold behavior.  See PR68021
4011      for more information.  */
4012   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4013     {
4014       gcc_assert (is_gimple_assign (use->stmt));
4015       gcc_assert (use->iv->ssa_name == cand->var_after);
4016       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
4017       rat = 1;
4018     }
4019   else if (!constant_multiple_of (ustep, cstep, &rat))
4020     return false;
4021 
4022   if (prat)
4023     *prat = rat;
4024 
4025   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
4026      type, we achieve better folding by computing their difference in this
4027      wider type, and cast the result to UUTYPE.  We do not need to worry about
4028      overflows, as all the arithmetics will in the end be performed in UUTYPE
4029      anyway.  */
4030   common_type = determine_common_wider_type (&ubase, &cbase);
4031 
4032   /* use = ubase - ratio * cbase + ratio * var.  */
4033   tree_to_aff_combination (ubase, common_type, aff_inv);
4034   tree_to_aff_combination (cbase, common_type, &aff_cbase);
4035   tree_to_aff_combination (var, uutype, aff_var);
4036 
4037   /* We need to shift the value if we are after the increment.  */
4038   if (stmt_after_increment (loop, cand, at))
4039     {
4040       aff_tree cstep_aff;
4041 
4042       if (common_type != uutype)
4043 	cstep_common = fold_convert (common_type, cstep);
4044       else
4045 	cstep_common = cstep;
4046 
4047       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4048       aff_combination_add (&aff_cbase, &cstep_aff);
4049     }
4050 
4051   aff_combination_scale (&aff_cbase, -rat);
4052   aff_combination_add (aff_inv, &aff_cbase);
4053   if (common_type != uutype)
4054     aff_combination_convert (aff_inv, uutype);
4055 
4056   aff_combination_scale (aff_var, rat);
4057   return true;
4058 }
4059 
4060 /* Determines the expression by that USE is expressed from induction variable
4061    CAND at statement AT in LOOP.  The expression is stored in a decomposed
4062    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
4063 
4064 static bool
get_computation_aff(class loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand,class aff_tree * aff)4065 get_computation_aff (class loop *loop, gimple *at, struct iv_use *use,
4066 		     struct iv_cand *cand, class aff_tree *aff)
4067 {
4068   aff_tree aff_var;
4069 
4070   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
4071     return false;
4072 
4073   aff_combination_add (aff, &aff_var);
4074   return true;
4075 }
4076 
4077 /* Return the type of USE.  */
4078 
4079 static tree
get_use_type(struct iv_use * use)4080 get_use_type (struct iv_use *use)
4081 {
4082   tree base_type = TREE_TYPE (use->iv->base);
4083   tree type;
4084 
4085   if (use->type == USE_REF_ADDRESS)
4086     {
4087       /* The base_type may be a void pointer.  Create a pointer type based on
4088 	 the mem_ref instead.  */
4089       type = build_pointer_type (TREE_TYPE (*use->op_p));
4090       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4091 		  == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4092     }
4093   else
4094     type = base_type;
4095 
4096   return type;
4097 }
4098 
4099 /* Determines the expression by that USE is expressed from induction variable
4100    CAND at statement AT in LOOP.  The computation is unshared.  */
4101 
4102 static tree
get_computation_at(class loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand)4103 get_computation_at (class loop *loop, gimple *at,
4104 		    struct iv_use *use, struct iv_cand *cand)
4105 {
4106   aff_tree aff;
4107   tree type = get_use_type (use);
4108 
4109   if (!get_computation_aff (loop, at, use, cand, &aff))
4110     return NULL_TREE;
4111   unshare_aff_combination (&aff);
4112   return fold_convert (type, aff_combination_to_tree (&aff));
4113 }
4114 
4115 /* Like get_computation_at, but try harder, even if the computation
4116    is more expensive.  Intended for debug stmts.  */
4117 
4118 static tree
get_debug_computation_at(class loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand)4119 get_debug_computation_at (class loop *loop, gimple *at,
4120 			  struct iv_use *use, struct iv_cand *cand)
4121 {
4122   if (tree ret = get_computation_at (loop, at, use, cand))
4123     return ret;
4124 
4125   tree ubase = use->iv->base, ustep = use->iv->step;
4126   tree cbase = cand->iv->base, cstep = cand->iv->step;
4127   tree var;
4128   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4129   widest_int rat;
4130 
4131   /* We must have a precision to express the values of use.  */
4132   if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype))
4133     return NULL_TREE;
4134 
4135   /* Try to handle the case that get_computation_at doesn't,
4136      try to express
4137      use = ubase + (var - cbase) / ratio.  */
4138   if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep),
4139 			     &rat))
4140     return NULL_TREE;
4141 
4142   bool neg_p = false;
4143   if (wi::neg_p (rat))
4144     {
4145       if (TYPE_UNSIGNED (ctype))
4146 	return NULL_TREE;
4147       neg_p = true;
4148       rat = wi::neg (rat);
4149     }
4150 
4151   /* If both IVs can wrap around and CAND doesn't have a power of two step,
4152      it is unsafe.  Consider uint16_t CAND with step 9, when wrapping around,
4153      the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say
4154      uint8_t with step 3, those values divided by 3 cast to uint8_t will be
4155      ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59.  */
4156   if (!use->iv->no_overflow
4157       && !cand->iv->no_overflow
4158       && !integer_pow2p (cstep))
4159     return NULL_TREE;
4160 
4161   int bits = wi::exact_log2 (rat);
4162   if (bits == -1)
4163     bits = wi::floor_log2 (rat) + 1;
4164   if (!cand->iv->no_overflow
4165       && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype))
4166     return NULL_TREE;
4167 
4168   var = var_at_stmt (loop, cand, at);
4169 
4170   if (POINTER_TYPE_P (ctype))
4171     {
4172       ctype = unsigned_type_for (ctype);
4173       cbase = fold_convert (ctype, cbase);
4174       cstep = fold_convert (ctype, cstep);
4175       var = fold_convert (ctype, var);
4176     }
4177 
4178   if (stmt_after_increment (loop, cand, at))
4179     var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var,
4180 		       unshare_expr (cstep));
4181 
4182   var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase);
4183   var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var,
4184 		     wide_int_to_tree (TREE_TYPE (var), rat));
4185   if (POINTER_TYPE_P (utype))
4186     {
4187       var = fold_convert (sizetype, var);
4188       if (neg_p)
4189 	var = fold_build1 (NEGATE_EXPR, sizetype, var);
4190       var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var);
4191     }
4192   else
4193     {
4194       var = fold_convert (utype, var);
4195       var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype,
4196 			 ubase, var);
4197     }
4198   return var;
4199 }
4200 
4201 /* Adjust the cost COST for being in loop setup rather than loop body.
4202    If we're optimizing for space, the loop setup overhead is constant;
4203    if we're optimizing for speed, amortize it over the per-iteration cost.
4204    If ROUND_UP_P is true, the result is round up rather than to zero when
4205    optimizing for speed.  */
4206 static int64_t
4207 adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4208 		   bool round_up_p = false)
4209 {
4210   if (cost == INFTY)
4211     return cost;
4212   else if (optimize_loop_for_speed_p (data->current_loop))
4213     {
4214       int64_t niters = (int64_t) avg_loop_niter (data->current_loop);
4215       return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4216     }
4217   else
4218     return cost;
4219 }
4220 
4221 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4222    EXPR operand holding the shift.  COST0 and COST1 are the costs for
4223    calculating the operands of EXPR.  Returns true if successful, and returns
4224    the cost in COST.  */
4225 
4226 static bool
get_shiftadd_cost(tree expr,scalar_int_mode mode,comp_cost cost0,comp_cost cost1,tree mult,bool speed,comp_cost * cost)4227 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4228 		   comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4229 {
4230   comp_cost res;
4231   tree op1 = TREE_OPERAND (expr, 1);
4232   tree cst = TREE_OPERAND (mult, 1);
4233   tree multop = TREE_OPERAND (mult, 0);
4234   int m = exact_log2 (int_cst_value (cst));
4235   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4236   int as_cost, sa_cost;
4237   bool mult_in_op1;
4238 
4239   if (!(m >= 0 && m < maxm))
4240     return false;
4241 
4242   STRIP_NOPS (op1);
4243   mult_in_op1 = operand_equal_p (op1, mult, 0);
4244 
4245   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4246 
4247   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4248      use that in preference to a shift insn followed by an add insn.  */
4249   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4250 	     ? shiftadd_cost (speed, mode, m)
4251 	     : (mult_in_op1
4252 		? shiftsub1_cost (speed, mode, m)
4253 		: shiftsub0_cost (speed, mode, m)));
4254 
4255   res = comp_cost (MIN (as_cost, sa_cost), 0);
4256   res += (mult_in_op1 ? cost0 : cost1);
4257 
4258   STRIP_NOPS (multop);
4259   if (!is_gimple_val (multop))
4260     res += force_expr_to_var_cost (multop, speed);
4261 
4262   *cost = res;
4263   return true;
4264 }
4265 
4266 /* Estimates cost of forcing expression EXPR into a variable.  */
4267 
4268 static comp_cost
force_expr_to_var_cost(tree expr,bool speed)4269 force_expr_to_var_cost (tree expr, bool speed)
4270 {
4271   static bool costs_initialized = false;
4272   static unsigned integer_cost [2];
4273   static unsigned symbol_cost [2];
4274   static unsigned address_cost [2];
4275   tree op0, op1;
4276   comp_cost cost0, cost1, cost;
4277   machine_mode mode;
4278   scalar_int_mode int_mode;
4279 
4280   if (!costs_initialized)
4281     {
4282       tree type = build_pointer_type (integer_type_node);
4283       tree var, addr;
4284       rtx x;
4285       int i;
4286 
4287       var = create_tmp_var_raw (integer_type_node, "test_var");
4288       TREE_STATIC (var) = 1;
4289       x = produce_memory_decl_rtl (var, NULL);
4290       SET_DECL_RTL (var, x);
4291 
4292       addr = build1 (ADDR_EXPR, type, var);
4293 
4294 
4295       for (i = 0; i < 2; i++)
4296 	{
4297 	  integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4298 							     2000), i);
4299 
4300 	  symbol_cost[i] = computation_cost (addr, i) + 1;
4301 
4302 	  address_cost[i]
4303 	    = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4304 	  if (dump_file && (dump_flags & TDF_DETAILS))
4305 	    {
4306 	      fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4307 	      fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4308 	      fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4309 	      fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4310 	      fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4311 	      fprintf (dump_file, "\n");
4312 	    }
4313 	}
4314 
4315       costs_initialized = true;
4316     }
4317 
4318   STRIP_NOPS (expr);
4319 
4320   if (SSA_VAR_P (expr))
4321     return no_cost;
4322 
4323   if (is_gimple_min_invariant (expr))
4324     {
4325       if (poly_int_tree_p (expr))
4326 	return comp_cost (integer_cost [speed], 0);
4327 
4328       if (TREE_CODE (expr) == ADDR_EXPR)
4329 	{
4330 	  tree obj = TREE_OPERAND (expr, 0);
4331 
4332 	  if (VAR_P (obj)
4333 	      || TREE_CODE (obj) == PARM_DECL
4334 	      || TREE_CODE (obj) == RESULT_DECL)
4335 	    return comp_cost (symbol_cost [speed], 0);
4336 	}
4337 
4338       return comp_cost (address_cost [speed], 0);
4339     }
4340 
4341   switch (TREE_CODE (expr))
4342     {
4343     case POINTER_PLUS_EXPR:
4344     case PLUS_EXPR:
4345     case MINUS_EXPR:
4346     case MULT_EXPR:
4347     case TRUNC_DIV_EXPR:
4348     case BIT_AND_EXPR:
4349     case BIT_IOR_EXPR:
4350     case LSHIFT_EXPR:
4351     case RSHIFT_EXPR:
4352       op0 = TREE_OPERAND (expr, 0);
4353       op1 = TREE_OPERAND (expr, 1);
4354       STRIP_NOPS (op0);
4355       STRIP_NOPS (op1);
4356       break;
4357 
4358     CASE_CONVERT:
4359     case NEGATE_EXPR:
4360     case BIT_NOT_EXPR:
4361       op0 = TREE_OPERAND (expr, 0);
4362       STRIP_NOPS (op0);
4363       op1 = NULL_TREE;
4364       break;
4365     /* See add_iv_candidate_for_doloop, for doloop may_be_zero case, we
4366        introduce COND_EXPR for IV base, need to support better cost estimation
4367        for this COND_EXPR and tcc_comparison.  */
4368     case COND_EXPR:
4369       op0 = TREE_OPERAND (expr, 1);
4370       STRIP_NOPS (op0);
4371       op1 = TREE_OPERAND (expr, 2);
4372       STRIP_NOPS (op1);
4373       break;
4374     case LT_EXPR:
4375     case LE_EXPR:
4376     case GT_EXPR:
4377     case GE_EXPR:
4378     case EQ_EXPR:
4379     case NE_EXPR:
4380     case UNORDERED_EXPR:
4381     case ORDERED_EXPR:
4382     case UNLT_EXPR:
4383     case UNLE_EXPR:
4384     case UNGT_EXPR:
4385     case UNGE_EXPR:
4386     case UNEQ_EXPR:
4387     case LTGT_EXPR:
4388     case MAX_EXPR:
4389     case MIN_EXPR:
4390       op0 = TREE_OPERAND (expr, 0);
4391       STRIP_NOPS (op0);
4392       op1 = TREE_OPERAND (expr, 1);
4393       STRIP_NOPS (op1);
4394       break;
4395 
4396     default:
4397       /* Just an arbitrary value, FIXME.  */
4398       return comp_cost (target_spill_cost[speed], 0);
4399     }
4400 
4401   if (op0 == NULL_TREE
4402       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4403     cost0 = no_cost;
4404   else
4405     cost0 = force_expr_to_var_cost (op0, speed);
4406 
4407   if (op1 == NULL_TREE
4408       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4409     cost1 = no_cost;
4410   else
4411     cost1 = force_expr_to_var_cost (op1, speed);
4412 
4413   mode = TYPE_MODE (TREE_TYPE (expr));
4414   switch (TREE_CODE (expr))
4415     {
4416     case POINTER_PLUS_EXPR:
4417     case PLUS_EXPR:
4418     case MINUS_EXPR:
4419     case NEGATE_EXPR:
4420       cost = comp_cost (add_cost (speed, mode), 0);
4421       if (TREE_CODE (expr) != NEGATE_EXPR)
4422 	{
4423 	  tree mult = NULL_TREE;
4424 	  comp_cost sa_cost;
4425 	  if (TREE_CODE (op1) == MULT_EXPR)
4426 	    mult = op1;
4427 	  else if (TREE_CODE (op0) == MULT_EXPR)
4428 	    mult = op0;
4429 
4430 	  if (mult != NULL_TREE
4431 	      && is_a <scalar_int_mode> (mode, &int_mode)
4432 	      && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4433 	      && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4434 				    speed, &sa_cost))
4435 	    return sa_cost;
4436 	}
4437       break;
4438 
4439     CASE_CONVERT:
4440       {
4441 	tree inner_mode, outer_mode;
4442 	outer_mode = TREE_TYPE (expr);
4443 	inner_mode = TREE_TYPE (op0);
4444 	cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4445 				       TYPE_MODE (inner_mode), speed), 0);
4446       }
4447       break;
4448 
4449     case MULT_EXPR:
4450       if (cst_and_fits_in_hwi (op0))
4451 	cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4452 					     mode, speed), 0);
4453       else if (cst_and_fits_in_hwi (op1))
4454 	cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4455 					     mode, speed), 0);
4456       else
4457 	return comp_cost (target_spill_cost [speed], 0);
4458       break;
4459 
4460     case TRUNC_DIV_EXPR:
4461       /* Division by power of two is usually cheap, so we allow it.  Forbid
4462 	 anything else.  */
4463       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4464 	cost = comp_cost (add_cost (speed, mode), 0);
4465       else
4466 	cost = comp_cost (target_spill_cost[speed], 0);
4467       break;
4468 
4469     case BIT_AND_EXPR:
4470     case BIT_IOR_EXPR:
4471     case BIT_NOT_EXPR:
4472     case LSHIFT_EXPR:
4473     case RSHIFT_EXPR:
4474       cost = comp_cost (add_cost (speed, mode), 0);
4475       break;
4476     case COND_EXPR:
4477       op0 = TREE_OPERAND (expr, 0);
4478       STRIP_NOPS (op0);
4479       if (op0 == NULL_TREE || TREE_CODE (op0) == SSA_NAME
4480 	  || CONSTANT_CLASS_P (op0))
4481 	cost = no_cost;
4482       else
4483 	cost = force_expr_to_var_cost (op0, speed);
4484       break;
4485     case LT_EXPR:
4486     case LE_EXPR:
4487     case GT_EXPR:
4488     case GE_EXPR:
4489     case EQ_EXPR:
4490     case NE_EXPR:
4491     case UNORDERED_EXPR:
4492     case ORDERED_EXPR:
4493     case UNLT_EXPR:
4494     case UNLE_EXPR:
4495     case UNGT_EXPR:
4496     case UNGE_EXPR:
4497     case UNEQ_EXPR:
4498     case LTGT_EXPR:
4499     case MAX_EXPR:
4500     case MIN_EXPR:
4501       /* Simply use add cost for now, FIXME if there is some more accurate cost
4502 	 evaluation way.  */
4503       cost = comp_cost (add_cost (speed, mode), 0);
4504       break;
4505 
4506     default:
4507       gcc_unreachable ();
4508     }
4509 
4510   cost += cost0;
4511   cost += cost1;
4512   return cost;
4513 }
4514 
4515 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4516    invariants the computation depends on.  */
4517 
4518 static comp_cost
force_var_cost(struct ivopts_data * data,tree expr,bitmap * inv_vars)4519 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4520 {
4521   if (!expr)
4522     return no_cost;
4523 
4524   find_inv_vars (data, &expr, inv_vars);
4525   return force_expr_to_var_cost (expr, data->speed);
4526 }
4527 
4528 /* Returns cost of auto-modifying address expression in shape base + offset.
4529    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4530    address expression.  The address expression has ADDR_MODE in addr space
4531    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4532    speed or size.  */
4533 
4534 enum ainc_type
4535 {
4536   AINC_PRE_INC,		/* Pre increment.  */
4537   AINC_PRE_DEC,		/* Pre decrement.  */
4538   AINC_POST_INC,	/* Post increment.  */
4539   AINC_POST_DEC,	/* Post decrement.  */
4540   AINC_NONE		/* Also the number of auto increment types.  */
4541 };
4542 
4543 struct ainc_cost_data
4544 {
4545   int64_t costs[AINC_NONE];
4546 };
4547 
4548 static comp_cost
get_address_cost_ainc(poly_int64 ainc_step,poly_int64 ainc_offset,machine_mode addr_mode,machine_mode mem_mode,addr_space_t as,bool speed)4549 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4550 		       machine_mode addr_mode, machine_mode mem_mode,
4551 		       addr_space_t as, bool speed)
4552 {
4553   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4554       && !USE_STORE_PRE_DECREMENT (mem_mode)
4555       && !USE_LOAD_POST_DECREMENT (mem_mode)
4556       && !USE_STORE_POST_DECREMENT (mem_mode)
4557       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4558       && !USE_STORE_PRE_INCREMENT (mem_mode)
4559       && !USE_LOAD_POST_INCREMENT (mem_mode)
4560       && !USE_STORE_POST_INCREMENT (mem_mode))
4561     return infinite_cost;
4562 
4563   static vec<ainc_cost_data *> ainc_cost_data_list;
4564   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4565   if (idx >= ainc_cost_data_list.length ())
4566     {
4567       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4568 
4569       gcc_assert (nsize > idx);
4570       ainc_cost_data_list.safe_grow_cleared (nsize);
4571     }
4572 
4573   ainc_cost_data *data = ainc_cost_data_list[idx];
4574   if (data == NULL)
4575     {
4576       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4577 
4578       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4579       data->costs[AINC_PRE_DEC] = INFTY;
4580       data->costs[AINC_POST_DEC] = INFTY;
4581       data->costs[AINC_PRE_INC] = INFTY;
4582       data->costs[AINC_POST_INC] = INFTY;
4583       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4584 	  || USE_STORE_PRE_DECREMENT (mem_mode))
4585 	{
4586 	  rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4587 
4588 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4589 	    data->costs[AINC_PRE_DEC]
4590 	      = address_cost (addr, mem_mode, as, speed);
4591 	}
4592       if (USE_LOAD_POST_DECREMENT (mem_mode)
4593 	  || USE_STORE_POST_DECREMENT (mem_mode))
4594 	{
4595 	  rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4596 
4597 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4598 	    data->costs[AINC_POST_DEC]
4599 	      = address_cost (addr, mem_mode, as, speed);
4600 	}
4601       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4602 	  || USE_STORE_PRE_INCREMENT (mem_mode))
4603 	{
4604 	  rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4605 
4606 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4607 	    data->costs[AINC_PRE_INC]
4608 	      = address_cost (addr, mem_mode, as, speed);
4609 	}
4610       if (USE_LOAD_POST_INCREMENT (mem_mode)
4611 	  || USE_STORE_POST_INCREMENT (mem_mode))
4612 	{
4613 	  rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4614 
4615 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4616 	    data->costs[AINC_POST_INC]
4617 	      = address_cost (addr, mem_mode, as, speed);
4618 	}
4619       ainc_cost_data_list[idx] = data;
4620     }
4621 
4622   poly_int64 msize = GET_MODE_SIZE (mem_mode);
4623   if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4624     return comp_cost (data->costs[AINC_POST_INC], 0);
4625   if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4626     return comp_cost (data->costs[AINC_POST_DEC], 0);
4627   if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4628     return comp_cost (data->costs[AINC_PRE_INC], 0);
4629   if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4630     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4631 
4632   return infinite_cost;
4633 }
4634 
4635 /* Return cost of computing USE's address expression by using CAND.
4636    AFF_INV and AFF_VAR represent invariant and variant parts of the
4637    address expression, respectively.  If AFF_INV is simple, store
4638    the loop invariant variables which are depended by it in INV_VARS;
4639    if AFF_INV is complicated, handle it as a new invariant expression
4640    and record it in INV_EXPR.  RATIO indicates multiple times between
4641    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4642    value to it indicating if this is an auto-increment address.  */
4643 
4644 static comp_cost
get_address_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,aff_tree * aff_inv,aff_tree * aff_var,HOST_WIDE_INT ratio,bitmap * inv_vars,iv_inv_expr_ent ** inv_expr,bool * can_autoinc,bool speed)4645 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4646 		  struct iv_cand *cand, aff_tree *aff_inv,
4647 		  aff_tree *aff_var, HOST_WIDE_INT ratio,
4648 		  bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4649 		  bool *can_autoinc, bool speed)
4650 {
4651   rtx addr;
4652   bool simple_inv = true;
4653   tree comp_inv = NULL_TREE, type = aff_var->type;
4654   comp_cost var_cost = no_cost, cost = no_cost;
4655   struct mem_address parts = {NULL_TREE, integer_one_node,
4656 			      NULL_TREE, NULL_TREE, NULL_TREE};
4657   machine_mode addr_mode = TYPE_MODE (type);
4658   machine_mode mem_mode = TYPE_MODE (use->mem_type);
4659   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4660   /* Only true if ratio != 1.  */
4661   bool ok_with_ratio_p = false;
4662   bool ok_without_ratio_p = false;
4663 
4664   if (!aff_combination_const_p (aff_inv))
4665     {
4666       parts.index = integer_one_node;
4667       /* Addressing mode "base + index".  */
4668       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4669       if (ratio != 1)
4670 	{
4671 	  parts.step = wide_int_to_tree (type, ratio);
4672 	  /* Addressing mode "base + index << scale".  */
4673 	  ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4674 	  if (!ok_with_ratio_p)
4675 	    parts.step = NULL_TREE;
4676 	}
4677       if (ok_with_ratio_p || ok_without_ratio_p)
4678 	{
4679 	  if (maybe_ne (aff_inv->offset, 0))
4680 	    {
4681 	      parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4682 	      /* Addressing mode "base + index [<< scale] + offset".  */
4683 	      if (!valid_mem_ref_p (mem_mode, as, &parts))
4684 		parts.offset = NULL_TREE;
4685 	      else
4686 		aff_inv->offset = 0;
4687 	    }
4688 
4689 	  move_fixed_address_to_symbol (&parts, aff_inv);
4690 	  /* Base is fixed address and is moved to symbol part.  */
4691 	  if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4692 	    parts.base = NULL_TREE;
4693 
4694 	  /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4695 	  if (parts.symbol != NULL_TREE
4696 	      && !valid_mem_ref_p (mem_mode, as, &parts))
4697 	    {
4698 	      aff_combination_add_elt (aff_inv, parts.symbol, 1);
4699 	      parts.symbol = NULL_TREE;
4700 	      /* Reset SIMPLE_INV since symbol address needs to be computed
4701 		 outside of address expression in this case.  */
4702 	      simple_inv = false;
4703 	      /* Symbol part is moved back to base part, it can't be NULL.  */
4704 	      parts.base = integer_one_node;
4705 	    }
4706 	}
4707       else
4708 	parts.index = NULL_TREE;
4709     }
4710   else
4711     {
4712       poly_int64 ainc_step;
4713       if (can_autoinc
4714 	  && ratio == 1
4715 	  && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4716 	{
4717 	  poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4718 
4719 	  if (stmt_after_increment (data->current_loop, cand, use->stmt))
4720 	    ainc_offset += ainc_step;
4721 	  cost = get_address_cost_ainc (ainc_step, ainc_offset,
4722 					addr_mode, mem_mode, as, speed);
4723 	  if (!cost.infinite_cost_p ())
4724 	    {
4725 	      *can_autoinc = true;
4726 	      return cost;
4727 	    }
4728 	  cost = no_cost;
4729 	}
4730       if (!aff_combination_zero_p (aff_inv))
4731 	{
4732 	  parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4733 	  /* Addressing mode "base + offset".  */
4734 	  if (!valid_mem_ref_p (mem_mode, as, &parts))
4735 	    parts.offset = NULL_TREE;
4736 	  else
4737 	    aff_inv->offset = 0;
4738 	}
4739     }
4740 
4741   if (simple_inv)
4742     simple_inv = (aff_inv == NULL
4743 		  || aff_combination_const_p (aff_inv)
4744 		  || aff_combination_singleton_var_p (aff_inv));
4745   if (!aff_combination_zero_p (aff_inv))
4746     comp_inv = aff_combination_to_tree (aff_inv);
4747   if (comp_inv != NULL_TREE)
4748     cost = force_var_cost (data, comp_inv, inv_vars);
4749   if (ratio != 1 && parts.step == NULL_TREE)
4750     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4751   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4752     var_cost += add_cost (speed, addr_mode);
4753 
4754   if (comp_inv && inv_expr && !simple_inv)
4755     {
4756       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4757       /* Clear depends on.  */
4758       if (*inv_expr != NULL && inv_vars && *inv_vars)
4759 	bitmap_clear (*inv_vars);
4760 
4761       /* Cost of small invariant expression adjusted against loop niters
4762 	 is usually zero, which makes it difficult to be differentiated
4763 	 from candidate based on loop invariant variables.  Secondly, the
4764 	 generated invariant expression may not be hoisted out of loop by
4765 	 following pass.  We penalize the cost by rounding up in order to
4766 	 neutralize such effects.  */
4767       cost.cost = adjust_setup_cost (data, cost.cost, true);
4768       cost.scratch = cost.cost;
4769     }
4770 
4771   cost += var_cost;
4772   addr = addr_for_mem_ref (&parts, as, false);
4773   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4774   cost += address_cost (addr, mem_mode, as, speed);
4775 
4776   if (parts.symbol != NULL_TREE)
4777     cost.complexity += 1;
4778   /* Don't increase the complexity of adding a scaled index if it's
4779      the only kind of index that the target allows.  */
4780   if (parts.step != NULL_TREE && ok_without_ratio_p)
4781     cost.complexity += 1;
4782   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4783     cost.complexity += 1;
4784   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4785     cost.complexity += 1;
4786 
4787   return cost;
4788 }
4789 
4790 /* Scale (multiply) the computed COST (except scratch part that should be
4791    hoisted out a loop) by header->frequency / AT->frequency, which makes
4792    expected cost more accurate.  */
4793 
4794 static comp_cost
get_scaled_computation_cost_at(ivopts_data * data,gimple * at,comp_cost cost)4795 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4796 {
4797   if (data->speed
4798       && data->current_loop->header->count.to_frequency (cfun) > 0)
4799     {
4800       basic_block bb = gimple_bb (at);
4801       gcc_assert (cost.scratch <= cost.cost);
4802       int scale_factor = (int)(intptr_t) bb->aux;
4803       if (scale_factor == 1)
4804 	return cost;
4805 
4806       int64_t scaled_cost
4807 	= cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4808 
4809       if (dump_file && (dump_flags & TDF_DETAILS))
4810 	fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4811 		 "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4812 		 1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4813 
4814       cost.cost = scaled_cost;
4815     }
4816 
4817   return cost;
4818 }
4819 
4820 /* Determines the cost of the computation by that USE is expressed
4821    from induction variable CAND.  If ADDRESS_P is true, we just need
4822    to create an address from it, otherwise we want to get it into
4823    register.  A set of invariants we depend on is stored in INV_VARS.
4824    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4825    addressing is likely.  If INV_EXPR is nonnull, record invariant
4826    expr entry in it.  */
4827 
4828 static comp_cost
get_computation_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,bool address_p,bitmap * inv_vars,bool * can_autoinc,iv_inv_expr_ent ** inv_expr)4829 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4830 		      struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4831 		      bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4832 {
4833   gimple *at = use->stmt;
4834   tree ubase = use->iv->base, cbase = cand->iv->base;
4835   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4836   tree comp_inv = NULL_TREE;
4837   HOST_WIDE_INT ratio, aratio;
4838   comp_cost cost;
4839   widest_int rat;
4840   aff_tree aff_inv, aff_var;
4841   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4842 
4843   if (inv_vars)
4844     *inv_vars = NULL;
4845   if (can_autoinc)
4846     *can_autoinc = false;
4847   if (inv_expr)
4848     *inv_expr = NULL;
4849 
4850   /* Check if we have enough precision to express the values of use.  */
4851   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4852     return infinite_cost;
4853 
4854   if (address_p
4855       || (use->iv->base_object
4856 	  && cand->iv->base_object
4857 	  && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4858 	  && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4859     {
4860       /* Do not try to express address of an object with computation based
4861 	 on address of a different object.  This may cause problems in rtl
4862 	 level alias analysis (that does not expect this to be happening,
4863 	 as this is illegal in C), and would be unlikely to be useful
4864 	 anyway.  */
4865       if (use->iv->base_object
4866 	  && cand->iv->base_object
4867 	  && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4868 	return infinite_cost;
4869     }
4870 
4871   if (!get_computation_aff_1 (data->current_loop, at, use,
4872 			      cand, &aff_inv, &aff_var, &rat)
4873       || !wi::fits_shwi_p (rat))
4874     return infinite_cost;
4875 
4876   ratio = rat.to_shwi ();
4877   if (address_p)
4878     {
4879       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4880 			       inv_vars, inv_expr, can_autoinc, speed);
4881       cost = get_scaled_computation_cost_at (data, at, cost);
4882       /* For doloop IV cand, add on the extra cost.  */
4883       cost += cand->doloop_p ? targetm.doloop_cost_for_address : 0;
4884       return cost;
4885     }
4886 
4887   bool simple_inv = (aff_combination_const_p (&aff_inv)
4888 		     || aff_combination_singleton_var_p (&aff_inv));
4889   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4890   aff_combination_convert (&aff_inv, signed_type);
4891   if (!aff_combination_zero_p (&aff_inv))
4892     comp_inv = aff_combination_to_tree (&aff_inv);
4893 
4894   cost = force_var_cost (data, comp_inv, inv_vars);
4895   if (comp_inv && inv_expr && !simple_inv)
4896     {
4897       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4898       /* Clear depends on.  */
4899       if (*inv_expr != NULL && inv_vars && *inv_vars)
4900 	bitmap_clear (*inv_vars);
4901 
4902       cost.cost = adjust_setup_cost (data, cost.cost);
4903       /* Record setup cost in scratch field.  */
4904       cost.scratch = cost.cost;
4905     }
4906   /* Cost of constant integer can be covered when adding invariant part to
4907      variant part.  */
4908   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4909     cost = no_cost;
4910 
4911   /* Need type narrowing to represent use with cand.  */
4912   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4913     {
4914       machine_mode outer_mode = TYPE_MODE (utype);
4915       machine_mode inner_mode = TYPE_MODE (ctype);
4916       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4917     }
4918 
4919   /* Turn a + i * (-c) into a - i * c.  */
4920   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4921     aratio = -ratio;
4922   else
4923     aratio = ratio;
4924 
4925   if (ratio != 1)
4926     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4927 
4928   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4929      instruction.  */
4930   /* Need to add up the invariant and variant parts.  */
4931   if (comp_inv && !integer_zerop (comp_inv))
4932     cost += add_cost (speed, TYPE_MODE (utype));
4933 
4934   cost = get_scaled_computation_cost_at (data, at, cost);
4935 
4936   /* For doloop IV cand, add on the extra cost.  */
4937   if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
4938     cost += targetm.doloop_cost_for_generic;
4939 
4940   return cost;
4941 }
4942 
4943 /* Determines cost of computing the use in GROUP with CAND in a generic
4944    expression.  */
4945 
4946 static bool
determine_group_iv_cost_generic(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)4947 determine_group_iv_cost_generic (struct ivopts_data *data,
4948 				 struct iv_group *group, struct iv_cand *cand)
4949 {
4950   comp_cost cost;
4951   iv_inv_expr_ent *inv_expr = NULL;
4952   bitmap inv_vars = NULL, inv_exprs = NULL;
4953   struct iv_use *use = group->vuses[0];
4954 
4955   /* The simple case first -- if we need to express value of the preserved
4956      original biv, the cost is 0.  This also prevents us from counting the
4957      cost of increment twice -- once at this use and once in the cost of
4958      the candidate.  */
4959   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4960     cost = no_cost;
4961   else
4962     cost = get_computation_cost (data, use, cand, false,
4963 				 &inv_vars, NULL, &inv_expr);
4964 
4965   if (inv_expr)
4966     {
4967       inv_exprs = BITMAP_ALLOC (NULL);
4968       bitmap_set_bit (inv_exprs, inv_expr->id);
4969     }
4970   set_group_iv_cost (data, group, cand, cost, inv_vars,
4971 		     NULL_TREE, ERROR_MARK, inv_exprs);
4972   return !cost.infinite_cost_p ();
4973 }
4974 
4975 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
4976 
4977 static bool
determine_group_iv_cost_address(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)4978 determine_group_iv_cost_address (struct ivopts_data *data,
4979 				 struct iv_group *group, struct iv_cand *cand)
4980 {
4981   unsigned i;
4982   bitmap inv_vars = NULL, inv_exprs = NULL;
4983   bool can_autoinc;
4984   iv_inv_expr_ent *inv_expr = NULL;
4985   struct iv_use *use = group->vuses[0];
4986   comp_cost sum_cost = no_cost, cost;
4987 
4988   cost = get_computation_cost (data, use, cand, true,
4989 			       &inv_vars, &can_autoinc, &inv_expr);
4990 
4991   if (inv_expr)
4992     {
4993       inv_exprs = BITMAP_ALLOC (NULL);
4994       bitmap_set_bit (inv_exprs, inv_expr->id);
4995     }
4996   sum_cost = cost;
4997   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4998     {
4999       if (can_autoinc)
5000 	sum_cost -= cand->cost_step;
5001       /* If we generated the candidate solely for exploiting autoincrement
5002 	 opportunities, and it turns out it can't be used, set the cost to
5003 	 infinity to make sure we ignore it.  */
5004       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5005 	sum_cost = infinite_cost;
5006     }
5007 
5008   /* Uses in a group can share setup code, so only add setup cost once.  */
5009   cost -= cost.scratch;
5010   /* Compute and add costs for rest uses of this group.  */
5011   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5012     {
5013       struct iv_use *next = group->vuses[i];
5014 
5015       /* TODO: We could skip computing cost for sub iv_use when it has the
5016 	 same cost as the first iv_use, but the cost really depends on the
5017 	 offset and where the iv_use is.  */
5018 	cost = get_computation_cost (data, next, cand, true,
5019 				     NULL, &can_autoinc, &inv_expr);
5020 	if (inv_expr)
5021 	  {
5022 	    if (!inv_exprs)
5023 	      inv_exprs = BITMAP_ALLOC (NULL);
5024 
5025 	    bitmap_set_bit (inv_exprs, inv_expr->id);
5026 	  }
5027       sum_cost += cost;
5028     }
5029   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
5030 		     NULL_TREE, ERROR_MARK, inv_exprs);
5031 
5032   return !sum_cost.infinite_cost_p ();
5033 }
5034 
5035 /* Computes value of candidate CAND at position AT in iteration NITER, and
5036    stores it to VAL.  */
5037 
5038 static void
cand_value_at(class loop * loop,struct iv_cand * cand,gimple * at,tree niter,aff_tree * val)5039 cand_value_at (class loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5040 	       aff_tree *val)
5041 {
5042   aff_tree step, delta, nit;
5043   struct iv *iv = cand->iv;
5044   tree type = TREE_TYPE (iv->base);
5045   tree steptype;
5046   if (POINTER_TYPE_P (type))
5047     steptype = sizetype;
5048   else
5049     steptype = unsigned_type_for (type);
5050 
5051   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5052   aff_combination_convert (&step, steptype);
5053   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5054   aff_combination_convert (&nit, steptype);
5055   aff_combination_mult (&nit, &step, &delta);
5056   if (stmt_after_increment (loop, cand, at))
5057     aff_combination_add (&delta, &step);
5058 
5059   tree_to_aff_combination (iv->base, type, val);
5060   if (!POINTER_TYPE_P (type))
5061     aff_combination_convert (val, steptype);
5062   aff_combination_add (val, &delta);
5063 }
5064 
5065 /* Returns period of induction variable iv.  */
5066 
5067 static tree
iv_period(struct iv * iv)5068 iv_period (struct iv *iv)
5069 {
5070   tree step = iv->step, period, type;
5071   tree pow2div;
5072 
5073   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5074 
5075   type = unsigned_type_for (TREE_TYPE (step));
5076   /* Period of the iv is lcm (step, type_range)/step -1,
5077      i.e., N*type_range/step - 1. Since type range is power
5078      of two, N == (step >> num_of_ending_zeros_binary (step),
5079      so the final result is
5080 
5081        (type_range >> num_of_ending_zeros_binary (step)) - 1
5082 
5083   */
5084   pow2div = num_ending_zeros (step);
5085 
5086   period = build_low_bits_mask (type,
5087 				(TYPE_PRECISION (type)
5088 				 - tree_to_uhwi (pow2div)));
5089 
5090   return period;
5091 }
5092 
5093 /* Returns the comparison operator used when eliminating the iv USE.  */
5094 
5095 static enum tree_code
iv_elimination_compare(struct ivopts_data * data,struct iv_use * use)5096 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5097 {
5098   class loop *loop = data->current_loop;
5099   basic_block ex_bb;
5100   edge exit;
5101 
5102   ex_bb = gimple_bb (use->stmt);
5103   exit = EDGE_SUCC (ex_bb, 0);
5104   if (flow_bb_inside_loop_p (loop, exit->dest))
5105     exit = EDGE_SUCC (ex_bb, 1);
5106 
5107   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5108 }
5109 
5110 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5111    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5112    calculation is performed in non-wrapping type.
5113 
5114    TODO: More generally, we could test for the situation that
5115 	 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5116 	 This would require knowing the sign of OFFSET.  */
5117 
5118 static bool
difference_cannot_overflow_p(struct ivopts_data * data,tree base,tree offset)5119 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5120 {
5121   enum tree_code code;
5122   tree e1, e2;
5123   aff_tree aff_e1, aff_e2, aff_offset;
5124 
5125   if (!nowrap_type_p (TREE_TYPE (base)))
5126     return false;
5127 
5128   base = expand_simple_operations (base);
5129 
5130   if (TREE_CODE (base) == SSA_NAME)
5131     {
5132       gimple *stmt = SSA_NAME_DEF_STMT (base);
5133 
5134       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5135 	return false;
5136 
5137       code = gimple_assign_rhs_code (stmt);
5138       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5139 	return false;
5140 
5141       e1 = gimple_assign_rhs1 (stmt);
5142       e2 = gimple_assign_rhs2 (stmt);
5143     }
5144   else
5145     {
5146       code = TREE_CODE (base);
5147       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5148 	return false;
5149       e1 = TREE_OPERAND (base, 0);
5150       e2 = TREE_OPERAND (base, 1);
5151     }
5152 
5153   /* Use affine expansion as deeper inspection to prove the equality.  */
5154   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5155 				  &aff_e2, &data->name_expansion_cache);
5156   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5157 				  &aff_offset, &data->name_expansion_cache);
5158   aff_combination_scale (&aff_offset, -1);
5159   switch (code)
5160     {
5161     case PLUS_EXPR:
5162       aff_combination_add (&aff_e2, &aff_offset);
5163       if (aff_combination_zero_p (&aff_e2))
5164 	return true;
5165 
5166       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5167 				      &aff_e1, &data->name_expansion_cache);
5168       aff_combination_add (&aff_e1, &aff_offset);
5169       return aff_combination_zero_p (&aff_e1);
5170 
5171     case POINTER_PLUS_EXPR:
5172       aff_combination_add (&aff_e2, &aff_offset);
5173       return aff_combination_zero_p (&aff_e2);
5174 
5175     default:
5176       return false;
5177     }
5178 }
5179 
5180 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5181    comparison with CAND.  NITER describes the number of iterations of
5182    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5183 
5184    We aim to handle the following situation:
5185 
5186    sometype *base, *p;
5187    int a, b, i;
5188 
5189    i = a;
5190    p = p_0 = base + a;
5191 
5192    do
5193      {
5194        bla (*p);
5195        p++;
5196        i++;
5197      }
5198    while (i < b);
5199 
5200    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5201    We aim to optimize this to
5202 
5203    p = p_0 = base + a;
5204    do
5205      {
5206        bla (*p);
5207        p++;
5208      }
5209    while (p < p_0 - a + b);
5210 
5211    This preserves the correctness, since the pointer arithmetics does not
5212    overflow.  More precisely:
5213 
5214    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5215       overflow in computing it or the values of p.
5216    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5217       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5218 
5219 static bool
iv_elimination_compare_lt(struct ivopts_data * data,struct iv_cand * cand,enum tree_code * comp_p,class tree_niter_desc * niter)5220 iv_elimination_compare_lt (struct ivopts_data *data,
5221 			   struct iv_cand *cand, enum tree_code *comp_p,
5222 			   class tree_niter_desc *niter)
5223 {
5224   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5225   class aff_tree nit, tmpa, tmpb;
5226   enum tree_code comp;
5227   HOST_WIDE_INT step;
5228 
5229   /* We need to know that the candidate induction variable does not overflow.
5230      While more complex analysis may be used to prove this, for now just
5231      check that the variable appears in the original program and that it
5232      is computed in a type that guarantees no overflows.  */
5233   cand_type = TREE_TYPE (cand->iv->base);
5234   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5235     return false;
5236 
5237   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5238      the calculation of the BOUND could overflow, making the comparison
5239      invalid.  */
5240   if (!data->loop_single_exit_p)
5241     return false;
5242 
5243   /* We need to be able to decide whether candidate is increasing or decreasing
5244      in order to choose the right comparison operator.  */
5245   if (!cst_and_fits_in_hwi (cand->iv->step))
5246     return false;
5247   step = int_cst_value (cand->iv->step);
5248 
5249   /* Check that the number of iterations matches the expected pattern:
5250      a + 1 > b ? 0 : b - a - 1.  */
5251   mbz = niter->may_be_zero;
5252   if (TREE_CODE (mbz) == GT_EXPR)
5253     {
5254       /* Handle a + 1 > b.  */
5255       tree op0 = TREE_OPERAND (mbz, 0);
5256       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5257 	{
5258 	  a = TREE_OPERAND (op0, 0);
5259 	  b = TREE_OPERAND (mbz, 1);
5260 	}
5261       else
5262 	return false;
5263     }
5264   else if (TREE_CODE (mbz) == LT_EXPR)
5265     {
5266       tree op1 = TREE_OPERAND (mbz, 1);
5267 
5268       /* Handle b < a + 1.  */
5269       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5270 	{
5271 	  a = TREE_OPERAND (op1, 0);
5272 	  b = TREE_OPERAND (mbz, 0);
5273 	}
5274       else
5275 	return false;
5276     }
5277   else
5278     return false;
5279 
5280   /* Expected number of iterations is B - A - 1.  Check that it matches
5281      the actual number, i.e., that B - A - NITER = 1.  */
5282   tree_to_aff_combination (niter->niter, nit_type, &nit);
5283   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5284   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5285   aff_combination_scale (&nit, -1);
5286   aff_combination_scale (&tmpa, -1);
5287   aff_combination_add (&tmpb, &tmpa);
5288   aff_combination_add (&tmpb, &nit);
5289   if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5290     return false;
5291 
5292   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5293      overflow.  */
5294   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5295 			cand->iv->step,
5296 			fold_convert (TREE_TYPE (cand->iv->step), a));
5297   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5298     return false;
5299 
5300   /* Determine the new comparison operator.  */
5301   comp = step < 0 ? GT_EXPR : LT_EXPR;
5302   if (*comp_p == NE_EXPR)
5303     *comp_p = comp;
5304   else if (*comp_p == EQ_EXPR)
5305     *comp_p = invert_tree_comparison (comp, false);
5306   else
5307     gcc_unreachable ();
5308 
5309   return true;
5310 }
5311 
5312 /* Check whether it is possible to express the condition in USE by comparison
5313    of candidate CAND.  If so, store the value compared with to BOUND, and the
5314    comparison operator to COMP.  */
5315 
5316 static bool
may_eliminate_iv(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,tree * bound,enum tree_code * comp)5317 may_eliminate_iv (struct ivopts_data *data,
5318 		  struct iv_use *use, struct iv_cand *cand, tree *bound,
5319 		  enum tree_code *comp)
5320 {
5321   basic_block ex_bb;
5322   edge exit;
5323   tree period;
5324   class loop *loop = data->current_loop;
5325   aff_tree bnd;
5326   class tree_niter_desc *desc = NULL;
5327 
5328   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5329     return false;
5330 
5331   /* For now works only for exits that dominate the loop latch.
5332      TODO: extend to other conditions inside loop body.  */
5333   ex_bb = gimple_bb (use->stmt);
5334   if (use->stmt != last_stmt (ex_bb)
5335       || gimple_code (use->stmt) != GIMPLE_COND
5336       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5337     return false;
5338 
5339   exit = EDGE_SUCC (ex_bb, 0);
5340   if (flow_bb_inside_loop_p (loop, exit->dest))
5341     exit = EDGE_SUCC (ex_bb, 1);
5342   if (flow_bb_inside_loop_p (loop, exit->dest))
5343     return false;
5344 
5345   desc = niter_for_exit (data, exit);
5346   if (!desc)
5347     return false;
5348 
5349   /* Determine whether we can use the variable to test the exit condition.
5350      This is the case iff the period of the induction variable is greater
5351      than the number of iterations for which the exit condition is true.  */
5352   period = iv_period (cand->iv);
5353 
5354   /* If the number of iterations is constant, compare against it directly.  */
5355   if (TREE_CODE (desc->niter) == INTEGER_CST)
5356     {
5357       /* See cand_value_at.  */
5358       if (stmt_after_increment (loop, cand, use->stmt))
5359 	{
5360 	  if (!tree_int_cst_lt (desc->niter, period))
5361 	    return false;
5362 	}
5363       else
5364 	{
5365 	  if (tree_int_cst_lt (period, desc->niter))
5366 	    return false;
5367 	}
5368     }
5369 
5370   /* If not, and if this is the only possible exit of the loop, see whether
5371      we can get a conservative estimate on the number of iterations of the
5372      entire loop and compare against that instead.  */
5373   else
5374     {
5375       widest_int period_value, max_niter;
5376 
5377       max_niter = desc->max;
5378       if (stmt_after_increment (loop, cand, use->stmt))
5379 	max_niter += 1;
5380       period_value = wi::to_widest (period);
5381       if (wi::gtu_p (max_niter, period_value))
5382 	{
5383 	  /* See if we can take advantage of inferred loop bound
5384 	     information.  */
5385 	  if (data->loop_single_exit_p)
5386 	    {
5387 	      if (!max_loop_iterations (loop, &max_niter))
5388 		return false;
5389 	      /* The loop bound is already adjusted by adding 1.  */
5390 	      if (wi::gtu_p (max_niter, period_value))
5391 		return false;
5392 	    }
5393 	  else
5394 	    return false;
5395 	}
5396     }
5397 
5398   /* For doloop IV cand, the bound would be zero.  It's safe whether
5399      may_be_zero set or not.  */
5400   if (cand->doloop_p)
5401     {
5402       *bound = build_int_cst (TREE_TYPE (cand->iv->base), 0);
5403       *comp = iv_elimination_compare (data, use);
5404       return true;
5405     }
5406 
5407   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5408 
5409   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5410 			 aff_combination_to_tree (&bnd));
5411   *comp = iv_elimination_compare (data, use);
5412 
5413   /* It is unlikely that computing the number of iterations using division
5414      would be more profitable than keeping the original induction variable.  */
5415   if (expression_expensive_p (*bound))
5416     return false;
5417 
5418   /* Sometimes, it is possible to handle the situation that the number of
5419      iterations may be zero unless additional assumptions by using <
5420      instead of != in the exit condition.
5421 
5422      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5423 	   base the exit condition on it.  However, that is often too
5424 	   expensive.  */
5425   if (!integer_zerop (desc->may_be_zero))
5426     return iv_elimination_compare_lt (data, cand, comp, desc);
5427 
5428   return true;
5429 }
5430 
5431  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5432     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5433 
5434 static int
parm_decl_cost(struct ivopts_data * data,tree bound)5435 parm_decl_cost (struct ivopts_data *data, tree bound)
5436 {
5437   tree sbound = bound;
5438   STRIP_NOPS (sbound);
5439 
5440   if (TREE_CODE (sbound) == SSA_NAME
5441       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5442       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5443       && data->body_includes_call)
5444     return COSTS_N_INSNS (1);
5445 
5446   return 0;
5447 }
5448 
5449 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5450 
5451 static bool
determine_group_iv_cost_cond(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5452 determine_group_iv_cost_cond (struct ivopts_data *data,
5453 			      struct iv_group *group, struct iv_cand *cand)
5454 {
5455   tree bound = NULL_TREE;
5456   struct iv *cmp_iv;
5457   bitmap inv_exprs = NULL;
5458   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5459   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5460   enum comp_iv_rewrite rewrite_type;
5461   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5462   tree *control_var, *bound_cst;
5463   enum tree_code comp = ERROR_MARK;
5464   struct iv_use *use = group->vuses[0];
5465 
5466   /* Extract condition operands.  */
5467   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5468 					&bound_cst, NULL, &cmp_iv);
5469   gcc_assert (rewrite_type != COMP_IV_NA);
5470 
5471   /* Try iv elimination.  */
5472   if (rewrite_type == COMP_IV_ELIM
5473       && may_eliminate_iv (data, use, cand, &bound, &comp))
5474     {
5475       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5476       if (elim_cost.cost == 0)
5477 	elim_cost.cost = parm_decl_cost (data, bound);
5478       else if (TREE_CODE (bound) == INTEGER_CST)
5479 	elim_cost.cost = 0;
5480       /* If we replace a loop condition 'i < n' with 'p < base + n',
5481 	 inv_vars_elim will have 'base' and 'n' set, which implies that both
5482 	 'base' and 'n' will be live during the loop.	 More likely,
5483 	 'base + n' will be loop invariant, resulting in only one live value
5484 	 during the loop.  So in that case we clear inv_vars_elim and set
5485 	 inv_expr_elim instead.  */
5486       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5487 	{
5488 	  inv_expr_elim = get_loop_invariant_expr (data, bound);
5489 	  bitmap_clear (inv_vars_elim);
5490 	}
5491       /* The bound is a loop invariant, so it will be only computed
5492 	 once.  */
5493       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5494     }
5495 
5496   /* When the condition is a comparison of the candidate IV against
5497      zero, prefer this IV.
5498 
5499      TODO: The constant that we're subtracting from the cost should
5500      be target-dependent.  This information should be added to the
5501      target costs for each backend.  */
5502   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5503       && integer_zerop (*bound_cst)
5504       && (operand_equal_p (*control_var, cand->var_after, 0)
5505 	  || operand_equal_p (*control_var, cand->var_before, 0)))
5506     elim_cost -= 1;
5507 
5508   express_cost = get_computation_cost (data, use, cand, false,
5509 				       &inv_vars_express, NULL,
5510 				       &inv_expr_express);
5511   if (cmp_iv != NULL)
5512     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5513 
5514   /* Count the cost of the original bound as well.  */
5515   bound_cost = force_var_cost (data, *bound_cst, NULL);
5516   if (bound_cost.cost == 0)
5517     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5518   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5519     bound_cost.cost = 0;
5520   express_cost += bound_cost;
5521 
5522   /* Choose the better approach, preferring the eliminated IV. */
5523   if (elim_cost <= express_cost)
5524     {
5525       cost = elim_cost;
5526       inv_vars = inv_vars_elim;
5527       inv_vars_elim = NULL;
5528       inv_expr = inv_expr_elim;
5529       /* For doloop candidate/use pair, adjust to zero cost.  */
5530       if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5531 	cost = no_cost;
5532     }
5533   else
5534     {
5535       cost = express_cost;
5536       inv_vars = inv_vars_express;
5537       inv_vars_express = NULL;
5538       bound = NULL_TREE;
5539       comp = ERROR_MARK;
5540       inv_expr = inv_expr_express;
5541     }
5542 
5543   if (inv_expr)
5544     {
5545       inv_exprs = BITMAP_ALLOC (NULL);
5546       bitmap_set_bit (inv_exprs, inv_expr->id);
5547     }
5548   set_group_iv_cost (data, group, cand, cost,
5549 		     inv_vars, bound, comp, inv_exprs);
5550 
5551   if (inv_vars_elim)
5552     BITMAP_FREE (inv_vars_elim);
5553   if (inv_vars_express)
5554     BITMAP_FREE (inv_vars_express);
5555 
5556   return !cost.infinite_cost_p ();
5557 }
5558 
5559 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5560    if USE cannot be represented with CAND.  */
5561 
5562 static bool
determine_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5563 determine_group_iv_cost (struct ivopts_data *data,
5564 			 struct iv_group *group, struct iv_cand *cand)
5565 {
5566   switch (group->type)
5567     {
5568     case USE_NONLINEAR_EXPR:
5569       return determine_group_iv_cost_generic (data, group, cand);
5570 
5571     case USE_REF_ADDRESS:
5572     case USE_PTR_ADDRESS:
5573       return determine_group_iv_cost_address (data, group, cand);
5574 
5575     case USE_COMPARE:
5576       return determine_group_iv_cost_cond (data, group, cand);
5577 
5578     default:
5579       gcc_unreachable ();
5580     }
5581 }
5582 
5583 /* Return true if get_computation_cost indicates that autoincrement is
5584    a possibility for the pair of USE and CAND, false otherwise.  */
5585 
5586 static bool
autoinc_possible_for_pair(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5587 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5588 			   struct iv_cand *cand)
5589 {
5590   if (!address_p (use->type))
5591     return false;
5592 
5593   bool can_autoinc = false;
5594   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5595   return can_autoinc;
5596 }
5597 
5598 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5599    use that allows autoincrement, and set their AINC_USE if possible.  */
5600 
5601 static void
set_autoinc_for_original_candidates(struct ivopts_data * data)5602 set_autoinc_for_original_candidates (struct ivopts_data *data)
5603 {
5604   unsigned i, j;
5605 
5606   for (i = 0; i < data->vcands.length (); i++)
5607     {
5608       struct iv_cand *cand = data->vcands[i];
5609       struct iv_use *closest_before = NULL;
5610       struct iv_use *closest_after = NULL;
5611       if (cand->pos != IP_ORIGINAL)
5612 	continue;
5613 
5614       for (j = 0; j < data->vgroups.length (); j++)
5615 	{
5616 	  struct iv_group *group = data->vgroups[j];
5617 	  struct iv_use *use = group->vuses[0];
5618 	  unsigned uid = gimple_uid (use->stmt);
5619 
5620 	  if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5621 	    continue;
5622 
5623 	  if (uid < gimple_uid (cand->incremented_at)
5624 	      && (closest_before == NULL
5625 		  || uid > gimple_uid (closest_before->stmt)))
5626 	    closest_before = use;
5627 
5628 	  if (uid > gimple_uid (cand->incremented_at)
5629 	      && (closest_after == NULL
5630 		  || uid < gimple_uid (closest_after->stmt)))
5631 	    closest_after = use;
5632 	}
5633 
5634       if (closest_before != NULL
5635 	  && autoinc_possible_for_pair (data, closest_before, cand))
5636 	cand->ainc_use = closest_before;
5637       else if (closest_after != NULL
5638 	       && autoinc_possible_for_pair (data, closest_after, cand))
5639 	cand->ainc_use = closest_after;
5640     }
5641 }
5642 
5643 /* Relate compare use with all candidates.  */
5644 
5645 static void
relate_compare_use_with_all_cands(struct ivopts_data * data)5646 relate_compare_use_with_all_cands (struct ivopts_data *data)
5647 {
5648   unsigned i, count = data->vcands.length ();
5649   for (i = 0; i < data->vgroups.length (); i++)
5650     {
5651       struct iv_group *group = data->vgroups[i];
5652 
5653       if (group->type == USE_COMPARE)
5654 	bitmap_set_range (group->related_cands, 0, count);
5655     }
5656 }
5657 
5658 /* Add one doloop dedicated IV candidate:
5659      - Base is (may_be_zero ? 1 : (niter + 1)).
5660      - Step is -1.  */
5661 
5662 static void
add_iv_candidate_for_doloop(struct ivopts_data * data)5663 add_iv_candidate_for_doloop (struct ivopts_data *data)
5664 {
5665   tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5666   gcc_assert (niter_desc && niter_desc->assumptions);
5667 
5668   tree niter = niter_desc->niter;
5669   tree ntype = TREE_TYPE (niter);
5670   gcc_assert (TREE_CODE (ntype) == INTEGER_TYPE);
5671 
5672   tree may_be_zero = niter_desc->may_be_zero;
5673   if (may_be_zero && integer_zerop (may_be_zero))
5674     may_be_zero = NULL_TREE;
5675   if (may_be_zero)
5676     {
5677       if (COMPARISON_CLASS_P (may_be_zero))
5678 	{
5679 	  niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5680 			       build_int_cst (ntype, 0),
5681 			       rewrite_to_non_trapping_overflow (niter));
5682 	}
5683       /* Don't try to obtain the iteration count expression when may_be_zero is
5684 	 integer_nonzerop (actually iteration count is one) or else.  */
5685       else
5686 	return;
5687     }
5688 
5689   tree base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5690 			   build_int_cst (ntype, 1));
5691   add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true);
5692 }
5693 
5694 /* Finds the candidates for the induction variables.  */
5695 
5696 static void
find_iv_candidates(struct ivopts_data * data)5697 find_iv_candidates (struct ivopts_data *data)
5698 {
5699   /* Add commonly used ivs.  */
5700   add_standard_iv_candidates (data);
5701 
5702   /* Add doloop dedicated ivs.  */
5703   if (data->doloop_use_p)
5704     add_iv_candidate_for_doloop (data);
5705 
5706   /* Add old induction variables.  */
5707   add_iv_candidate_for_bivs (data);
5708 
5709   /* Add induction variables derived from uses.  */
5710   add_iv_candidate_for_groups (data);
5711 
5712   set_autoinc_for_original_candidates (data);
5713 
5714   /* Record the important candidates.  */
5715   record_important_candidates (data);
5716 
5717   /* Relate compare iv_use with all candidates.  */
5718   if (!data->consider_all_candidates)
5719     relate_compare_use_with_all_cands (data);
5720 
5721   if (dump_file && (dump_flags & TDF_DETAILS))
5722     {
5723       unsigned i;
5724 
5725       fprintf (dump_file, "\n<Important Candidates>:\t");
5726       for (i = 0; i < data->vcands.length (); i++)
5727 	if (data->vcands[i]->important)
5728 	  fprintf (dump_file, " %d,", data->vcands[i]->id);
5729       fprintf (dump_file, "\n");
5730 
5731       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5732       for (i = 0; i < data->vgroups.length (); i++)
5733 	{
5734 	  struct iv_group *group = data->vgroups[i];
5735 
5736 	  if (group->related_cands)
5737 	    {
5738 	      fprintf (dump_file, "  Group %d:\t", group->id);
5739 	      dump_bitmap (dump_file, group->related_cands);
5740 	    }
5741 	}
5742       fprintf (dump_file, "\n");
5743     }
5744 }
5745 
5746 /* Determines costs of computing use of iv with an iv candidate.  */
5747 
5748 static void
determine_group_iv_costs(struct ivopts_data * data)5749 determine_group_iv_costs (struct ivopts_data *data)
5750 {
5751   unsigned i, j;
5752   struct iv_cand *cand;
5753   struct iv_group *group;
5754   bitmap to_clear = BITMAP_ALLOC (NULL);
5755 
5756   alloc_use_cost_map (data);
5757 
5758   for (i = 0; i < data->vgroups.length (); i++)
5759     {
5760       group = data->vgroups[i];
5761 
5762       if (data->consider_all_candidates)
5763 	{
5764 	  for (j = 0; j < data->vcands.length (); j++)
5765 	    {
5766 	      cand = data->vcands[j];
5767 	      determine_group_iv_cost (data, group, cand);
5768 	    }
5769 	}
5770       else
5771 	{
5772 	  bitmap_iterator bi;
5773 
5774 	  EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5775 	    {
5776 	      cand = data->vcands[j];
5777 	      if (!determine_group_iv_cost (data, group, cand))
5778 		bitmap_set_bit (to_clear, j);
5779 	    }
5780 
5781 	  /* Remove the candidates for that the cost is infinite from
5782 	     the list of related candidates.  */
5783 	  bitmap_and_compl_into (group->related_cands, to_clear);
5784 	  bitmap_clear (to_clear);
5785 	}
5786     }
5787 
5788   BITMAP_FREE (to_clear);
5789 
5790   if (dump_file && (dump_flags & TDF_DETAILS))
5791     {
5792       bitmap_iterator bi;
5793 
5794       /* Dump invariant variables.  */
5795       fprintf (dump_file, "\n<Invariant Vars>:\n");
5796       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5797 	{
5798 	  struct version_info *info = ver_info (data, i);
5799 	  if (info->inv_id)
5800 	    {
5801 	      fprintf (dump_file, "Inv %d:\t", info->inv_id);
5802 	      print_generic_expr (dump_file, info->name, TDF_SLIM);
5803 	      fprintf (dump_file, "%s\n",
5804 		       info->has_nonlin_use ? "" : "\t(eliminable)");
5805 	    }
5806 	}
5807 
5808       /* Dump invariant expressions.  */
5809       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5810       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5811 
5812       for (hash_table<iv_inv_expr_hasher>::iterator it
5813 	   = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5814 	   ++it)
5815 	list.safe_push (*it);
5816 
5817       list.qsort (sort_iv_inv_expr_ent);
5818 
5819       for (i = 0; i < list.length (); ++i)
5820 	{
5821 	  fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5822 	  print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5823 	  fprintf (dump_file, "\n");
5824 	}
5825 
5826       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5827 
5828       for (i = 0; i < data->vgroups.length (); i++)
5829 	{
5830 	  group = data->vgroups[i];
5831 
5832 	  fprintf (dump_file, "Group %d:\n", i);
5833 	  fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5834 	  for (j = 0; j < group->n_map_members; j++)
5835 	    {
5836 	      if (!group->cost_map[j].cand
5837 		  || group->cost_map[j].cost.infinite_cost_p ())
5838 		continue;
5839 
5840 	      fprintf (dump_file, "  %d\t%" PRId64 "\t%d\t",
5841 		       group->cost_map[j].cand->id,
5842 		       group->cost_map[j].cost.cost,
5843 		       group->cost_map[j].cost.complexity);
5844 	      if (!group->cost_map[j].inv_exprs
5845 		  || bitmap_empty_p (group->cost_map[j].inv_exprs))
5846 		fprintf (dump_file, "NIL;\t");
5847 	      else
5848 		bitmap_print (dump_file,
5849 			      group->cost_map[j].inv_exprs, "", ";\t");
5850 	      if (!group->cost_map[j].inv_vars
5851 		  || bitmap_empty_p (group->cost_map[j].inv_vars))
5852 		fprintf (dump_file, "NIL;\n");
5853 	      else
5854 		bitmap_print (dump_file,
5855 			      group->cost_map[j].inv_vars, "", "\n");
5856 	    }
5857 
5858 	  fprintf (dump_file, "\n");
5859 	}
5860       fprintf (dump_file, "\n");
5861     }
5862 }
5863 
5864 /* Determines cost of the candidate CAND.  */
5865 
5866 static void
determine_iv_cost(struct ivopts_data * data,struct iv_cand * cand)5867 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5868 {
5869   comp_cost cost_base;
5870   int64_t cost, cost_step;
5871   tree base;
5872 
5873   gcc_assert (cand->iv != NULL);
5874 
5875   /* There are two costs associated with the candidate -- its increment
5876      and its initialization.  The second is almost negligible for any loop
5877      that rolls enough, so we take it just very little into account.  */
5878 
5879   base = cand->iv->base;
5880   cost_base = force_var_cost (data, base, NULL);
5881   /* It will be exceptional that the iv register happens to be initialized with
5882      the proper value at no cost.  In general, there will at least be a regcopy
5883      or a const set.  */
5884   if (cost_base.cost == 0)
5885     cost_base.cost = COSTS_N_INSNS (1);
5886   /* Doloop decrement should be considered as zero cost.  */
5887   if (cand->doloop_p)
5888     cost_step = 0;
5889   else
5890     cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5891   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5892 
5893   /* Prefer the original ivs unless we may gain something by replacing it.
5894      The reason is to make debugging simpler; so this is not relevant for
5895      artificial ivs created by other optimization passes.  */
5896   if ((cand->pos != IP_ORIGINAL
5897        || !SSA_NAME_VAR (cand->var_before)
5898        || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5899       /* Prefer doloop as well.  */
5900       && !cand->doloop_p)
5901     cost++;
5902 
5903   /* Prefer not to insert statements into latch unless there are some
5904      already (so that we do not create unnecessary jumps).  */
5905   if (cand->pos == IP_END
5906       && empty_block_p (ip_end_pos (data->current_loop)))
5907     cost++;
5908 
5909   cand->cost = cost;
5910   cand->cost_step = cost_step;
5911 }
5912 
5913 /* Determines costs of computation of the candidates.  */
5914 
5915 static void
determine_iv_costs(struct ivopts_data * data)5916 determine_iv_costs (struct ivopts_data *data)
5917 {
5918   unsigned i;
5919 
5920   if (dump_file && (dump_flags & TDF_DETAILS))
5921     {
5922       fprintf (dump_file, "<Candidate Costs>:\n");
5923       fprintf (dump_file, "  cand\tcost\n");
5924     }
5925 
5926   for (i = 0; i < data->vcands.length (); i++)
5927     {
5928       struct iv_cand *cand = data->vcands[i];
5929 
5930       determine_iv_cost (data, cand);
5931 
5932       if (dump_file && (dump_flags & TDF_DETAILS))
5933 	fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5934     }
5935 
5936   if (dump_file && (dump_flags & TDF_DETAILS))
5937     fprintf (dump_file, "\n");
5938 }
5939 
5940 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5941    induction variables.  Note N_INVS includes both invariant variables and
5942    invariant expressions.  */
5943 
5944 static unsigned
ivopts_estimate_reg_pressure(struct ivopts_data * data,unsigned n_invs,unsigned n_cands)5945 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5946 			      unsigned n_cands)
5947 {
5948   unsigned cost;
5949   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5950   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5951   bool speed = data->speed;
5952 
5953   /* If there is a call in the loop body, the call-clobbered registers
5954      are not available for loop invariants.  */
5955   if (data->body_includes_call)
5956     available_regs = available_regs - target_clobbered_regs;
5957 
5958   /* If we have enough registers.  */
5959   if (regs_needed + target_res_regs < available_regs)
5960     cost = n_new;
5961   /* If close to running out of registers, try to preserve them.  */
5962   else if (regs_needed <= available_regs)
5963     cost = target_reg_cost [speed] * regs_needed;
5964   /* If we run out of available registers but the number of candidates
5965      does not, we penalize extra registers using target_spill_cost.  */
5966   else if (n_cands <= available_regs)
5967     cost = target_reg_cost [speed] * available_regs
5968 	   + target_spill_cost [speed] * (regs_needed - available_regs);
5969   /* If the number of candidates runs out available registers, we penalize
5970      extra candidate registers using target_spill_cost * 2.  Because it is
5971      more expensive to spill induction variable than invariant.  */
5972   else
5973     cost = target_reg_cost [speed] * available_regs
5974 	   + target_spill_cost [speed] * (n_cands - available_regs) * 2
5975 	   + target_spill_cost [speed] * (regs_needed - n_cands);
5976 
5977   /* Finally, add the number of candidates, so that we prefer eliminating
5978      induction variables if possible.  */
5979   return cost + n_cands;
5980 }
5981 
5982 /* For each size of the induction variable set determine the penalty.  */
5983 
5984 static void
determine_set_costs(struct ivopts_data * data)5985 determine_set_costs (struct ivopts_data *data)
5986 {
5987   unsigned j, n;
5988   gphi *phi;
5989   gphi_iterator psi;
5990   tree op;
5991   class loop *loop = data->current_loop;
5992   bitmap_iterator bi;
5993 
5994   if (dump_file && (dump_flags & TDF_DETAILS))
5995     {
5996       fprintf (dump_file, "<Global Costs>:\n");
5997       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5998       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5999       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
6000       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
6001     }
6002 
6003   n = 0;
6004   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6005     {
6006       phi = psi.phi ();
6007       op = PHI_RESULT (phi);
6008 
6009       if (virtual_operand_p (op))
6010 	continue;
6011 
6012       if (get_iv (data, op))
6013 	continue;
6014 
6015       if (!POINTER_TYPE_P (TREE_TYPE (op))
6016 	  && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
6017 	continue;
6018 
6019       n++;
6020     }
6021 
6022   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6023     {
6024       struct version_info *info = ver_info (data, j);
6025 
6026       if (info->inv_id && info->has_nonlin_use)
6027 	n++;
6028     }
6029 
6030   data->regs_used = n;
6031   if (dump_file && (dump_flags & TDF_DETAILS))
6032     fprintf (dump_file, "  regs_used %d\n", n);
6033 
6034   if (dump_file && (dump_flags & TDF_DETAILS))
6035     {
6036       fprintf (dump_file, "  cost for size:\n");
6037       fprintf (dump_file, "  ivs\tcost\n");
6038       for (j = 0; j <= 2 * target_avail_regs; j++)
6039 	fprintf (dump_file, "  %d\t%d\n", j,
6040 		 ivopts_estimate_reg_pressure (data, 0, j));
6041       fprintf (dump_file, "\n");
6042     }
6043 }
6044 
6045 /* Returns true if A is a cheaper cost pair than B.  */
6046 
6047 static bool
cheaper_cost_pair(class cost_pair * a,class cost_pair * b)6048 cheaper_cost_pair (class cost_pair *a, class cost_pair *b)
6049 {
6050   if (!a)
6051     return false;
6052 
6053   if (!b)
6054     return true;
6055 
6056   if (a->cost < b->cost)
6057     return true;
6058 
6059   if (b->cost < a->cost)
6060     return false;
6061 
6062   /* In case the costs are the same, prefer the cheaper candidate.  */
6063   if (a->cand->cost < b->cand->cost)
6064     return true;
6065 
6066   return false;
6067 }
6068 
6069 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
6070    for more expensive, equal and cheaper respectively.  */
6071 
6072 static int
compare_cost_pair(class cost_pair * a,class cost_pair * b)6073 compare_cost_pair (class cost_pair *a, class cost_pair *b)
6074 {
6075   if (cheaper_cost_pair (a, b))
6076     return -1;
6077   if (cheaper_cost_pair (b, a))
6078     return 1;
6079 
6080   return 0;
6081 }
6082 
6083 /* Returns candidate by that USE is expressed in IVS.  */
6084 
6085 static class cost_pair *
iv_ca_cand_for_group(class iv_ca * ivs,struct iv_group * group)6086 iv_ca_cand_for_group (class iv_ca *ivs, struct iv_group *group)
6087 {
6088   return ivs->cand_for_group[group->id];
6089 }
6090 
6091 /* Computes the cost field of IVS structure.  */
6092 
6093 static void
iv_ca_recount_cost(struct ivopts_data * data,class iv_ca * ivs)6094 iv_ca_recount_cost (struct ivopts_data *data, class iv_ca *ivs)
6095 {
6096   comp_cost cost = ivs->cand_use_cost;
6097 
6098   cost += ivs->cand_cost;
6099   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
6100   ivs->cost = cost;
6101 }
6102 
6103 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
6104    and IVS.  */
6105 
6106 static void
iv_ca_set_remove_invs(class iv_ca * ivs,bitmap invs,unsigned * n_inv_uses)6107 iv_ca_set_remove_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6108 {
6109   bitmap_iterator bi;
6110   unsigned iid;
6111 
6112   if (!invs)
6113     return;
6114 
6115   gcc_assert (n_inv_uses != NULL);
6116   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6117     {
6118       n_inv_uses[iid]--;
6119       if (n_inv_uses[iid] == 0)
6120 	ivs->n_invs--;
6121     }
6122 }
6123 
6124 /* Set USE not to be expressed by any candidate in IVS.  */
6125 
6126 static void
iv_ca_set_no_cp(struct ivopts_data * data,class iv_ca * ivs,struct iv_group * group)6127 iv_ca_set_no_cp (struct ivopts_data *data, class iv_ca *ivs,
6128 		 struct iv_group *group)
6129 {
6130   unsigned gid = group->id, cid;
6131   class cost_pair *cp;
6132 
6133   cp = ivs->cand_for_group[gid];
6134   if (!cp)
6135     return;
6136   cid = cp->cand->id;
6137 
6138   ivs->bad_groups++;
6139   ivs->cand_for_group[gid] = NULL;
6140   ivs->n_cand_uses[cid]--;
6141 
6142   if (ivs->n_cand_uses[cid] == 0)
6143     {
6144       bitmap_clear_bit (ivs->cands, cid);
6145       if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6146 	ivs->n_cands--;
6147       ivs->cand_cost -= cp->cand->cost;
6148       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6149       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6150     }
6151 
6152   ivs->cand_use_cost -= cp->cost;
6153   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6154   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6155   iv_ca_recount_cost (data, ivs);
6156 }
6157 
6158 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
6159    IVS.  */
6160 
6161 static void
iv_ca_set_add_invs(class iv_ca * ivs,bitmap invs,unsigned * n_inv_uses)6162 iv_ca_set_add_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6163 {
6164   bitmap_iterator bi;
6165   unsigned iid;
6166 
6167   if (!invs)
6168     return;
6169 
6170   gcc_assert (n_inv_uses != NULL);
6171   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6172     {
6173       n_inv_uses[iid]++;
6174       if (n_inv_uses[iid] == 1)
6175 	ivs->n_invs++;
6176     }
6177 }
6178 
6179 /* Set cost pair for GROUP in set IVS to CP.  */
6180 
6181 static void
iv_ca_set_cp(struct ivopts_data * data,class iv_ca * ivs,struct iv_group * group,class cost_pair * cp)6182 iv_ca_set_cp (struct ivopts_data *data, class iv_ca *ivs,
6183 	      struct iv_group *group, class cost_pair *cp)
6184 {
6185   unsigned gid = group->id, cid;
6186 
6187   if (ivs->cand_for_group[gid] == cp)
6188     return;
6189 
6190   if (ivs->cand_for_group[gid])
6191     iv_ca_set_no_cp (data, ivs, group);
6192 
6193   if (cp)
6194     {
6195       cid = cp->cand->id;
6196 
6197       ivs->bad_groups--;
6198       ivs->cand_for_group[gid] = cp;
6199       ivs->n_cand_uses[cid]++;
6200       if (ivs->n_cand_uses[cid] == 1)
6201 	{
6202 	  bitmap_set_bit (ivs->cands, cid);
6203 	  if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6204 	    ivs->n_cands++;
6205 	  ivs->cand_cost += cp->cand->cost;
6206 	  iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6207 	  iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6208 	}
6209 
6210       ivs->cand_use_cost += cp->cost;
6211       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6212       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6213       iv_ca_recount_cost (data, ivs);
6214     }
6215 }
6216 
6217 /* Extend set IVS by expressing USE by some of the candidates in it
6218    if possible.  Consider all important candidates if candidates in
6219    set IVS don't give any result.  */
6220 
6221 static void
iv_ca_add_group(struct ivopts_data * data,class iv_ca * ivs,struct iv_group * group)6222 iv_ca_add_group (struct ivopts_data *data, class iv_ca *ivs,
6223 	       struct iv_group *group)
6224 {
6225   class cost_pair *best_cp = NULL, *cp;
6226   bitmap_iterator bi;
6227   unsigned i;
6228   struct iv_cand *cand;
6229 
6230   gcc_assert (ivs->upto >= group->id);
6231   ivs->upto++;
6232   ivs->bad_groups++;
6233 
6234   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6235     {
6236       cand = data->vcands[i];
6237       cp = get_group_iv_cost (data, group, cand);
6238       if (cheaper_cost_pair (cp, best_cp))
6239 	best_cp = cp;
6240     }
6241 
6242   if (best_cp == NULL)
6243     {
6244       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6245 	{
6246 	  cand = data->vcands[i];
6247 	  cp = get_group_iv_cost (data, group, cand);
6248 	  if (cheaper_cost_pair (cp, best_cp))
6249 	    best_cp = cp;
6250 	}
6251     }
6252 
6253   iv_ca_set_cp (data, ivs, group, best_cp);
6254 }
6255 
6256 /* Get cost for assignment IVS.  */
6257 
6258 static comp_cost
iv_ca_cost(class iv_ca * ivs)6259 iv_ca_cost (class iv_ca *ivs)
6260 {
6261   /* This was a conditional expression but it triggered a bug in
6262      Sun C 5.5.  */
6263   if (ivs->bad_groups)
6264     return infinite_cost;
6265   else
6266     return ivs->cost;
6267 }
6268 
6269 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
6270    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
6271    respectively.  */
6272 
6273 static int
iv_ca_compare_deps(struct ivopts_data * data,class iv_ca * ivs,struct iv_group * group,class cost_pair * old_cp,class cost_pair * new_cp)6274 iv_ca_compare_deps (struct ivopts_data *data, class iv_ca *ivs,
6275 		    struct iv_group *group, class cost_pair *old_cp,
6276 		    class cost_pair *new_cp)
6277 {
6278   gcc_assert (old_cp && new_cp && old_cp != new_cp);
6279   unsigned old_n_invs = ivs->n_invs;
6280   iv_ca_set_cp (data, ivs, group, new_cp);
6281   unsigned new_n_invs = ivs->n_invs;
6282   iv_ca_set_cp (data, ivs, group, old_cp);
6283 
6284   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6285 }
6286 
6287 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6288    it before NEXT.  */
6289 
6290 static struct iv_ca_delta *
iv_ca_delta_add(struct iv_group * group,class cost_pair * old_cp,class cost_pair * new_cp,struct iv_ca_delta * next)6291 iv_ca_delta_add (struct iv_group *group, class cost_pair *old_cp,
6292 		 class cost_pair *new_cp, struct iv_ca_delta *next)
6293 {
6294   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6295 
6296   change->group = group;
6297   change->old_cp = old_cp;
6298   change->new_cp = new_cp;
6299   change->next = next;
6300 
6301   return change;
6302 }
6303 
6304 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6305    are rewritten.  */
6306 
6307 static struct iv_ca_delta *
iv_ca_delta_join(struct iv_ca_delta * l1,struct iv_ca_delta * l2)6308 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6309 {
6310   struct iv_ca_delta *last;
6311 
6312   if (!l2)
6313     return l1;
6314 
6315   if (!l1)
6316     return l2;
6317 
6318   for (last = l1; last->next; last = last->next)
6319     continue;
6320   last->next = l2;
6321 
6322   return l1;
6323 }
6324 
6325 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6326 
6327 static struct iv_ca_delta *
iv_ca_delta_reverse(struct iv_ca_delta * delta)6328 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6329 {
6330   struct iv_ca_delta *act, *next, *prev = NULL;
6331 
6332   for (act = delta; act; act = next)
6333     {
6334       next = act->next;
6335       act->next = prev;
6336       prev = act;
6337 
6338       std::swap (act->old_cp, act->new_cp);
6339     }
6340 
6341   return prev;
6342 }
6343 
6344 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6345    reverted instead.  */
6346 
6347 static void
iv_ca_delta_commit(struct ivopts_data * data,class iv_ca * ivs,struct iv_ca_delta * delta,bool forward)6348 iv_ca_delta_commit (struct ivopts_data *data, class iv_ca *ivs,
6349 		    struct iv_ca_delta *delta, bool forward)
6350 {
6351   class cost_pair *from, *to;
6352   struct iv_ca_delta *act;
6353 
6354   if (!forward)
6355     delta = iv_ca_delta_reverse (delta);
6356 
6357   for (act = delta; act; act = act->next)
6358     {
6359       from = act->old_cp;
6360       to = act->new_cp;
6361       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6362       iv_ca_set_cp (data, ivs, act->group, to);
6363     }
6364 
6365   if (!forward)
6366     iv_ca_delta_reverse (delta);
6367 }
6368 
6369 /* Returns true if CAND is used in IVS.  */
6370 
6371 static bool
iv_ca_cand_used_p(class iv_ca * ivs,struct iv_cand * cand)6372 iv_ca_cand_used_p (class iv_ca *ivs, struct iv_cand *cand)
6373 {
6374   return ivs->n_cand_uses[cand->id] > 0;
6375 }
6376 
6377 /* Returns number of induction variable candidates in the set IVS.  */
6378 
6379 static unsigned
iv_ca_n_cands(class iv_ca * ivs)6380 iv_ca_n_cands (class iv_ca *ivs)
6381 {
6382   return ivs->n_cands;
6383 }
6384 
6385 /* Free the list of changes DELTA.  */
6386 
6387 static void
iv_ca_delta_free(struct iv_ca_delta ** delta)6388 iv_ca_delta_free (struct iv_ca_delta **delta)
6389 {
6390   struct iv_ca_delta *act, *next;
6391 
6392   for (act = *delta; act; act = next)
6393     {
6394       next = act->next;
6395       free (act);
6396     }
6397 
6398   *delta = NULL;
6399 }
6400 
6401 /* Allocates new iv candidates assignment.  */
6402 
6403 static class iv_ca *
iv_ca_new(struct ivopts_data * data)6404 iv_ca_new (struct ivopts_data *data)
6405 {
6406   class iv_ca *nw = XNEW (class iv_ca);
6407 
6408   nw->upto = 0;
6409   nw->bad_groups = 0;
6410   nw->cand_for_group = XCNEWVEC (class cost_pair *,
6411 				 data->vgroups.length ());
6412   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6413   nw->cands = BITMAP_ALLOC (NULL);
6414   nw->n_cands = 0;
6415   nw->n_invs = 0;
6416   nw->cand_use_cost = no_cost;
6417   nw->cand_cost = 0;
6418   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6419   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6420   nw->cost = no_cost;
6421 
6422   return nw;
6423 }
6424 
6425 /* Free memory occupied by the set IVS.  */
6426 
6427 static void
iv_ca_free(class iv_ca ** ivs)6428 iv_ca_free (class iv_ca **ivs)
6429 {
6430   free ((*ivs)->cand_for_group);
6431   free ((*ivs)->n_cand_uses);
6432   BITMAP_FREE ((*ivs)->cands);
6433   free ((*ivs)->n_inv_var_uses);
6434   free ((*ivs)->n_inv_expr_uses);
6435   free (*ivs);
6436   *ivs = NULL;
6437 }
6438 
6439 /* Dumps IVS to FILE.  */
6440 
6441 static void
iv_ca_dump(struct ivopts_data * data,FILE * file,class iv_ca * ivs)6442 iv_ca_dump (struct ivopts_data *data, FILE *file, class iv_ca *ivs)
6443 {
6444   unsigned i;
6445   comp_cost cost = iv_ca_cost (ivs);
6446 
6447   fprintf (file, "  cost: %" PRId64 " (complexity %d)\n", cost.cost,
6448 	   cost.complexity);
6449   fprintf (file, "  reg_cost: %d\n",
6450 	   ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands));
6451   fprintf (file, "  cand_cost: %" PRId64 "\n  cand_group_cost: "
6452 	   "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6453 	   ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6454   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6455 
6456   for (i = 0; i < ivs->upto; i++)
6457     {
6458       struct iv_group *group = data->vgroups[i];
6459       class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6460       if (cp)
6461         fprintf (file, "   group:%d --> iv_cand:%d, cost=("
6462 		 "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6463 		 cp->cost.cost, cp->cost.complexity);
6464       else
6465 	fprintf (file, "   group:%d --> ??\n", group->id);
6466     }
6467 
6468   const char *pref = "";
6469   fprintf (file, "  invariant variables: ");
6470   for (i = 1; i <= data->max_inv_var_id; i++)
6471     if (ivs->n_inv_var_uses[i])
6472       {
6473 	fprintf (file, "%s%d", pref, i);
6474 	pref = ", ";
6475       }
6476 
6477   pref = "";
6478   fprintf (file, "\n  invariant expressions: ");
6479   for (i = 1; i <= data->max_inv_expr_id; i++)
6480     if (ivs->n_inv_expr_uses[i])
6481       {
6482 	fprintf (file, "%s%d", pref, i);
6483 	pref = ", ";
6484       }
6485 
6486   fprintf (file, "\n\n");
6487 }
6488 
6489 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6490    new set, and store differences in DELTA.  Number of induction variables
6491    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6492    the function will try to find a solution with mimimal iv candidates.  */
6493 
6494 static comp_cost
iv_ca_extend(struct ivopts_data * data,class iv_ca * ivs,struct iv_cand * cand,struct iv_ca_delta ** delta,unsigned * n_ivs,bool min_ncand)6495 iv_ca_extend (struct ivopts_data *data, class iv_ca *ivs,
6496 	      struct iv_cand *cand, struct iv_ca_delta **delta,
6497 	      unsigned *n_ivs, bool min_ncand)
6498 {
6499   unsigned i;
6500   comp_cost cost;
6501   struct iv_group *group;
6502   class cost_pair *old_cp, *new_cp;
6503 
6504   *delta = NULL;
6505   for (i = 0; i < ivs->upto; i++)
6506     {
6507       group = data->vgroups[i];
6508       old_cp = iv_ca_cand_for_group (ivs, group);
6509 
6510       if (old_cp
6511 	  && old_cp->cand == cand)
6512 	continue;
6513 
6514       new_cp = get_group_iv_cost (data, group, cand);
6515       if (!new_cp)
6516 	continue;
6517 
6518       if (!min_ncand)
6519 	{
6520 	  int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6521 	  /* Skip if new_cp depends on more invariants.  */
6522 	  if (cmp_invs > 0)
6523 	    continue;
6524 
6525 	  int cmp_cost = compare_cost_pair (new_cp, old_cp);
6526 	  /* Skip if new_cp is not cheaper.  */
6527 	  if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6528 	    continue;
6529 	}
6530 
6531       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6532     }
6533 
6534   iv_ca_delta_commit (data, ivs, *delta, true);
6535   cost = iv_ca_cost (ivs);
6536   if (n_ivs)
6537     *n_ivs = iv_ca_n_cands (ivs);
6538   iv_ca_delta_commit (data, ivs, *delta, false);
6539 
6540   return cost;
6541 }
6542 
6543 /* Try narrowing set IVS by removing CAND.  Return the cost of
6544    the new set and store the differences in DELTA.  START is
6545    the candidate with which we start narrowing.  */
6546 
6547 static comp_cost
iv_ca_narrow(struct ivopts_data * data,class iv_ca * ivs,struct iv_cand * cand,struct iv_cand * start,struct iv_ca_delta ** delta)6548 iv_ca_narrow (struct ivopts_data *data, class iv_ca *ivs,
6549 	      struct iv_cand *cand, struct iv_cand *start,
6550 	      struct iv_ca_delta **delta)
6551 {
6552   unsigned i, ci;
6553   struct iv_group *group;
6554   class cost_pair *old_cp, *new_cp, *cp;
6555   bitmap_iterator bi;
6556   struct iv_cand *cnd;
6557   comp_cost cost, best_cost, acost;
6558 
6559   *delta = NULL;
6560   for (i = 0; i < data->vgroups.length (); i++)
6561     {
6562       group = data->vgroups[i];
6563 
6564       old_cp = iv_ca_cand_for_group (ivs, group);
6565       if (old_cp->cand != cand)
6566 	continue;
6567 
6568       best_cost = iv_ca_cost (ivs);
6569       /* Start narrowing with START.  */
6570       new_cp = get_group_iv_cost (data, group, start);
6571 
6572       if (data->consider_all_candidates)
6573 	{
6574 	  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6575 	    {
6576 	      if (ci == cand->id || (start && ci == start->id))
6577 		continue;
6578 
6579 	      cnd = data->vcands[ci];
6580 
6581 	      cp = get_group_iv_cost (data, group, cnd);
6582 	      if (!cp)
6583 		continue;
6584 
6585 	      iv_ca_set_cp (data, ivs, group, cp);
6586 	      acost = iv_ca_cost (ivs);
6587 
6588 	      if (acost < best_cost)
6589 		{
6590 		  best_cost = acost;
6591 		  new_cp = cp;
6592 		}
6593 	    }
6594 	}
6595       else
6596 	{
6597 	  EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6598 	    {
6599 	      if (ci == cand->id || (start && ci == start->id))
6600 		continue;
6601 
6602 	      cnd = data->vcands[ci];
6603 
6604 	      cp = get_group_iv_cost (data, group, cnd);
6605 	      if (!cp)
6606 		continue;
6607 
6608 	      iv_ca_set_cp (data, ivs, group, cp);
6609 	      acost = iv_ca_cost (ivs);
6610 
6611 	      if (acost < best_cost)
6612 		{
6613 		  best_cost = acost;
6614 		  new_cp = cp;
6615 		}
6616 	    }
6617 	}
6618       /* Restore to old cp for use.  */
6619       iv_ca_set_cp (data, ivs, group, old_cp);
6620 
6621       if (!new_cp)
6622 	{
6623 	  iv_ca_delta_free (delta);
6624 	  return infinite_cost;
6625 	}
6626 
6627       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6628     }
6629 
6630   iv_ca_delta_commit (data, ivs, *delta, true);
6631   cost = iv_ca_cost (ivs);
6632   iv_ca_delta_commit (data, ivs, *delta, false);
6633 
6634   return cost;
6635 }
6636 
6637 /* Try optimizing the set of candidates IVS by removing candidates different
6638    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6639    differences in DELTA.  */
6640 
6641 static comp_cost
iv_ca_prune(struct ivopts_data * data,class iv_ca * ivs,struct iv_cand * except_cand,struct iv_ca_delta ** delta)6642 iv_ca_prune (struct ivopts_data *data, class iv_ca *ivs,
6643 	     struct iv_cand *except_cand, struct iv_ca_delta **delta)
6644 {
6645   bitmap_iterator bi;
6646   struct iv_ca_delta *act_delta, *best_delta;
6647   unsigned i;
6648   comp_cost best_cost, acost;
6649   struct iv_cand *cand;
6650 
6651   best_delta = NULL;
6652   best_cost = iv_ca_cost (ivs);
6653 
6654   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6655     {
6656       cand = data->vcands[i];
6657 
6658       if (cand == except_cand)
6659 	continue;
6660 
6661       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6662 
6663       if (acost < best_cost)
6664 	{
6665 	  best_cost = acost;
6666 	  iv_ca_delta_free (&best_delta);
6667 	  best_delta = act_delta;
6668 	}
6669       else
6670 	iv_ca_delta_free (&act_delta);
6671     }
6672 
6673   if (!best_delta)
6674     {
6675       *delta = NULL;
6676       return best_cost;
6677     }
6678 
6679   /* Recurse to possibly remove other unnecessary ivs.  */
6680   iv_ca_delta_commit (data, ivs, best_delta, true);
6681   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6682   iv_ca_delta_commit (data, ivs, best_delta, false);
6683   *delta = iv_ca_delta_join (best_delta, *delta);
6684   return best_cost;
6685 }
6686 
6687 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6688    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6689    the corresponding cost_pair, otherwise just return BEST_CP.  */
6690 
6691 static class cost_pair*
cheaper_cost_with_cand(struct ivopts_data * data,struct iv_group * group,unsigned int cand_idx,struct iv_cand * old_cand,class cost_pair * best_cp)6692 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6693 			unsigned int cand_idx, struct iv_cand *old_cand,
6694 			class cost_pair *best_cp)
6695 {
6696   struct iv_cand *cand;
6697   class cost_pair *cp;
6698 
6699   gcc_assert (old_cand != NULL && best_cp != NULL);
6700   if (cand_idx == old_cand->id)
6701     return best_cp;
6702 
6703   cand = data->vcands[cand_idx];
6704   cp = get_group_iv_cost (data, group, cand);
6705   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6706     return cp;
6707 
6708   return best_cp;
6709 }
6710 
6711 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6712    which are used by more than one iv uses.  For each of those candidates,
6713    this function tries to represent iv uses under that candidate using
6714    other ones with lower local cost, then tries to prune the new set.
6715    If the new set has lower cost, It returns the new cost after recording
6716    candidate replacement in list DELTA.  */
6717 
6718 static comp_cost
iv_ca_replace(struct ivopts_data * data,class iv_ca * ivs,struct iv_ca_delta ** delta)6719 iv_ca_replace (struct ivopts_data *data, class iv_ca *ivs,
6720 	       struct iv_ca_delta **delta)
6721 {
6722   bitmap_iterator bi, bj;
6723   unsigned int i, j, k;
6724   struct iv_cand *cand;
6725   comp_cost orig_cost, acost;
6726   struct iv_ca_delta *act_delta, *tmp_delta;
6727   class cost_pair *old_cp, *best_cp = NULL;
6728 
6729   *delta = NULL;
6730   orig_cost = iv_ca_cost (ivs);
6731 
6732   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6733     {
6734       if (ivs->n_cand_uses[i] == 1
6735 	  || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6736 	continue;
6737 
6738       cand = data->vcands[i];
6739 
6740       act_delta = NULL;
6741       /*  Represent uses under current candidate using other ones with
6742 	  lower local cost.  */
6743       for (j = 0; j < ivs->upto; j++)
6744 	{
6745 	  struct iv_group *group = data->vgroups[j];
6746 	  old_cp = iv_ca_cand_for_group (ivs, group);
6747 
6748 	  if (old_cp->cand != cand)
6749 	    continue;
6750 
6751 	  best_cp = old_cp;
6752 	  if (data->consider_all_candidates)
6753 	    for (k = 0; k < data->vcands.length (); k++)
6754 	      best_cp = cheaper_cost_with_cand (data, group, k,
6755 						old_cp->cand, best_cp);
6756 	  else
6757 	    EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6758 	      best_cp = cheaper_cost_with_cand (data, group, k,
6759 						old_cp->cand, best_cp);
6760 
6761 	  if (best_cp == old_cp)
6762 	    continue;
6763 
6764 	  act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6765 	}
6766       /* No need for further prune.  */
6767       if (!act_delta)
6768 	continue;
6769 
6770       /* Prune the new candidate set.  */
6771       iv_ca_delta_commit (data, ivs, act_delta, true);
6772       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6773       iv_ca_delta_commit (data, ivs, act_delta, false);
6774       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6775 
6776       if (acost < orig_cost)
6777 	{
6778 	  *delta = act_delta;
6779 	  return acost;
6780 	}
6781       else
6782 	iv_ca_delta_free (&act_delta);
6783     }
6784 
6785   return orig_cost;
6786 }
6787 
6788 /* Tries to extend the sets IVS in the best possible way in order to
6789    express the GROUP.  If ORIGINALP is true, prefer candidates from
6790    the original set of IVs, otherwise favor important candidates not
6791    based on any memory object.  */
6792 
6793 static bool
try_add_cand_for(struct ivopts_data * data,class iv_ca * ivs,struct iv_group * group,bool originalp)6794 try_add_cand_for (struct ivopts_data *data, class iv_ca *ivs,
6795 		  struct iv_group *group, bool originalp)
6796 {
6797   comp_cost best_cost, act_cost;
6798   unsigned i;
6799   bitmap_iterator bi;
6800   struct iv_cand *cand;
6801   struct iv_ca_delta *best_delta = NULL, *act_delta;
6802   class cost_pair *cp;
6803 
6804   iv_ca_add_group (data, ivs, group);
6805   best_cost = iv_ca_cost (ivs);
6806   cp = iv_ca_cand_for_group (ivs, group);
6807   if (cp)
6808     {
6809       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6810       iv_ca_set_no_cp (data, ivs, group);
6811     }
6812 
6813   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6814      first try important candidates not based on any memory object.  Only if
6815      this fails, try the specific ones.  Rationale -- in loops with many
6816      variables the best choice often is to use just one generic biv.  If we
6817      added here many ivs specific to the uses, the optimization algorithm later
6818      would be likely to get stuck in a local minimum, thus causing us to create
6819      too many ivs.  The approach from few ivs to more seems more likely to be
6820      successful -- starting from few ivs, replacing an expensive use by a
6821      specific iv should always be a win.  */
6822   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6823     {
6824       cand = data->vcands[i];
6825 
6826       if (originalp && cand->pos !=IP_ORIGINAL)
6827 	continue;
6828 
6829       if (!originalp && cand->iv->base_object != NULL_TREE)
6830 	continue;
6831 
6832       if (iv_ca_cand_used_p (ivs, cand))
6833 	continue;
6834 
6835       cp = get_group_iv_cost (data, group, cand);
6836       if (!cp)
6837 	continue;
6838 
6839       iv_ca_set_cp (data, ivs, group, cp);
6840       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6841 			       true);
6842       iv_ca_set_no_cp (data, ivs, group);
6843       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6844 
6845       if (act_cost < best_cost)
6846 	{
6847 	  best_cost = act_cost;
6848 
6849 	  iv_ca_delta_free (&best_delta);
6850 	  best_delta = act_delta;
6851 	}
6852       else
6853 	iv_ca_delta_free (&act_delta);
6854     }
6855 
6856   if (best_cost.infinite_cost_p ())
6857     {
6858       for (i = 0; i < group->n_map_members; i++)
6859 	{
6860 	  cp = group->cost_map + i;
6861 	  cand = cp->cand;
6862 	  if (!cand)
6863 	    continue;
6864 
6865 	  /* Already tried this.  */
6866 	  if (cand->important)
6867 	    {
6868 	      if (originalp && cand->pos == IP_ORIGINAL)
6869 		continue;
6870 	      if (!originalp && cand->iv->base_object == NULL_TREE)
6871 		continue;
6872 	    }
6873 
6874 	  if (iv_ca_cand_used_p (ivs, cand))
6875 	    continue;
6876 
6877 	  act_delta = NULL;
6878 	  iv_ca_set_cp (data, ivs, group, cp);
6879 	  act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6880 	  iv_ca_set_no_cp (data, ivs, group);
6881 	  act_delta = iv_ca_delta_add (group,
6882 				       iv_ca_cand_for_group (ivs, group),
6883 				       cp, act_delta);
6884 
6885 	  if (act_cost < best_cost)
6886 	    {
6887 	      best_cost = act_cost;
6888 
6889 	      if (best_delta)
6890 		iv_ca_delta_free (&best_delta);
6891 	      best_delta = act_delta;
6892 	    }
6893 	  else
6894 	    iv_ca_delta_free (&act_delta);
6895 	}
6896     }
6897 
6898   iv_ca_delta_commit (data, ivs, best_delta, true);
6899   iv_ca_delta_free (&best_delta);
6900 
6901   return !best_cost.infinite_cost_p ();
6902 }
6903 
6904 /* Finds an initial assignment of candidates to uses.  */
6905 
6906 static class iv_ca *
get_initial_solution(struct ivopts_data * data,bool originalp)6907 get_initial_solution (struct ivopts_data *data, bool originalp)
6908 {
6909   unsigned i;
6910   class iv_ca *ivs = iv_ca_new (data);
6911 
6912   for (i = 0; i < data->vgroups.length (); i++)
6913     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6914       {
6915 	iv_ca_free (&ivs);
6916 	return NULL;
6917       }
6918 
6919   return ivs;
6920 }
6921 
6922 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6923    points to a bool variable, this function tries to break local
6924    optimal fixed-point by replacing candidates in IVS if it's true.  */
6925 
6926 static bool
try_improve_iv_set(struct ivopts_data * data,class iv_ca * ivs,bool * try_replace_p)6927 try_improve_iv_set (struct ivopts_data *data,
6928 		    class iv_ca *ivs, bool *try_replace_p)
6929 {
6930   unsigned i, n_ivs;
6931   comp_cost acost, best_cost = iv_ca_cost (ivs);
6932   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6933   struct iv_cand *cand;
6934 
6935   /* Try extending the set of induction variables by one.  */
6936   for (i = 0; i < data->vcands.length (); i++)
6937     {
6938       cand = data->vcands[i];
6939 
6940       if (iv_ca_cand_used_p (ivs, cand))
6941 	continue;
6942 
6943       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6944       if (!act_delta)
6945 	continue;
6946 
6947       /* If we successfully added the candidate and the set is small enough,
6948 	 try optimizing it by removing other candidates.  */
6949       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6950       	{
6951 	  iv_ca_delta_commit (data, ivs, act_delta, true);
6952 	  acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6953 	  iv_ca_delta_commit (data, ivs, act_delta, false);
6954 	  act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6955 	}
6956 
6957       if (acost < best_cost)
6958 	{
6959 	  best_cost = acost;
6960 	  iv_ca_delta_free (&best_delta);
6961 	  best_delta = act_delta;
6962 	}
6963       else
6964 	iv_ca_delta_free (&act_delta);
6965     }
6966 
6967   if (!best_delta)
6968     {
6969       /* Try removing the candidates from the set instead.  */
6970       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6971 
6972       if (!best_delta && *try_replace_p)
6973 	{
6974 	  *try_replace_p = false;
6975 	  /* So far candidate selecting algorithm tends to choose fewer IVs
6976 	     so that it can handle cases in which loops have many variables
6977 	     but the best choice is often to use only one general biv.  One
6978 	     weakness is it can't handle opposite cases, in which different
6979 	     candidates should be chosen with respect to each use.  To solve
6980 	     the problem, we replace candidates in a manner described by the
6981 	     comments of iv_ca_replace, thus give general algorithm a chance
6982 	     to break local optimal fixed-point in these cases.  */
6983 	  best_cost = iv_ca_replace (data, ivs, &best_delta);
6984 	}
6985 
6986       if (!best_delta)
6987 	return false;
6988     }
6989 
6990   iv_ca_delta_commit (data, ivs, best_delta, true);
6991   iv_ca_delta_free (&best_delta);
6992   return best_cost == iv_ca_cost (ivs);
6993 }
6994 
6995 /* Attempts to find the optimal set of induction variables.  We do simple
6996    greedy heuristic -- we try to replace at most one candidate in the selected
6997    solution and remove the unused ivs while this improves the cost.  */
6998 
6999 static class iv_ca *
find_optimal_iv_set_1(struct ivopts_data * data,bool originalp)7000 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
7001 {
7002   class iv_ca *set;
7003   bool try_replace_p = true;
7004 
7005   /* Get the initial solution.  */
7006   set = get_initial_solution (data, originalp);
7007   if (!set)
7008     {
7009       if (dump_file && (dump_flags & TDF_DETAILS))
7010 	fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7011       return NULL;
7012     }
7013 
7014   if (dump_file && (dump_flags & TDF_DETAILS))
7015     {
7016       fprintf (dump_file, "Initial set of candidates:\n");
7017       iv_ca_dump (data, dump_file, set);
7018     }
7019 
7020   while (try_improve_iv_set (data, set, &try_replace_p))
7021     {
7022       if (dump_file && (dump_flags & TDF_DETAILS))
7023 	{
7024 	  fprintf (dump_file, "Improved to:\n");
7025 	  iv_ca_dump (data, dump_file, set);
7026 	}
7027     }
7028 
7029   /* If the set has infinite_cost, it can't be optimal.  */
7030   if (iv_ca_cost (set).infinite_cost_p ())
7031     {
7032       if (dump_file && (dump_flags & TDF_DETAILS))
7033 	fprintf (dump_file,
7034 		 "Overflow to infinite cost in try_improve_iv_set.\n");
7035       iv_ca_free (&set);
7036     }
7037   return set;
7038 }
7039 
7040 static class iv_ca *
find_optimal_iv_set(struct ivopts_data * data)7041 find_optimal_iv_set (struct ivopts_data *data)
7042 {
7043   unsigned i;
7044   comp_cost cost, origcost;
7045   class iv_ca *set, *origset;
7046 
7047   /* Determine the cost based on a strategy that starts with original IVs,
7048      and try again using a strategy that prefers candidates not based
7049      on any IVs.  */
7050   origset = find_optimal_iv_set_1 (data, true);
7051   set = find_optimal_iv_set_1 (data, false);
7052 
7053   if (!origset && !set)
7054     return NULL;
7055 
7056   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7057   cost = set ? iv_ca_cost (set) : infinite_cost;
7058 
7059   if (dump_file && (dump_flags & TDF_DETAILS))
7060     {
7061       fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
7062 	       origcost.cost, origcost.complexity);
7063       fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
7064 	       cost.cost, cost.complexity);
7065     }
7066 
7067   /* Choose the one with the best cost.  */
7068   if (origcost <= cost)
7069     {
7070       if (set)
7071 	iv_ca_free (&set);
7072       set = origset;
7073     }
7074   else if (origset)
7075     iv_ca_free (&origset);
7076 
7077   for (i = 0; i < data->vgroups.length (); i++)
7078     {
7079       struct iv_group *group = data->vgroups[i];
7080       group->selected = iv_ca_cand_for_group (set, group)->cand;
7081     }
7082 
7083   return set;
7084 }
7085 
7086 /* Creates a new induction variable corresponding to CAND.  */
7087 
7088 static void
create_new_iv(struct ivopts_data * data,struct iv_cand * cand)7089 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7090 {
7091   gimple_stmt_iterator incr_pos;
7092   tree base;
7093   struct iv_use *use;
7094   struct iv_group *group;
7095   bool after = false;
7096 
7097   gcc_assert (cand->iv != NULL);
7098 
7099   switch (cand->pos)
7100     {
7101     case IP_NORMAL:
7102       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7103       break;
7104 
7105     case IP_END:
7106       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7107       after = true;
7108       break;
7109 
7110     case IP_AFTER_USE:
7111       after = true;
7112       /* fall through */
7113     case IP_BEFORE_USE:
7114       incr_pos = gsi_for_stmt (cand->incremented_at);
7115       break;
7116 
7117     case IP_ORIGINAL:
7118       /* Mark that the iv is preserved.  */
7119       name_info (data, cand->var_before)->preserve_biv = true;
7120       name_info (data, cand->var_after)->preserve_biv = true;
7121 
7122       /* Rewrite the increment so that it uses var_before directly.  */
7123       use = find_interesting_uses_op (data, cand->var_after);
7124       group = data->vgroups[use->group_id];
7125       group->selected = cand;
7126       return;
7127     }
7128 
7129   gimple_add_tmp_var (cand->var_before);
7130 
7131   base = unshare_expr (cand->iv->base);
7132 
7133   create_iv (base, unshare_expr (cand->iv->step),
7134 	     cand->var_before, data->current_loop,
7135 	     &incr_pos, after, &cand->var_before, &cand->var_after);
7136 }
7137 
7138 /* Creates new induction variables described in SET.  */
7139 
7140 static void
create_new_ivs(struct ivopts_data * data,class iv_ca * set)7141 create_new_ivs (struct ivopts_data *data, class iv_ca *set)
7142 {
7143   unsigned i;
7144   struct iv_cand *cand;
7145   bitmap_iterator bi;
7146 
7147   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7148     {
7149       cand = data->vcands[i];
7150       create_new_iv (data, cand);
7151     }
7152 
7153   if (dump_file && (dump_flags & TDF_DETAILS))
7154     {
7155       fprintf (dump_file, "Selected IV set for loop %d",
7156 	       data->current_loop->num);
7157       if (data->loop_loc != UNKNOWN_LOCATION)
7158 	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7159 		 LOCATION_LINE (data->loop_loc));
7160       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7161 	       avg_loop_niter (data->current_loop));
7162       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7163       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7164 	{
7165 	  cand = data->vcands[i];
7166 	  dump_cand (dump_file, cand);
7167 	}
7168       fprintf (dump_file, "\n");
7169     }
7170 }
7171 
7172 /* Rewrites USE (definition of iv used in a nonlinear expression)
7173    using candidate CAND.  */
7174 
7175 static void
rewrite_use_nonlinear_expr(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7176 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7177 			    struct iv_use *use, struct iv_cand *cand)
7178 {
7179   gassign *ass;
7180   gimple_stmt_iterator bsi;
7181   tree comp, type = get_use_type (use), tgt;
7182 
7183   /* An important special case -- if we are asked to express value of
7184      the original iv by itself, just exit; there is no need to
7185      introduce a new computation (that might also need casting the
7186      variable to unsigned and back).  */
7187   if (cand->pos == IP_ORIGINAL
7188       && cand->incremented_at == use->stmt)
7189     {
7190       tree op = NULL_TREE;
7191       enum tree_code stmt_code;
7192 
7193       gcc_assert (is_gimple_assign (use->stmt));
7194       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7195 
7196       /* Check whether we may leave the computation unchanged.
7197 	 This is the case only if it does not rely on other
7198 	 computations in the loop -- otherwise, the computation
7199 	 we rely upon may be removed in remove_unused_ivs,
7200 	 thus leading to ICE.  */
7201       stmt_code = gimple_assign_rhs_code (use->stmt);
7202       if (stmt_code == PLUS_EXPR
7203 	  || stmt_code == MINUS_EXPR
7204 	  || stmt_code == POINTER_PLUS_EXPR)
7205 	{
7206 	  if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7207 	    op = gimple_assign_rhs2 (use->stmt);
7208 	  else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7209 	    op = gimple_assign_rhs1 (use->stmt);
7210 	}
7211 
7212       if (op != NULL_TREE)
7213 	{
7214 	  if (expr_invariant_in_loop_p (data->current_loop, op))
7215 	    return;
7216 	  if (TREE_CODE (op) == SSA_NAME)
7217 	    {
7218 	      struct iv *iv = get_iv (data, op);
7219 	      if (iv != NULL && integer_zerop (iv->step))
7220 		return;
7221 	    }
7222 	}
7223     }
7224 
7225   switch (gimple_code (use->stmt))
7226     {
7227     case GIMPLE_PHI:
7228       tgt = PHI_RESULT (use->stmt);
7229 
7230       /* If we should keep the biv, do not replace it.  */
7231       if (name_info (data, tgt)->preserve_biv)
7232 	return;
7233 
7234       bsi = gsi_after_labels (gimple_bb (use->stmt));
7235       break;
7236 
7237     case GIMPLE_ASSIGN:
7238       tgt = gimple_assign_lhs (use->stmt);
7239       bsi = gsi_for_stmt (use->stmt);
7240       break;
7241 
7242     default:
7243       gcc_unreachable ();
7244     }
7245 
7246   aff_tree aff_inv, aff_var;
7247   if (!get_computation_aff_1 (data->current_loop, use->stmt,
7248 			      use, cand, &aff_inv, &aff_var))
7249     gcc_unreachable ();
7250 
7251   unshare_aff_combination (&aff_inv);
7252   unshare_aff_combination (&aff_var);
7253   /* Prefer CSE opportunity than loop invariant by adding offset at last
7254      so that iv_uses have different offsets can be CSEed.  */
7255   poly_widest_int offset = aff_inv.offset;
7256   aff_inv.offset = 0;
7257 
7258   gimple_seq stmt_list = NULL, seq = NULL;
7259   tree comp_op1 = aff_combination_to_tree (&aff_inv);
7260   tree comp_op2 = aff_combination_to_tree (&aff_var);
7261   gcc_assert (comp_op1 && comp_op2);
7262 
7263   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7264   gimple_seq_add_seq (&stmt_list, seq);
7265   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7266   gimple_seq_add_seq (&stmt_list, seq);
7267 
7268   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7269     std::swap (comp_op1, comp_op2);
7270 
7271   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7272     {
7273       comp = fold_build_pointer_plus (comp_op1,
7274 				      fold_convert (sizetype, comp_op2));
7275       comp = fold_build_pointer_plus (comp,
7276 				      wide_int_to_tree (sizetype, offset));
7277     }
7278   else
7279     {
7280       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7281 			  fold_convert (TREE_TYPE (comp_op1), comp_op2));
7282       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7283 			  wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7284     }
7285 
7286   comp = fold_convert (type, comp);
7287   if (!valid_gimple_rhs_p (comp)
7288       || (gimple_code (use->stmt) != GIMPLE_PHI
7289 	  /* We can't allow re-allocating the stmt as it might be pointed
7290 	     to still.  */
7291 	  && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7292 	      >= gimple_num_ops (gsi_stmt (bsi)))))
7293     {
7294       comp = force_gimple_operand (comp, &seq, true, NULL);
7295       gimple_seq_add_seq (&stmt_list, seq);
7296       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7297 	{
7298 	  duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7299 	  /* As this isn't a plain copy we have to reset alignment
7300 	     information.  */
7301 	  if (SSA_NAME_PTR_INFO (comp))
7302 	    mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7303 	}
7304     }
7305 
7306   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7307   if (gimple_code (use->stmt) == GIMPLE_PHI)
7308     {
7309       ass = gimple_build_assign (tgt, comp);
7310       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7311 
7312       bsi = gsi_for_stmt (use->stmt);
7313       remove_phi_node (&bsi, false);
7314     }
7315   else
7316     {
7317       gimple_assign_set_rhs_from_tree (&bsi, comp);
7318       use->stmt = gsi_stmt (bsi);
7319     }
7320 }
7321 
7322 /* Performs a peephole optimization to reorder the iv update statement with
7323    a mem ref to enable instruction combining in later phases. The mem ref uses
7324    the iv value before the update, so the reordering transformation requires
7325    adjustment of the offset. CAND is the selected IV_CAND.
7326 
7327    Example:
7328 
7329    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7330    iv2 = iv1 + 1;
7331 
7332    if (t < val)      (1)
7333      goto L;
7334    goto Head;
7335 
7336 
7337    directly propagating t over to (1) will introduce overlapping live range
7338    thus increase register pressure. This peephole transform it into:
7339 
7340 
7341    iv2 = iv1 + 1;
7342    t = MEM_REF (base, iv2, 8, 8);
7343    if (t < val)
7344      goto L;
7345    goto Head;
7346 */
7347 
7348 static void
adjust_iv_update_pos(struct iv_cand * cand,struct iv_use * use)7349 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7350 {
7351   tree var_after;
7352   gimple *iv_update, *stmt;
7353   basic_block bb;
7354   gimple_stmt_iterator gsi, gsi_iv;
7355 
7356   if (cand->pos != IP_NORMAL)
7357     return;
7358 
7359   var_after = cand->var_after;
7360   iv_update = SSA_NAME_DEF_STMT (var_after);
7361 
7362   bb = gimple_bb (iv_update);
7363   gsi = gsi_last_nondebug_bb (bb);
7364   stmt = gsi_stmt (gsi);
7365 
7366   /* Only handle conditional statement for now.  */
7367   if (gimple_code (stmt) != GIMPLE_COND)
7368     return;
7369 
7370   gsi_prev_nondebug (&gsi);
7371   stmt = gsi_stmt (gsi);
7372   if (stmt != iv_update)
7373     return;
7374 
7375   gsi_prev_nondebug (&gsi);
7376   if (gsi_end_p (gsi))
7377     return;
7378 
7379   stmt = gsi_stmt (gsi);
7380   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7381     return;
7382 
7383   if (stmt != use->stmt)
7384     return;
7385 
7386   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7387     return;
7388 
7389   if (dump_file && (dump_flags & TDF_DETAILS))
7390     {
7391       fprintf (dump_file, "Reordering \n");
7392       print_gimple_stmt (dump_file, iv_update, 0);
7393       print_gimple_stmt (dump_file, use->stmt, 0);
7394       fprintf (dump_file, "\n");
7395     }
7396 
7397   gsi = gsi_for_stmt (use->stmt);
7398   gsi_iv = gsi_for_stmt (iv_update);
7399   gsi_move_before (&gsi_iv, &gsi);
7400 
7401   cand->pos = IP_BEFORE_USE;
7402   cand->incremented_at = use->stmt;
7403 }
7404 
7405 /* Return the alias pointer type that should be used for a MEM_REF
7406    associated with USE, which has type USE_PTR_ADDRESS.  */
7407 
7408 static tree
get_alias_ptr_type_for_ptr_address(iv_use * use)7409 get_alias_ptr_type_for_ptr_address (iv_use *use)
7410 {
7411   gcall *call = as_a <gcall *> (use->stmt);
7412   switch (gimple_call_internal_fn (call))
7413     {
7414     case IFN_MASK_LOAD:
7415     case IFN_MASK_STORE:
7416     case IFN_MASK_LOAD_LANES:
7417     case IFN_MASK_STORE_LANES:
7418       /* The second argument contains the correct alias type.  */
7419       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7420       return TREE_TYPE (gimple_call_arg (call, 1));
7421 
7422     default:
7423       gcc_unreachable ();
7424     }
7425 }
7426 
7427 
7428 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7429 
7430 static void
rewrite_use_address(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7431 rewrite_use_address (struct ivopts_data *data,
7432 		     struct iv_use *use, struct iv_cand *cand)
7433 {
7434   aff_tree aff;
7435   bool ok;
7436 
7437   adjust_iv_update_pos (cand, use);
7438   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7439   gcc_assert (ok);
7440   unshare_aff_combination (&aff);
7441 
7442   /* To avoid undefined overflow problems, all IV candidates use unsigned
7443      integer types.  The drawback is that this makes it impossible for
7444      create_mem_ref to distinguish an IV that is based on a memory object
7445      from one that represents simply an offset.
7446 
7447      To work around this problem, we pass a hint to create_mem_ref that
7448      indicates which variable (if any) in aff is an IV based on a memory
7449      object.  Note that we only consider the candidate.  If this is not
7450      based on an object, the base of the reference is in some subexpression
7451      of the use -- but these will use pointer types, so they are recognized
7452      by the create_mem_ref heuristics anyway.  */
7453   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7454   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7455   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7456   tree type = use->mem_type;
7457   tree alias_ptr_type;
7458   if (use->type == USE_PTR_ADDRESS)
7459     alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7460   else
7461     {
7462       gcc_assert (type == TREE_TYPE (*use->op_p));
7463       unsigned int align = get_object_alignment (*use->op_p);
7464       if (align != TYPE_ALIGN (type))
7465 	type = build_aligned_type (type, align);
7466       alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7467     }
7468   tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7469 			     iv, base_hint, data->speed);
7470 
7471   if (use->type == USE_PTR_ADDRESS)
7472     {
7473       ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7474       ref = fold_convert (get_use_type (use), ref);
7475       ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7476 				      true, GSI_SAME_STMT);
7477     }
7478   else
7479     copy_ref_info (ref, *use->op_p);
7480 
7481   *use->op_p = ref;
7482 }
7483 
7484 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7485    candidate CAND.  */
7486 
7487 static void
rewrite_use_compare(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7488 rewrite_use_compare (struct ivopts_data *data,
7489 		     struct iv_use *use, struct iv_cand *cand)
7490 {
7491   tree comp, op, bound;
7492   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7493   enum tree_code compare;
7494   struct iv_group *group = data->vgroups[use->group_id];
7495   class cost_pair *cp = get_group_iv_cost (data, group, cand);
7496 
7497   bound = cp->value;
7498   if (bound)
7499     {
7500       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7501       tree var_type = TREE_TYPE (var);
7502       gimple_seq stmts;
7503 
7504       if (dump_file && (dump_flags & TDF_DETAILS))
7505 	{
7506 	  fprintf (dump_file, "Replacing exit test: ");
7507 	  print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7508 	}
7509       compare = cp->comp;
7510       bound = unshare_expr (fold_convert (var_type, bound));
7511       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7512       if (stmts)
7513 	gsi_insert_seq_on_edge_immediate (
7514 		loop_preheader_edge (data->current_loop),
7515 		stmts);
7516 
7517       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7518       gimple_cond_set_lhs (cond_stmt, var);
7519       gimple_cond_set_code (cond_stmt, compare);
7520       gimple_cond_set_rhs (cond_stmt, op);
7521       return;
7522     }
7523 
7524   /* The induction variable elimination failed; just express the original
7525      giv.  */
7526   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7527   gcc_assert (comp != NULL_TREE);
7528   gcc_assert (use->op_p != NULL);
7529   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7530 					 SSA_NAME_VAR (*use->op_p),
7531 					 true, GSI_SAME_STMT);
7532 }
7533 
7534 /* Rewrite the groups using the selected induction variables.  */
7535 
7536 static void
rewrite_groups(struct ivopts_data * data)7537 rewrite_groups (struct ivopts_data *data)
7538 {
7539   unsigned i, j;
7540 
7541   for (i = 0; i < data->vgroups.length (); i++)
7542     {
7543       struct iv_group *group = data->vgroups[i];
7544       struct iv_cand *cand = group->selected;
7545 
7546       gcc_assert (cand);
7547 
7548       if (group->type == USE_NONLINEAR_EXPR)
7549 	{
7550 	  for (j = 0; j < group->vuses.length (); j++)
7551 	    {
7552 	      rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7553 	      update_stmt (group->vuses[j]->stmt);
7554 	    }
7555 	}
7556       else if (address_p (group->type))
7557 	{
7558 	  for (j = 0; j < group->vuses.length (); j++)
7559 	    {
7560 	      rewrite_use_address (data, group->vuses[j], cand);
7561 	      update_stmt (group->vuses[j]->stmt);
7562 	    }
7563 	}
7564       else
7565 	{
7566 	  gcc_assert (group->type == USE_COMPARE);
7567 
7568 	  for (j = 0; j < group->vuses.length (); j++)
7569 	    {
7570 	      rewrite_use_compare (data, group->vuses[j], cand);
7571 	      update_stmt (group->vuses[j]->stmt);
7572 	    }
7573 	}
7574     }
7575 }
7576 
7577 /* Removes the ivs that are not used after rewriting.  */
7578 
7579 static void
remove_unused_ivs(struct ivopts_data * data,bitmap toremove)7580 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7581 {
7582   unsigned j;
7583   bitmap_iterator bi;
7584 
7585   /* Figure out an order in which to release SSA DEFs so that we don't
7586      release something that we'd have to propagate into a debug stmt
7587      afterwards.  */
7588   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7589     {
7590       struct version_info *info;
7591 
7592       info = ver_info (data, j);
7593       if (info->iv
7594 	  && !integer_zerop (info->iv->step)
7595 	  && !info->inv_id
7596 	  && !info->iv->nonlin_use
7597 	  && !info->preserve_biv)
7598 	{
7599 	  bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7600 
7601 	  tree def = info->iv->ssa_name;
7602 
7603 	  if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7604 	    {
7605 	      imm_use_iterator imm_iter;
7606 	      use_operand_p use_p;
7607 	      gimple *stmt;
7608 	      int count = 0;
7609 
7610 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7611 		{
7612 		  if (!gimple_debug_bind_p (stmt))
7613 		    continue;
7614 
7615 		  /* We just want to determine whether to do nothing
7616 		     (count == 0), to substitute the computed
7617 		     expression into a single use of the SSA DEF by
7618 		     itself (count == 1), or to use a debug temp
7619 		     because the SSA DEF is used multiple times or as
7620 		     part of a larger expression (count > 1). */
7621 		  count++;
7622 		  if (gimple_debug_bind_get_value (stmt) != def)
7623 		    count++;
7624 
7625 		  if (count > 1)
7626 		    BREAK_FROM_IMM_USE_STMT (imm_iter);
7627 		}
7628 
7629 	      if (!count)
7630 		continue;
7631 
7632 	      struct iv_use dummy_use;
7633 	      struct iv_cand *best_cand = NULL, *cand;
7634 	      unsigned i, best_pref = 0, cand_pref;
7635 	      tree comp = NULL_TREE;
7636 
7637 	      memset (&dummy_use, 0, sizeof (dummy_use));
7638 	      dummy_use.iv = info->iv;
7639 	      for (i = 0; i < data->vgroups.length () && i < 64; i++)
7640 		{
7641 		  cand = data->vgroups[i]->selected;
7642 		  if (cand == best_cand)
7643 		    continue;
7644 		  cand_pref = operand_equal_p (cand->iv->step,
7645 					       info->iv->step, 0)
7646 		    ? 4 : 0;
7647 		  cand_pref
7648 		    += TYPE_MODE (TREE_TYPE (cand->iv->base))
7649 		    == TYPE_MODE (TREE_TYPE (info->iv->base))
7650 		    ? 2 : 0;
7651 		  cand_pref
7652 		    += TREE_CODE (cand->iv->base) == INTEGER_CST
7653 		    ? 1 : 0;
7654 		  if (best_cand == NULL || best_pref < cand_pref)
7655 		    {
7656 		      tree this_comp
7657 			= get_debug_computation_at (data->current_loop,
7658 						    SSA_NAME_DEF_STMT (def),
7659 						    &dummy_use, cand);
7660 		      if (this_comp)
7661 			{
7662 			  best_cand = cand;
7663 			  best_pref = cand_pref;
7664 			  comp = this_comp;
7665 			}
7666 		    }
7667 		}
7668 
7669 	      if (!best_cand)
7670 		continue;
7671 
7672 	      comp = unshare_expr (comp);
7673 	      if (count > 1)
7674 		{
7675 		  tree vexpr = make_node (DEBUG_EXPR_DECL);
7676 		  DECL_ARTIFICIAL (vexpr) = 1;
7677 		  TREE_TYPE (vexpr) = TREE_TYPE (comp);
7678 		  if (SSA_NAME_VAR (def))
7679 		    SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7680 		  else
7681 		    SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7682 		  gdebug *def_temp
7683 		    = gimple_build_debug_bind (vexpr, comp, NULL);
7684 		  gimple_stmt_iterator gsi;
7685 
7686 		  if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7687 		    gsi = gsi_after_labels (gimple_bb
7688 					    (SSA_NAME_DEF_STMT (def)));
7689 		  else
7690 		    gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7691 
7692 		  gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7693 		  comp = vexpr;
7694 		}
7695 
7696 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7697 		{
7698 		  if (!gimple_debug_bind_p (stmt))
7699 		    continue;
7700 
7701 		  FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7702 		    SET_USE (use_p, comp);
7703 
7704 		  update_stmt (stmt);
7705 		}
7706 	    }
7707 	}
7708     }
7709 }
7710 
7711 /* Frees memory occupied by class tree_niter_desc in *VALUE. Callback
7712    for hash_map::traverse.  */
7713 
7714 bool
free_tree_niter_desc(edge const &,tree_niter_desc * const & value,void *)7715 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7716 {
7717   free (value);
7718   return true;
7719 }
7720 
7721 /* Frees data allocated by the optimization of a single loop.  */
7722 
7723 static void
free_loop_data(struct ivopts_data * data)7724 free_loop_data (struct ivopts_data *data)
7725 {
7726   unsigned i, j;
7727   bitmap_iterator bi;
7728   tree obj;
7729 
7730   if (data->niters)
7731     {
7732       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7733       delete data->niters;
7734       data->niters = NULL;
7735     }
7736 
7737   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7738     {
7739       struct version_info *info;
7740 
7741       info = ver_info (data, i);
7742       info->iv = NULL;
7743       info->has_nonlin_use = false;
7744       info->preserve_biv = false;
7745       info->inv_id = 0;
7746     }
7747   bitmap_clear (data->relevant);
7748   bitmap_clear (data->important_candidates);
7749 
7750   for (i = 0; i < data->vgroups.length (); i++)
7751     {
7752       struct iv_group *group = data->vgroups[i];
7753 
7754       for (j = 0; j < group->vuses.length (); j++)
7755 	free (group->vuses[j]);
7756       group->vuses.release ();
7757 
7758       BITMAP_FREE (group->related_cands);
7759       for (j = 0; j < group->n_map_members; j++)
7760 	{
7761 	  if (group->cost_map[j].inv_vars)
7762 	    BITMAP_FREE (group->cost_map[j].inv_vars);
7763 	  if (group->cost_map[j].inv_exprs)
7764 	    BITMAP_FREE (group->cost_map[j].inv_exprs);
7765 	}
7766 
7767       free (group->cost_map);
7768       free (group);
7769     }
7770   data->vgroups.truncate (0);
7771 
7772   for (i = 0; i < data->vcands.length (); i++)
7773     {
7774       struct iv_cand *cand = data->vcands[i];
7775 
7776       if (cand->inv_vars)
7777 	BITMAP_FREE (cand->inv_vars);
7778       if (cand->inv_exprs)
7779 	BITMAP_FREE (cand->inv_exprs);
7780       free (cand);
7781     }
7782   data->vcands.truncate (0);
7783 
7784   if (data->version_info_size < num_ssa_names)
7785     {
7786       data->version_info_size = 2 * num_ssa_names;
7787       free (data->version_info);
7788       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7789     }
7790 
7791   data->max_inv_var_id = 0;
7792   data->max_inv_expr_id = 0;
7793 
7794   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7795     SET_DECL_RTL (obj, NULL_RTX);
7796 
7797   decl_rtl_to_reset.truncate (0);
7798 
7799   data->inv_expr_tab->empty ();
7800 
7801   data->iv_common_cand_tab->empty ();
7802   data->iv_common_cands.truncate (0);
7803 }
7804 
7805 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7806    loop tree.  */
7807 
7808 static void
tree_ssa_iv_optimize_finalize(struct ivopts_data * data)7809 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7810 {
7811   free_loop_data (data);
7812   free (data->version_info);
7813   BITMAP_FREE (data->relevant);
7814   BITMAP_FREE (data->important_candidates);
7815 
7816   decl_rtl_to_reset.release ();
7817   data->vgroups.release ();
7818   data->vcands.release ();
7819   delete data->inv_expr_tab;
7820   data->inv_expr_tab = NULL;
7821   free_affine_expand_cache (&data->name_expansion_cache);
7822   if (data->base_object_map)
7823     delete data->base_object_map;
7824   delete data->iv_common_cand_tab;
7825   data->iv_common_cand_tab = NULL;
7826   data->iv_common_cands.release ();
7827   obstack_free (&data->iv_obstack, NULL);
7828 }
7829 
7830 /* Returns true if the loop body BODY includes any function calls.  */
7831 
7832 static bool
loop_body_includes_call(basic_block * body,unsigned num_nodes)7833 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7834 {
7835   gimple_stmt_iterator gsi;
7836   unsigned i;
7837 
7838   for (i = 0; i < num_nodes; i++)
7839     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7840       {
7841 	gimple *stmt = gsi_stmt (gsi);
7842 	if (is_gimple_call (stmt)
7843 	    && !gimple_call_internal_p (stmt)
7844 	    && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7845 	  return true;
7846       }
7847   return false;
7848 }
7849 
7850 /* Determine cost scaling factor for basic blocks in loop.  */
7851 #define COST_SCALING_FACTOR_BOUND (20)
7852 
7853 static void
determine_scaling_factor(struct ivopts_data * data,basic_block * body)7854 determine_scaling_factor (struct ivopts_data *data, basic_block *body)
7855 {
7856   int lfreq = data->current_loop->header->count.to_frequency (cfun);
7857   if (!data->speed || lfreq <= 0)
7858     return;
7859 
7860   int max_freq = lfreq;
7861   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7862     {
7863       body[i]->aux = (void *)(intptr_t) 1;
7864       if (max_freq < body[i]->count.to_frequency (cfun))
7865 	max_freq = body[i]->count.to_frequency (cfun);
7866     }
7867   if (max_freq > lfreq)
7868     {
7869       int divisor, factor;
7870       /* Check if scaling factor itself needs to be scaled by the bound.  This
7871 	 is to avoid overflow when scaling cost according to profile info.  */
7872       if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
7873 	{
7874 	  divisor = max_freq;
7875 	  factor = COST_SCALING_FACTOR_BOUND;
7876 	}
7877       else
7878 	{
7879 	  divisor = lfreq;
7880 	  factor = 1;
7881 	}
7882       for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7883 	{
7884 	  int bfreq = body[i]->count.to_frequency (cfun);
7885 	  if (bfreq <= lfreq)
7886 	    continue;
7887 
7888 	  body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
7889 	}
7890     }
7891 }
7892 
7893 /* Find doloop comparison use and set its doloop_p on if found.  */
7894 
7895 static bool
find_doloop_use(struct ivopts_data * data)7896 find_doloop_use (struct ivopts_data *data)
7897 {
7898   struct loop *loop = data->current_loop;
7899 
7900   for (unsigned i = 0; i < data->vgroups.length (); i++)
7901     {
7902       struct iv_group *group = data->vgroups[i];
7903       if (group->type == USE_COMPARE)
7904 	{
7905 	  gcc_assert (group->vuses.length () == 1);
7906 	  struct iv_use *use = group->vuses[0];
7907 	  gimple *stmt = use->stmt;
7908 	  if (gimple_code (stmt) == GIMPLE_COND)
7909 	    {
7910 	      basic_block bb = gimple_bb (stmt);
7911 	      edge true_edge, false_edge;
7912 	      extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
7913 	      /* This comparison is used for loop latch.  Require latch is empty
7914 		 for now.  */
7915 	      if ((loop->latch == true_edge->dest
7916 		   || loop->latch == false_edge->dest)
7917 		  && empty_block_p (loop->latch))
7918 		{
7919 		  group->doloop_p = true;
7920 		  if (dump_file && (dump_flags & TDF_DETAILS))
7921 		    {
7922 		      fprintf (dump_file, "Doloop cmp iv use: ");
7923 		      print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
7924 		    }
7925 		  return true;
7926 		}
7927 	    }
7928 	}
7929     }
7930 
7931   return false;
7932 }
7933 
7934 /* For the targets which support doloop, to predict whether later RTL doloop
7935    transformation will perform on this loop, further detect the doloop use and
7936    mark the flag doloop_use_p if predicted.  */
7937 
7938 void
analyze_and_mark_doloop_use(struct ivopts_data * data)7939 analyze_and_mark_doloop_use (struct ivopts_data *data)
7940 {
7941   data->doloop_use_p = false;
7942 
7943   if (!flag_branch_on_count_reg)
7944     return;
7945 
7946   if (!generic_predict_doloop_p (data))
7947     return;
7948 
7949   if (find_doloop_use (data))
7950     {
7951       data->doloop_use_p = true;
7952       if (dump_file && (dump_flags & TDF_DETAILS))
7953 	{
7954 	  struct loop *loop = data->current_loop;
7955 	  fprintf (dump_file,
7956 		   "Predict loop %d can perform"
7957 		   " doloop optimization later.\n",
7958 		   loop->num);
7959 	  flow_loop_dump (loop, dump_file, NULL, 1);
7960 	}
7961     }
7962 }
7963 
7964 /* Optimizes the LOOP.  Returns true if anything changed.  */
7965 
7966 static bool
tree_ssa_iv_optimize_loop(struct ivopts_data * data,class loop * loop,bitmap toremove)7967 tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
7968 			   bitmap toremove)
7969 {
7970   bool changed = false;
7971   class iv_ca *iv_ca;
7972   edge exit = single_dom_exit (loop);
7973   basic_block *body;
7974 
7975   gcc_assert (!data->niters);
7976   data->current_loop = loop;
7977   data->loop_loc = find_loop_location (loop).get_location_t ();
7978   data->speed = optimize_loop_for_speed_p (loop);
7979 
7980   if (dump_file && (dump_flags & TDF_DETAILS))
7981     {
7982       fprintf (dump_file, "Processing loop %d", loop->num);
7983       if (data->loop_loc != UNKNOWN_LOCATION)
7984 	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7985 		 LOCATION_LINE (data->loop_loc));
7986       fprintf (dump_file, "\n");
7987 
7988       if (exit)
7989 	{
7990 	  fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7991 		   exit->src->index, exit->dest->index);
7992 	  print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7993 	  fprintf (dump_file, "\n");
7994 	}
7995 
7996       fprintf (dump_file, "\n");
7997     }
7998 
7999   body = get_loop_body (loop);
8000   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
8001   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
8002 
8003   data->loop_single_exit_p
8004     = exit != NULL && loop_only_exit_p (loop, body, exit);
8005 
8006   /* For each ssa name determines whether it behaves as an induction variable
8007      in some loop.  */
8008   if (!find_induction_variables (data))
8009     goto finish;
8010 
8011   /* Finds interesting uses (item 1).  */
8012   find_interesting_uses (data);
8013   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
8014     goto finish;
8015 
8016   /* Determine cost scaling factor for basic blocks in loop.  */
8017   determine_scaling_factor (data, body);
8018 
8019   /* Analyze doloop possibility and mark the doloop use if predicted.  */
8020   analyze_and_mark_doloop_use (data);
8021 
8022   /* Finds candidates for the induction variables (item 2).  */
8023   find_iv_candidates (data);
8024 
8025   /* Calculates the costs (item 3, part 1).  */
8026   determine_iv_costs (data);
8027   determine_group_iv_costs (data);
8028   determine_set_costs (data);
8029 
8030   /* Find the optimal set of induction variables (item 3, part 2).  */
8031   iv_ca = find_optimal_iv_set (data);
8032   /* Cleanup basic block aux field.  */
8033   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8034     body[i]->aux = NULL;
8035   if (!iv_ca)
8036     goto finish;
8037   changed = true;
8038 
8039   /* Create the new induction variables (item 4, part 1).  */
8040   create_new_ivs (data, iv_ca);
8041   iv_ca_free (&iv_ca);
8042 
8043   /* Rewrite the uses (item 4, part 2).  */
8044   rewrite_groups (data);
8045 
8046   /* Remove the ivs that are unused after rewriting.  */
8047   remove_unused_ivs (data, toremove);
8048 
8049 finish:
8050   free (body);
8051   free_loop_data (data);
8052 
8053   return changed;
8054 }
8055 
8056 /* Main entry point.  Optimizes induction variables in loops.  */
8057 
8058 void
tree_ssa_iv_optimize(void)8059 tree_ssa_iv_optimize (void)
8060 {
8061   class loop *loop;
8062   struct ivopts_data data;
8063   auto_bitmap toremove;
8064 
8065   tree_ssa_iv_optimize_init (&data);
8066 
8067   /* Optimize the loops starting with the innermost ones.  */
8068   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
8069     {
8070       if (!dbg_cnt (ivopts_loop))
8071 	continue;
8072 
8073       if (dump_file && (dump_flags & TDF_DETAILS))
8074 	flow_loop_dump (loop, dump_file, NULL, 1);
8075 
8076       tree_ssa_iv_optimize_loop (&data, loop, toremove);
8077     }
8078 
8079   /* Remove eliminated IV defs.  */
8080   release_defs_bitset (toremove);
8081 
8082   /* We have changed the structure of induction variables; it might happen
8083      that definitions in the scev database refer to some of them that were
8084      eliminated.  */
8085   scev_reset_htab ();
8086   /* Likewise niter and control-IV information.  */
8087   free_numbers_of_iterations_estimates (cfun);
8088 
8089   tree_ssa_iv_optimize_finalize (&data);
8090 }
8091 
8092 #include "gt-tree-ssa-loop-ivopts.h"
8093