1 /* Induction variable optimizations.
2    Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This pass tries to find the optimal set of induction variables for the loop.
21    It optimizes just the basic linear induction variables (although adding
22    support for other types should not be too hard).  It includes the
23    optimizations commonly known as strength reduction, induction variable
24    coalescing and induction variable elimination.  It does it in the
25    following steps:
26 
27    1) The interesting uses of induction variables are found.  This includes
28 
29       -- uses of induction variables in non-linear expressions
30       -- addresses of arrays
31       -- comparisons of induction variables
32 
33       Note the interesting uses are categorized and handled in group.
34       Generally, address type uses are grouped together if their iv bases
35       are different in constant offset.
36 
37    2) Candidates for the induction variables are found.  This includes
38 
39       -- old induction variables
40       -- the variables defined by expressions derived from the "interesting
41 	 groups/uses" above
42 
43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
44       cost function assigns a cost to sets of induction variables and consists
45       of three parts:
46 
47       -- The group/use costs.  Each of the interesting groups/uses chooses
48 	 the best induction variable in the set and adds its cost to the sum.
49 	 The cost reflects the time spent on modifying the induction variables
50 	 value to be usable for the given purpose (adding base and offset for
51 	 arrays, etc.).
52       -- The variable costs.  Each of the variables has a cost assigned that
53 	 reflects the costs associated with incrementing the value of the
54 	 variable.  The original variables are somewhat preferred.
55       -- The set cost.  Depending on the size of the set, extra cost may be
56 	 added to reflect register pressure.
57 
58       All the costs are defined in a machine-specific way, using the target
59       hooks and machine descriptions to determine them.
60 
61    4) The trees are transformed to use the new variables, the dead code is
62       removed.
63 
64    All of this is done loop by loop.  Doing it globally is theoretically
65    possible, it might give a better performance and it might enable us
66    to decide costs more precisely, but getting all the interactions right
67    would be complicated.
68 
69    For the targets supporting low-overhead loops, IVOPTs has to take care of
70    the loops which will probably be transformed in RTL doloop optimization,
71    to try to make selected IV candidate set optimal.  The process of doloop
72    support includes:
73 
74    1) Analyze the current loop will be transformed to doloop or not, find and
75       mark its compare type IV use as doloop use (iv_group field doloop_p), and
76       set flag doloop_use_p of ivopts_data to notify subsequent processings on
77       doloop.  See analyze_and_mark_doloop_use and its callees for the details.
78       The target hook predict_doloop_p can be used for target specific checks.
79 
80    2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
81       set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
82       like biv.  For cost determination between doloop IV cand and IV use, the
83       target hooks doloop_cost_for_generic and doloop_cost_for_address are
84       provided to add on extra costs for generic type and address type IV use.
85       Zero cost is assigned to the pair between doloop IV cand and doloop IV
86       use, and bound zero is set for IV elimination.
87 
88    3) With the cost setting in step 2), the current cost model based IV
89       selection algorithm will process as usual, pick up doloop dedicated IV if
90       profitable.  */
91 
92 #include "config.h"
93 #include "system.h"
94 #include "coretypes.h"
95 #include "backend.h"
96 #include "rtl.h"
97 #include "tree.h"
98 #include "gimple.h"
99 #include "cfghooks.h"
100 #include "tree-pass.h"
101 #include "memmodel.h"
102 #include "tm_p.h"
103 #include "ssa.h"
104 #include "expmed.h"
105 #include "insn-config.h"
106 #include "emit-rtl.h"
107 #include "recog.h"
108 #include "cgraph.h"
109 #include "gimple-pretty-print.h"
110 #include "alias.h"
111 #include "fold-const.h"
112 #include "stor-layout.h"
113 #include "tree-eh.h"
114 #include "gimplify.h"
115 #include "gimple-iterator.h"
116 #include "gimplify-me.h"
117 #include "tree-cfg.h"
118 #include "tree-ssa-loop-ivopts.h"
119 #include "tree-ssa-loop-manip.h"
120 #include "tree-ssa-loop-niter.h"
121 #include "tree-ssa-loop.h"
122 #include "explow.h"
123 #include "expr.h"
124 #include "tree-dfa.h"
125 #include "tree-ssa.h"
126 #include "cfgloop.h"
127 #include "tree-scalar-evolution.h"
128 #include "tree-affine.h"
129 #include "tree-ssa-propagate.h"
130 #include "tree-ssa-address.h"
131 #include "builtins.h"
132 #include "tree-vectorizer.h"
133 #include "dbgcnt.h"
134 
135 /* For lang_hooks.types.type_for_mode.  */
136 #include "langhooks.h"
137 
138 /* FIXME: Expressions are expanded to RTL in this pass to determine the
139    cost of different addressing modes.  This should be moved to a TBD
140    interface between the GIMPLE and RTL worlds.  */
141 
142 /* The infinite cost.  */
143 #define INFTY 1000000000
144 
145 /* Returns the expected number of loop iterations for LOOP.
146    The average trip count is computed from profile data if it
147    exists. */
148 
149 static inline HOST_WIDE_INT
avg_loop_niter(class loop * loop)150 avg_loop_niter (class loop *loop)
151 {
152   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
153   if (niter == -1)
154     {
155       niter = likely_max_stmt_executions_int (loop);
156 
157       if (niter == -1 || niter > param_avg_loop_niter)
158 	return param_avg_loop_niter;
159     }
160 
161   return niter;
162 }
163 
164 struct iv_use;
165 
166 /* Representation of the induction variable.  */
167 struct iv
168 {
169   tree base;		/* Initial value of the iv.  */
170   tree base_object;	/* A memory object to that the induction variable points.  */
171   tree step;		/* Step of the iv (constant only).  */
172   tree ssa_name;	/* The ssa name with the value.  */
173   struct iv_use *nonlin_use;	/* The identifier in the use if it is the case.  */
174   bool biv_p;		/* Is it a biv?  */
175   bool no_overflow;	/* True if the iv doesn't overflow.  */
176   bool have_address_use;/* For biv, indicate if it's used in any address
177 			   type use.  */
178 };
179 
180 /* Per-ssa version information (induction variable descriptions, etc.).  */
181 struct version_info
182 {
183   tree name;		/* The ssa name.  */
184   struct iv *iv;	/* Induction variable description.  */
185   bool has_nonlin_use;	/* For a loop-level invariant, whether it is used in
186 			   an expression that is not an induction variable.  */
187   bool preserve_biv;	/* For the original biv, whether to preserve it.  */
188   unsigned inv_id;	/* Id of an invariant.  */
189 };
190 
191 /* Types of uses.  */
192 enum use_type
193 {
194   USE_NONLINEAR_EXPR,	/* Use in a nonlinear expression.  */
195   USE_REF_ADDRESS,	/* Use is an address for an explicit memory
196 			   reference.  */
197   USE_PTR_ADDRESS,	/* Use is a pointer argument to a function in
198 			   cases where the expansion of the function
199 			   will turn the argument into a normal address.  */
200   USE_COMPARE		/* Use is a compare.  */
201 };
202 
203 /* Cost of a computation.  */
204 class comp_cost
205 {
206 public:
comp_cost()207   comp_cost (): cost (0), complexity (0), scratch (0)
208   {}
209 
210   comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
cost(cost)211     : cost (cost), complexity (complexity), scratch (scratch)
212   {}
213 
214   /* Returns true if COST is infinite.  */
215   bool infinite_cost_p ();
216 
217   /* Adds costs COST1 and COST2.  */
218   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
219 
220   /* Adds COST to the comp_cost.  */
221   comp_cost operator+= (comp_cost cost);
222 
223   /* Adds constant C to this comp_cost.  */
224   comp_cost operator+= (HOST_WIDE_INT c);
225 
226   /* Subtracts constant C to this comp_cost.  */
227   comp_cost operator-= (HOST_WIDE_INT c);
228 
229   /* Divide the comp_cost by constant C.  */
230   comp_cost operator/= (HOST_WIDE_INT c);
231 
232   /* Multiply the comp_cost by constant C.  */
233   comp_cost operator*= (HOST_WIDE_INT c);
234 
235   /* Subtracts costs COST1 and COST2.  */
236   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
237 
238   /* Subtracts COST from this comp_cost.  */
239   comp_cost operator-= (comp_cost cost);
240 
241   /* Returns true if COST1 is smaller than COST2.  */
242   friend bool operator< (comp_cost cost1, comp_cost cost2);
243 
244   /* Returns true if COST1 and COST2 are equal.  */
245   friend bool operator== (comp_cost cost1, comp_cost cost2);
246 
247   /* Returns true if COST1 is smaller or equal than COST2.  */
248   friend bool operator<= (comp_cost cost1, comp_cost cost2);
249 
250   int64_t cost;		/* The runtime cost.  */
251   unsigned complexity;  /* The estimate of the complexity of the code for
252 			   the computation (in no concrete units --
253 			   complexity field should be larger for more
254 			   complex expressions and addressing modes).  */
255   int64_t scratch;	/* Scratch used during cost computation.  */
256 };
257 
258 static const comp_cost no_cost;
259 static const comp_cost infinite_cost (INFTY, 0, INFTY);
260 
261 bool
infinite_cost_p()262 comp_cost::infinite_cost_p ()
263 {
264   return cost == INFTY;
265 }
266 
267 comp_cost
268 operator+ (comp_cost cost1, comp_cost cost2)
269 {
270   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
271     return infinite_cost;
272 
273   gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
274   cost1.cost += cost2.cost;
275   cost1.complexity += cost2.complexity;
276 
277   return cost1;
278 }
279 
280 comp_cost
281 operator- (comp_cost cost1, comp_cost cost2)
282 {
283   if (cost1.infinite_cost_p ())
284     return infinite_cost;
285 
286   gcc_assert (!cost2.infinite_cost_p ());
287   gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
288 
289   cost1.cost -= cost2.cost;
290   cost1.complexity -= cost2.complexity;
291 
292   return cost1;
293 }
294 
295 comp_cost
296 comp_cost::operator+= (comp_cost cost)
297 {
298   *this = *this + cost;
299   return *this;
300 }
301 
302 comp_cost
303 comp_cost::operator+= (HOST_WIDE_INT c)
304 {
305   if (c >= INFTY)
306     this->cost = INFTY;
307 
308   if (infinite_cost_p ())
309     return *this;
310 
311   gcc_assert (this->cost + c < infinite_cost.cost);
312   this->cost += c;
313 
314   return *this;
315 }
316 
317 comp_cost
318 comp_cost::operator-= (HOST_WIDE_INT c)
319 {
320   if (infinite_cost_p ())
321     return *this;
322 
323   gcc_assert (this->cost - c < infinite_cost.cost);
324   this->cost -= c;
325 
326   return *this;
327 }
328 
329 comp_cost
330 comp_cost::operator/= (HOST_WIDE_INT c)
331 {
332   gcc_assert (c != 0);
333   if (infinite_cost_p ())
334     return *this;
335 
336   this->cost /= c;
337 
338   return *this;
339 }
340 
341 comp_cost
342 comp_cost::operator*= (HOST_WIDE_INT c)
343 {
344   if (infinite_cost_p ())
345     return *this;
346 
347   gcc_assert (this->cost * c < infinite_cost.cost);
348   this->cost *= c;
349 
350   return *this;
351 }
352 
353 comp_cost
354 comp_cost::operator-= (comp_cost cost)
355 {
356   *this = *this - cost;
357   return *this;
358 }
359 
360 bool
361 operator< (comp_cost cost1, comp_cost cost2)
362 {
363   if (cost1.cost == cost2.cost)
364     return cost1.complexity < cost2.complexity;
365 
366   return cost1.cost < cost2.cost;
367 }
368 
369 bool
370 operator== (comp_cost cost1, comp_cost cost2)
371 {
372   return cost1.cost == cost2.cost
373     && cost1.complexity == cost2.complexity;
374 }
375 
376 bool
377 operator<= (comp_cost cost1, comp_cost cost2)
378 {
379   return cost1 < cost2 || cost1 == cost2;
380 }
381 
382 struct iv_inv_expr_ent;
383 
384 /* The candidate - cost pair.  */
385 class cost_pair
386 {
387 public:
388   struct iv_cand *cand;	/* The candidate.  */
389   comp_cost cost;	/* The cost.  */
390   enum tree_code comp;	/* For iv elimination, the comparison.  */
391   bitmap inv_vars;	/* The list of invariant ssa_vars that have to be
392 			   preserved when representing iv_use with iv_cand.  */
393   bitmap inv_exprs;	/* The list of newly created invariant expressions
394 			   when representing iv_use with iv_cand.  */
395   tree value;		/* For final value elimination, the expression for
396 			   the final value of the iv.  For iv elimination,
397 			   the new bound to compare with.  */
398 };
399 
400 /* Use.  */
401 struct iv_use
402 {
403   unsigned id;		/* The id of the use.  */
404   unsigned group_id;	/* The group id the use belongs to.  */
405   enum use_type type;	/* Type of the use.  */
406   tree mem_type;	/* The memory type to use when testing whether an
407 			   address is legitimate, and what the address's
408 			   cost is.  */
409   struct iv *iv;	/* The induction variable it is based on.  */
410   gimple *stmt;		/* Statement in that it occurs.  */
411   tree *op_p;		/* The place where it occurs.  */
412 
413   tree addr_base;	/* Base address with const offset stripped.  */
414   poly_uint64_pod addr_offset;
415 			/* Const offset stripped from base address.  */
416 };
417 
418 /* Group of uses.  */
419 struct iv_group
420 {
421   /* The id of the group.  */
422   unsigned id;
423   /* Uses of the group are of the same type.  */
424   enum use_type type;
425   /* The set of "related" IV candidates, plus the important ones.  */
426   bitmap related_cands;
427   /* Number of IV candidates in the cost_map.  */
428   unsigned n_map_members;
429   /* The costs wrto the iv candidates.  */
430   class cost_pair *cost_map;
431   /* The selected candidate for the group.  */
432   struct iv_cand *selected;
433   /* To indicate this is a doloop use group.  */
434   bool doloop_p;
435   /* Uses in the group.  */
436   vec<struct iv_use *> vuses;
437 };
438 
439 /* The position where the iv is computed.  */
440 enum iv_position
441 {
442   IP_NORMAL,		/* At the end, just before the exit condition.  */
443   IP_END,		/* At the end of the latch block.  */
444   IP_BEFORE_USE,	/* Immediately before a specific use.  */
445   IP_AFTER_USE,		/* Immediately after a specific use.  */
446   IP_ORIGINAL		/* The original biv.  */
447 };
448 
449 /* The induction variable candidate.  */
450 struct iv_cand
451 {
452   unsigned id;		/* The number of the candidate.  */
453   bool important;	/* Whether this is an "important" candidate, i.e. such
454 			   that it should be considered by all uses.  */
455   ENUM_BITFIELD(iv_position) pos : 8;	/* Where it is computed.  */
456   gimple *incremented_at;/* For original biv, the statement where it is
457 			   incremented.  */
458   tree var_before;	/* The variable used for it before increment.  */
459   tree var_after;	/* The variable used for it after increment.  */
460   struct iv *iv;	/* The value of the candidate.  NULL for
461 			   "pseudocandidate" used to indicate the possibility
462 			   to replace the final value of an iv by direct
463 			   computation of the value.  */
464   unsigned cost;	/* Cost of the candidate.  */
465   unsigned cost_step;	/* Cost of the candidate's increment operation.  */
466   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
467 			      where it is incremented.  */
468   bitmap inv_vars;	/* The list of invariant ssa_vars used in step of the
469 			   iv_cand.  */
470   bitmap inv_exprs;	/* If step is more complicated than a single ssa_var,
471 			   hanlde it as a new invariant expression which will
472 			   be hoisted out of loop.  */
473   struct iv *orig_iv;	/* The original iv if this cand is added from biv with
474 			   smaller type.  */
475   bool doloop_p;	/* Whether this is a doloop candidate.  */
476 };
477 
478 /* Hashtable entry for common candidate derived from iv uses.  */
479 class iv_common_cand
480 {
481 public:
482   tree base;
483   tree step;
484   /* IV uses from which this common candidate is derived.  */
485   auto_vec<struct iv_use *> uses;
486   hashval_t hash;
487 };
488 
489 /* Hashtable helpers.  */
490 
491 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
492 {
493   static inline hashval_t hash (const iv_common_cand *);
494   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
495 };
496 
497 /* Hash function for possible common candidates.  */
498 
499 inline hashval_t
hash(const iv_common_cand * ccand)500 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
501 {
502   return ccand->hash;
503 }
504 
505 /* Hash table equality function for common candidates.  */
506 
507 inline bool
equal(const iv_common_cand * ccand1,const iv_common_cand * ccand2)508 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
509 			      const iv_common_cand *ccand2)
510 {
511   return (ccand1->hash == ccand2->hash
512 	  && operand_equal_p (ccand1->base, ccand2->base, 0)
513 	  && operand_equal_p (ccand1->step, ccand2->step, 0)
514 	  && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
515 	      == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
516 }
517 
518 /* Loop invariant expression hashtable entry.  */
519 
520 struct iv_inv_expr_ent
521 {
522   /* Tree expression of the entry.  */
523   tree expr;
524   /* Unique indentifier.  */
525   int id;
526   /* Hash value.  */
527   hashval_t hash;
528 };
529 
530 /* Sort iv_inv_expr_ent pair A and B by id field.  */
531 
532 static int
sort_iv_inv_expr_ent(const void * a,const void * b)533 sort_iv_inv_expr_ent (const void *a, const void *b)
534 {
535   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
536   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
537 
538   unsigned id1 = (*e1)->id;
539   unsigned id2 = (*e2)->id;
540 
541   if (id1 < id2)
542     return -1;
543   else if (id1 > id2)
544     return 1;
545   else
546     return 0;
547 }
548 
549 /* Hashtable helpers.  */
550 
551 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
552 {
553   static inline hashval_t hash (const iv_inv_expr_ent *);
554   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
555 };
556 
557 /* Return true if uses of type TYPE represent some form of address.  */
558 
559 inline bool
address_p(use_type type)560 address_p (use_type type)
561 {
562   return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
563 }
564 
565 /* Hash function for loop invariant expressions.  */
566 
567 inline hashval_t
hash(const iv_inv_expr_ent * expr)568 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
569 {
570   return expr->hash;
571 }
572 
573 /* Hash table equality function for expressions.  */
574 
575 inline bool
equal(const iv_inv_expr_ent * expr1,const iv_inv_expr_ent * expr2)576 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
577 			   const iv_inv_expr_ent *expr2)
578 {
579   return expr1->hash == expr2->hash
580 	 && operand_equal_p (expr1->expr, expr2->expr, 0);
581 }
582 
583 struct ivopts_data
584 {
585   /* The currently optimized loop.  */
586   class loop *current_loop;
587   location_t loop_loc;
588 
589   /* Numbers of iterations for all exits of the current loop.  */
590   hash_map<edge, tree_niter_desc *> *niters;
591 
592   /* Number of registers used in it.  */
593   unsigned regs_used;
594 
595   /* The size of version_info array allocated.  */
596   unsigned version_info_size;
597 
598   /* The array of information for the ssa names.  */
599   struct version_info *version_info;
600 
601   /* The hashtable of loop invariant expressions created
602      by ivopt.  */
603   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
604 
605   /* The bitmap of indices in version_info whose value was changed.  */
606   bitmap relevant;
607 
608   /* The uses of induction variables.  */
609   vec<iv_group *> vgroups;
610 
611   /* The candidates.  */
612   vec<iv_cand *> vcands;
613 
614   /* A bitmap of important candidates.  */
615   bitmap important_candidates;
616 
617   /* Cache used by tree_to_aff_combination_expand.  */
618   hash_map<tree, name_expansion *> *name_expansion_cache;
619 
620   /* The hashtable of common candidates derived from iv uses.  */
621   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
622 
623   /* The common candidates.  */
624   vec<iv_common_cand *> iv_common_cands;
625 
626   /* Hash map recording base object information of tree exp.  */
627   hash_map<tree, tree> *base_object_map;
628 
629   /* The maximum invariant variable id.  */
630   unsigned max_inv_var_id;
631 
632   /* The maximum invariant expression id.  */
633   unsigned max_inv_expr_id;
634 
635   /* Number of no_overflow BIVs which are not used in memory address.  */
636   unsigned bivs_not_used_in_addr;
637 
638   /* Obstack for iv structure.  */
639   struct obstack iv_obstack;
640 
641   /* Whether to consider just related and important candidates when replacing a
642      use.  */
643   bool consider_all_candidates;
644 
645   /* Are we optimizing for speed?  */
646   bool speed;
647 
648   /* Whether the loop body includes any function calls.  */
649   bool body_includes_call;
650 
651   /* Whether the loop body can only be exited via single exit.  */
652   bool loop_single_exit_p;
653 
654   /* Whether the loop has doloop comparison use.  */
655   bool doloop_use_p;
656 };
657 
658 /* An assignment of iv candidates to uses.  */
659 
660 class iv_ca
661 {
662 public:
663   /* The number of uses covered by the assignment.  */
664   unsigned upto;
665 
666   /* Number of uses that cannot be expressed by the candidates in the set.  */
667   unsigned bad_groups;
668 
669   /* Candidate assigned to a use, together with the related costs.  */
670   class cost_pair **cand_for_group;
671 
672   /* Number of times each candidate is used.  */
673   unsigned *n_cand_uses;
674 
675   /* The candidates used.  */
676   bitmap cands;
677 
678   /* The number of candidates in the set.  */
679   unsigned n_cands;
680 
681   /* The number of invariants needed, including both invariant variants and
682      invariant expressions.  */
683   unsigned n_invs;
684 
685   /* Total cost of expressing uses.  */
686   comp_cost cand_use_cost;
687 
688   /* Total cost of candidates.  */
689   int64_t cand_cost;
690 
691   /* Number of times each invariant variable is used.  */
692   unsigned *n_inv_var_uses;
693 
694   /* Number of times each invariant expression is used.  */
695   unsigned *n_inv_expr_uses;
696 
697   /* Total cost of the assignment.  */
698   comp_cost cost;
699 };
700 
701 /* Difference of two iv candidate assignments.  */
702 
703 struct iv_ca_delta
704 {
705   /* Changed group.  */
706   struct iv_group *group;
707 
708   /* An old assignment (for rollback purposes).  */
709   class cost_pair *old_cp;
710 
711   /* A new assignment.  */
712   class cost_pair *new_cp;
713 
714   /* Next change in the list.  */
715   struct iv_ca_delta *next;
716 };
717 
718 /* Bound on number of candidates below that all candidates are considered.  */
719 
720 #define CONSIDER_ALL_CANDIDATES_BOUND \
721   ((unsigned) param_iv_consider_all_candidates_bound)
722 
723 /* If there are more iv occurrences, we just give up (it is quite unlikely that
724    optimizing such a loop would help, and it would take ages).  */
725 
726 #define MAX_CONSIDERED_GROUPS \
727   ((unsigned) param_iv_max_considered_uses)
728 
729 /* If there are at most this number of ivs in the set, try removing unnecessary
730    ivs from the set always.  */
731 
732 #define ALWAYS_PRUNE_CAND_SET_BOUND \
733   ((unsigned) param_iv_always_prune_cand_set_bound)
734 
735 /* The list of trees for that the decl_rtl field must be reset is stored
736    here.  */
737 
738 static vec<tree> decl_rtl_to_reset;
739 
740 static comp_cost force_expr_to_var_cost (tree, bool);
741 
742 /* The single loop exit if it dominates the latch, NULL otherwise.  */
743 
744 edge
single_dom_exit(class loop * loop)745 single_dom_exit (class loop *loop)
746 {
747   edge exit = single_exit (loop);
748 
749   if (!exit)
750     return NULL;
751 
752   if (!just_once_each_iteration_p (loop, exit->src))
753     return NULL;
754 
755   return exit;
756 }
757 
758 /* Dumps information about the induction variable IV to FILE.  Don't dump
759    variable's name if DUMP_NAME is FALSE.  The information is dumped with
760    preceding spaces indicated by INDENT_LEVEL.  */
761 
762 void
dump_iv(FILE * file,struct iv * iv,bool dump_name,unsigned indent_level)763 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
764 {
765   const char *p;
766   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
767 
768   if (indent_level > 4)
769     indent_level = 4;
770   p = spaces + 8 - (indent_level << 1);
771 
772   fprintf (file, "%sIV struct:\n", p);
773   if (iv->ssa_name && dump_name)
774     {
775       fprintf (file, "%s  SSA_NAME:\t", p);
776       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
777       fprintf (file, "\n");
778     }
779 
780   fprintf (file, "%s  Type:\t", p);
781   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
782   fprintf (file, "\n");
783 
784   fprintf (file, "%s  Base:\t", p);
785   print_generic_expr (file, iv->base, TDF_SLIM);
786   fprintf (file, "\n");
787 
788   fprintf (file, "%s  Step:\t", p);
789   print_generic_expr (file, iv->step, TDF_SLIM);
790   fprintf (file, "\n");
791 
792   if (iv->base_object)
793     {
794       fprintf (file, "%s  Object:\t", p);
795       print_generic_expr (file, iv->base_object, TDF_SLIM);
796       fprintf (file, "\n");
797     }
798 
799   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
800 
801   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
802 	   p, iv->no_overflow ? "No-overflow" : "Overflow");
803 }
804 
805 /* Dumps information about the USE to FILE.  */
806 
807 void
dump_use(FILE * file,struct iv_use * use)808 dump_use (FILE *file, struct iv_use *use)
809 {
810   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
811   fprintf (file, "    At stmt:\t");
812   print_gimple_stmt (file, use->stmt, 0);
813   fprintf (file, "    At pos:\t");
814   if (use->op_p)
815     print_generic_expr (file, *use->op_p, TDF_SLIM);
816   fprintf (file, "\n");
817   dump_iv (file, use->iv, false, 2);
818 }
819 
820 /* Dumps information about the uses to FILE.  */
821 
822 void
dump_groups(FILE * file,struct ivopts_data * data)823 dump_groups (FILE *file, struct ivopts_data *data)
824 {
825   unsigned i, j;
826   struct iv_group *group;
827 
828   for (i = 0; i < data->vgroups.length (); i++)
829     {
830       group = data->vgroups[i];
831       fprintf (file, "Group %d:\n", group->id);
832       if (group->type == USE_NONLINEAR_EXPR)
833 	fprintf (file, "  Type:\tGENERIC\n");
834       else if (group->type == USE_REF_ADDRESS)
835 	fprintf (file, "  Type:\tREFERENCE ADDRESS\n");
836       else if (group->type == USE_PTR_ADDRESS)
837 	fprintf (file, "  Type:\tPOINTER ARGUMENT ADDRESS\n");
838       else
839 	{
840 	  gcc_assert (group->type == USE_COMPARE);
841 	  fprintf (file, "  Type:\tCOMPARE\n");
842 	}
843       for (j = 0; j < group->vuses.length (); j++)
844 	dump_use (file, group->vuses[j]);
845     }
846 }
847 
848 /* Dumps information about induction variable candidate CAND to FILE.  */
849 
850 void
dump_cand(FILE * file,struct iv_cand * cand)851 dump_cand (FILE *file, struct iv_cand *cand)
852 {
853   struct iv *iv = cand->iv;
854 
855   fprintf (file, "Candidate %d:\n", cand->id);
856   if (cand->inv_vars)
857     {
858       fprintf (file, "  Depend on inv.vars: ");
859       dump_bitmap (file, cand->inv_vars);
860     }
861   if (cand->inv_exprs)
862     {
863       fprintf (file, "  Depend on inv.exprs: ");
864       dump_bitmap (file, cand->inv_exprs);
865     }
866 
867   if (cand->var_before)
868     {
869       fprintf (file, "  Var befor: ");
870       print_generic_expr (file, cand->var_before, TDF_SLIM);
871       fprintf (file, "\n");
872     }
873   if (cand->var_after)
874     {
875       fprintf (file, "  Var after: ");
876       print_generic_expr (file, cand->var_after, TDF_SLIM);
877       fprintf (file, "\n");
878     }
879 
880   switch (cand->pos)
881     {
882     case IP_NORMAL:
883       fprintf (file, "  Incr POS: before exit test\n");
884       break;
885 
886     case IP_BEFORE_USE:
887       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
888       break;
889 
890     case IP_AFTER_USE:
891       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
892       break;
893 
894     case IP_END:
895       fprintf (file, "  Incr POS: at end\n");
896       break;
897 
898     case IP_ORIGINAL:
899       fprintf (file, "  Incr POS: orig biv\n");
900       break;
901     }
902 
903   dump_iv (file, iv, false, 1);
904 }
905 
906 /* Returns the info for ssa version VER.  */
907 
908 static inline struct version_info *
ver_info(struct ivopts_data * data,unsigned ver)909 ver_info (struct ivopts_data *data, unsigned ver)
910 {
911   return data->version_info + ver;
912 }
913 
914 /* Returns the info for ssa name NAME.  */
915 
916 static inline struct version_info *
name_info(struct ivopts_data * data,tree name)917 name_info (struct ivopts_data *data, tree name)
918 {
919   return ver_info (data, SSA_NAME_VERSION (name));
920 }
921 
922 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
923    emitted in LOOP.  */
924 
925 static bool
stmt_after_ip_normal_pos(class loop * loop,gimple * stmt)926 stmt_after_ip_normal_pos (class loop *loop, gimple *stmt)
927 {
928   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
929 
930   gcc_assert (bb);
931 
932   if (sbb == loop->latch)
933     return true;
934 
935   if (sbb != bb)
936     return false;
937 
938   return stmt == last_stmt (bb);
939 }
940 
941 /* Returns true if STMT if after the place where the original induction
942    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
943    if the positions are identical.  */
944 
945 static bool
stmt_after_inc_pos(struct iv_cand * cand,gimple * stmt,bool true_if_equal)946 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
947 {
948   basic_block cand_bb = gimple_bb (cand->incremented_at);
949   basic_block stmt_bb = gimple_bb (stmt);
950 
951   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
952     return false;
953 
954   if (stmt_bb != cand_bb)
955     return true;
956 
957   if (true_if_equal
958       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
959     return true;
960   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
961 }
962 
963 /* Returns true if STMT if after the place where the induction variable
964    CAND is incremented in LOOP.  */
965 
966 static bool
stmt_after_increment(class loop * loop,struct iv_cand * cand,gimple * stmt)967 stmt_after_increment (class loop *loop, struct iv_cand *cand, gimple *stmt)
968 {
969   switch (cand->pos)
970     {
971     case IP_END:
972       return false;
973 
974     case IP_NORMAL:
975       return stmt_after_ip_normal_pos (loop, stmt);
976 
977     case IP_ORIGINAL:
978     case IP_AFTER_USE:
979       return stmt_after_inc_pos (cand, stmt, false);
980 
981     case IP_BEFORE_USE:
982       return stmt_after_inc_pos (cand, stmt, true);
983 
984     default:
985       gcc_unreachable ();
986     }
987 }
988 
989 /* walk_tree callback for contains_abnormal_ssa_name_p.  */
990 
991 static tree
contains_abnormal_ssa_name_p_1(tree * tp,int * walk_subtrees,void *)992 contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
993 {
994   if (TREE_CODE (*tp) == SSA_NAME
995       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
996     return *tp;
997 
998   if (!EXPR_P (*tp))
999     *walk_subtrees = 0;
1000 
1001   return NULL_TREE;
1002 }
1003 
1004 /* Returns true if EXPR contains a ssa name that occurs in an
1005    abnormal phi node.  */
1006 
1007 bool
contains_abnormal_ssa_name_p(tree expr)1008 contains_abnormal_ssa_name_p (tree expr)
1009 {
1010   return walk_tree_without_duplicates
1011 	   (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1012 }
1013 
1014 /*  Returns the structure describing number of iterations determined from
1015     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1016 
1017 static class tree_niter_desc *
niter_for_exit(struct ivopts_data * data,edge exit)1018 niter_for_exit (struct ivopts_data *data, edge exit)
1019 {
1020   class tree_niter_desc *desc;
1021   tree_niter_desc **slot;
1022 
1023   if (!data->niters)
1024     {
1025       data->niters = new hash_map<edge, tree_niter_desc *>;
1026       slot = NULL;
1027     }
1028   else
1029     slot = data->niters->get (exit);
1030 
1031   if (!slot)
1032     {
1033       /* Try to determine number of iterations.  We cannot safely work with ssa
1034 	 names that appear in phi nodes on abnormal edges, so that we do not
1035 	 create overlapping life ranges for them (PR 27283).  */
1036       desc = XNEW (class tree_niter_desc);
1037       if (!number_of_iterations_exit (data->current_loop,
1038 				      exit, desc, true)
1039      	  || contains_abnormal_ssa_name_p (desc->niter))
1040 	{
1041 	  XDELETE (desc);
1042 	  desc = NULL;
1043 	}
1044       data->niters->put (exit, desc);
1045     }
1046   else
1047     desc = *slot;
1048 
1049   return desc;
1050 }
1051 
1052 /* Returns the structure describing number of iterations determined from
1053    single dominating exit of DATA->current_loop, or NULL if something
1054    goes wrong.  */
1055 
1056 static class tree_niter_desc *
niter_for_single_dom_exit(struct ivopts_data * data)1057 niter_for_single_dom_exit (struct ivopts_data *data)
1058 {
1059   edge exit = single_dom_exit (data->current_loop);
1060 
1061   if (!exit)
1062     return NULL;
1063 
1064   return niter_for_exit (data, exit);
1065 }
1066 
1067 /* Initializes data structures used by the iv optimization pass, stored
1068    in DATA.  */
1069 
1070 static void
tree_ssa_iv_optimize_init(struct ivopts_data * data)1071 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1072 {
1073   data->version_info_size = 2 * num_ssa_names;
1074   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1075   data->relevant = BITMAP_ALLOC (NULL);
1076   data->important_candidates = BITMAP_ALLOC (NULL);
1077   data->max_inv_var_id = 0;
1078   data->max_inv_expr_id = 0;
1079   data->niters = NULL;
1080   data->vgroups.create (20);
1081   data->vcands.create (20);
1082   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1083   data->name_expansion_cache = NULL;
1084   data->base_object_map = NULL;
1085   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1086   data->iv_common_cands.create (20);
1087   decl_rtl_to_reset.create (20);
1088   gcc_obstack_init (&data->iv_obstack);
1089 }
1090 
1091 /* walk_tree callback for determine_base_object.  */
1092 
1093 static tree
determine_base_object_1(tree * tp,int * walk_subtrees,void * wdata)1094 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1095 {
1096   tree_code code = TREE_CODE (*tp);
1097   tree obj = NULL_TREE;
1098   if (code == ADDR_EXPR)
1099     {
1100       tree base = get_base_address (TREE_OPERAND (*tp, 0));
1101       if (!base)
1102 	obj = *tp;
1103       else if (TREE_CODE (base) != MEM_REF)
1104 	obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1105     }
1106   else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1107 	obj = fold_convert (ptr_type_node, *tp);
1108 
1109   if (!obj)
1110     {
1111       if (!EXPR_P (*tp))
1112 	*walk_subtrees = 0;
1113 
1114       return NULL_TREE;
1115     }
1116   /* Record special node for multiple base objects and stop.  */
1117   if (*static_cast<tree *> (wdata))
1118     {
1119       *static_cast<tree *> (wdata) = integer_zero_node;
1120       return integer_zero_node;
1121     }
1122   /* Record the base object and continue looking.  */
1123   *static_cast<tree *> (wdata) = obj;
1124   return NULL_TREE;
1125 }
1126 
1127 /* Returns a memory object to that EXPR points with caching.  Return NULL if we
1128    are able to determine that it does not point to any such object; specially
1129    return integer_zero_node if EXPR contains multiple base objects.  */
1130 
1131 static tree
determine_base_object(struct ivopts_data * data,tree expr)1132 determine_base_object (struct ivopts_data *data, tree expr)
1133 {
1134   tree *slot, obj = NULL_TREE;
1135   if (data->base_object_map)
1136     {
1137       if ((slot = data->base_object_map->get(expr)) != NULL)
1138 	return *slot;
1139     }
1140   else
1141     data->base_object_map = new hash_map<tree, tree>;
1142 
1143   (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1144   data->base_object_map->put (expr, obj);
1145   return obj;
1146 }
1147 
1148 /* Return true if address expression with non-DECL_P operand appears
1149    in EXPR.  */
1150 
1151 static bool
contain_complex_addr_expr(tree expr)1152 contain_complex_addr_expr (tree expr)
1153 {
1154   bool res = false;
1155 
1156   STRIP_NOPS (expr);
1157   switch (TREE_CODE (expr))
1158     {
1159     case POINTER_PLUS_EXPR:
1160     case PLUS_EXPR:
1161     case MINUS_EXPR:
1162       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1163       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1164       break;
1165 
1166     case ADDR_EXPR:
1167       return (!DECL_P (TREE_OPERAND (expr, 0)));
1168 
1169     default:
1170       return false;
1171     }
1172 
1173   return res;
1174 }
1175 
1176 /* Allocates an induction variable with given initial value BASE and step STEP
1177    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1178 
1179 static struct iv *
1180 alloc_iv (struct ivopts_data *data, tree base, tree step,
1181 	  bool no_overflow = false)
1182 {
1183   tree expr = base;
1184   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1185 					      sizeof (struct iv));
1186   gcc_assert (step != NULL_TREE);
1187 
1188   /* Lower address expression in base except ones with DECL_P as operand.
1189      By doing this:
1190        1) More accurate cost can be computed for address expressions;
1191        2) Duplicate candidates won't be created for bases in different
1192 	  forms, like &a[0] and &a.  */
1193   STRIP_NOPS (expr);
1194   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1195       || contain_complex_addr_expr (expr))
1196     {
1197       aff_tree comb;
1198       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1199       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1200     }
1201 
1202   iv->base = base;
1203   iv->base_object = determine_base_object (data, base);
1204   iv->step = step;
1205   iv->biv_p = false;
1206   iv->nonlin_use = NULL;
1207   iv->ssa_name = NULL_TREE;
1208   if (!no_overflow
1209        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1210 			      base, step))
1211     no_overflow = true;
1212   iv->no_overflow = no_overflow;
1213   iv->have_address_use = false;
1214 
1215   return iv;
1216 }
1217 
1218 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1219    doesn't overflow.  */
1220 
1221 static void
set_iv(struct ivopts_data * data,tree iv,tree base,tree step,bool no_overflow)1222 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1223 	bool no_overflow)
1224 {
1225   struct version_info *info = name_info (data, iv);
1226 
1227   gcc_assert (!info->iv);
1228 
1229   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1230   info->iv = alloc_iv (data, base, step, no_overflow);
1231   info->iv->ssa_name = iv;
1232 }
1233 
1234 /* Finds induction variable declaration for VAR.  */
1235 
1236 static struct iv *
get_iv(struct ivopts_data * data,tree var)1237 get_iv (struct ivopts_data *data, tree var)
1238 {
1239   basic_block bb;
1240   tree type = TREE_TYPE (var);
1241 
1242   if (!POINTER_TYPE_P (type)
1243       && !INTEGRAL_TYPE_P (type))
1244     return NULL;
1245 
1246   if (!name_info (data, var)->iv)
1247     {
1248       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1249 
1250       if (!bb
1251 	  || !flow_bb_inside_loop_p (data->current_loop, bb))
1252 	{
1253 	  if (POINTER_TYPE_P (type))
1254 	    type = sizetype;
1255 	  set_iv (data, var, var, build_int_cst (type, 0), true);
1256 	}
1257     }
1258 
1259   return name_info (data, var)->iv;
1260 }
1261 
1262 /* Return the first non-invariant ssa var found in EXPR.  */
1263 
1264 static tree
extract_single_var_from_expr(tree expr)1265 extract_single_var_from_expr (tree expr)
1266 {
1267   int i, n;
1268   tree tmp;
1269   enum tree_code code;
1270 
1271   if (!expr || is_gimple_min_invariant (expr))
1272     return NULL;
1273 
1274   code = TREE_CODE (expr);
1275   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1276     {
1277       n = TREE_OPERAND_LENGTH (expr);
1278       for (i = 0; i < n; i++)
1279 	{
1280 	  tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1281 
1282 	  if (tmp)
1283 	    return tmp;
1284 	}
1285     }
1286   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1287 }
1288 
1289 /* Finds basic ivs.  */
1290 
1291 static bool
find_bivs(struct ivopts_data * data)1292 find_bivs (struct ivopts_data *data)
1293 {
1294   gphi *phi;
1295   affine_iv iv;
1296   tree step, type, base, stop;
1297   bool found = false;
1298   class loop *loop = data->current_loop;
1299   gphi_iterator psi;
1300 
1301   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1302     {
1303       phi = psi.phi ();
1304 
1305       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1306 	continue;
1307 
1308       if (virtual_operand_p (PHI_RESULT (phi)))
1309 	continue;
1310 
1311       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1312 	continue;
1313 
1314       if (integer_zerop (iv.step))
1315 	continue;
1316 
1317       step = iv.step;
1318       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1319       /* Stop expanding iv base at the first ssa var referred by iv step.
1320 	 Ideally we should stop at any ssa var, because that's expensive
1321 	 and unusual to happen, we just do it on the first one.
1322 
1323 	 See PR64705 for the rationale.  */
1324       stop = extract_single_var_from_expr (step);
1325       base = expand_simple_operations (base, stop);
1326       if (contains_abnormal_ssa_name_p (base)
1327 	  || contains_abnormal_ssa_name_p (step))
1328 	continue;
1329 
1330       type = TREE_TYPE (PHI_RESULT (phi));
1331       base = fold_convert (type, base);
1332       if (step)
1333 	{
1334 	  if (POINTER_TYPE_P (type))
1335 	    step = convert_to_ptrofftype (step);
1336 	  else
1337 	    step = fold_convert (type, step);
1338 	}
1339 
1340       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1341       found = true;
1342     }
1343 
1344   return found;
1345 }
1346 
1347 /* Marks basic ivs.  */
1348 
1349 static void
mark_bivs(struct ivopts_data * data)1350 mark_bivs (struct ivopts_data *data)
1351 {
1352   gphi *phi;
1353   gimple *def;
1354   tree var;
1355   struct iv *iv, *incr_iv;
1356   class loop *loop = data->current_loop;
1357   basic_block incr_bb;
1358   gphi_iterator psi;
1359 
1360   data->bivs_not_used_in_addr = 0;
1361   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1362     {
1363       phi = psi.phi ();
1364 
1365       iv = get_iv (data, PHI_RESULT (phi));
1366       if (!iv)
1367 	continue;
1368 
1369       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1370       def = SSA_NAME_DEF_STMT (var);
1371       /* Don't mark iv peeled from other one as biv.  */
1372       if (def
1373 	  && gimple_code (def) == GIMPLE_PHI
1374 	  && gimple_bb (def) == loop->header)
1375 	continue;
1376 
1377       incr_iv = get_iv (data, var);
1378       if (!incr_iv)
1379 	continue;
1380 
1381       /* If the increment is in the subloop, ignore it.  */
1382       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1383       if (incr_bb->loop_father != data->current_loop
1384 	  || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1385 	continue;
1386 
1387       iv->biv_p = true;
1388       incr_iv->biv_p = true;
1389       if (iv->no_overflow)
1390 	data->bivs_not_used_in_addr++;
1391       if (incr_iv->no_overflow)
1392 	data->bivs_not_used_in_addr++;
1393     }
1394 }
1395 
1396 /* Checks whether STMT defines a linear induction variable and stores its
1397    parameters to IV.  */
1398 
1399 static bool
find_givs_in_stmt_scev(struct ivopts_data * data,gimple * stmt,affine_iv * iv)1400 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1401 {
1402   tree lhs, stop;
1403   class loop *loop = data->current_loop;
1404 
1405   iv->base = NULL_TREE;
1406   iv->step = NULL_TREE;
1407 
1408   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1409     return false;
1410 
1411   lhs = gimple_assign_lhs (stmt);
1412   if (TREE_CODE (lhs) != SSA_NAME)
1413     return false;
1414 
1415   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1416     return false;
1417 
1418   /* Stop expanding iv base at the first ssa var referred by iv step.
1419      Ideally we should stop at any ssa var, because that's expensive
1420      and unusual to happen, we just do it on the first one.
1421 
1422      See PR64705 for the rationale.  */
1423   stop = extract_single_var_from_expr (iv->step);
1424   iv->base = expand_simple_operations (iv->base, stop);
1425   if (contains_abnormal_ssa_name_p (iv->base)
1426       || contains_abnormal_ssa_name_p (iv->step))
1427     return false;
1428 
1429   /* If STMT could throw, then do not consider STMT as defining a GIV.
1430      While this will suppress optimizations, we cannot safely delete this
1431      GIV and associated statements, even if it appears it is not used.  */
1432   if (stmt_could_throw_p (cfun, stmt))
1433     return false;
1434 
1435   return true;
1436 }
1437 
1438 /* Finds general ivs in statement STMT.  */
1439 
1440 static void
find_givs_in_stmt(struct ivopts_data * data,gimple * stmt)1441 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1442 {
1443   affine_iv iv;
1444 
1445   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1446     return;
1447 
1448   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1449 }
1450 
1451 /* Finds general ivs in basic block BB.  */
1452 
1453 static void
find_givs_in_bb(struct ivopts_data * data,basic_block bb)1454 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1455 {
1456   gimple_stmt_iterator bsi;
1457 
1458   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1459     find_givs_in_stmt (data, gsi_stmt (bsi));
1460 }
1461 
1462 /* Finds general ivs.  */
1463 
1464 static void
find_givs(struct ivopts_data * data)1465 find_givs (struct ivopts_data *data)
1466 {
1467   class loop *loop = data->current_loop;
1468   basic_block *body = get_loop_body_in_dom_order (loop);
1469   unsigned i;
1470 
1471   for (i = 0; i < loop->num_nodes; i++)
1472     find_givs_in_bb (data, body[i]);
1473   free (body);
1474 }
1475 
1476 /* For each ssa name defined in LOOP determines whether it is an induction
1477    variable and if so, its initial value and step.  */
1478 
1479 static bool
find_induction_variables(struct ivopts_data * data)1480 find_induction_variables (struct ivopts_data *data)
1481 {
1482   unsigned i;
1483   bitmap_iterator bi;
1484 
1485   if (!find_bivs (data))
1486     return false;
1487 
1488   find_givs (data);
1489   mark_bivs (data);
1490 
1491   if (dump_file && (dump_flags & TDF_DETAILS))
1492     {
1493       class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1494 
1495       if (niter)
1496 	{
1497 	  fprintf (dump_file, "  number of iterations ");
1498 	  print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1499 	  if (!integer_zerop (niter->may_be_zero))
1500 	    {
1501 	      fprintf (dump_file, "; zero if ");
1502 	      print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1503 	    }
1504 	  fprintf (dump_file, "\n");
1505 	};
1506 
1507       fprintf (dump_file, "\n<Induction Vars>:\n");
1508       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1509 	{
1510 	  struct version_info *info = ver_info (data, i);
1511 	  if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1512 	    dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1513 	}
1514     }
1515 
1516   return true;
1517 }
1518 
1519 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1520    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1521    is the const offset stripped from IV base and MEM_TYPE is the type
1522    of the memory being addressed.  For uses of other types, ADDR_BASE
1523    and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE.  */
1524 
1525 static struct iv_use *
record_use(struct iv_group * group,tree * use_p,struct iv * iv,gimple * stmt,enum use_type type,tree mem_type,tree addr_base,poly_uint64 addr_offset)1526 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1527 	    gimple *stmt, enum use_type type, tree mem_type,
1528 	    tree addr_base, poly_uint64 addr_offset)
1529 {
1530   struct iv_use *use = XCNEW (struct iv_use);
1531 
1532   use->id = group->vuses.length ();
1533   use->group_id = group->id;
1534   use->type = type;
1535   use->mem_type = mem_type;
1536   use->iv = iv;
1537   use->stmt = stmt;
1538   use->op_p = use_p;
1539   use->addr_base = addr_base;
1540   use->addr_offset = addr_offset;
1541 
1542   group->vuses.safe_push (use);
1543   return use;
1544 }
1545 
1546 /* Checks whether OP is a loop-level invariant and if so, records it.
1547    NONLINEAR_USE is true if the invariant is used in a way we do not
1548    handle specially.  */
1549 
1550 static void
record_invariant(struct ivopts_data * data,tree op,bool nonlinear_use)1551 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1552 {
1553   basic_block bb;
1554   struct version_info *info;
1555 
1556   if (TREE_CODE (op) != SSA_NAME
1557       || virtual_operand_p (op))
1558     return;
1559 
1560   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1561   if (bb
1562       && flow_bb_inside_loop_p (data->current_loop, bb))
1563     return;
1564 
1565   info = name_info (data, op);
1566   info->name = op;
1567   info->has_nonlin_use |= nonlinear_use;
1568   if (!info->inv_id)
1569     info->inv_id = ++data->max_inv_var_id;
1570   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1571 }
1572 
1573 /* Record a group of TYPE.  */
1574 
1575 static struct iv_group *
record_group(struct ivopts_data * data,enum use_type type)1576 record_group (struct ivopts_data *data, enum use_type type)
1577 {
1578   struct iv_group *group = XCNEW (struct iv_group);
1579 
1580   group->id = data->vgroups.length ();
1581   group->type = type;
1582   group->related_cands = BITMAP_ALLOC (NULL);
1583   group->vuses.create (1);
1584   group->doloop_p = false;
1585 
1586   data->vgroups.safe_push (group);
1587   return group;
1588 }
1589 
1590 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1591    New group will be created if there is no existing group for the use.
1592    MEM_TYPE is the type of memory being addressed, or NULL if this
1593    isn't an address reference.  */
1594 
1595 static struct iv_use *
record_group_use(struct ivopts_data * data,tree * use_p,struct iv * iv,gimple * stmt,enum use_type type,tree mem_type)1596 record_group_use (struct ivopts_data *data, tree *use_p,
1597 		  struct iv *iv, gimple *stmt, enum use_type type,
1598 		  tree mem_type)
1599 {
1600   tree addr_base = NULL;
1601   struct iv_group *group = NULL;
1602   poly_uint64 addr_offset = 0;
1603 
1604   /* Record non address type use in a new group.  */
1605   if (address_p (type))
1606     {
1607       unsigned int i;
1608 
1609       addr_base = strip_offset (iv->base, &addr_offset);
1610       for (i = 0; i < data->vgroups.length (); i++)
1611 	{
1612 	  struct iv_use *use;
1613 
1614 	  group = data->vgroups[i];
1615 	  use = group->vuses[0];
1616 	  if (!address_p (use->type))
1617 	    continue;
1618 
1619 	  /* Check if it has the same stripped base and step.  */
1620 	  if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1621 	      && operand_equal_p (iv->step, use->iv->step, 0)
1622 	      && operand_equal_p (addr_base, use->addr_base, 0))
1623 	    break;
1624 	}
1625       if (i == data->vgroups.length ())
1626 	group = NULL;
1627     }
1628 
1629   if (!group)
1630     group = record_group (data, type);
1631 
1632   return record_use (group, use_p, iv, stmt, type, mem_type,
1633 		     addr_base, addr_offset);
1634 }
1635 
1636 /* Checks whether the use OP is interesting and if so, records it.  */
1637 
1638 static struct iv_use *
find_interesting_uses_op(struct ivopts_data * data,tree op)1639 find_interesting_uses_op (struct ivopts_data *data, tree op)
1640 {
1641   struct iv *iv;
1642   gimple *stmt;
1643   struct iv_use *use;
1644 
1645   if (TREE_CODE (op) != SSA_NAME)
1646     return NULL;
1647 
1648   iv = get_iv (data, op);
1649   if (!iv)
1650     return NULL;
1651 
1652   if (iv->nonlin_use)
1653     {
1654       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1655       return iv->nonlin_use;
1656     }
1657 
1658   if (integer_zerop (iv->step))
1659     {
1660       record_invariant (data, op, true);
1661       return NULL;
1662     }
1663 
1664   stmt = SSA_NAME_DEF_STMT (op);
1665   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1666 
1667   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1668   iv->nonlin_use = use;
1669   return use;
1670 }
1671 
1672 /* Indicate how compare type iv_use can be handled.  */
1673 enum comp_iv_rewrite
1674 {
1675   COMP_IV_NA,
1676   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1677   COMP_IV_EXPR,
1678   /* We may rewrite compare type iv_uses on both sides of comparison by
1679      expressing value of each iv_use.  */
1680   COMP_IV_EXPR_2,
1681   /* We may rewrite compare type iv_use by expressing value of the iv_use
1682      or by eliminating it with other iv_cand.  */
1683   COMP_IV_ELIM
1684 };
1685 
1686 /* Given a condition in statement STMT, checks whether it is a compare
1687    of an induction variable and an invariant.  If this is the case,
1688    CONTROL_VAR is set to location of the iv, BOUND to the location of
1689    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1690    induction variable descriptions, and true is returned.  If this is not
1691    the case, CONTROL_VAR and BOUND are set to the arguments of the
1692    condition and false is returned.  */
1693 
1694 static enum comp_iv_rewrite
extract_cond_operands(struct ivopts_data * data,gimple * stmt,tree ** control_var,tree ** bound,struct iv ** iv_var,struct iv ** iv_bound)1695 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1696 		       tree **control_var, tree **bound,
1697 		       struct iv **iv_var, struct iv **iv_bound)
1698 {
1699   /* The objects returned when COND has constant operands.  */
1700   static struct iv const_iv;
1701   static tree zero;
1702   tree *op0 = &zero, *op1 = &zero;
1703   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1704   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1705 
1706   if (gimple_code (stmt) == GIMPLE_COND)
1707     {
1708       gcond *cond_stmt = as_a <gcond *> (stmt);
1709       op0 = gimple_cond_lhs_ptr (cond_stmt);
1710       op1 = gimple_cond_rhs_ptr (cond_stmt);
1711     }
1712   else
1713     {
1714       op0 = gimple_assign_rhs1_ptr (stmt);
1715       op1 = gimple_assign_rhs2_ptr (stmt);
1716     }
1717 
1718   zero = integer_zero_node;
1719   const_iv.step = integer_zero_node;
1720 
1721   if (TREE_CODE (*op0) == SSA_NAME)
1722     iv0 = get_iv (data, *op0);
1723   if (TREE_CODE (*op1) == SSA_NAME)
1724     iv1 = get_iv (data, *op1);
1725 
1726   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1727   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1728     {
1729       rewrite_type = COMP_IV_EXPR_2;
1730       goto end;
1731     }
1732 
1733   /* If none side of comparison is IV.  */
1734   if ((!iv0 || integer_zerop (iv0->step))
1735       && (!iv1 || integer_zerop (iv1->step)))
1736     goto end;
1737 
1738   /* Control variable may be on the other side.  */
1739   if (!iv0 || integer_zerop (iv0->step))
1740     {
1741       std::swap (op0, op1);
1742       std::swap (iv0, iv1);
1743     }
1744   /* If one side is IV and the other side isn't loop invariant.  */
1745   if (!iv1)
1746     rewrite_type = COMP_IV_EXPR;
1747   /* If one side is IV and the other side is loop invariant.  */
1748   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1749     rewrite_type = COMP_IV_ELIM;
1750 
1751 end:
1752   if (control_var)
1753     *control_var = op0;
1754   if (iv_var)
1755     *iv_var = iv0;
1756   if (bound)
1757     *bound = op1;
1758   if (iv_bound)
1759     *iv_bound = iv1;
1760 
1761   return rewrite_type;
1762 }
1763 
1764 /* Checks whether the condition in STMT is interesting and if so,
1765    records it.  */
1766 
1767 static void
find_interesting_uses_cond(struct ivopts_data * data,gimple * stmt)1768 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1769 {
1770   tree *var_p, *bound_p;
1771   struct iv *var_iv, *bound_iv;
1772   enum comp_iv_rewrite ret;
1773 
1774   ret = extract_cond_operands (data, stmt,
1775 			       &var_p, &bound_p, &var_iv, &bound_iv);
1776   if (ret == COMP_IV_NA)
1777     {
1778       find_interesting_uses_op (data, *var_p);
1779       find_interesting_uses_op (data, *bound_p);
1780       return;
1781     }
1782 
1783   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1784   /* Record compare type iv_use for iv on the other side of comparison.  */
1785   if (ret == COMP_IV_EXPR_2)
1786     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1787 }
1788 
1789 /* Returns the outermost loop EXPR is obviously invariant in
1790    relative to the loop LOOP, i.e. if all its operands are defined
1791    outside of the returned loop.  Returns NULL if EXPR is not
1792    even obviously invariant in LOOP.  */
1793 
1794 class loop *
outermost_invariant_loop_for_expr(class loop * loop,tree expr)1795 outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1796 {
1797   basic_block def_bb;
1798   unsigned i, len;
1799 
1800   if (is_gimple_min_invariant (expr))
1801     return current_loops->tree_root;
1802 
1803   if (TREE_CODE (expr) == SSA_NAME)
1804     {
1805       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1806       if (def_bb)
1807 	{
1808 	  if (flow_bb_inside_loop_p (loop, def_bb))
1809 	    return NULL;
1810 	  return superloop_at_depth (loop,
1811 				     loop_depth (def_bb->loop_father) + 1);
1812 	}
1813 
1814       return current_loops->tree_root;
1815     }
1816 
1817   if (!EXPR_P (expr))
1818     return NULL;
1819 
1820   unsigned maxdepth = 0;
1821   len = TREE_OPERAND_LENGTH (expr);
1822   for (i = 0; i < len; i++)
1823     {
1824       class loop *ivloop;
1825       if (!TREE_OPERAND (expr, i))
1826 	continue;
1827 
1828       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1829       if (!ivloop)
1830 	return NULL;
1831       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1832     }
1833 
1834   return superloop_at_depth (loop, maxdepth);
1835 }
1836 
1837 /* Returns true if expression EXPR is obviously invariant in LOOP,
1838    i.e. if all its operands are defined outside of the LOOP.  LOOP
1839    should not be the function body.  */
1840 
1841 bool
expr_invariant_in_loop_p(class loop * loop,tree expr)1842 expr_invariant_in_loop_p (class loop *loop, tree expr)
1843 {
1844   basic_block def_bb;
1845   unsigned i, len;
1846 
1847   gcc_assert (loop_depth (loop) > 0);
1848 
1849   if (is_gimple_min_invariant (expr))
1850     return true;
1851 
1852   if (TREE_CODE (expr) == SSA_NAME)
1853     {
1854       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1855       if (def_bb
1856 	  && flow_bb_inside_loop_p (loop, def_bb))
1857 	return false;
1858 
1859       return true;
1860     }
1861 
1862   if (!EXPR_P (expr))
1863     return false;
1864 
1865   len = TREE_OPERAND_LENGTH (expr);
1866   for (i = 0; i < len; i++)
1867     if (TREE_OPERAND (expr, i)
1868 	&& !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1869       return false;
1870 
1871   return true;
1872 }
1873 
1874 /* Given expression EXPR which computes inductive values with respect
1875    to loop recorded in DATA, this function returns biv from which EXPR
1876    is derived by tracing definition chains of ssa variables in EXPR.  */
1877 
1878 static struct iv*
find_deriving_biv_for_expr(struct ivopts_data * data,tree expr)1879 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1880 {
1881   struct iv *iv;
1882   unsigned i, n;
1883   tree e2, e1;
1884   enum tree_code code;
1885   gimple *stmt;
1886 
1887   if (expr == NULL_TREE)
1888     return NULL;
1889 
1890   if (is_gimple_min_invariant (expr))
1891     return NULL;
1892 
1893   code = TREE_CODE (expr);
1894   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1895     {
1896       n = TREE_OPERAND_LENGTH (expr);
1897       for (i = 0; i < n; i++)
1898 	{
1899 	  iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1900 	  if (iv)
1901 	    return iv;
1902 	}
1903     }
1904 
1905   /* Stop if it's not ssa name.  */
1906   if (code != SSA_NAME)
1907     return NULL;
1908 
1909   iv = get_iv (data, expr);
1910   if (!iv || integer_zerop (iv->step))
1911     return NULL;
1912   else if (iv->biv_p)
1913     return iv;
1914 
1915   stmt = SSA_NAME_DEF_STMT (expr);
1916   if (gphi *phi = dyn_cast <gphi *> (stmt))
1917     {
1918       ssa_op_iter iter;
1919       use_operand_p use_p;
1920       basic_block phi_bb = gimple_bb (phi);
1921 
1922       /* Skip loop header PHI that doesn't define biv.  */
1923       if (phi_bb->loop_father == data->current_loop)
1924 	return NULL;
1925 
1926       if (virtual_operand_p (gimple_phi_result (phi)))
1927 	return NULL;
1928 
1929       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1930 	{
1931 	  tree use = USE_FROM_PTR (use_p);
1932 	  iv = find_deriving_biv_for_expr (data, use);
1933 	  if (iv)
1934 	    return iv;
1935 	}
1936       return NULL;
1937     }
1938   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1939     return NULL;
1940 
1941   e1 = gimple_assign_rhs1 (stmt);
1942   code = gimple_assign_rhs_code (stmt);
1943   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1944     return find_deriving_biv_for_expr (data, e1);
1945 
1946   switch (code)
1947     {
1948     case MULT_EXPR:
1949     case PLUS_EXPR:
1950     case MINUS_EXPR:
1951     case POINTER_PLUS_EXPR:
1952       /* Increments, decrements and multiplications by a constant
1953 	 are simple.  */
1954       e2 = gimple_assign_rhs2 (stmt);
1955       iv = find_deriving_biv_for_expr (data, e2);
1956       if (iv)
1957 	return iv;
1958       gcc_fallthrough ();
1959 
1960     CASE_CONVERT:
1961       /* Casts are simple.  */
1962       return find_deriving_biv_for_expr (data, e1);
1963 
1964     default:
1965       break;
1966     }
1967 
1968   return NULL;
1969 }
1970 
1971 /* Record BIV, its predecessor and successor that they are used in
1972    address type uses.  */
1973 
1974 static void
record_biv_for_address_use(struct ivopts_data * data,struct iv * biv)1975 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1976 {
1977   unsigned i;
1978   tree type, base_1, base_2;
1979   bitmap_iterator bi;
1980 
1981   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1982       || biv->have_address_use || !biv->no_overflow)
1983     return;
1984 
1985   type = TREE_TYPE (biv->base);
1986   if (!INTEGRAL_TYPE_P (type))
1987     return;
1988 
1989   biv->have_address_use = true;
1990   data->bivs_not_used_in_addr--;
1991   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1992   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1993     {
1994       struct iv *iv = ver_info (data, i)->iv;
1995 
1996       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1997 	  || iv->have_address_use || !iv->no_overflow)
1998 	continue;
1999 
2000       if (type != TREE_TYPE (iv->base)
2001 	  || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2002 	continue;
2003 
2004       if (!operand_equal_p (biv->step, iv->step, 0))
2005 	continue;
2006 
2007       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2008       if (operand_equal_p (base_1, iv->base, 0)
2009 	  || operand_equal_p (base_2, biv->base, 0))
2010 	{
2011 	  iv->have_address_use = true;
2012 	  data->bivs_not_used_in_addr--;
2013 	}
2014     }
2015 }
2016 
2017 /* Cumulates the steps of indices into DATA and replaces their values with the
2018    initial ones.  Returns false when the value of the index cannot be determined.
2019    Callback for for_each_index.  */
2020 
2021 struct ifs_ivopts_data
2022 {
2023   struct ivopts_data *ivopts_data;
2024   gimple *stmt;
2025   tree step;
2026 };
2027 
2028 static bool
idx_find_step(tree base,tree * idx,void * data)2029 idx_find_step (tree base, tree *idx, void *data)
2030 {
2031   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2032   struct iv *iv;
2033   bool use_overflow_semantics = false;
2034   tree step, iv_base, iv_step, lbound, off;
2035   class loop *loop = dta->ivopts_data->current_loop;
2036 
2037   /* If base is a component ref, require that the offset of the reference
2038      be invariant.  */
2039   if (TREE_CODE (base) == COMPONENT_REF)
2040     {
2041       off = component_ref_field_offset (base);
2042       return expr_invariant_in_loop_p (loop, off);
2043     }
2044 
2045   /* If base is array, first check whether we will be able to move the
2046      reference out of the loop (in order to take its address in strength
2047      reduction).  In order for this to work we need both lower bound
2048      and step to be loop invariants.  */
2049   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2050     {
2051       /* Moreover, for a range, the size needs to be invariant as well.  */
2052       if (TREE_CODE (base) == ARRAY_RANGE_REF
2053 	  && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2054 	return false;
2055 
2056       step = array_ref_element_size (base);
2057       lbound = array_ref_low_bound (base);
2058 
2059       if (!expr_invariant_in_loop_p (loop, step)
2060 	  || !expr_invariant_in_loop_p (loop, lbound))
2061 	return false;
2062     }
2063 
2064   if (TREE_CODE (*idx) != SSA_NAME)
2065     return true;
2066 
2067   iv = get_iv (dta->ivopts_data, *idx);
2068   if (!iv)
2069     return false;
2070 
2071   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2072 	  *&x[0], which is not folded and does not trigger the
2073 	  ARRAY_REF path below.  */
2074   *idx = iv->base;
2075 
2076   if (integer_zerop (iv->step))
2077     return true;
2078 
2079   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2080     {
2081       step = array_ref_element_size (base);
2082 
2083       /* We only handle addresses whose step is an integer constant.  */
2084       if (TREE_CODE (step) != INTEGER_CST)
2085 	return false;
2086     }
2087   else
2088     /* The step for pointer arithmetics already is 1 byte.  */
2089     step = size_one_node;
2090 
2091   iv_base = iv->base;
2092   iv_step = iv->step;
2093   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2094     use_overflow_semantics = true;
2095 
2096   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2097 			    sizetype, &iv_base, &iv_step, dta->stmt,
2098 			    use_overflow_semantics))
2099     {
2100       /* The index might wrap.  */
2101       return false;
2102     }
2103 
2104   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2105   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2106 
2107   if (dta->ivopts_data->bivs_not_used_in_addr)
2108     {
2109       if (!iv->biv_p)
2110 	iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2111 
2112       record_biv_for_address_use (dta->ivopts_data, iv);
2113     }
2114   return true;
2115 }
2116 
2117 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2118    object is passed to it in DATA.  */
2119 
2120 static bool
idx_record_use(tree base,tree * idx,void * vdata)2121 idx_record_use (tree base, tree *idx,
2122 		void *vdata)
2123 {
2124   struct ivopts_data *data = (struct ivopts_data *) vdata;
2125   find_interesting_uses_op (data, *idx);
2126   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2127     {
2128       find_interesting_uses_op (data, array_ref_element_size (base));
2129       find_interesting_uses_op (data, array_ref_low_bound (base));
2130     }
2131   return true;
2132 }
2133 
2134 /* If we can prove that TOP = cst * BOT for some constant cst,
2135    store cst to MUL and return true.  Otherwise return false.
2136    The returned value is always sign-extended, regardless of the
2137    signedness of TOP and BOT.  */
2138 
2139 static bool
constant_multiple_of(tree top,tree bot,widest_int * mul)2140 constant_multiple_of (tree top, tree bot, widest_int *mul)
2141 {
2142   tree mby;
2143   enum tree_code code;
2144   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2145   widest_int res, p0, p1;
2146 
2147   STRIP_NOPS (top);
2148   STRIP_NOPS (bot);
2149 
2150   if (operand_equal_p (top, bot, 0))
2151     {
2152       *mul = 1;
2153       return true;
2154     }
2155 
2156   code = TREE_CODE (top);
2157   switch (code)
2158     {
2159     case MULT_EXPR:
2160       mby = TREE_OPERAND (top, 1);
2161       if (TREE_CODE (mby) != INTEGER_CST)
2162 	return false;
2163 
2164       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2165 	return false;
2166 
2167       *mul = wi::sext (res * wi::to_widest (mby), precision);
2168       return true;
2169 
2170     case PLUS_EXPR:
2171     case MINUS_EXPR:
2172       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2173 	  || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2174 	return false;
2175 
2176       if (code == MINUS_EXPR)
2177 	p1 = -p1;
2178       *mul = wi::sext (p0 + p1, precision);
2179       return true;
2180 
2181     case INTEGER_CST:
2182       if (TREE_CODE (bot) != INTEGER_CST)
2183 	return false;
2184 
2185       p0 = widest_int::from (wi::to_wide (top), SIGNED);
2186       p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2187       if (p1 == 0)
2188 	return false;
2189       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2190       return res == 0;
2191 
2192     default:
2193       if (POLY_INT_CST_P (top)
2194 	  && POLY_INT_CST_P (bot)
2195 	  && constant_multiple_p (wi::to_poly_widest (top),
2196 				  wi::to_poly_widest (bot), mul))
2197 	return true;
2198 
2199       return false;
2200     }
2201 }
2202 
2203 /* Return true if memory reference REF with step STEP may be unaligned.  */
2204 
2205 static bool
may_be_unaligned_p(tree ref,tree step)2206 may_be_unaligned_p (tree ref, tree step)
2207 {
2208   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2209      thus they are not misaligned.  */
2210   if (TREE_CODE (ref) == TARGET_MEM_REF)
2211     return false;
2212 
2213   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2214   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2215     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2216 
2217   unsigned HOST_WIDE_INT bitpos;
2218   unsigned int ref_align;
2219   get_object_alignment_1 (ref, &ref_align, &bitpos);
2220   if (ref_align < align
2221       || (bitpos % align) != 0
2222       || (bitpos % BITS_PER_UNIT) != 0)
2223     return true;
2224 
2225   unsigned int trailing_zeros = tree_ctz (step);
2226   if (trailing_zeros < HOST_BITS_PER_INT
2227       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2228     return true;
2229 
2230   return false;
2231 }
2232 
2233 /* Return true if EXPR may be non-addressable.   */
2234 
2235 bool
may_be_nonaddressable_p(tree expr)2236 may_be_nonaddressable_p (tree expr)
2237 {
2238   switch (TREE_CODE (expr))
2239     {
2240     case VAR_DECL:
2241       /* Check if it's a register variable.  */
2242       return DECL_HARD_REGISTER (expr);
2243 
2244     case TARGET_MEM_REF:
2245       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2246 	 target, thus they are always addressable.  */
2247       return false;
2248 
2249     case MEM_REF:
2250       /* Likewise for MEM_REFs, modulo the storage order.  */
2251       return REF_REVERSE_STORAGE_ORDER (expr);
2252 
2253     case BIT_FIELD_REF:
2254       if (REF_REVERSE_STORAGE_ORDER (expr))
2255 	return true;
2256       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2257 
2258     case COMPONENT_REF:
2259       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2260 	return true;
2261       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2262 	     || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2263 
2264     case ARRAY_REF:
2265     case ARRAY_RANGE_REF:
2266       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2267 	return true;
2268       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2269 
2270     case VIEW_CONVERT_EXPR:
2271       /* This kind of view-conversions may wrap non-addressable objects
2272 	 and make them look addressable.  After some processing the
2273 	 non-addressability may be uncovered again, causing ADDR_EXPRs
2274 	 of inappropriate objects to be built.  */
2275       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2276 	  || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2277 	return true;
2278       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2279 
2280     CASE_CONVERT:
2281       return true;
2282 
2283     default:
2284       break;
2285     }
2286 
2287   return false;
2288 }
2289 
2290 /* Finds addresses in *OP_P inside STMT.  */
2291 
2292 static void
find_interesting_uses_address(struct ivopts_data * data,gimple * stmt,tree * op_p)2293 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2294 			       tree *op_p)
2295 {
2296   tree base = *op_p, step = size_zero_node;
2297   struct iv *civ;
2298   struct ifs_ivopts_data ifs_ivopts_data;
2299 
2300   /* Do not play with volatile memory references.  A bit too conservative,
2301      perhaps, but safe.  */
2302   if (gimple_has_volatile_ops (stmt))
2303     goto fail;
2304 
2305   /* Ignore bitfields for now.  Not really something terribly complicated
2306      to handle.  TODO.  */
2307   if (TREE_CODE (base) == BIT_FIELD_REF)
2308     goto fail;
2309 
2310   base = unshare_expr (base);
2311 
2312   if (TREE_CODE (base) == TARGET_MEM_REF)
2313     {
2314       tree type = build_pointer_type (TREE_TYPE (base));
2315       tree astep;
2316 
2317       if (TMR_BASE (base)
2318 	  && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2319 	{
2320 	  civ = get_iv (data, TMR_BASE (base));
2321 	  if (!civ)
2322 	    goto fail;
2323 
2324 	  TMR_BASE (base) = civ->base;
2325 	  step = civ->step;
2326 	}
2327       if (TMR_INDEX2 (base)
2328 	  && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2329 	{
2330 	  civ = get_iv (data, TMR_INDEX2 (base));
2331 	  if (!civ)
2332 	    goto fail;
2333 
2334 	  TMR_INDEX2 (base) = civ->base;
2335 	  step = civ->step;
2336 	}
2337       if (TMR_INDEX (base)
2338 	  && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2339 	{
2340 	  civ = get_iv (data, TMR_INDEX (base));
2341 	  if (!civ)
2342 	    goto fail;
2343 
2344 	  TMR_INDEX (base) = civ->base;
2345 	  astep = civ->step;
2346 
2347 	  if (astep)
2348 	    {
2349 	      if (TMR_STEP (base))
2350 		astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2351 
2352 	      step = fold_build2 (PLUS_EXPR, type, step, astep);
2353 	    }
2354 	}
2355 
2356       if (integer_zerop (step))
2357 	goto fail;
2358       base = tree_mem_ref_addr (type, base);
2359     }
2360   else
2361     {
2362       ifs_ivopts_data.ivopts_data = data;
2363       ifs_ivopts_data.stmt = stmt;
2364       ifs_ivopts_data.step = size_zero_node;
2365       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2366 	  || integer_zerop (ifs_ivopts_data.step))
2367 	goto fail;
2368       step = ifs_ivopts_data.step;
2369 
2370       /* Check that the base expression is addressable.  This needs
2371 	 to be done after substituting bases of IVs into it.  */
2372       if (may_be_nonaddressable_p (base))
2373 	goto fail;
2374 
2375       /* Moreover, on strict alignment platforms, check that it is
2376 	 sufficiently aligned.  */
2377       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2378 	goto fail;
2379 
2380       base = build_fold_addr_expr (base);
2381 
2382       /* Substituting bases of IVs into the base expression might
2383 	 have caused folding opportunities.  */
2384       if (TREE_CODE (base) == ADDR_EXPR)
2385 	{
2386 	  tree *ref = &TREE_OPERAND (base, 0);
2387 	  while (handled_component_p (*ref))
2388 	    ref = &TREE_OPERAND (*ref, 0);
2389 	  if (TREE_CODE (*ref) == MEM_REF)
2390 	    {
2391 	      tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2392 				      TREE_OPERAND (*ref, 0),
2393 				      TREE_OPERAND (*ref, 1));
2394 	      if (tem)
2395 		*ref = tem;
2396 	    }
2397 	}
2398     }
2399 
2400   civ = alloc_iv (data, base, step);
2401   /* Fail if base object of this memory reference is unknown.  */
2402   if (civ->base_object == NULL_TREE)
2403     goto fail;
2404 
2405   record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2406   return;
2407 
2408 fail:
2409   for_each_index (op_p, idx_record_use, data);
2410 }
2411 
2412 /* Finds and records invariants used in STMT.  */
2413 
2414 static void
find_invariants_stmt(struct ivopts_data * data,gimple * stmt)2415 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2416 {
2417   ssa_op_iter iter;
2418   use_operand_p use_p;
2419   tree op;
2420 
2421   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2422     {
2423       op = USE_FROM_PTR (use_p);
2424       record_invariant (data, op, false);
2425     }
2426 }
2427 
2428 /* CALL calls an internal function.  If operand *OP_P will become an
2429    address when the call is expanded, return the type of the memory
2430    being addressed, otherwise return null.  */
2431 
2432 static tree
get_mem_type_for_internal_fn(gcall * call,tree * op_p)2433 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2434 {
2435   switch (gimple_call_internal_fn (call))
2436     {
2437     case IFN_MASK_LOAD:
2438     case IFN_MASK_LOAD_LANES:
2439     case IFN_LEN_LOAD:
2440       if (op_p == gimple_call_arg_ptr (call, 0))
2441 	return TREE_TYPE (gimple_call_lhs (call));
2442       return NULL_TREE;
2443 
2444     case IFN_MASK_STORE:
2445     case IFN_MASK_STORE_LANES:
2446     case IFN_LEN_STORE:
2447       if (op_p == gimple_call_arg_ptr (call, 0))
2448 	return TREE_TYPE (gimple_call_arg (call, 3));
2449       return NULL_TREE;
2450 
2451     default:
2452       return NULL_TREE;
2453     }
2454 }
2455 
2456 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2457    Return true if the operand will become an address when STMT
2458    is expanded and record the associated address use if so.  */
2459 
2460 static bool
find_address_like_use(struct ivopts_data * data,gimple * stmt,tree * op_p,struct iv * iv)2461 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2462 		       struct iv *iv)
2463 {
2464   /* Fail if base object of this memory reference is unknown.  */
2465   if (iv->base_object == NULL_TREE)
2466     return false;
2467 
2468   tree mem_type = NULL_TREE;
2469   if (gcall *call = dyn_cast <gcall *> (stmt))
2470     if (gimple_call_internal_p (call))
2471       mem_type = get_mem_type_for_internal_fn (call, op_p);
2472   if (mem_type)
2473     {
2474       iv = alloc_iv (data, iv->base, iv->step);
2475       record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2476       return true;
2477     }
2478   return false;
2479 }
2480 
2481 /* Finds interesting uses of induction variables in the statement STMT.  */
2482 
2483 static void
find_interesting_uses_stmt(struct ivopts_data * data,gimple * stmt)2484 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2485 {
2486   struct iv *iv;
2487   tree op, *lhs, *rhs;
2488   ssa_op_iter iter;
2489   use_operand_p use_p;
2490   enum tree_code code;
2491 
2492   find_invariants_stmt (data, stmt);
2493 
2494   if (gimple_code (stmt) == GIMPLE_COND)
2495     {
2496       find_interesting_uses_cond (data, stmt);
2497       return;
2498     }
2499 
2500   if (is_gimple_assign (stmt))
2501     {
2502       lhs = gimple_assign_lhs_ptr (stmt);
2503       rhs = gimple_assign_rhs1_ptr (stmt);
2504 
2505       if (TREE_CODE (*lhs) == SSA_NAME)
2506 	{
2507 	  /* If the statement defines an induction variable, the uses are not
2508 	     interesting by themselves.  */
2509 
2510 	  iv = get_iv (data, *lhs);
2511 
2512 	  if (iv && !integer_zerop (iv->step))
2513 	    return;
2514 	}
2515 
2516       code = gimple_assign_rhs_code (stmt);
2517       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2518 	  && (REFERENCE_CLASS_P (*rhs)
2519 	      || is_gimple_val (*rhs)))
2520 	{
2521 	  if (REFERENCE_CLASS_P (*rhs))
2522 	    find_interesting_uses_address (data, stmt, rhs);
2523 	  else
2524 	    find_interesting_uses_op (data, *rhs);
2525 
2526 	  if (REFERENCE_CLASS_P (*lhs))
2527 	    find_interesting_uses_address (data, stmt, lhs);
2528 	  return;
2529 	}
2530       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2531 	{
2532 	  find_interesting_uses_cond (data, stmt);
2533 	  return;
2534 	}
2535 
2536       /* TODO -- we should also handle address uses of type
2537 
2538 	 memory = call (whatever);
2539 
2540 	 and
2541 
2542 	 call (memory).  */
2543     }
2544 
2545   if (gimple_code (stmt) == GIMPLE_PHI
2546       && gimple_bb (stmt) == data->current_loop->header)
2547     {
2548       iv = get_iv (data, PHI_RESULT (stmt));
2549 
2550       if (iv && !integer_zerop (iv->step))
2551 	return;
2552     }
2553 
2554   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2555     {
2556       op = USE_FROM_PTR (use_p);
2557 
2558       if (TREE_CODE (op) != SSA_NAME)
2559 	continue;
2560 
2561       iv = get_iv (data, op);
2562       if (!iv)
2563 	continue;
2564 
2565       if (!find_address_like_use (data, stmt, use_p->use, iv))
2566 	find_interesting_uses_op (data, op);
2567     }
2568 }
2569 
2570 /* Finds interesting uses of induction variables outside of loops
2571    on loop exit edge EXIT.  */
2572 
2573 static void
find_interesting_uses_outside(struct ivopts_data * data,edge exit)2574 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2575 {
2576   gphi *phi;
2577   gphi_iterator psi;
2578   tree def;
2579 
2580   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2581     {
2582       phi = psi.phi ();
2583       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2584       if (!virtual_operand_p (def))
2585 	find_interesting_uses_op (data, def);
2586     }
2587 }
2588 
2589 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2590    mode for memory reference represented by USE.  */
2591 
2592 static GTY (()) vec<rtx, va_gc> *addr_list;
2593 
2594 static bool
addr_offset_valid_p(struct iv_use * use,poly_int64 offset)2595 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2596 {
2597   rtx reg, addr;
2598   unsigned list_index;
2599   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2600   machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2601 
2602   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2603   if (list_index >= vec_safe_length (addr_list))
2604     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE, true);
2605 
2606   addr = (*addr_list)[list_index];
2607   if (!addr)
2608     {
2609       addr_mode = targetm.addr_space.address_mode (as);
2610       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2611       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2612       (*addr_list)[list_index] = addr;
2613     }
2614   else
2615     addr_mode = GET_MODE (addr);
2616 
2617   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2618   return (memory_address_addr_space_p (mem_mode, addr, as));
2619 }
2620 
2621 /* Comparison function to sort group in ascending order of addr_offset.  */
2622 
2623 static int
group_compare_offset(const void * a,const void * b)2624 group_compare_offset (const void *a, const void *b)
2625 {
2626   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2627   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2628 
2629   return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2630 }
2631 
2632 /* Check if small groups should be split.  Return true if no group
2633    contains more than two uses with distinct addr_offsets.  Return
2634    false otherwise.  We want to split such groups because:
2635 
2636      1) Small groups don't have much benefit and may interfer with
2637 	general candidate selection.
2638      2) Size for problem with only small groups is usually small and
2639 	general algorithm can handle it well.
2640 
2641    TODO -- Above claim may not hold when we want to merge memory
2642    accesses with conseuctive addresses.  */
2643 
2644 static bool
split_small_address_groups_p(struct ivopts_data * data)2645 split_small_address_groups_p (struct ivopts_data *data)
2646 {
2647   unsigned int i, j, distinct = 1;
2648   struct iv_use *pre;
2649   struct iv_group *group;
2650 
2651   for (i = 0; i < data->vgroups.length (); i++)
2652     {
2653       group = data->vgroups[i];
2654       if (group->vuses.length () == 1)
2655 	continue;
2656 
2657       gcc_assert (address_p (group->type));
2658       if (group->vuses.length () == 2)
2659 	{
2660 	  if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2661 				      group->vuses[1]->addr_offset) > 0)
2662 	    std::swap (group->vuses[0], group->vuses[1]);
2663 	}
2664       else
2665 	group->vuses.qsort (group_compare_offset);
2666 
2667       if (distinct > 2)
2668 	continue;
2669 
2670       distinct = 1;
2671       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2672 	{
2673 	  if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2674 	    {
2675 	      pre = group->vuses[j];
2676 	      distinct++;
2677 	    }
2678 
2679 	  if (distinct > 2)
2680 	    break;
2681 	}
2682     }
2683 
2684   return (distinct <= 2);
2685 }
2686 
2687 /* For each group of address type uses, this function further groups
2688    these uses according to the maximum offset supported by target's
2689    [base + offset] addressing mode.  */
2690 
2691 static void
split_address_groups(struct ivopts_data * data)2692 split_address_groups (struct ivopts_data *data)
2693 {
2694   unsigned int i, j;
2695   /* Always split group.  */
2696   bool split_p = split_small_address_groups_p (data);
2697 
2698   for (i = 0; i < data->vgroups.length (); i++)
2699     {
2700       struct iv_group *new_group = NULL;
2701       struct iv_group *group = data->vgroups[i];
2702       struct iv_use *use = group->vuses[0];
2703 
2704       use->id = 0;
2705       use->group_id = group->id;
2706       if (group->vuses.length () == 1)
2707 	continue;
2708 
2709       gcc_assert (address_p (use->type));
2710 
2711       for (j = 1; j < group->vuses.length ();)
2712 	{
2713 	  struct iv_use *next = group->vuses[j];
2714 	  poly_int64 offset = next->addr_offset - use->addr_offset;
2715 
2716 	  /* Split group if aksed to, or the offset against the first
2717 	     use can't fit in offset part of addressing mode.  IV uses
2718 	     having the same offset are still kept in one group.  */
2719 	  if (maybe_ne (offset, 0)
2720 	      && (split_p || !addr_offset_valid_p (use, offset)))
2721 	    {
2722 	      if (!new_group)
2723 		new_group = record_group (data, group->type);
2724 	      group->vuses.ordered_remove (j);
2725 	      new_group->vuses.safe_push (next);
2726 	      continue;
2727 	    }
2728 
2729 	  next->id = j;
2730 	  next->group_id = group->id;
2731 	  j++;
2732 	}
2733     }
2734 }
2735 
2736 /* Finds uses of the induction variables that are interesting.  */
2737 
2738 static void
find_interesting_uses(struct ivopts_data * data)2739 find_interesting_uses (struct ivopts_data *data)
2740 {
2741   basic_block bb;
2742   gimple_stmt_iterator bsi;
2743   basic_block *body = get_loop_body (data->current_loop);
2744   unsigned i;
2745   edge e;
2746 
2747   for (i = 0; i < data->current_loop->num_nodes; i++)
2748     {
2749       edge_iterator ei;
2750       bb = body[i];
2751 
2752       FOR_EACH_EDGE (e, ei, bb->succs)
2753 	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2754 	    && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2755 	  find_interesting_uses_outside (data, e);
2756 
2757       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2758 	find_interesting_uses_stmt (data, gsi_stmt (bsi));
2759       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2760 	if (!is_gimple_debug (gsi_stmt (bsi)))
2761 	  find_interesting_uses_stmt (data, gsi_stmt (bsi));
2762     }
2763   free (body);
2764 
2765   split_address_groups (data);
2766 
2767   if (dump_file && (dump_flags & TDF_DETAILS))
2768     {
2769       fprintf (dump_file, "\n<IV Groups>:\n");
2770       dump_groups (dump_file, data);
2771       fprintf (dump_file, "\n");
2772     }
2773 }
2774 
2775 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2776    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2777    we are at the top-level of the processed address.  */
2778 
2779 static tree
strip_offset_1(tree expr,bool inside_addr,bool top_compref,poly_int64 * offset)2780 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2781 		poly_int64 *offset)
2782 {
2783   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2784   enum tree_code code;
2785   tree type, orig_type = TREE_TYPE (expr);
2786   poly_int64 off0, off1;
2787   HOST_WIDE_INT st;
2788   tree orig_expr = expr;
2789 
2790   STRIP_NOPS (expr);
2791 
2792   type = TREE_TYPE (expr);
2793   code = TREE_CODE (expr);
2794   *offset = 0;
2795 
2796   switch (code)
2797     {
2798     case POINTER_PLUS_EXPR:
2799     case PLUS_EXPR:
2800     case MINUS_EXPR:
2801       op0 = TREE_OPERAND (expr, 0);
2802       op1 = TREE_OPERAND (expr, 1);
2803 
2804       op0 = strip_offset_1 (op0, false, false, &off0);
2805       op1 = strip_offset_1 (op1, false, false, &off1);
2806 
2807       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2808       if (op0 == TREE_OPERAND (expr, 0)
2809 	  && op1 == TREE_OPERAND (expr, 1))
2810 	return orig_expr;
2811 
2812       if (integer_zerop (op1))
2813 	expr = op0;
2814       else if (integer_zerop (op0))
2815 	{
2816 	  if (code == MINUS_EXPR)
2817 	    expr = fold_build1 (NEGATE_EXPR, type, op1);
2818 	  else
2819 	    expr = op1;
2820 	}
2821       else
2822 	expr = fold_build2 (code, type, op0, op1);
2823 
2824       return fold_convert (orig_type, expr);
2825 
2826     case MULT_EXPR:
2827       op1 = TREE_OPERAND (expr, 1);
2828       if (!cst_and_fits_in_hwi (op1))
2829 	return orig_expr;
2830 
2831       op0 = TREE_OPERAND (expr, 0);
2832       op0 = strip_offset_1 (op0, false, false, &off0);
2833       if (op0 == TREE_OPERAND (expr, 0))
2834 	return orig_expr;
2835 
2836       *offset = off0 * int_cst_value (op1);
2837       if (integer_zerop (op0))
2838 	expr = op0;
2839       else
2840 	expr = fold_build2 (MULT_EXPR, type, op0, op1);
2841 
2842       return fold_convert (orig_type, expr);
2843 
2844     case ARRAY_REF:
2845     case ARRAY_RANGE_REF:
2846       if (!inside_addr)
2847 	return orig_expr;
2848 
2849       step = array_ref_element_size (expr);
2850       if (!cst_and_fits_in_hwi (step))
2851 	break;
2852 
2853       st = int_cst_value (step);
2854       op1 = TREE_OPERAND (expr, 1);
2855       op1 = strip_offset_1 (op1, false, false, &off1);
2856       *offset = off1 * st;
2857 
2858       if (top_compref
2859 	  && integer_zerop (op1))
2860 	{
2861 	  /* Strip the component reference completely.  */
2862 	  op0 = TREE_OPERAND (expr, 0);
2863 	  op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2864 	  *offset += off0;
2865 	  return op0;
2866 	}
2867       break;
2868 
2869     case COMPONENT_REF:
2870       {
2871 	tree field;
2872 
2873 	if (!inside_addr)
2874 	  return orig_expr;
2875 
2876 	tmp = component_ref_field_offset (expr);
2877 	field = TREE_OPERAND (expr, 1);
2878 	if (top_compref
2879 	    && cst_and_fits_in_hwi (tmp)
2880 	    && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2881 	  {
2882 	    HOST_WIDE_INT boffset, abs_off;
2883 
2884 	    /* Strip the component reference completely.  */
2885 	    op0 = TREE_OPERAND (expr, 0);
2886 	    op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2887 	    boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2888 	    abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2889 	    if (boffset < 0)
2890 	      abs_off = -abs_off;
2891 
2892 	    *offset = off0 + int_cst_value (tmp) + abs_off;
2893 	    return op0;
2894 	  }
2895       }
2896       break;
2897 
2898     case ADDR_EXPR:
2899       op0 = TREE_OPERAND (expr, 0);
2900       op0 = strip_offset_1 (op0, true, true, &off0);
2901       *offset += off0;
2902 
2903       if (op0 == TREE_OPERAND (expr, 0))
2904 	return orig_expr;
2905 
2906       expr = build_fold_addr_expr (op0);
2907       return fold_convert (orig_type, expr);
2908 
2909     case MEM_REF:
2910       /* ???  Offset operand?  */
2911       inside_addr = false;
2912       break;
2913 
2914     default:
2915       if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2916 	return build_int_cst (orig_type, 0);
2917       return orig_expr;
2918     }
2919 
2920   /* Default handling of expressions for that we want to recurse into
2921      the first operand.  */
2922   op0 = TREE_OPERAND (expr, 0);
2923   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2924   *offset += off0;
2925 
2926   if (op0 == TREE_OPERAND (expr, 0)
2927       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2928     return orig_expr;
2929 
2930   expr = copy_node (expr);
2931   TREE_OPERAND (expr, 0) = op0;
2932   if (op1)
2933     TREE_OPERAND (expr, 1) = op1;
2934 
2935   /* Inside address, we might strip the top level component references,
2936      thus changing type of the expression.  Handling of ADDR_EXPR
2937      will fix that.  */
2938   expr = fold_convert (orig_type, expr);
2939 
2940   return expr;
2941 }
2942 
2943 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2944 
2945 tree
strip_offset(tree expr,poly_uint64_pod * offset)2946 strip_offset (tree expr, poly_uint64_pod *offset)
2947 {
2948   poly_int64 off;
2949   tree core = strip_offset_1 (expr, false, false, &off);
2950   *offset = off;
2951   return core;
2952 }
2953 
2954 /* Returns variant of TYPE that can be used as base for different uses.
2955    We return unsigned type with the same precision, which avoids problems
2956    with overflows.  */
2957 
2958 static tree
generic_type_for(tree type)2959 generic_type_for (tree type)
2960 {
2961   if (POINTER_TYPE_P (type))
2962     return unsigned_type_for (type);
2963 
2964   if (TYPE_UNSIGNED (type))
2965     return type;
2966 
2967   return unsigned_type_for (type);
2968 }
2969 
2970 /* Private data for walk_tree.  */
2971 
2972 struct walk_tree_data
2973 {
2974   bitmap *inv_vars;
2975   struct ivopts_data *idata;
2976 };
2977 
2978 /* Callback function for walk_tree, it records invariants and symbol
2979    reference in *EXPR_P.  DATA is the structure storing result info.  */
2980 
2981 static tree
find_inv_vars_cb(tree * expr_p,int * ws ATTRIBUTE_UNUSED,void * data)2982 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2983 {
2984   tree op = *expr_p;
2985   struct version_info *info;
2986   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2987 
2988   if (TREE_CODE (op) != SSA_NAME)
2989     return NULL_TREE;
2990 
2991   info = name_info (wdata->idata, op);
2992   /* Because we expand simple operations when finding IVs, loop invariant
2993      variable that isn't referred by the original loop could be used now.
2994      Record such invariant variables here.  */
2995   if (!info->iv)
2996     {
2997       struct ivopts_data *idata = wdata->idata;
2998       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2999 
3000       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
3001 	{
3002 	  tree steptype = TREE_TYPE (op);
3003 	  if (POINTER_TYPE_P (steptype))
3004 	    steptype = sizetype;
3005 	  set_iv (idata, op, op, build_int_cst (steptype, 0), true);
3006 	  record_invariant (idata, op, false);
3007 	}
3008     }
3009   if (!info->inv_id || info->has_nonlin_use)
3010     return NULL_TREE;
3011 
3012   if (!*wdata->inv_vars)
3013     *wdata->inv_vars = BITMAP_ALLOC (NULL);
3014   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3015 
3016   return NULL_TREE;
3017 }
3018 
3019 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
3020    store it.  */
3021 
3022 static inline void
find_inv_vars(struct ivopts_data * data,tree * expr_p,bitmap * inv_vars)3023 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3024 {
3025   struct walk_tree_data wdata;
3026 
3027   if (!inv_vars)
3028     return;
3029 
3030   wdata.idata = data;
3031   wdata.inv_vars = inv_vars;
3032   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3033 }
3034 
3035 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
3036    will be recorded if it doesn't exist yet.  Given below two exprs:
3037      inv_expr + cst1, inv_expr + cst2
3038    It's hard to make decision whether constant part should be stripped
3039    or not.  We choose to not strip based on below facts:
3040      1) We need to count ADD cost for constant part if it's stripped,
3041 	which isn't always trivial where this functions is called.
3042      2) Stripping constant away may be conflict with following loop
3043 	invariant hoisting pass.
3044      3) Not stripping constant away results in more invariant exprs,
3045 	which usually leads to decision preferring lower reg pressure.  */
3046 
3047 static iv_inv_expr_ent *
get_loop_invariant_expr(struct ivopts_data * data,tree inv_expr)3048 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3049 {
3050   STRIP_NOPS (inv_expr);
3051 
3052   if (poly_int_tree_p (inv_expr)
3053       || TREE_CODE (inv_expr) == SSA_NAME)
3054     return NULL;
3055 
3056   /* Don't strip constant part away as we used to.  */
3057 
3058   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
3059   struct iv_inv_expr_ent ent;
3060   ent.expr = inv_expr;
3061   ent.hash = iterative_hash_expr (inv_expr, 0);
3062   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3063 
3064   if (!*slot)
3065     {
3066       *slot = XNEW (struct iv_inv_expr_ent);
3067       (*slot)->expr = inv_expr;
3068       (*slot)->hash = ent.hash;
3069       (*slot)->id = ++data->max_inv_expr_id;
3070     }
3071 
3072   return *slot;
3073 }
3074 
3075 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3076    position to POS.  If USE is not NULL, the candidate is set as related to
3077    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
3078    replacement of the final value of the iv by a direct computation.  */
3079 
3080 static struct iv_cand *
3081 add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important,
3082 		 enum iv_position pos, struct iv_use *use,
3083 		 gimple *incremented_at, struct iv *orig_iv = NULL,
3084 		 bool doloop = false)
3085 {
3086   unsigned i;
3087   struct iv_cand *cand = NULL;
3088   tree type, orig_type;
3089 
3090   gcc_assert (base && step);
3091 
3092   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3093      live, but the ivopts code may replace a real pointer with one
3094      pointing before or after the memory block that is then adjusted
3095      into the memory block during the loop.  FIXME: It would likely be
3096      better to actually force the pointer live and still use ivopts;
3097      for example, it would be enough to write the pointer into memory
3098      and keep it there until after the loop.  */
3099   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3100     return NULL;
3101 
3102   /* For non-original variables, make sure their values are computed in a type
3103      that does not invoke undefined behavior on overflows (since in general,
3104      we cannot prove that these induction variables are non-wrapping).  */
3105   if (pos != IP_ORIGINAL)
3106     {
3107       orig_type = TREE_TYPE (base);
3108       type = generic_type_for (orig_type);
3109       if (type != orig_type)
3110 	{
3111 	  base = fold_convert (type, base);
3112 	  step = fold_convert (type, step);
3113 	}
3114     }
3115 
3116   for (i = 0; i < data->vcands.length (); i++)
3117     {
3118       cand = data->vcands[i];
3119 
3120       if (cand->pos != pos)
3121 	continue;
3122 
3123       if (cand->incremented_at != incremented_at
3124 	  || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3125 	      && cand->ainc_use != use))
3126 	continue;
3127 
3128       if (operand_equal_p (base, cand->iv->base, 0)
3129 	  && operand_equal_p (step, cand->iv->step, 0)
3130 	  && (TYPE_PRECISION (TREE_TYPE (base))
3131 	      == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3132 	break;
3133     }
3134 
3135   if (i == data->vcands.length ())
3136     {
3137       cand = XCNEW (struct iv_cand);
3138       cand->id = i;
3139       cand->iv = alloc_iv (data, base, step);
3140       cand->pos = pos;
3141       if (pos != IP_ORIGINAL)
3142 	{
3143 	  if (doloop)
3144 	    cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3145 	  else
3146 	    cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3147 	  cand->var_after = cand->var_before;
3148 	}
3149       cand->important = important;
3150       cand->incremented_at = incremented_at;
3151       cand->doloop_p = doloop;
3152       data->vcands.safe_push (cand);
3153 
3154       if (!poly_int_tree_p (step))
3155 	{
3156 	  find_inv_vars (data, &step, &cand->inv_vars);
3157 
3158 	  iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3159 	  /* Share bitmap between inv_vars and inv_exprs for cand.  */
3160 	  if (inv_expr != NULL)
3161 	    {
3162 	      cand->inv_exprs = cand->inv_vars;
3163 	      cand->inv_vars = NULL;
3164 	      if (cand->inv_exprs)
3165 		bitmap_clear (cand->inv_exprs);
3166 	      else
3167 		cand->inv_exprs = BITMAP_ALLOC (NULL);
3168 
3169 	      bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3170 	    }
3171 	}
3172 
3173       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3174 	cand->ainc_use = use;
3175       else
3176 	cand->ainc_use = NULL;
3177 
3178       cand->orig_iv = orig_iv;
3179       if (dump_file && (dump_flags & TDF_DETAILS))
3180 	dump_cand (dump_file, cand);
3181     }
3182 
3183   cand->important |= important;
3184   cand->doloop_p |= doloop;
3185 
3186   /* Relate candidate to the group for which it is added.  */
3187   if (use)
3188     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3189 
3190   return cand;
3191 }
3192 
3193 /* Returns true if incrementing the induction variable at the end of the LOOP
3194    is allowed.
3195 
3196    The purpose is to avoid splitting latch edge with a biv increment, thus
3197    creating a jump, possibly confusing other optimization passes and leaving
3198    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3199    available (so we do not have a better alternative), or if the latch edge
3200    is already nonempty.  */
3201 
3202 static bool
allow_ip_end_pos_p(class loop * loop)3203 allow_ip_end_pos_p (class loop *loop)
3204 {
3205   if (!ip_normal_pos (loop))
3206     return true;
3207 
3208   if (!empty_block_p (ip_end_pos (loop)))
3209     return true;
3210 
3211   return false;
3212 }
3213 
3214 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3215    Important field is set to IMPORTANT.  */
3216 
3217 static void
add_autoinc_candidates(struct ivopts_data * data,tree base,tree step,bool important,struct iv_use * use)3218 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3219 			bool important, struct iv_use *use)
3220 {
3221   basic_block use_bb = gimple_bb (use->stmt);
3222   machine_mode mem_mode;
3223   unsigned HOST_WIDE_INT cstepi;
3224 
3225   /* If we insert the increment in any position other than the standard
3226      ones, we must ensure that it is incremented once per iteration.
3227      It must not be in an inner nested loop, or one side of an if
3228      statement.  */
3229   if (use_bb->loop_father != data->current_loop
3230       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3231       || stmt_can_throw_internal (cfun, use->stmt)
3232       || !cst_and_fits_in_hwi (step))
3233     return;
3234 
3235   cstepi = int_cst_value (step);
3236 
3237   mem_mode = TYPE_MODE (use->mem_type);
3238   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3239 	|| USE_STORE_PRE_INCREMENT (mem_mode))
3240        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3241       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3242 	   || USE_STORE_PRE_DECREMENT (mem_mode))
3243 	  && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3244     {
3245       enum tree_code code = MINUS_EXPR;
3246       tree new_base;
3247       tree new_step = step;
3248 
3249       if (POINTER_TYPE_P (TREE_TYPE (base)))
3250 	{
3251 	  new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3252 	  code = POINTER_PLUS_EXPR;
3253 	}
3254       else
3255 	new_step = fold_convert (TREE_TYPE (base), new_step);
3256       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3257       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3258 		       use->stmt);
3259     }
3260   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3261 	|| USE_STORE_POST_INCREMENT (mem_mode))
3262        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3263       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3264 	   || USE_STORE_POST_DECREMENT (mem_mode))
3265 	  && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3266     {
3267       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3268 		       use->stmt);
3269     }
3270 }
3271 
3272 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3273    position to POS.  If USE is not NULL, the candidate is set as related to
3274    it.  The candidate computation is scheduled before exit condition and at
3275    the end of loop.  */
3276 
3277 static void
3278 add_candidate (struct ivopts_data *data, tree base, tree step, bool important,
3279 	       struct iv_use *use, struct iv *orig_iv = NULL,
3280 	       bool doloop = false)
3281 {
3282   if (ip_normal_pos (data->current_loop))
3283     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL, orig_iv,
3284 		     doloop);
3285   /* Exclude doloop candidate here since it requires decrement then comparison
3286      and jump, the IP_END position doesn't match.  */
3287   if (!doloop && ip_end_pos (data->current_loop)
3288       && allow_ip_end_pos_p (data->current_loop))
3289     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3290 }
3291 
3292 /* Adds standard iv candidates.  */
3293 
3294 static void
add_standard_iv_candidates(struct ivopts_data * data)3295 add_standard_iv_candidates (struct ivopts_data *data)
3296 {
3297   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3298 
3299   /* The same for a double-integer type if it is still fast enough.  */
3300   if (TYPE_PRECISION
3301 	(long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3302       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3303     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3304 		   build_int_cst (long_integer_type_node, 1), true, NULL);
3305 
3306   /* The same for a double-integer type if it is still fast enough.  */
3307   if (TYPE_PRECISION
3308 	(long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3309       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3310     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3311 		   build_int_cst (long_long_integer_type_node, 1), true, NULL);
3312 }
3313 
3314 
3315 /* Adds candidates bases on the old induction variable IV.  */
3316 
3317 static void
add_iv_candidate_for_biv(struct ivopts_data * data,struct iv * iv)3318 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3319 {
3320   gimple *phi;
3321   tree def;
3322   struct iv_cand *cand;
3323 
3324   /* Check if this biv is used in address type use.  */
3325   if (iv->no_overflow  && iv->have_address_use
3326       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3327       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3328     {
3329       tree base = fold_convert (sizetype, iv->base);
3330       tree step = fold_convert (sizetype, iv->step);
3331 
3332       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3333       add_candidate (data, base, step, true, NULL, iv);
3334       /* Add iv cand of the original type only if it has nonlinear use.  */
3335       if (iv->nonlin_use)
3336 	add_candidate (data, iv->base, iv->step, true, NULL);
3337     }
3338   else
3339     add_candidate (data, iv->base, iv->step, true, NULL);
3340 
3341   /* The same, but with initial value zero.  */
3342   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3343     add_candidate (data, size_int (0), iv->step, true, NULL);
3344   else
3345     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3346 		   iv->step, true, NULL);
3347 
3348   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3349   if (gimple_code (phi) == GIMPLE_PHI)
3350     {
3351       /* Additionally record the possibility of leaving the original iv
3352 	 untouched.  */
3353       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3354       /* Don't add candidate if it's from another PHI node because
3355 	 it's an affine iv appearing in the form of PEELED_CHREC.  */
3356       phi = SSA_NAME_DEF_STMT (def);
3357       if (gimple_code (phi) != GIMPLE_PHI)
3358 	{
3359 	  cand = add_candidate_1 (data,
3360 				  iv->base, iv->step, true, IP_ORIGINAL, NULL,
3361 				  SSA_NAME_DEF_STMT (def));
3362 	  if (cand)
3363 	    {
3364 	      cand->var_before = iv->ssa_name;
3365 	      cand->var_after = def;
3366 	    }
3367 	}
3368       else
3369 	gcc_assert (gimple_bb (phi) == data->current_loop->header);
3370     }
3371 }
3372 
3373 /* Adds candidates based on the old induction variables.  */
3374 
3375 static void
add_iv_candidate_for_bivs(struct ivopts_data * data)3376 add_iv_candidate_for_bivs (struct ivopts_data *data)
3377 {
3378   unsigned i;
3379   struct iv *iv;
3380   bitmap_iterator bi;
3381 
3382   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3383     {
3384       iv = ver_info (data, i)->iv;
3385       if (iv && iv->biv_p && !integer_zerop (iv->step))
3386 	add_iv_candidate_for_biv (data, iv);
3387     }
3388 }
3389 
3390 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3391 
3392 static void
record_common_cand(struct ivopts_data * data,tree base,tree step,struct iv_use * use)3393 record_common_cand (struct ivopts_data *data, tree base,
3394 		    tree step, struct iv_use *use)
3395 {
3396   class iv_common_cand ent;
3397   class iv_common_cand **slot;
3398 
3399   ent.base = base;
3400   ent.step = step;
3401   ent.hash = iterative_hash_expr (base, 0);
3402   ent.hash = iterative_hash_expr (step, ent.hash);
3403 
3404   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3405   if (*slot == NULL)
3406     {
3407       *slot = new iv_common_cand ();
3408       (*slot)->base = base;
3409       (*slot)->step = step;
3410       (*slot)->uses.create (8);
3411       (*slot)->hash = ent.hash;
3412       data->iv_common_cands.safe_push ((*slot));
3413     }
3414 
3415   gcc_assert (use != NULL);
3416   (*slot)->uses.safe_push (use);
3417   return;
3418 }
3419 
3420 /* Comparison function used to sort common candidates.  */
3421 
3422 static int
common_cand_cmp(const void * p1,const void * p2)3423 common_cand_cmp (const void *p1, const void *p2)
3424 {
3425   unsigned n1, n2;
3426   const class iv_common_cand *const *const ccand1
3427     = (const class iv_common_cand *const *)p1;
3428   const class iv_common_cand *const *const ccand2
3429     = (const class iv_common_cand *const *)p2;
3430 
3431   n1 = (*ccand1)->uses.length ();
3432   n2 = (*ccand2)->uses.length ();
3433   return n2 - n1;
3434 }
3435 
3436 /* Adds IV candidates based on common candidated recorded.  */
3437 
3438 static void
add_iv_candidate_derived_from_uses(struct ivopts_data * data)3439 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3440 {
3441   unsigned i, j;
3442   struct iv_cand *cand_1, *cand_2;
3443 
3444   data->iv_common_cands.qsort (common_cand_cmp);
3445   for (i = 0; i < data->iv_common_cands.length (); i++)
3446     {
3447       class iv_common_cand *ptr = data->iv_common_cands[i];
3448 
3449       /* Only add IV candidate if it's derived from multiple uses.  */
3450       if (ptr->uses.length () <= 1)
3451 	break;
3452 
3453       cand_1 = NULL;
3454       cand_2 = NULL;
3455       if (ip_normal_pos (data->current_loop))
3456 	cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3457 				  false, IP_NORMAL, NULL, NULL);
3458 
3459       if (ip_end_pos (data->current_loop)
3460 	  && allow_ip_end_pos_p (data->current_loop))
3461 	cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3462 				  false, IP_END, NULL, NULL);
3463 
3464       /* Bind deriving uses and the new candidates.  */
3465       for (j = 0; j < ptr->uses.length (); j++)
3466 	{
3467 	  struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3468 	  if (cand_1)
3469 	    bitmap_set_bit (group->related_cands, cand_1->id);
3470 	  if (cand_2)
3471 	    bitmap_set_bit (group->related_cands, cand_2->id);
3472 	}
3473     }
3474 
3475   /* Release data since it is useless from this point.  */
3476   data->iv_common_cand_tab->empty ();
3477   data->iv_common_cands.truncate (0);
3478 }
3479 
3480 /* Adds candidates based on the value of USE's iv.  */
3481 
3482 static void
add_iv_candidate_for_use(struct ivopts_data * data,struct iv_use * use)3483 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3484 {
3485   poly_uint64 offset;
3486   tree base;
3487   struct iv *iv = use->iv;
3488   tree basetype = TREE_TYPE (iv->base);
3489 
3490   /* Don't add candidate for iv_use with non integer, pointer or non-mode
3491      precision types, instead, add candidate for the corresponding scev in
3492      unsigned type with the same precision.  See PR93674 for more info.  */
3493   if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3494       || !type_has_mode_precision_p (basetype))
3495     {
3496       basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3497 						 TYPE_UNSIGNED (basetype));
3498       add_candidate (data, fold_convert (basetype, iv->base),
3499 		     fold_convert (basetype, iv->step), false, NULL);
3500       return;
3501     }
3502 
3503   add_candidate (data, iv->base, iv->step, false, use);
3504 
3505   /* Record common candidate for use in case it can be shared by others.  */
3506   record_common_cand (data, iv->base, iv->step, use);
3507 
3508   /* Record common candidate with initial value zero.  */
3509   basetype = TREE_TYPE (iv->base);
3510   if (POINTER_TYPE_P (basetype))
3511     basetype = sizetype;
3512   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3513 
3514   /* Compare the cost of an address with an unscaled index with the cost of
3515     an address with a scaled index and add candidate if useful.  */
3516   poly_int64 step;
3517   if (use != NULL
3518       && poly_int_tree_p (iv->step, &step)
3519       && address_p (use->type))
3520     {
3521       poly_int64 new_step;
3522       unsigned int fact = preferred_mem_scale_factor
3523 	(use->iv->base,
3524 	 TYPE_MODE (use->mem_type),
3525 	 optimize_loop_for_speed_p (data->current_loop));
3526 
3527       if (fact != 1
3528 	  && multiple_p (step, fact, &new_step))
3529 	add_candidate (data, size_int (0),
3530 		       wide_int_to_tree (sizetype, new_step),
3531 		       true, NULL);
3532     }
3533 
3534   /* Record common candidate with constant offset stripped in base.
3535      Like the use itself, we also add candidate directly for it.  */
3536   base = strip_offset (iv->base, &offset);
3537   if (maybe_ne (offset, 0U) || base != iv->base)
3538     {
3539       record_common_cand (data, base, iv->step, use);
3540       add_candidate (data, base, iv->step, false, use);
3541     }
3542 
3543   /* Record common candidate with base_object removed in base.  */
3544   base = iv->base;
3545   STRIP_NOPS (base);
3546   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3547     {
3548       tree step = iv->step;
3549 
3550       STRIP_NOPS (step);
3551       base = TREE_OPERAND (base, 1);
3552       step = fold_convert (sizetype, step);
3553       record_common_cand (data, base, step, use);
3554       /* Also record common candidate with offset stripped.  */
3555       base = strip_offset (base, &offset);
3556       if (maybe_ne (offset, 0U))
3557 	record_common_cand (data, base, step, use);
3558     }
3559 
3560   /* At last, add auto-incremental candidates.  Make such variables
3561      important since other iv uses with same base object may be based
3562      on it.  */
3563   if (use != NULL && address_p (use->type))
3564     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3565 }
3566 
3567 /* Adds candidates based on the uses.  */
3568 
3569 static void
add_iv_candidate_for_groups(struct ivopts_data * data)3570 add_iv_candidate_for_groups (struct ivopts_data *data)
3571 {
3572   unsigned i;
3573 
3574   /* Only add candidate for the first use in group.  */
3575   for (i = 0; i < data->vgroups.length (); i++)
3576     {
3577       struct iv_group *group = data->vgroups[i];
3578 
3579       gcc_assert (group->vuses[0] != NULL);
3580       add_iv_candidate_for_use (data, group->vuses[0]);
3581     }
3582   add_iv_candidate_derived_from_uses (data);
3583 }
3584 
3585 /* Record important candidates and add them to related_cands bitmaps.  */
3586 
3587 static void
record_important_candidates(struct ivopts_data * data)3588 record_important_candidates (struct ivopts_data *data)
3589 {
3590   unsigned i;
3591   struct iv_group *group;
3592 
3593   for (i = 0; i < data->vcands.length (); i++)
3594     {
3595       struct iv_cand *cand = data->vcands[i];
3596 
3597       if (cand->important)
3598 	bitmap_set_bit (data->important_candidates, i);
3599     }
3600 
3601   data->consider_all_candidates = (data->vcands.length ()
3602 				   <= CONSIDER_ALL_CANDIDATES_BOUND);
3603 
3604   /* Add important candidates to groups' related_cands bitmaps.  */
3605   for (i = 0; i < data->vgroups.length (); i++)
3606     {
3607       group = data->vgroups[i];
3608       bitmap_ior_into (group->related_cands, data->important_candidates);
3609     }
3610 }
3611 
3612 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3613    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3614    we allocate a simple list to every use.  */
3615 
3616 static void
alloc_use_cost_map(struct ivopts_data * data)3617 alloc_use_cost_map (struct ivopts_data *data)
3618 {
3619   unsigned i, size, s;
3620 
3621   for (i = 0; i < data->vgroups.length (); i++)
3622     {
3623       struct iv_group *group = data->vgroups[i];
3624 
3625       if (data->consider_all_candidates)
3626 	size = data->vcands.length ();
3627       else
3628 	{
3629 	  s = bitmap_count_bits (group->related_cands);
3630 
3631 	  /* Round up to the power of two, so that moduling by it is fast.  */
3632 	  size = s ? (1 << ceil_log2 (s)) : 1;
3633 	}
3634 
3635       group->n_map_members = size;
3636       group->cost_map = XCNEWVEC (class cost_pair, size);
3637     }
3638 }
3639 
3640 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3641    on invariants INV_VARS and that the value used in expressing it is
3642    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3643 
3644 static void
set_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand,comp_cost cost,bitmap inv_vars,tree value,enum tree_code comp,bitmap inv_exprs)3645 set_group_iv_cost (struct ivopts_data *data,
3646 		   struct iv_group *group, struct iv_cand *cand,
3647 		   comp_cost cost, bitmap inv_vars, tree value,
3648 		   enum tree_code comp, bitmap inv_exprs)
3649 {
3650   unsigned i, s;
3651 
3652   if (cost.infinite_cost_p ())
3653     {
3654       BITMAP_FREE (inv_vars);
3655       BITMAP_FREE (inv_exprs);
3656       return;
3657     }
3658 
3659   if (data->consider_all_candidates)
3660     {
3661       group->cost_map[cand->id].cand = cand;
3662       group->cost_map[cand->id].cost = cost;
3663       group->cost_map[cand->id].inv_vars = inv_vars;
3664       group->cost_map[cand->id].inv_exprs = inv_exprs;
3665       group->cost_map[cand->id].value = value;
3666       group->cost_map[cand->id].comp = comp;
3667       return;
3668     }
3669 
3670   /* n_map_members is a power of two, so this computes modulo.  */
3671   s = cand->id & (group->n_map_members - 1);
3672   for (i = s; i < group->n_map_members; i++)
3673     if (!group->cost_map[i].cand)
3674       goto found;
3675   for (i = 0; i < s; i++)
3676     if (!group->cost_map[i].cand)
3677       goto found;
3678 
3679   gcc_unreachable ();
3680 
3681 found:
3682   group->cost_map[i].cand = cand;
3683   group->cost_map[i].cost = cost;
3684   group->cost_map[i].inv_vars = inv_vars;
3685   group->cost_map[i].inv_exprs = inv_exprs;
3686   group->cost_map[i].value = value;
3687   group->cost_map[i].comp = comp;
3688 }
3689 
3690 /* Gets cost of (GROUP, CAND) pair.  */
3691 
3692 static class cost_pair *
get_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)3693 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3694 		   struct iv_cand *cand)
3695 {
3696   unsigned i, s;
3697   class cost_pair *ret;
3698 
3699   if (!cand)
3700     return NULL;
3701 
3702   if (data->consider_all_candidates)
3703     {
3704       ret = group->cost_map + cand->id;
3705       if (!ret->cand)
3706 	return NULL;
3707 
3708       return ret;
3709     }
3710 
3711   /* n_map_members is a power of two, so this computes modulo.  */
3712   s = cand->id & (group->n_map_members - 1);
3713   for (i = s; i < group->n_map_members; i++)
3714     if (group->cost_map[i].cand == cand)
3715       return group->cost_map + i;
3716     else if (group->cost_map[i].cand == NULL)
3717       return NULL;
3718   for (i = 0; i < s; i++)
3719     if (group->cost_map[i].cand == cand)
3720       return group->cost_map + i;
3721     else if (group->cost_map[i].cand == NULL)
3722       return NULL;
3723 
3724   return NULL;
3725 }
3726 
3727 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3728 static rtx
produce_memory_decl_rtl(tree obj,int * regno)3729 produce_memory_decl_rtl (tree obj, int *regno)
3730 {
3731   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3732   machine_mode address_mode = targetm.addr_space.address_mode (as);
3733   rtx x;
3734 
3735   gcc_assert (obj);
3736   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3737     {
3738       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3739       x = gen_rtx_SYMBOL_REF (address_mode, name);
3740       SET_SYMBOL_REF_DECL (x, obj);
3741       x = gen_rtx_MEM (DECL_MODE (obj), x);
3742       set_mem_addr_space (x, as);
3743       targetm.encode_section_info (obj, x, true);
3744     }
3745   else
3746     {
3747       x = gen_raw_REG (address_mode, (*regno)++);
3748       x = gen_rtx_MEM (DECL_MODE (obj), x);
3749       set_mem_addr_space (x, as);
3750     }
3751 
3752   return x;
3753 }
3754 
3755 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3756    walk_tree.  DATA contains the actual fake register number.  */
3757 
3758 static tree
prepare_decl_rtl(tree * expr_p,int * ws,void * data)3759 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3760 {
3761   tree obj = NULL_TREE;
3762   rtx x = NULL_RTX;
3763   int *regno = (int *) data;
3764 
3765   switch (TREE_CODE (*expr_p))
3766     {
3767     case ADDR_EXPR:
3768       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3769 	   handled_component_p (*expr_p);
3770 	   expr_p = &TREE_OPERAND (*expr_p, 0))
3771 	continue;
3772       obj = *expr_p;
3773       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3774 	x = produce_memory_decl_rtl (obj, regno);
3775       break;
3776 
3777     case SSA_NAME:
3778       *ws = 0;
3779       obj = SSA_NAME_VAR (*expr_p);
3780       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3781       if (!obj)
3782 	return NULL_TREE;
3783       if (!DECL_RTL_SET_P (obj))
3784 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3785       break;
3786 
3787     case VAR_DECL:
3788     case PARM_DECL:
3789     case RESULT_DECL:
3790       *ws = 0;
3791       obj = *expr_p;
3792 
3793       if (DECL_RTL_SET_P (obj))
3794 	break;
3795 
3796       if (DECL_MODE (obj) == BLKmode)
3797 	x = produce_memory_decl_rtl (obj, regno);
3798       else
3799 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3800 
3801       break;
3802 
3803     default:
3804       break;
3805     }
3806 
3807   if (x)
3808     {
3809       decl_rtl_to_reset.safe_push (obj);
3810       SET_DECL_RTL (obj, x);
3811     }
3812 
3813   return NULL_TREE;
3814 }
3815 
3816 /* Predict whether the given loop will be transformed in the RTL
3817    doloop_optimize pass.  Attempt to duplicate some doloop_optimize checks.
3818    This is only for target independent checks, see targetm.predict_doloop_p
3819    for the target dependent ones.
3820 
3821    Note that according to some initial investigation, some checks like costly
3822    niter check and invalid stmt scanning don't have much gains among general
3823    cases, so keep this as simple as possible first.
3824 
3825    Some RTL specific checks seems unable to be checked in gimple, if any new
3826    checks or easy checks _are_ missing here, please add them.  */
3827 
3828 static bool
generic_predict_doloop_p(struct ivopts_data * data)3829 generic_predict_doloop_p (struct ivopts_data *data)
3830 {
3831   class loop *loop = data->current_loop;
3832 
3833   /* Call target hook for target dependent checks.  */
3834   if (!targetm.predict_doloop_p (loop))
3835     {
3836       if (dump_file && (dump_flags & TDF_DETAILS))
3837 	fprintf (dump_file, "Predict doloop failure due to"
3838 			    " target specific checks.\n");
3839       return false;
3840     }
3841 
3842   /* Similar to doloop_optimize, check iteration description to know it's
3843      suitable or not.  Keep it as simple as possible, feel free to extend it
3844      if you find any multiple exits cases matter.  */
3845   edge exit = single_dom_exit (loop);
3846   class tree_niter_desc *niter_desc;
3847   if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3848     {
3849       if (dump_file && (dump_flags & TDF_DETAILS))
3850 	fprintf (dump_file, "Predict doloop failure due to"
3851 			    " unexpected niters.\n");
3852       return false;
3853     }
3854 
3855   /* Similar to doloop_optimize, check whether iteration count too small
3856      and not profitable.  */
3857   HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3858   if (est_niter == -1)
3859     est_niter = get_likely_max_loop_iterations_int (loop);
3860   if (est_niter >= 0 && est_niter < 3)
3861     {
3862       if (dump_file && (dump_flags & TDF_DETAILS))
3863 	fprintf (dump_file,
3864 		 "Predict doloop failure due to"
3865 		 " too few iterations (%u).\n",
3866 		 (unsigned int) est_niter);
3867       return false;
3868     }
3869 
3870   return true;
3871 }
3872 
3873 /* Determines cost of the computation of EXPR.  */
3874 
3875 static unsigned
computation_cost(tree expr,bool speed)3876 computation_cost (tree expr, bool speed)
3877 {
3878   rtx_insn *seq;
3879   rtx rslt;
3880   tree type = TREE_TYPE (expr);
3881   unsigned cost;
3882   /* Avoid using hard regs in ways which may be unsupported.  */
3883   int regno = LAST_VIRTUAL_REGISTER + 1;
3884   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3885   enum node_frequency real_frequency = node->frequency;
3886 
3887   node->frequency = NODE_FREQUENCY_NORMAL;
3888   crtl->maybe_hot_insn_p = speed;
3889   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3890   start_sequence ();
3891   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3892   seq = get_insns ();
3893   end_sequence ();
3894   default_rtl_profile ();
3895   node->frequency = real_frequency;
3896 
3897   cost = seq_cost (seq, speed);
3898   if (MEM_P (rslt))
3899     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3900 			  TYPE_ADDR_SPACE (type), speed);
3901   else if (!REG_P (rslt))
3902     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3903 
3904   return cost;
3905 }
3906 
3907 /* Returns variable containing the value of candidate CAND at statement AT.  */
3908 
3909 static tree
var_at_stmt(class loop * loop,struct iv_cand * cand,gimple * stmt)3910 var_at_stmt (class loop *loop, struct iv_cand *cand, gimple *stmt)
3911 {
3912   if (stmt_after_increment (loop, cand, stmt))
3913     return cand->var_after;
3914   else
3915     return cand->var_before;
3916 }
3917 
3918 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3919    same precision that is at least as wide as the precision of TYPE, stores
3920    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3921    type of A and B.  */
3922 
3923 static tree
determine_common_wider_type(tree * a,tree * b)3924 determine_common_wider_type (tree *a, tree *b)
3925 {
3926   tree wider_type = NULL;
3927   tree suba, subb;
3928   tree atype = TREE_TYPE (*a);
3929 
3930   if (CONVERT_EXPR_P (*a))
3931     {
3932       suba = TREE_OPERAND (*a, 0);
3933       wider_type = TREE_TYPE (suba);
3934       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3935 	return atype;
3936     }
3937   else
3938     return atype;
3939 
3940   if (CONVERT_EXPR_P (*b))
3941     {
3942       subb = TREE_OPERAND (*b, 0);
3943       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3944 	return atype;
3945     }
3946   else
3947     return atype;
3948 
3949   *a = suba;
3950   *b = subb;
3951   return wider_type;
3952 }
3953 
3954 /* Determines the expression by that USE is expressed from induction variable
3955    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3956    decomposed form.  The invariant part is stored in AFF_INV; while variant
3957    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3958    non-null.  Returns false if USE cannot be expressed using CAND.  */
3959 
3960 static bool
3961 get_computation_aff_1 (class loop *loop, gimple *at, struct iv_use *use,
3962 		       struct iv_cand *cand, class aff_tree *aff_inv,
3963 		       class aff_tree *aff_var, widest_int *prat = NULL)
3964 {
3965   tree ubase = use->iv->base, ustep = use->iv->step;
3966   tree cbase = cand->iv->base, cstep = cand->iv->step;
3967   tree common_type, uutype, var, cstep_common;
3968   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3969   aff_tree aff_cbase;
3970   widest_int rat;
3971 
3972   /* We must have a precision to express the values of use.  */
3973   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3974     return false;
3975 
3976   var = var_at_stmt (loop, cand, at);
3977   uutype = unsigned_type_for (utype);
3978 
3979   /* If the conversion is not noop, perform it.  */
3980   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3981     {
3982       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3983 	  && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3984 	{
3985 	  tree inner_base, inner_step, inner_type;
3986 	  inner_base = TREE_OPERAND (cbase, 0);
3987 	  if (CONVERT_EXPR_P (cstep))
3988 	    inner_step = TREE_OPERAND (cstep, 0);
3989 	  else
3990 	    inner_step = cstep;
3991 
3992 	  inner_type = TREE_TYPE (inner_base);
3993 	  /* If candidate is added from a biv whose type is smaller than
3994 	     ctype, we know both candidate and the biv won't overflow.
3995 	     In this case, it's safe to skip the convertion in candidate.
3996 	     As an example, (unsigned short)((unsigned long)A) equals to
3997 	     (unsigned short)A, if A has a type no larger than short.  */
3998 	  if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3999 	    {
4000 	      cbase = inner_base;
4001 	      cstep = inner_step;
4002 	    }
4003 	}
4004       cbase = fold_convert (uutype, cbase);
4005       cstep = fold_convert (uutype, cstep);
4006       var = fold_convert (uutype, var);
4007     }
4008 
4009   /* Ratio is 1 when computing the value of biv cand by itself.
4010      We can't rely on constant_multiple_of in this case because the
4011      use is created after the original biv is selected.  The call
4012      could fail because of inconsistent fold behavior.  See PR68021
4013      for more information.  */
4014   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4015     {
4016       gcc_assert (is_gimple_assign (use->stmt));
4017       gcc_assert (use->iv->ssa_name == cand->var_after);
4018       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
4019       rat = 1;
4020     }
4021   else if (!constant_multiple_of (ustep, cstep, &rat))
4022     return false;
4023 
4024   if (prat)
4025     *prat = rat;
4026 
4027   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
4028      type, we achieve better folding by computing their difference in this
4029      wider type, and cast the result to UUTYPE.  We do not need to worry about
4030      overflows, as all the arithmetics will in the end be performed in UUTYPE
4031      anyway.  */
4032   common_type = determine_common_wider_type (&ubase, &cbase);
4033 
4034   /* use = ubase - ratio * cbase + ratio * var.  */
4035   tree_to_aff_combination (ubase, common_type, aff_inv);
4036   tree_to_aff_combination (cbase, common_type, &aff_cbase);
4037   tree_to_aff_combination (var, uutype, aff_var);
4038 
4039   /* We need to shift the value if we are after the increment.  */
4040   if (stmt_after_increment (loop, cand, at))
4041     {
4042       aff_tree cstep_aff;
4043 
4044       if (common_type != uutype)
4045 	cstep_common = fold_convert (common_type, cstep);
4046       else
4047 	cstep_common = cstep;
4048 
4049       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4050       aff_combination_add (&aff_cbase, &cstep_aff);
4051     }
4052 
4053   aff_combination_scale (&aff_cbase, -rat);
4054   aff_combination_add (aff_inv, &aff_cbase);
4055   if (common_type != uutype)
4056     aff_combination_convert (aff_inv, uutype);
4057 
4058   aff_combination_scale (aff_var, rat);
4059   return true;
4060 }
4061 
4062 /* Determines the expression by that USE is expressed from induction variable
4063    CAND at statement AT in LOOP.  The expression is stored in a decomposed
4064    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
4065 
4066 static bool
get_computation_aff(class loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand,class aff_tree * aff)4067 get_computation_aff (class loop *loop, gimple *at, struct iv_use *use,
4068 		     struct iv_cand *cand, class aff_tree *aff)
4069 {
4070   aff_tree aff_var;
4071 
4072   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
4073     return false;
4074 
4075   aff_combination_add (aff, &aff_var);
4076   return true;
4077 }
4078 
4079 /* Return the type of USE.  */
4080 
4081 static tree
get_use_type(struct iv_use * use)4082 get_use_type (struct iv_use *use)
4083 {
4084   tree base_type = TREE_TYPE (use->iv->base);
4085   tree type;
4086 
4087   if (use->type == USE_REF_ADDRESS)
4088     {
4089       /* The base_type may be a void pointer.  Create a pointer type based on
4090 	 the mem_ref instead.  */
4091       type = build_pointer_type (TREE_TYPE (*use->op_p));
4092       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4093 		  == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4094     }
4095   else
4096     type = base_type;
4097 
4098   return type;
4099 }
4100 
4101 /* Determines the expression by that USE is expressed from induction variable
4102    CAND at statement AT in LOOP.  The computation is unshared.  */
4103 
4104 static tree
get_computation_at(class loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand)4105 get_computation_at (class loop *loop, gimple *at,
4106 		    struct iv_use *use, struct iv_cand *cand)
4107 {
4108   aff_tree aff;
4109   tree type = get_use_type (use);
4110 
4111   if (!get_computation_aff (loop, at, use, cand, &aff))
4112     return NULL_TREE;
4113   unshare_aff_combination (&aff);
4114   return fold_convert (type, aff_combination_to_tree (&aff));
4115 }
4116 
4117 /* Like get_computation_at, but try harder, even if the computation
4118    is more expensive.  Intended for debug stmts.  */
4119 
4120 static tree
get_debug_computation_at(class loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand)4121 get_debug_computation_at (class loop *loop, gimple *at,
4122 			  struct iv_use *use, struct iv_cand *cand)
4123 {
4124   if (tree ret = get_computation_at (loop, at, use, cand))
4125     return ret;
4126 
4127   tree ubase = use->iv->base, ustep = use->iv->step;
4128   tree cbase = cand->iv->base, cstep = cand->iv->step;
4129   tree var;
4130   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4131   widest_int rat;
4132 
4133   /* We must have a precision to express the values of use.  */
4134   if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype))
4135     return NULL_TREE;
4136 
4137   /* Try to handle the case that get_computation_at doesn't,
4138      try to express
4139      use = ubase + (var - cbase) / ratio.  */
4140   if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep),
4141 			     &rat))
4142     return NULL_TREE;
4143 
4144   bool neg_p = false;
4145   if (wi::neg_p (rat))
4146     {
4147       if (TYPE_UNSIGNED (ctype))
4148 	return NULL_TREE;
4149       neg_p = true;
4150       rat = wi::neg (rat);
4151     }
4152 
4153   /* If both IVs can wrap around and CAND doesn't have a power of two step,
4154      it is unsafe.  Consider uint16_t CAND with step 9, when wrapping around,
4155      the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say
4156      uint8_t with step 3, those values divided by 3 cast to uint8_t will be
4157      ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59.  */
4158   if (!use->iv->no_overflow
4159       && !cand->iv->no_overflow
4160       && !integer_pow2p (cstep))
4161     return NULL_TREE;
4162 
4163   int bits = wi::exact_log2 (rat);
4164   if (bits == -1)
4165     bits = wi::floor_log2 (rat) + 1;
4166   if (!cand->iv->no_overflow
4167       && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype))
4168     return NULL_TREE;
4169 
4170   var = var_at_stmt (loop, cand, at);
4171 
4172   if (POINTER_TYPE_P (ctype))
4173     {
4174       ctype = unsigned_type_for (ctype);
4175       cbase = fold_convert (ctype, cbase);
4176       cstep = fold_convert (ctype, cstep);
4177       var = fold_convert (ctype, var);
4178     }
4179 
4180   if (stmt_after_increment (loop, cand, at))
4181     var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var,
4182 		       unshare_expr (cstep));
4183 
4184   var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase);
4185   var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var,
4186 		     wide_int_to_tree (TREE_TYPE (var), rat));
4187   if (POINTER_TYPE_P (utype))
4188     {
4189       var = fold_convert (sizetype, var);
4190       if (neg_p)
4191 	var = fold_build1 (NEGATE_EXPR, sizetype, var);
4192       var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var);
4193     }
4194   else
4195     {
4196       var = fold_convert (utype, var);
4197       var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype,
4198 			 ubase, var);
4199     }
4200   return var;
4201 }
4202 
4203 /* Adjust the cost COST for being in loop setup rather than loop body.
4204    If we're optimizing for space, the loop setup overhead is constant;
4205    if we're optimizing for speed, amortize it over the per-iteration cost.
4206    If ROUND_UP_P is true, the result is round up rather than to zero when
4207    optimizing for speed.  */
4208 static int64_t
4209 adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4210 		   bool round_up_p = false)
4211 {
4212   if (cost == INFTY)
4213     return cost;
4214   else if (optimize_loop_for_speed_p (data->current_loop))
4215     {
4216       int64_t niters = (int64_t) avg_loop_niter (data->current_loop);
4217       return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4218     }
4219   else
4220     return cost;
4221 }
4222 
4223 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4224    EXPR operand holding the shift.  COST0 and COST1 are the costs for
4225    calculating the operands of EXPR.  Returns true if successful, and returns
4226    the cost in COST.  */
4227 
4228 static bool
get_shiftadd_cost(tree expr,scalar_int_mode mode,comp_cost cost0,comp_cost cost1,tree mult,bool speed,comp_cost * cost)4229 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4230 		   comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4231 {
4232   comp_cost res;
4233   tree op1 = TREE_OPERAND (expr, 1);
4234   tree cst = TREE_OPERAND (mult, 1);
4235   tree multop = TREE_OPERAND (mult, 0);
4236   int m = exact_log2 (int_cst_value (cst));
4237   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4238   int as_cost, sa_cost;
4239   bool mult_in_op1;
4240 
4241   if (!(m >= 0 && m < maxm))
4242     return false;
4243 
4244   STRIP_NOPS (op1);
4245   mult_in_op1 = operand_equal_p (op1, mult, 0);
4246 
4247   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4248 
4249   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4250      use that in preference to a shift insn followed by an add insn.  */
4251   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4252 	     ? shiftadd_cost (speed, mode, m)
4253 	     : (mult_in_op1
4254 		? shiftsub1_cost (speed, mode, m)
4255 		: shiftsub0_cost (speed, mode, m)));
4256 
4257   res = comp_cost (MIN (as_cost, sa_cost), 0);
4258   res += (mult_in_op1 ? cost0 : cost1);
4259 
4260   STRIP_NOPS (multop);
4261   if (!is_gimple_val (multop))
4262     res += force_expr_to_var_cost (multop, speed);
4263 
4264   *cost = res;
4265   return true;
4266 }
4267 
4268 /* Estimates cost of forcing expression EXPR into a variable.  */
4269 
4270 static comp_cost
force_expr_to_var_cost(tree expr,bool speed)4271 force_expr_to_var_cost (tree expr, bool speed)
4272 {
4273   static bool costs_initialized = false;
4274   static unsigned integer_cost [2];
4275   static unsigned symbol_cost [2];
4276   static unsigned address_cost [2];
4277   tree op0, op1;
4278   comp_cost cost0, cost1, cost;
4279   machine_mode mode;
4280   scalar_int_mode int_mode;
4281 
4282   if (!costs_initialized)
4283     {
4284       tree type = build_pointer_type (integer_type_node);
4285       tree var, addr;
4286       rtx x;
4287       int i;
4288 
4289       var = create_tmp_var_raw (integer_type_node, "test_var");
4290       TREE_STATIC (var) = 1;
4291       x = produce_memory_decl_rtl (var, NULL);
4292       SET_DECL_RTL (var, x);
4293 
4294       addr = build1 (ADDR_EXPR, type, var);
4295 
4296 
4297       for (i = 0; i < 2; i++)
4298 	{
4299 	  integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4300 							     2000), i);
4301 
4302 	  symbol_cost[i] = computation_cost (addr, i) + 1;
4303 
4304 	  address_cost[i]
4305 	    = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4306 	  if (dump_file && (dump_flags & TDF_DETAILS))
4307 	    {
4308 	      fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4309 	      fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4310 	      fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4311 	      fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4312 	      fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4313 	      fprintf (dump_file, "\n");
4314 	    }
4315 	}
4316 
4317       costs_initialized = true;
4318     }
4319 
4320   STRIP_NOPS (expr);
4321 
4322   if (SSA_VAR_P (expr))
4323     return no_cost;
4324 
4325   if (is_gimple_min_invariant (expr))
4326     {
4327       if (poly_int_tree_p (expr))
4328 	return comp_cost (integer_cost [speed], 0);
4329 
4330       if (TREE_CODE (expr) == ADDR_EXPR)
4331 	{
4332 	  tree obj = TREE_OPERAND (expr, 0);
4333 
4334 	  if (VAR_P (obj)
4335 	      || TREE_CODE (obj) == PARM_DECL
4336 	      || TREE_CODE (obj) == RESULT_DECL)
4337 	    return comp_cost (symbol_cost [speed], 0);
4338 	}
4339 
4340       return comp_cost (address_cost [speed], 0);
4341     }
4342 
4343   switch (TREE_CODE (expr))
4344     {
4345     case POINTER_PLUS_EXPR:
4346     case PLUS_EXPR:
4347     case MINUS_EXPR:
4348     case MULT_EXPR:
4349     case TRUNC_DIV_EXPR:
4350     case BIT_AND_EXPR:
4351     case BIT_IOR_EXPR:
4352     case LSHIFT_EXPR:
4353     case RSHIFT_EXPR:
4354       op0 = TREE_OPERAND (expr, 0);
4355       op1 = TREE_OPERAND (expr, 1);
4356       STRIP_NOPS (op0);
4357       STRIP_NOPS (op1);
4358       break;
4359 
4360     CASE_CONVERT:
4361     case NEGATE_EXPR:
4362     case BIT_NOT_EXPR:
4363       op0 = TREE_OPERAND (expr, 0);
4364       STRIP_NOPS (op0);
4365       op1 = NULL_TREE;
4366       break;
4367     /* See add_iv_candidate_for_doloop, for doloop may_be_zero case, we
4368        introduce COND_EXPR for IV base, need to support better cost estimation
4369        for this COND_EXPR and tcc_comparison.  */
4370     case COND_EXPR:
4371       op0 = TREE_OPERAND (expr, 1);
4372       STRIP_NOPS (op0);
4373       op1 = TREE_OPERAND (expr, 2);
4374       STRIP_NOPS (op1);
4375       break;
4376     case LT_EXPR:
4377     case LE_EXPR:
4378     case GT_EXPR:
4379     case GE_EXPR:
4380     case EQ_EXPR:
4381     case NE_EXPR:
4382     case UNORDERED_EXPR:
4383     case ORDERED_EXPR:
4384     case UNLT_EXPR:
4385     case UNLE_EXPR:
4386     case UNGT_EXPR:
4387     case UNGE_EXPR:
4388     case UNEQ_EXPR:
4389     case LTGT_EXPR:
4390     case MAX_EXPR:
4391     case MIN_EXPR:
4392       op0 = TREE_OPERAND (expr, 0);
4393       STRIP_NOPS (op0);
4394       op1 = TREE_OPERAND (expr, 1);
4395       STRIP_NOPS (op1);
4396       break;
4397 
4398     default:
4399       /* Just an arbitrary value, FIXME.  */
4400       return comp_cost (target_spill_cost[speed], 0);
4401     }
4402 
4403   if (op0 == NULL_TREE
4404       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4405     cost0 = no_cost;
4406   else
4407     cost0 = force_expr_to_var_cost (op0, speed);
4408 
4409   if (op1 == NULL_TREE
4410       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4411     cost1 = no_cost;
4412   else
4413     cost1 = force_expr_to_var_cost (op1, speed);
4414 
4415   mode = TYPE_MODE (TREE_TYPE (expr));
4416   switch (TREE_CODE (expr))
4417     {
4418     case POINTER_PLUS_EXPR:
4419     case PLUS_EXPR:
4420     case MINUS_EXPR:
4421     case NEGATE_EXPR:
4422       cost = comp_cost (add_cost (speed, mode), 0);
4423       if (TREE_CODE (expr) != NEGATE_EXPR)
4424 	{
4425 	  tree mult = NULL_TREE;
4426 	  comp_cost sa_cost;
4427 	  if (TREE_CODE (op1) == MULT_EXPR)
4428 	    mult = op1;
4429 	  else if (TREE_CODE (op0) == MULT_EXPR)
4430 	    mult = op0;
4431 
4432 	  if (mult != NULL_TREE
4433 	      && is_a <scalar_int_mode> (mode, &int_mode)
4434 	      && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4435 	      && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4436 				    speed, &sa_cost))
4437 	    return sa_cost;
4438 	}
4439       break;
4440 
4441     CASE_CONVERT:
4442       {
4443 	tree inner_mode, outer_mode;
4444 	outer_mode = TREE_TYPE (expr);
4445 	inner_mode = TREE_TYPE (op0);
4446 	cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4447 				       TYPE_MODE (inner_mode), speed), 0);
4448       }
4449       break;
4450 
4451     case MULT_EXPR:
4452       if (cst_and_fits_in_hwi (op0))
4453 	cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4454 					     mode, speed), 0);
4455       else if (cst_and_fits_in_hwi (op1))
4456 	cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4457 					     mode, speed), 0);
4458       else
4459 	return comp_cost (target_spill_cost [speed], 0);
4460       break;
4461 
4462     case TRUNC_DIV_EXPR:
4463       /* Division by power of two is usually cheap, so we allow it.  Forbid
4464 	 anything else.  */
4465       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4466 	cost = comp_cost (add_cost (speed, mode), 0);
4467       else
4468 	cost = comp_cost (target_spill_cost[speed], 0);
4469       break;
4470 
4471     case BIT_AND_EXPR:
4472     case BIT_IOR_EXPR:
4473     case BIT_NOT_EXPR:
4474     case LSHIFT_EXPR:
4475     case RSHIFT_EXPR:
4476       cost = comp_cost (add_cost (speed, mode), 0);
4477       break;
4478     case COND_EXPR:
4479       op0 = TREE_OPERAND (expr, 0);
4480       STRIP_NOPS (op0);
4481       if (op0 == NULL_TREE || TREE_CODE (op0) == SSA_NAME
4482 	  || CONSTANT_CLASS_P (op0))
4483 	cost = no_cost;
4484       else
4485 	cost = force_expr_to_var_cost (op0, speed);
4486       break;
4487     case LT_EXPR:
4488     case LE_EXPR:
4489     case GT_EXPR:
4490     case GE_EXPR:
4491     case EQ_EXPR:
4492     case NE_EXPR:
4493     case UNORDERED_EXPR:
4494     case ORDERED_EXPR:
4495     case UNLT_EXPR:
4496     case UNLE_EXPR:
4497     case UNGT_EXPR:
4498     case UNGE_EXPR:
4499     case UNEQ_EXPR:
4500     case LTGT_EXPR:
4501     case MAX_EXPR:
4502     case MIN_EXPR:
4503       /* Simply use add cost for now, FIXME if there is some more accurate cost
4504 	 evaluation way.  */
4505       cost = comp_cost (add_cost (speed, mode), 0);
4506       break;
4507 
4508     default:
4509       gcc_unreachable ();
4510     }
4511 
4512   cost += cost0;
4513   cost += cost1;
4514   return cost;
4515 }
4516 
4517 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4518    invariants the computation depends on.  */
4519 
4520 static comp_cost
force_var_cost(struct ivopts_data * data,tree expr,bitmap * inv_vars)4521 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4522 {
4523   if (!expr)
4524     return no_cost;
4525 
4526   find_inv_vars (data, &expr, inv_vars);
4527   return force_expr_to_var_cost (expr, data->speed);
4528 }
4529 
4530 /* Returns cost of auto-modifying address expression in shape base + offset.
4531    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4532    address expression.  The address expression has ADDR_MODE in addr space
4533    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4534    speed or size.  */
4535 
4536 enum ainc_type
4537 {
4538   AINC_PRE_INC,		/* Pre increment.  */
4539   AINC_PRE_DEC,		/* Pre decrement.  */
4540   AINC_POST_INC,	/* Post increment.  */
4541   AINC_POST_DEC,	/* Post decrement.  */
4542   AINC_NONE		/* Also the number of auto increment types.  */
4543 };
4544 
4545 struct ainc_cost_data
4546 {
4547   int64_t costs[AINC_NONE];
4548 };
4549 
4550 static comp_cost
get_address_cost_ainc(poly_int64 ainc_step,poly_int64 ainc_offset,machine_mode addr_mode,machine_mode mem_mode,addr_space_t as,bool speed)4551 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4552 		       machine_mode addr_mode, machine_mode mem_mode,
4553 		       addr_space_t as, bool speed)
4554 {
4555   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4556       && !USE_STORE_PRE_DECREMENT (mem_mode)
4557       && !USE_LOAD_POST_DECREMENT (mem_mode)
4558       && !USE_STORE_POST_DECREMENT (mem_mode)
4559       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4560       && !USE_STORE_PRE_INCREMENT (mem_mode)
4561       && !USE_LOAD_POST_INCREMENT (mem_mode)
4562       && !USE_STORE_POST_INCREMENT (mem_mode))
4563     return infinite_cost;
4564 
4565   static vec<ainc_cost_data *> ainc_cost_data_list;
4566   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4567   if (idx >= ainc_cost_data_list.length ())
4568     {
4569       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4570 
4571       gcc_assert (nsize > idx);
4572       ainc_cost_data_list.safe_grow_cleared (nsize, true);
4573     }
4574 
4575   ainc_cost_data *data = ainc_cost_data_list[idx];
4576   if (data == NULL)
4577     {
4578       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4579 
4580       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4581       data->costs[AINC_PRE_DEC] = INFTY;
4582       data->costs[AINC_POST_DEC] = INFTY;
4583       data->costs[AINC_PRE_INC] = INFTY;
4584       data->costs[AINC_POST_INC] = INFTY;
4585       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4586 	  || USE_STORE_PRE_DECREMENT (mem_mode))
4587 	{
4588 	  rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4589 
4590 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4591 	    data->costs[AINC_PRE_DEC]
4592 	      = address_cost (addr, mem_mode, as, speed);
4593 	}
4594       if (USE_LOAD_POST_DECREMENT (mem_mode)
4595 	  || USE_STORE_POST_DECREMENT (mem_mode))
4596 	{
4597 	  rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4598 
4599 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4600 	    data->costs[AINC_POST_DEC]
4601 	      = address_cost (addr, mem_mode, as, speed);
4602 	}
4603       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4604 	  || USE_STORE_PRE_INCREMENT (mem_mode))
4605 	{
4606 	  rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4607 
4608 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4609 	    data->costs[AINC_PRE_INC]
4610 	      = address_cost (addr, mem_mode, as, speed);
4611 	}
4612       if (USE_LOAD_POST_INCREMENT (mem_mode)
4613 	  || USE_STORE_POST_INCREMENT (mem_mode))
4614 	{
4615 	  rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4616 
4617 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4618 	    data->costs[AINC_POST_INC]
4619 	      = address_cost (addr, mem_mode, as, speed);
4620 	}
4621       ainc_cost_data_list[idx] = data;
4622     }
4623 
4624   poly_int64 msize = GET_MODE_SIZE (mem_mode);
4625   if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4626     return comp_cost (data->costs[AINC_POST_INC], 0);
4627   if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4628     return comp_cost (data->costs[AINC_POST_DEC], 0);
4629   if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4630     return comp_cost (data->costs[AINC_PRE_INC], 0);
4631   if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4632     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4633 
4634   return infinite_cost;
4635 }
4636 
4637 /* Return cost of computing USE's address expression by using CAND.
4638    AFF_INV and AFF_VAR represent invariant and variant parts of the
4639    address expression, respectively.  If AFF_INV is simple, store
4640    the loop invariant variables which are depended by it in INV_VARS;
4641    if AFF_INV is complicated, handle it as a new invariant expression
4642    and record it in INV_EXPR.  RATIO indicates multiple times between
4643    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4644    value to it indicating if this is an auto-increment address.  */
4645 
4646 static comp_cost
get_address_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,aff_tree * aff_inv,aff_tree * aff_var,HOST_WIDE_INT ratio,bitmap * inv_vars,iv_inv_expr_ent ** inv_expr,bool * can_autoinc,bool speed)4647 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4648 		  struct iv_cand *cand, aff_tree *aff_inv,
4649 		  aff_tree *aff_var, HOST_WIDE_INT ratio,
4650 		  bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4651 		  bool *can_autoinc, bool speed)
4652 {
4653   rtx addr;
4654   bool simple_inv = true;
4655   tree comp_inv = NULL_TREE, type = aff_var->type;
4656   comp_cost var_cost = no_cost, cost = no_cost;
4657   struct mem_address parts = {NULL_TREE, integer_one_node,
4658 			      NULL_TREE, NULL_TREE, NULL_TREE};
4659   machine_mode addr_mode = TYPE_MODE (type);
4660   machine_mode mem_mode = TYPE_MODE (use->mem_type);
4661   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4662   /* Only true if ratio != 1.  */
4663   bool ok_with_ratio_p = false;
4664   bool ok_without_ratio_p = false;
4665 
4666   if (!aff_combination_const_p (aff_inv))
4667     {
4668       parts.index = integer_one_node;
4669       /* Addressing mode "base + index".  */
4670       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4671       if (ratio != 1)
4672 	{
4673 	  parts.step = wide_int_to_tree (type, ratio);
4674 	  /* Addressing mode "base + index << scale".  */
4675 	  ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4676 	  if (!ok_with_ratio_p)
4677 	    parts.step = NULL_TREE;
4678 	}
4679       if (ok_with_ratio_p || ok_without_ratio_p)
4680 	{
4681 	  if (maybe_ne (aff_inv->offset, 0))
4682 	    {
4683 	      parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4684 	      /* Addressing mode "base + index [<< scale] + offset".  */
4685 	      if (!valid_mem_ref_p (mem_mode, as, &parts))
4686 		parts.offset = NULL_TREE;
4687 	      else
4688 		aff_inv->offset = 0;
4689 	    }
4690 
4691 	  move_fixed_address_to_symbol (&parts, aff_inv);
4692 	  /* Base is fixed address and is moved to symbol part.  */
4693 	  if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4694 	    parts.base = NULL_TREE;
4695 
4696 	  /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4697 	  if (parts.symbol != NULL_TREE
4698 	      && !valid_mem_ref_p (mem_mode, as, &parts))
4699 	    {
4700 	      aff_combination_add_elt (aff_inv, parts.symbol, 1);
4701 	      parts.symbol = NULL_TREE;
4702 	      /* Reset SIMPLE_INV since symbol address needs to be computed
4703 		 outside of address expression in this case.  */
4704 	      simple_inv = false;
4705 	      /* Symbol part is moved back to base part, it can't be NULL.  */
4706 	      parts.base = integer_one_node;
4707 	    }
4708 	}
4709       else
4710 	parts.index = NULL_TREE;
4711     }
4712   else
4713     {
4714       poly_int64 ainc_step;
4715       if (can_autoinc
4716 	  && ratio == 1
4717 	  && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4718 	{
4719 	  poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4720 
4721 	  if (stmt_after_increment (data->current_loop, cand, use->stmt))
4722 	    ainc_offset += ainc_step;
4723 	  cost = get_address_cost_ainc (ainc_step, ainc_offset,
4724 					addr_mode, mem_mode, as, speed);
4725 	  if (!cost.infinite_cost_p ())
4726 	    {
4727 	      *can_autoinc = true;
4728 	      return cost;
4729 	    }
4730 	  cost = no_cost;
4731 	}
4732       if (!aff_combination_zero_p (aff_inv))
4733 	{
4734 	  parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4735 	  /* Addressing mode "base + offset".  */
4736 	  if (!valid_mem_ref_p (mem_mode, as, &parts))
4737 	    parts.offset = NULL_TREE;
4738 	  else
4739 	    aff_inv->offset = 0;
4740 	}
4741     }
4742 
4743   if (simple_inv)
4744     simple_inv = (aff_inv == NULL
4745 		  || aff_combination_const_p (aff_inv)
4746 		  || aff_combination_singleton_var_p (aff_inv));
4747   if (!aff_combination_zero_p (aff_inv))
4748     comp_inv = aff_combination_to_tree (aff_inv);
4749   if (comp_inv != NULL_TREE)
4750     cost = force_var_cost (data, comp_inv, inv_vars);
4751   if (ratio != 1 && parts.step == NULL_TREE)
4752     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4753   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4754     var_cost += add_cost (speed, addr_mode);
4755 
4756   if (comp_inv && inv_expr && !simple_inv)
4757     {
4758       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4759       /* Clear depends on.  */
4760       if (*inv_expr != NULL && inv_vars && *inv_vars)
4761 	bitmap_clear (*inv_vars);
4762 
4763       /* Cost of small invariant expression adjusted against loop niters
4764 	 is usually zero, which makes it difficult to be differentiated
4765 	 from candidate based on loop invariant variables.  Secondly, the
4766 	 generated invariant expression may not be hoisted out of loop by
4767 	 following pass.  We penalize the cost by rounding up in order to
4768 	 neutralize such effects.  */
4769       cost.cost = adjust_setup_cost (data, cost.cost, true);
4770       cost.scratch = cost.cost;
4771     }
4772 
4773   cost += var_cost;
4774   addr = addr_for_mem_ref (&parts, as, false);
4775   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4776   cost += address_cost (addr, mem_mode, as, speed);
4777 
4778   if (parts.symbol != NULL_TREE)
4779     cost.complexity += 1;
4780   /* Don't increase the complexity of adding a scaled index if it's
4781      the only kind of index that the target allows.  */
4782   if (parts.step != NULL_TREE && ok_without_ratio_p)
4783     cost.complexity += 1;
4784   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4785     cost.complexity += 1;
4786   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4787     cost.complexity += 1;
4788 
4789   return cost;
4790 }
4791 
4792 /* Scale (multiply) the computed COST (except scratch part that should be
4793    hoisted out a loop) by header->frequency / AT->frequency, which makes
4794    expected cost more accurate.  */
4795 
4796 static comp_cost
get_scaled_computation_cost_at(ivopts_data * data,gimple * at,comp_cost cost)4797 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4798 {
4799   if (data->speed
4800       && data->current_loop->header->count.to_frequency (cfun) > 0)
4801     {
4802       basic_block bb = gimple_bb (at);
4803       gcc_assert (cost.scratch <= cost.cost);
4804       int scale_factor = (int)(intptr_t) bb->aux;
4805       if (scale_factor == 1)
4806 	return cost;
4807 
4808       int64_t scaled_cost
4809 	= cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4810 
4811       if (dump_file && (dump_flags & TDF_DETAILS))
4812 	fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4813 		 "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4814 		 1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4815 
4816       cost.cost = scaled_cost;
4817     }
4818 
4819   return cost;
4820 }
4821 
4822 /* Determines the cost of the computation by that USE is expressed
4823    from induction variable CAND.  If ADDRESS_P is true, we just need
4824    to create an address from it, otherwise we want to get it into
4825    register.  A set of invariants we depend on is stored in INV_VARS.
4826    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4827    addressing is likely.  If INV_EXPR is nonnull, record invariant
4828    expr entry in it.  */
4829 
4830 static comp_cost
get_computation_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,bool address_p,bitmap * inv_vars,bool * can_autoinc,iv_inv_expr_ent ** inv_expr)4831 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4832 		      struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4833 		      bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4834 {
4835   gimple *at = use->stmt;
4836   tree ubase = use->iv->base, cbase = cand->iv->base;
4837   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4838   tree comp_inv = NULL_TREE;
4839   HOST_WIDE_INT ratio, aratio;
4840   comp_cost cost;
4841   widest_int rat;
4842   aff_tree aff_inv, aff_var;
4843   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4844 
4845   if (inv_vars)
4846     *inv_vars = NULL;
4847   if (can_autoinc)
4848     *can_autoinc = false;
4849   if (inv_expr)
4850     *inv_expr = NULL;
4851 
4852   /* Check if we have enough precision to express the values of use.  */
4853   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4854     return infinite_cost;
4855 
4856   if (address_p
4857       || (use->iv->base_object
4858 	  && cand->iv->base_object
4859 	  && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4860 	  && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4861     {
4862       /* Do not try to express address of an object with computation based
4863 	 on address of a different object.  This may cause problems in rtl
4864 	 level alias analysis (that does not expect this to be happening,
4865 	 as this is illegal in C), and would be unlikely to be useful
4866 	 anyway.  */
4867       if (use->iv->base_object
4868 	  && cand->iv->base_object
4869 	  && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4870 	return infinite_cost;
4871     }
4872 
4873   if (!get_computation_aff_1 (data->current_loop, at, use,
4874 			      cand, &aff_inv, &aff_var, &rat)
4875       || !wi::fits_shwi_p (rat))
4876     return infinite_cost;
4877 
4878   ratio = rat.to_shwi ();
4879   if (address_p)
4880     {
4881       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4882 			       inv_vars, inv_expr, can_autoinc, speed);
4883       cost = get_scaled_computation_cost_at (data, at, cost);
4884       /* For doloop IV cand, add on the extra cost.  */
4885       cost += cand->doloop_p ? targetm.doloop_cost_for_address : 0;
4886       return cost;
4887     }
4888 
4889   bool simple_inv = (aff_combination_const_p (&aff_inv)
4890 		     || aff_combination_singleton_var_p (&aff_inv));
4891   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4892   aff_combination_convert (&aff_inv, signed_type);
4893   if (!aff_combination_zero_p (&aff_inv))
4894     comp_inv = aff_combination_to_tree (&aff_inv);
4895 
4896   cost = force_var_cost (data, comp_inv, inv_vars);
4897   if (comp_inv && inv_expr && !simple_inv)
4898     {
4899       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4900       /* Clear depends on.  */
4901       if (*inv_expr != NULL && inv_vars && *inv_vars)
4902 	bitmap_clear (*inv_vars);
4903 
4904       cost.cost = adjust_setup_cost (data, cost.cost);
4905       /* Record setup cost in scratch field.  */
4906       cost.scratch = cost.cost;
4907     }
4908   /* Cost of constant integer can be covered when adding invariant part to
4909      variant part.  */
4910   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4911     cost = no_cost;
4912 
4913   /* Need type narrowing to represent use with cand.  */
4914   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4915     {
4916       machine_mode outer_mode = TYPE_MODE (utype);
4917       machine_mode inner_mode = TYPE_MODE (ctype);
4918       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4919     }
4920 
4921   /* Turn a + i * (-c) into a - i * c.  */
4922   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4923     aratio = -ratio;
4924   else
4925     aratio = ratio;
4926 
4927   if (ratio != 1)
4928     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4929 
4930   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4931      instruction.  */
4932   /* Need to add up the invariant and variant parts.  */
4933   if (comp_inv && !integer_zerop (comp_inv))
4934     cost += add_cost (speed, TYPE_MODE (utype));
4935 
4936   cost = get_scaled_computation_cost_at (data, at, cost);
4937 
4938   /* For doloop IV cand, add on the extra cost.  */
4939   if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
4940     cost += targetm.doloop_cost_for_generic;
4941 
4942   return cost;
4943 }
4944 
4945 /* Determines cost of computing the use in GROUP with CAND in a generic
4946    expression.  */
4947 
4948 static bool
determine_group_iv_cost_generic(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)4949 determine_group_iv_cost_generic (struct ivopts_data *data,
4950 				 struct iv_group *group, struct iv_cand *cand)
4951 {
4952   comp_cost cost;
4953   iv_inv_expr_ent *inv_expr = NULL;
4954   bitmap inv_vars = NULL, inv_exprs = NULL;
4955   struct iv_use *use = group->vuses[0];
4956 
4957   /* The simple case first -- if we need to express value of the preserved
4958      original biv, the cost is 0.  This also prevents us from counting the
4959      cost of increment twice -- once at this use and once in the cost of
4960      the candidate.  */
4961   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4962     cost = no_cost;
4963   else
4964     cost = get_computation_cost (data, use, cand, false,
4965 				 &inv_vars, NULL, &inv_expr);
4966 
4967   if (inv_expr)
4968     {
4969       inv_exprs = BITMAP_ALLOC (NULL);
4970       bitmap_set_bit (inv_exprs, inv_expr->id);
4971     }
4972   set_group_iv_cost (data, group, cand, cost, inv_vars,
4973 		     NULL_TREE, ERROR_MARK, inv_exprs);
4974   return !cost.infinite_cost_p ();
4975 }
4976 
4977 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
4978 
4979 static bool
determine_group_iv_cost_address(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)4980 determine_group_iv_cost_address (struct ivopts_data *data,
4981 				 struct iv_group *group, struct iv_cand *cand)
4982 {
4983   unsigned i;
4984   bitmap inv_vars = NULL, inv_exprs = NULL;
4985   bool can_autoinc;
4986   iv_inv_expr_ent *inv_expr = NULL;
4987   struct iv_use *use = group->vuses[0];
4988   comp_cost sum_cost = no_cost, cost;
4989 
4990   cost = get_computation_cost (data, use, cand, true,
4991 			       &inv_vars, &can_autoinc, &inv_expr);
4992 
4993   if (inv_expr)
4994     {
4995       inv_exprs = BITMAP_ALLOC (NULL);
4996       bitmap_set_bit (inv_exprs, inv_expr->id);
4997     }
4998   sum_cost = cost;
4999   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5000     {
5001       if (can_autoinc)
5002 	sum_cost -= cand->cost_step;
5003       /* If we generated the candidate solely for exploiting autoincrement
5004 	 opportunities, and it turns out it can't be used, set the cost to
5005 	 infinity to make sure we ignore it.  */
5006       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5007 	sum_cost = infinite_cost;
5008     }
5009 
5010   /* Uses in a group can share setup code, so only add setup cost once.  */
5011   cost -= cost.scratch;
5012   /* Compute and add costs for rest uses of this group.  */
5013   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5014     {
5015       struct iv_use *next = group->vuses[i];
5016 
5017       /* TODO: We could skip computing cost for sub iv_use when it has the
5018 	 same cost as the first iv_use, but the cost really depends on the
5019 	 offset and where the iv_use is.  */
5020 	cost = get_computation_cost (data, next, cand, true,
5021 				     NULL, &can_autoinc, &inv_expr);
5022 	if (inv_expr)
5023 	  {
5024 	    if (!inv_exprs)
5025 	      inv_exprs = BITMAP_ALLOC (NULL);
5026 
5027 	    bitmap_set_bit (inv_exprs, inv_expr->id);
5028 	  }
5029       sum_cost += cost;
5030     }
5031   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
5032 		     NULL_TREE, ERROR_MARK, inv_exprs);
5033 
5034   return !sum_cost.infinite_cost_p ();
5035 }
5036 
5037 /* Computes value of candidate CAND at position AT in iteration NITER, and
5038    stores it to VAL.  */
5039 
5040 static void
cand_value_at(class loop * loop,struct iv_cand * cand,gimple * at,tree niter,aff_tree * val)5041 cand_value_at (class loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5042 	       aff_tree *val)
5043 {
5044   aff_tree step, delta, nit;
5045   struct iv *iv = cand->iv;
5046   tree type = TREE_TYPE (iv->base);
5047   tree steptype;
5048   if (POINTER_TYPE_P (type))
5049     steptype = sizetype;
5050   else
5051     steptype = unsigned_type_for (type);
5052 
5053   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5054   aff_combination_convert (&step, steptype);
5055   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5056   aff_combination_convert (&nit, steptype);
5057   aff_combination_mult (&nit, &step, &delta);
5058   if (stmt_after_increment (loop, cand, at))
5059     aff_combination_add (&delta, &step);
5060 
5061   tree_to_aff_combination (iv->base, type, val);
5062   if (!POINTER_TYPE_P (type))
5063     aff_combination_convert (val, steptype);
5064   aff_combination_add (val, &delta);
5065 }
5066 
5067 /* Returns period of induction variable iv.  */
5068 
5069 static tree
iv_period(struct iv * iv)5070 iv_period (struct iv *iv)
5071 {
5072   tree step = iv->step, period, type;
5073   tree pow2div;
5074 
5075   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5076 
5077   type = unsigned_type_for (TREE_TYPE (step));
5078   /* Period of the iv is lcm (step, type_range)/step -1,
5079      i.e., N*type_range/step - 1. Since type range is power
5080      of two, N == (step >> num_of_ending_zeros_binary (step),
5081      so the final result is
5082 
5083        (type_range >> num_of_ending_zeros_binary (step)) - 1
5084 
5085   */
5086   pow2div = num_ending_zeros (step);
5087 
5088   period = build_low_bits_mask (type,
5089 				(TYPE_PRECISION (type)
5090 				 - tree_to_uhwi (pow2div)));
5091 
5092   return period;
5093 }
5094 
5095 /* Returns the comparison operator used when eliminating the iv USE.  */
5096 
5097 static enum tree_code
iv_elimination_compare(struct ivopts_data * data,struct iv_use * use)5098 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5099 {
5100   class loop *loop = data->current_loop;
5101   basic_block ex_bb;
5102   edge exit;
5103 
5104   ex_bb = gimple_bb (use->stmt);
5105   exit = EDGE_SUCC (ex_bb, 0);
5106   if (flow_bb_inside_loop_p (loop, exit->dest))
5107     exit = EDGE_SUCC (ex_bb, 1);
5108 
5109   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5110 }
5111 
5112 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5113    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5114    calculation is performed in non-wrapping type.
5115 
5116    TODO: More generally, we could test for the situation that
5117 	 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5118 	 This would require knowing the sign of OFFSET.  */
5119 
5120 static bool
difference_cannot_overflow_p(struct ivopts_data * data,tree base,tree offset)5121 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5122 {
5123   enum tree_code code;
5124   tree e1, e2;
5125   aff_tree aff_e1, aff_e2, aff_offset;
5126 
5127   if (!nowrap_type_p (TREE_TYPE (base)))
5128     return false;
5129 
5130   base = expand_simple_operations (base);
5131 
5132   if (TREE_CODE (base) == SSA_NAME)
5133     {
5134       gimple *stmt = SSA_NAME_DEF_STMT (base);
5135 
5136       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5137 	return false;
5138 
5139       code = gimple_assign_rhs_code (stmt);
5140       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5141 	return false;
5142 
5143       e1 = gimple_assign_rhs1 (stmt);
5144       e2 = gimple_assign_rhs2 (stmt);
5145     }
5146   else
5147     {
5148       code = TREE_CODE (base);
5149       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5150 	return false;
5151       e1 = TREE_OPERAND (base, 0);
5152       e2 = TREE_OPERAND (base, 1);
5153     }
5154 
5155   /* Use affine expansion as deeper inspection to prove the equality.  */
5156   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5157 				  &aff_e2, &data->name_expansion_cache);
5158   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5159 				  &aff_offset, &data->name_expansion_cache);
5160   aff_combination_scale (&aff_offset, -1);
5161   switch (code)
5162     {
5163     case PLUS_EXPR:
5164       aff_combination_add (&aff_e2, &aff_offset);
5165       if (aff_combination_zero_p (&aff_e2))
5166 	return true;
5167 
5168       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5169 				      &aff_e1, &data->name_expansion_cache);
5170       aff_combination_add (&aff_e1, &aff_offset);
5171       return aff_combination_zero_p (&aff_e1);
5172 
5173     case POINTER_PLUS_EXPR:
5174       aff_combination_add (&aff_e2, &aff_offset);
5175       return aff_combination_zero_p (&aff_e2);
5176 
5177     default:
5178       return false;
5179     }
5180 }
5181 
5182 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5183    comparison with CAND.  NITER describes the number of iterations of
5184    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5185 
5186    We aim to handle the following situation:
5187 
5188    sometype *base, *p;
5189    int a, b, i;
5190 
5191    i = a;
5192    p = p_0 = base + a;
5193 
5194    do
5195      {
5196        bla (*p);
5197        p++;
5198        i++;
5199      }
5200    while (i < b);
5201 
5202    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5203    We aim to optimize this to
5204 
5205    p = p_0 = base + a;
5206    do
5207      {
5208        bla (*p);
5209        p++;
5210      }
5211    while (p < p_0 - a + b);
5212 
5213    This preserves the correctness, since the pointer arithmetics does not
5214    overflow.  More precisely:
5215 
5216    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5217       overflow in computing it or the values of p.
5218    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5219       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5220 
5221 static bool
iv_elimination_compare_lt(struct ivopts_data * data,struct iv_cand * cand,enum tree_code * comp_p,class tree_niter_desc * niter)5222 iv_elimination_compare_lt (struct ivopts_data *data,
5223 			   struct iv_cand *cand, enum tree_code *comp_p,
5224 			   class tree_niter_desc *niter)
5225 {
5226   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5227   class aff_tree nit, tmpa, tmpb;
5228   enum tree_code comp;
5229   HOST_WIDE_INT step;
5230 
5231   /* We need to know that the candidate induction variable does not overflow.
5232      While more complex analysis may be used to prove this, for now just
5233      check that the variable appears in the original program and that it
5234      is computed in a type that guarantees no overflows.  */
5235   cand_type = TREE_TYPE (cand->iv->base);
5236   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5237     return false;
5238 
5239   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5240      the calculation of the BOUND could overflow, making the comparison
5241      invalid.  */
5242   if (!data->loop_single_exit_p)
5243     return false;
5244 
5245   /* We need to be able to decide whether candidate is increasing or decreasing
5246      in order to choose the right comparison operator.  */
5247   if (!cst_and_fits_in_hwi (cand->iv->step))
5248     return false;
5249   step = int_cst_value (cand->iv->step);
5250 
5251   /* Check that the number of iterations matches the expected pattern:
5252      a + 1 > b ? 0 : b - a - 1.  */
5253   mbz = niter->may_be_zero;
5254   if (TREE_CODE (mbz) == GT_EXPR)
5255     {
5256       /* Handle a + 1 > b.  */
5257       tree op0 = TREE_OPERAND (mbz, 0);
5258       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5259 	{
5260 	  a = TREE_OPERAND (op0, 0);
5261 	  b = TREE_OPERAND (mbz, 1);
5262 	}
5263       else
5264 	return false;
5265     }
5266   else if (TREE_CODE (mbz) == LT_EXPR)
5267     {
5268       tree op1 = TREE_OPERAND (mbz, 1);
5269 
5270       /* Handle b < a + 1.  */
5271       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5272 	{
5273 	  a = TREE_OPERAND (op1, 0);
5274 	  b = TREE_OPERAND (mbz, 0);
5275 	}
5276       else
5277 	return false;
5278     }
5279   else
5280     return false;
5281 
5282   /* Expected number of iterations is B - A - 1.  Check that it matches
5283      the actual number, i.e., that B - A - NITER = 1.  */
5284   tree_to_aff_combination (niter->niter, nit_type, &nit);
5285   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5286   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5287   aff_combination_scale (&nit, -1);
5288   aff_combination_scale (&tmpa, -1);
5289   aff_combination_add (&tmpb, &tmpa);
5290   aff_combination_add (&tmpb, &nit);
5291   if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5292     return false;
5293 
5294   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5295      overflow.  */
5296   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5297 			cand->iv->step,
5298 			fold_convert (TREE_TYPE (cand->iv->step), a));
5299   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5300     return false;
5301 
5302   /* Determine the new comparison operator.  */
5303   comp = step < 0 ? GT_EXPR : LT_EXPR;
5304   if (*comp_p == NE_EXPR)
5305     *comp_p = comp;
5306   else if (*comp_p == EQ_EXPR)
5307     *comp_p = invert_tree_comparison (comp, false);
5308   else
5309     gcc_unreachable ();
5310 
5311   return true;
5312 }
5313 
5314 /* Check whether it is possible to express the condition in USE by comparison
5315    of candidate CAND.  If so, store the value compared with to BOUND, and the
5316    comparison operator to COMP.  */
5317 
5318 static bool
may_eliminate_iv(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,tree * bound,enum tree_code * comp)5319 may_eliminate_iv (struct ivopts_data *data,
5320 		  struct iv_use *use, struct iv_cand *cand, tree *bound,
5321 		  enum tree_code *comp)
5322 {
5323   basic_block ex_bb;
5324   edge exit;
5325   tree period;
5326   class loop *loop = data->current_loop;
5327   aff_tree bnd;
5328   class tree_niter_desc *desc = NULL;
5329 
5330   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5331     return false;
5332 
5333   /* For now works only for exits that dominate the loop latch.
5334      TODO: extend to other conditions inside loop body.  */
5335   ex_bb = gimple_bb (use->stmt);
5336   if (use->stmt != last_stmt (ex_bb)
5337       || gimple_code (use->stmt) != GIMPLE_COND
5338       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5339     return false;
5340 
5341   exit = EDGE_SUCC (ex_bb, 0);
5342   if (flow_bb_inside_loop_p (loop, exit->dest))
5343     exit = EDGE_SUCC (ex_bb, 1);
5344   if (flow_bb_inside_loop_p (loop, exit->dest))
5345     return false;
5346 
5347   desc = niter_for_exit (data, exit);
5348   if (!desc)
5349     return false;
5350 
5351   /* Determine whether we can use the variable to test the exit condition.
5352      This is the case iff the period of the induction variable is greater
5353      than the number of iterations for which the exit condition is true.  */
5354   period = iv_period (cand->iv);
5355 
5356   /* If the number of iterations is constant, compare against it directly.  */
5357   if (TREE_CODE (desc->niter) == INTEGER_CST)
5358     {
5359       /* See cand_value_at.  */
5360       if (stmt_after_increment (loop, cand, use->stmt))
5361 	{
5362 	  if (!tree_int_cst_lt (desc->niter, period))
5363 	    return false;
5364 	}
5365       else
5366 	{
5367 	  if (tree_int_cst_lt (period, desc->niter))
5368 	    return false;
5369 	}
5370     }
5371 
5372   /* If not, and if this is the only possible exit of the loop, see whether
5373      we can get a conservative estimate on the number of iterations of the
5374      entire loop and compare against that instead.  */
5375   else
5376     {
5377       widest_int period_value, max_niter;
5378 
5379       max_niter = desc->max;
5380       if (stmt_after_increment (loop, cand, use->stmt))
5381 	max_niter += 1;
5382       period_value = wi::to_widest (period);
5383       if (wi::gtu_p (max_niter, period_value))
5384 	{
5385 	  /* See if we can take advantage of inferred loop bound
5386 	     information.  */
5387 	  if (data->loop_single_exit_p)
5388 	    {
5389 	      if (!max_loop_iterations (loop, &max_niter))
5390 		return false;
5391 	      /* The loop bound is already adjusted by adding 1.  */
5392 	      if (wi::gtu_p (max_niter, period_value))
5393 		return false;
5394 	    }
5395 	  else
5396 	    return false;
5397 	}
5398     }
5399 
5400   /* For doloop IV cand, the bound would be zero.  It's safe whether
5401      may_be_zero set or not.  */
5402   if (cand->doloop_p)
5403     {
5404       *bound = build_int_cst (TREE_TYPE (cand->iv->base), 0);
5405       *comp = iv_elimination_compare (data, use);
5406       return true;
5407     }
5408 
5409   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5410 
5411   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5412 			 aff_combination_to_tree (&bnd));
5413   *comp = iv_elimination_compare (data, use);
5414 
5415   /* It is unlikely that computing the number of iterations using division
5416      would be more profitable than keeping the original induction variable.  */
5417   if (expression_expensive_p (*bound))
5418     return false;
5419 
5420   /* Sometimes, it is possible to handle the situation that the number of
5421      iterations may be zero unless additional assumptions by using <
5422      instead of != in the exit condition.
5423 
5424      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5425 	   base the exit condition on it.  However, that is often too
5426 	   expensive.  */
5427   if (!integer_zerop (desc->may_be_zero))
5428     return iv_elimination_compare_lt (data, cand, comp, desc);
5429 
5430   return true;
5431 }
5432 
5433  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5434     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5435 
5436 static int
parm_decl_cost(struct ivopts_data * data,tree bound)5437 parm_decl_cost (struct ivopts_data *data, tree bound)
5438 {
5439   tree sbound = bound;
5440   STRIP_NOPS (sbound);
5441 
5442   if (TREE_CODE (sbound) == SSA_NAME
5443       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5444       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5445       && data->body_includes_call)
5446     return COSTS_N_INSNS (1);
5447 
5448   return 0;
5449 }
5450 
5451 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5452 
5453 static bool
determine_group_iv_cost_cond(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5454 determine_group_iv_cost_cond (struct ivopts_data *data,
5455 			      struct iv_group *group, struct iv_cand *cand)
5456 {
5457   tree bound = NULL_TREE;
5458   struct iv *cmp_iv;
5459   bitmap inv_exprs = NULL;
5460   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5461   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5462   enum comp_iv_rewrite rewrite_type;
5463   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5464   tree *control_var, *bound_cst;
5465   enum tree_code comp = ERROR_MARK;
5466   struct iv_use *use = group->vuses[0];
5467 
5468   /* Extract condition operands.  */
5469   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5470 					&bound_cst, NULL, &cmp_iv);
5471   gcc_assert (rewrite_type != COMP_IV_NA);
5472 
5473   /* Try iv elimination.  */
5474   if (rewrite_type == COMP_IV_ELIM
5475       && may_eliminate_iv (data, use, cand, &bound, &comp))
5476     {
5477       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5478       if (elim_cost.cost == 0)
5479 	elim_cost.cost = parm_decl_cost (data, bound);
5480       else if (TREE_CODE (bound) == INTEGER_CST)
5481 	elim_cost.cost = 0;
5482       /* If we replace a loop condition 'i < n' with 'p < base + n',
5483 	 inv_vars_elim will have 'base' and 'n' set, which implies that both
5484 	 'base' and 'n' will be live during the loop.	 More likely,
5485 	 'base + n' will be loop invariant, resulting in only one live value
5486 	 during the loop.  So in that case we clear inv_vars_elim and set
5487 	 inv_expr_elim instead.  */
5488       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5489 	{
5490 	  inv_expr_elim = get_loop_invariant_expr (data, bound);
5491 	  bitmap_clear (inv_vars_elim);
5492 	}
5493       /* The bound is a loop invariant, so it will be only computed
5494 	 once.  */
5495       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5496     }
5497 
5498   /* When the condition is a comparison of the candidate IV against
5499      zero, prefer this IV.
5500 
5501      TODO: The constant that we're subtracting from the cost should
5502      be target-dependent.  This information should be added to the
5503      target costs for each backend.  */
5504   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5505       && integer_zerop (*bound_cst)
5506       && (operand_equal_p (*control_var, cand->var_after, 0)
5507 	  || operand_equal_p (*control_var, cand->var_before, 0)))
5508     elim_cost -= 1;
5509 
5510   express_cost = get_computation_cost (data, use, cand, false,
5511 				       &inv_vars_express, NULL,
5512 				       &inv_expr_express);
5513   if (cmp_iv != NULL)
5514     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5515 
5516   /* Count the cost of the original bound as well.  */
5517   bound_cost = force_var_cost (data, *bound_cst, NULL);
5518   if (bound_cost.cost == 0)
5519     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5520   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5521     bound_cost.cost = 0;
5522   express_cost += bound_cost;
5523 
5524   /* Choose the better approach, preferring the eliminated IV. */
5525   if (elim_cost <= express_cost)
5526     {
5527       cost = elim_cost;
5528       inv_vars = inv_vars_elim;
5529       inv_vars_elim = NULL;
5530       inv_expr = inv_expr_elim;
5531       /* For doloop candidate/use pair, adjust to zero cost.  */
5532       if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5533 	cost = no_cost;
5534     }
5535   else
5536     {
5537       cost = express_cost;
5538       inv_vars = inv_vars_express;
5539       inv_vars_express = NULL;
5540       bound = NULL_TREE;
5541       comp = ERROR_MARK;
5542       inv_expr = inv_expr_express;
5543     }
5544 
5545   if (inv_expr)
5546     {
5547       inv_exprs = BITMAP_ALLOC (NULL);
5548       bitmap_set_bit (inv_exprs, inv_expr->id);
5549     }
5550   set_group_iv_cost (data, group, cand, cost,
5551 		     inv_vars, bound, comp, inv_exprs);
5552 
5553   if (inv_vars_elim)
5554     BITMAP_FREE (inv_vars_elim);
5555   if (inv_vars_express)
5556     BITMAP_FREE (inv_vars_express);
5557 
5558   return !cost.infinite_cost_p ();
5559 }
5560 
5561 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5562    if USE cannot be represented with CAND.  */
5563 
5564 static bool
determine_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5565 determine_group_iv_cost (struct ivopts_data *data,
5566 			 struct iv_group *group, struct iv_cand *cand)
5567 {
5568   switch (group->type)
5569     {
5570     case USE_NONLINEAR_EXPR:
5571       return determine_group_iv_cost_generic (data, group, cand);
5572 
5573     case USE_REF_ADDRESS:
5574     case USE_PTR_ADDRESS:
5575       return determine_group_iv_cost_address (data, group, cand);
5576 
5577     case USE_COMPARE:
5578       return determine_group_iv_cost_cond (data, group, cand);
5579 
5580     default:
5581       gcc_unreachable ();
5582     }
5583 }
5584 
5585 /* Return true if get_computation_cost indicates that autoincrement is
5586    a possibility for the pair of USE and CAND, false otherwise.  */
5587 
5588 static bool
autoinc_possible_for_pair(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5589 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5590 			   struct iv_cand *cand)
5591 {
5592   if (!address_p (use->type))
5593     return false;
5594 
5595   bool can_autoinc = false;
5596   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5597   return can_autoinc;
5598 }
5599 
5600 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5601    use that allows autoincrement, and set their AINC_USE if possible.  */
5602 
5603 static void
set_autoinc_for_original_candidates(struct ivopts_data * data)5604 set_autoinc_for_original_candidates (struct ivopts_data *data)
5605 {
5606   unsigned i, j;
5607 
5608   for (i = 0; i < data->vcands.length (); i++)
5609     {
5610       struct iv_cand *cand = data->vcands[i];
5611       struct iv_use *closest_before = NULL;
5612       struct iv_use *closest_after = NULL;
5613       if (cand->pos != IP_ORIGINAL)
5614 	continue;
5615 
5616       for (j = 0; j < data->vgroups.length (); j++)
5617 	{
5618 	  struct iv_group *group = data->vgroups[j];
5619 	  struct iv_use *use = group->vuses[0];
5620 	  unsigned uid = gimple_uid (use->stmt);
5621 
5622 	  if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5623 	    continue;
5624 
5625 	  if (uid < gimple_uid (cand->incremented_at)
5626 	      && (closest_before == NULL
5627 		  || uid > gimple_uid (closest_before->stmt)))
5628 	    closest_before = use;
5629 
5630 	  if (uid > gimple_uid (cand->incremented_at)
5631 	      && (closest_after == NULL
5632 		  || uid < gimple_uid (closest_after->stmt)))
5633 	    closest_after = use;
5634 	}
5635 
5636       if (closest_before != NULL
5637 	  && autoinc_possible_for_pair (data, closest_before, cand))
5638 	cand->ainc_use = closest_before;
5639       else if (closest_after != NULL
5640 	       && autoinc_possible_for_pair (data, closest_after, cand))
5641 	cand->ainc_use = closest_after;
5642     }
5643 }
5644 
5645 /* Relate compare use with all candidates.  */
5646 
5647 static void
relate_compare_use_with_all_cands(struct ivopts_data * data)5648 relate_compare_use_with_all_cands (struct ivopts_data *data)
5649 {
5650   unsigned i, count = data->vcands.length ();
5651   for (i = 0; i < data->vgroups.length (); i++)
5652     {
5653       struct iv_group *group = data->vgroups[i];
5654 
5655       if (group->type == USE_COMPARE)
5656 	bitmap_set_range (group->related_cands, 0, count);
5657     }
5658 }
5659 
5660 /* Add one doloop dedicated IV candidate:
5661      - Base is (may_be_zero ? 1 : (niter + 1)).
5662      - Step is -1.  */
5663 
5664 static void
add_iv_candidate_for_doloop(struct ivopts_data * data)5665 add_iv_candidate_for_doloop (struct ivopts_data *data)
5666 {
5667   tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5668   gcc_assert (niter_desc && niter_desc->assumptions);
5669 
5670   tree niter = niter_desc->niter;
5671   tree ntype = TREE_TYPE (niter);
5672   gcc_assert (TREE_CODE (ntype) == INTEGER_TYPE);
5673 
5674   tree may_be_zero = niter_desc->may_be_zero;
5675   if (may_be_zero && integer_zerop (may_be_zero))
5676     may_be_zero = NULL_TREE;
5677   if (may_be_zero)
5678     {
5679       if (COMPARISON_CLASS_P (may_be_zero))
5680 	{
5681 	  niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5682 			       build_int_cst (ntype, 0),
5683 			       rewrite_to_non_trapping_overflow (niter));
5684 	}
5685       /* Don't try to obtain the iteration count expression when may_be_zero is
5686 	 integer_nonzerop (actually iteration count is one) or else.  */
5687       else
5688 	return;
5689     }
5690 
5691   tree base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5692 			   build_int_cst (ntype, 1));
5693   add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true);
5694 }
5695 
5696 /* Finds the candidates for the induction variables.  */
5697 
5698 static void
find_iv_candidates(struct ivopts_data * data)5699 find_iv_candidates (struct ivopts_data *data)
5700 {
5701   /* Add commonly used ivs.  */
5702   add_standard_iv_candidates (data);
5703 
5704   /* Add doloop dedicated ivs.  */
5705   if (data->doloop_use_p)
5706     add_iv_candidate_for_doloop (data);
5707 
5708   /* Add old induction variables.  */
5709   add_iv_candidate_for_bivs (data);
5710 
5711   /* Add induction variables derived from uses.  */
5712   add_iv_candidate_for_groups (data);
5713 
5714   set_autoinc_for_original_candidates (data);
5715 
5716   /* Record the important candidates.  */
5717   record_important_candidates (data);
5718 
5719   /* Relate compare iv_use with all candidates.  */
5720   if (!data->consider_all_candidates)
5721     relate_compare_use_with_all_cands (data);
5722 
5723   if (dump_file && (dump_flags & TDF_DETAILS))
5724     {
5725       unsigned i;
5726 
5727       fprintf (dump_file, "\n<Important Candidates>:\t");
5728       for (i = 0; i < data->vcands.length (); i++)
5729 	if (data->vcands[i]->important)
5730 	  fprintf (dump_file, " %d,", data->vcands[i]->id);
5731       fprintf (dump_file, "\n");
5732 
5733       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5734       for (i = 0; i < data->vgroups.length (); i++)
5735 	{
5736 	  struct iv_group *group = data->vgroups[i];
5737 
5738 	  if (group->related_cands)
5739 	    {
5740 	      fprintf (dump_file, "  Group %d:\t", group->id);
5741 	      dump_bitmap (dump_file, group->related_cands);
5742 	    }
5743 	}
5744       fprintf (dump_file, "\n");
5745     }
5746 }
5747 
5748 /* Determines costs of computing use of iv with an iv candidate.  */
5749 
5750 static void
determine_group_iv_costs(struct ivopts_data * data)5751 determine_group_iv_costs (struct ivopts_data *data)
5752 {
5753   unsigned i, j;
5754   struct iv_cand *cand;
5755   struct iv_group *group;
5756   bitmap to_clear = BITMAP_ALLOC (NULL);
5757 
5758   alloc_use_cost_map (data);
5759 
5760   for (i = 0; i < data->vgroups.length (); i++)
5761     {
5762       group = data->vgroups[i];
5763 
5764       if (data->consider_all_candidates)
5765 	{
5766 	  for (j = 0; j < data->vcands.length (); j++)
5767 	    {
5768 	      cand = data->vcands[j];
5769 	      determine_group_iv_cost (data, group, cand);
5770 	    }
5771 	}
5772       else
5773 	{
5774 	  bitmap_iterator bi;
5775 
5776 	  EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5777 	    {
5778 	      cand = data->vcands[j];
5779 	      if (!determine_group_iv_cost (data, group, cand))
5780 		bitmap_set_bit (to_clear, j);
5781 	    }
5782 
5783 	  /* Remove the candidates for that the cost is infinite from
5784 	     the list of related candidates.  */
5785 	  bitmap_and_compl_into (group->related_cands, to_clear);
5786 	  bitmap_clear (to_clear);
5787 	}
5788     }
5789 
5790   BITMAP_FREE (to_clear);
5791 
5792   if (dump_file && (dump_flags & TDF_DETAILS))
5793     {
5794       bitmap_iterator bi;
5795 
5796       /* Dump invariant variables.  */
5797       fprintf (dump_file, "\n<Invariant Vars>:\n");
5798       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5799 	{
5800 	  struct version_info *info = ver_info (data, i);
5801 	  if (info->inv_id)
5802 	    {
5803 	      fprintf (dump_file, "Inv %d:\t", info->inv_id);
5804 	      print_generic_expr (dump_file, info->name, TDF_SLIM);
5805 	      fprintf (dump_file, "%s\n",
5806 		       info->has_nonlin_use ? "" : "\t(eliminable)");
5807 	    }
5808 	}
5809 
5810       /* Dump invariant expressions.  */
5811       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5812       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5813 
5814       for (hash_table<iv_inv_expr_hasher>::iterator it
5815 	   = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5816 	   ++it)
5817 	list.safe_push (*it);
5818 
5819       list.qsort (sort_iv_inv_expr_ent);
5820 
5821       for (i = 0; i < list.length (); ++i)
5822 	{
5823 	  fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5824 	  print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5825 	  fprintf (dump_file, "\n");
5826 	}
5827 
5828       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5829 
5830       for (i = 0; i < data->vgroups.length (); i++)
5831 	{
5832 	  group = data->vgroups[i];
5833 
5834 	  fprintf (dump_file, "Group %d:\n", i);
5835 	  fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5836 	  for (j = 0; j < group->n_map_members; j++)
5837 	    {
5838 	      if (!group->cost_map[j].cand
5839 		  || group->cost_map[j].cost.infinite_cost_p ())
5840 		continue;
5841 
5842 	      fprintf (dump_file, "  %d\t%" PRId64 "\t%d\t",
5843 		       group->cost_map[j].cand->id,
5844 		       group->cost_map[j].cost.cost,
5845 		       group->cost_map[j].cost.complexity);
5846 	      if (!group->cost_map[j].inv_exprs
5847 		  || bitmap_empty_p (group->cost_map[j].inv_exprs))
5848 		fprintf (dump_file, "NIL;\t");
5849 	      else
5850 		bitmap_print (dump_file,
5851 			      group->cost_map[j].inv_exprs, "", ";\t");
5852 	      if (!group->cost_map[j].inv_vars
5853 		  || bitmap_empty_p (group->cost_map[j].inv_vars))
5854 		fprintf (dump_file, "NIL;\n");
5855 	      else
5856 		bitmap_print (dump_file,
5857 			      group->cost_map[j].inv_vars, "", "\n");
5858 	    }
5859 
5860 	  fprintf (dump_file, "\n");
5861 	}
5862       fprintf (dump_file, "\n");
5863     }
5864 }
5865 
5866 /* Determines cost of the candidate CAND.  */
5867 
5868 static void
determine_iv_cost(struct ivopts_data * data,struct iv_cand * cand)5869 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5870 {
5871   comp_cost cost_base;
5872   int64_t cost, cost_step;
5873   tree base;
5874 
5875   gcc_assert (cand->iv != NULL);
5876 
5877   /* There are two costs associated with the candidate -- its increment
5878      and its initialization.  The second is almost negligible for any loop
5879      that rolls enough, so we take it just very little into account.  */
5880 
5881   base = cand->iv->base;
5882   cost_base = force_var_cost (data, base, NULL);
5883   /* It will be exceptional that the iv register happens to be initialized with
5884      the proper value at no cost.  In general, there will at least be a regcopy
5885      or a const set.  */
5886   if (cost_base.cost == 0)
5887     cost_base.cost = COSTS_N_INSNS (1);
5888   /* Doloop decrement should be considered as zero cost.  */
5889   if (cand->doloop_p)
5890     cost_step = 0;
5891   else
5892     cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5893   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5894 
5895   /* Prefer the original ivs unless we may gain something by replacing it.
5896      The reason is to make debugging simpler; so this is not relevant for
5897      artificial ivs created by other optimization passes.  */
5898   if ((cand->pos != IP_ORIGINAL
5899        || !SSA_NAME_VAR (cand->var_before)
5900        || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5901       /* Prefer doloop as well.  */
5902       && !cand->doloop_p)
5903     cost++;
5904 
5905   /* Prefer not to insert statements into latch unless there are some
5906      already (so that we do not create unnecessary jumps).  */
5907   if (cand->pos == IP_END
5908       && empty_block_p (ip_end_pos (data->current_loop)))
5909     cost++;
5910 
5911   cand->cost = cost;
5912   cand->cost_step = cost_step;
5913 }
5914 
5915 /* Determines costs of computation of the candidates.  */
5916 
5917 static void
determine_iv_costs(struct ivopts_data * data)5918 determine_iv_costs (struct ivopts_data *data)
5919 {
5920   unsigned i;
5921 
5922   if (dump_file && (dump_flags & TDF_DETAILS))
5923     {
5924       fprintf (dump_file, "<Candidate Costs>:\n");
5925       fprintf (dump_file, "  cand\tcost\n");
5926     }
5927 
5928   for (i = 0; i < data->vcands.length (); i++)
5929     {
5930       struct iv_cand *cand = data->vcands[i];
5931 
5932       determine_iv_cost (data, cand);
5933 
5934       if (dump_file && (dump_flags & TDF_DETAILS))
5935 	fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5936     }
5937 
5938   if (dump_file && (dump_flags & TDF_DETAILS))
5939     fprintf (dump_file, "\n");
5940 }
5941 
5942 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5943    induction variables.  Note N_INVS includes both invariant variables and
5944    invariant expressions.  */
5945 
5946 static unsigned
ivopts_estimate_reg_pressure(struct ivopts_data * data,unsigned n_invs,unsigned n_cands)5947 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5948 			      unsigned n_cands)
5949 {
5950   unsigned cost;
5951   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5952   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5953   bool speed = data->speed;
5954 
5955   /* If there is a call in the loop body, the call-clobbered registers
5956      are not available for loop invariants.  */
5957   if (data->body_includes_call)
5958     available_regs = available_regs - target_clobbered_regs;
5959 
5960   /* If we have enough registers.  */
5961   if (regs_needed + target_res_regs < available_regs)
5962     cost = n_new;
5963   /* If close to running out of registers, try to preserve them.  */
5964   else if (regs_needed <= available_regs)
5965     cost = target_reg_cost [speed] * regs_needed;
5966   /* If we run out of available registers but the number of candidates
5967      does not, we penalize extra registers using target_spill_cost.  */
5968   else if (n_cands <= available_regs)
5969     cost = target_reg_cost [speed] * available_regs
5970 	   + target_spill_cost [speed] * (regs_needed - available_regs);
5971   /* If the number of candidates runs out available registers, we penalize
5972      extra candidate registers using target_spill_cost * 2.  Because it is
5973      more expensive to spill induction variable than invariant.  */
5974   else
5975     cost = target_reg_cost [speed] * available_regs
5976 	   + target_spill_cost [speed] * (n_cands - available_regs) * 2
5977 	   + target_spill_cost [speed] * (regs_needed - n_cands);
5978 
5979   /* Finally, add the number of candidates, so that we prefer eliminating
5980      induction variables if possible.  */
5981   return cost + n_cands;
5982 }
5983 
5984 /* For each size of the induction variable set determine the penalty.  */
5985 
5986 static void
determine_set_costs(struct ivopts_data * data)5987 determine_set_costs (struct ivopts_data *data)
5988 {
5989   unsigned j, n;
5990   gphi *phi;
5991   gphi_iterator psi;
5992   tree op;
5993   class loop *loop = data->current_loop;
5994   bitmap_iterator bi;
5995 
5996   if (dump_file && (dump_flags & TDF_DETAILS))
5997     {
5998       fprintf (dump_file, "<Global Costs>:\n");
5999       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
6000       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
6001       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
6002       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
6003     }
6004 
6005   n = 0;
6006   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6007     {
6008       phi = psi.phi ();
6009       op = PHI_RESULT (phi);
6010 
6011       if (virtual_operand_p (op))
6012 	continue;
6013 
6014       if (get_iv (data, op))
6015 	continue;
6016 
6017       if (!POINTER_TYPE_P (TREE_TYPE (op))
6018 	  && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
6019 	continue;
6020 
6021       n++;
6022     }
6023 
6024   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6025     {
6026       struct version_info *info = ver_info (data, j);
6027 
6028       if (info->inv_id && info->has_nonlin_use)
6029 	n++;
6030     }
6031 
6032   data->regs_used = n;
6033   if (dump_file && (dump_flags & TDF_DETAILS))
6034     fprintf (dump_file, "  regs_used %d\n", n);
6035 
6036   if (dump_file && (dump_flags & TDF_DETAILS))
6037     {
6038       fprintf (dump_file, "  cost for size:\n");
6039       fprintf (dump_file, "  ivs\tcost\n");
6040       for (j = 0; j <= 2 * target_avail_regs; j++)
6041 	fprintf (dump_file, "  %d\t%d\n", j,
6042 		 ivopts_estimate_reg_pressure (data, 0, j));
6043       fprintf (dump_file, "\n");
6044     }
6045 }
6046 
6047 /* Returns true if A is a cheaper cost pair than B.  */
6048 
6049 static bool
cheaper_cost_pair(class cost_pair * a,class cost_pair * b)6050 cheaper_cost_pair (class cost_pair *a, class cost_pair *b)
6051 {
6052   if (!a)
6053     return false;
6054 
6055   if (!b)
6056     return true;
6057 
6058   if (a->cost < b->cost)
6059     return true;
6060 
6061   if (b->cost < a->cost)
6062     return false;
6063 
6064   /* In case the costs are the same, prefer the cheaper candidate.  */
6065   if (a->cand->cost < b->cand->cost)
6066     return true;
6067 
6068   return false;
6069 }
6070 
6071 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
6072    for more expensive, equal and cheaper respectively.  */
6073 
6074 static int
compare_cost_pair(class cost_pair * a,class cost_pair * b)6075 compare_cost_pair (class cost_pair *a, class cost_pair *b)
6076 {
6077   if (cheaper_cost_pair (a, b))
6078     return -1;
6079   if (cheaper_cost_pair (b, a))
6080     return 1;
6081 
6082   return 0;
6083 }
6084 
6085 /* Returns candidate by that USE is expressed in IVS.  */
6086 
6087 static class cost_pair *
iv_ca_cand_for_group(class iv_ca * ivs,struct iv_group * group)6088 iv_ca_cand_for_group (class iv_ca *ivs, struct iv_group *group)
6089 {
6090   return ivs->cand_for_group[group->id];
6091 }
6092 
6093 /* Computes the cost field of IVS structure.  */
6094 
6095 static void
iv_ca_recount_cost(struct ivopts_data * data,class iv_ca * ivs)6096 iv_ca_recount_cost (struct ivopts_data *data, class iv_ca *ivs)
6097 {
6098   comp_cost cost = ivs->cand_use_cost;
6099 
6100   cost += ivs->cand_cost;
6101   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
6102   ivs->cost = cost;
6103 }
6104 
6105 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
6106    and IVS.  */
6107 
6108 static void
iv_ca_set_remove_invs(class iv_ca * ivs,bitmap invs,unsigned * n_inv_uses)6109 iv_ca_set_remove_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6110 {
6111   bitmap_iterator bi;
6112   unsigned iid;
6113 
6114   if (!invs)
6115     return;
6116 
6117   gcc_assert (n_inv_uses != NULL);
6118   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6119     {
6120       n_inv_uses[iid]--;
6121       if (n_inv_uses[iid] == 0)
6122 	ivs->n_invs--;
6123     }
6124 }
6125 
6126 /* Set USE not to be expressed by any candidate in IVS.  */
6127 
6128 static void
iv_ca_set_no_cp(struct ivopts_data * data,class iv_ca * ivs,struct iv_group * group)6129 iv_ca_set_no_cp (struct ivopts_data *data, class iv_ca *ivs,
6130 		 struct iv_group *group)
6131 {
6132   unsigned gid = group->id, cid;
6133   class cost_pair *cp;
6134 
6135   cp = ivs->cand_for_group[gid];
6136   if (!cp)
6137     return;
6138   cid = cp->cand->id;
6139 
6140   ivs->bad_groups++;
6141   ivs->cand_for_group[gid] = NULL;
6142   ivs->n_cand_uses[cid]--;
6143 
6144   if (ivs->n_cand_uses[cid] == 0)
6145     {
6146       bitmap_clear_bit (ivs->cands, cid);
6147       if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6148 	ivs->n_cands--;
6149       ivs->cand_cost -= cp->cand->cost;
6150       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6151       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6152     }
6153 
6154   ivs->cand_use_cost -= cp->cost;
6155   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6156   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6157   iv_ca_recount_cost (data, ivs);
6158 }
6159 
6160 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
6161    IVS.  */
6162 
6163 static void
iv_ca_set_add_invs(class iv_ca * ivs,bitmap invs,unsigned * n_inv_uses)6164 iv_ca_set_add_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6165 {
6166   bitmap_iterator bi;
6167   unsigned iid;
6168 
6169   if (!invs)
6170     return;
6171 
6172   gcc_assert (n_inv_uses != NULL);
6173   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6174     {
6175       n_inv_uses[iid]++;
6176       if (n_inv_uses[iid] == 1)
6177 	ivs->n_invs++;
6178     }
6179 }
6180 
6181 /* Set cost pair for GROUP in set IVS to CP.  */
6182 
6183 static void
iv_ca_set_cp(struct ivopts_data * data,class iv_ca * ivs,struct iv_group * group,class cost_pair * cp)6184 iv_ca_set_cp (struct ivopts_data *data, class iv_ca *ivs,
6185 	      struct iv_group *group, class cost_pair *cp)
6186 {
6187   unsigned gid = group->id, cid;
6188 
6189   if (ivs->cand_for_group[gid] == cp)
6190     return;
6191 
6192   if (ivs->cand_for_group[gid])
6193     iv_ca_set_no_cp (data, ivs, group);
6194 
6195   if (cp)
6196     {
6197       cid = cp->cand->id;
6198 
6199       ivs->bad_groups--;
6200       ivs->cand_for_group[gid] = cp;
6201       ivs->n_cand_uses[cid]++;
6202       if (ivs->n_cand_uses[cid] == 1)
6203 	{
6204 	  bitmap_set_bit (ivs->cands, cid);
6205 	  if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6206 	    ivs->n_cands++;
6207 	  ivs->cand_cost += cp->cand->cost;
6208 	  iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6209 	  iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6210 	}
6211 
6212       ivs->cand_use_cost += cp->cost;
6213       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6214       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6215       iv_ca_recount_cost (data, ivs);
6216     }
6217 }
6218 
6219 /* Extend set IVS by expressing USE by some of the candidates in it
6220    if possible.  Consider all important candidates if candidates in
6221    set IVS don't give any result.  */
6222 
6223 static void
iv_ca_add_group(struct ivopts_data * data,class iv_ca * ivs,struct iv_group * group)6224 iv_ca_add_group (struct ivopts_data *data, class iv_ca *ivs,
6225 	       struct iv_group *group)
6226 {
6227   class cost_pair *best_cp = NULL, *cp;
6228   bitmap_iterator bi;
6229   unsigned i;
6230   struct iv_cand *cand;
6231 
6232   gcc_assert (ivs->upto >= group->id);
6233   ivs->upto++;
6234   ivs->bad_groups++;
6235 
6236   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6237     {
6238       cand = data->vcands[i];
6239       cp = get_group_iv_cost (data, group, cand);
6240       if (cheaper_cost_pair (cp, best_cp))
6241 	best_cp = cp;
6242     }
6243 
6244   if (best_cp == NULL)
6245     {
6246       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6247 	{
6248 	  cand = data->vcands[i];
6249 	  cp = get_group_iv_cost (data, group, cand);
6250 	  if (cheaper_cost_pair (cp, best_cp))
6251 	    best_cp = cp;
6252 	}
6253     }
6254 
6255   iv_ca_set_cp (data, ivs, group, best_cp);
6256 }
6257 
6258 /* Get cost for assignment IVS.  */
6259 
6260 static comp_cost
iv_ca_cost(class iv_ca * ivs)6261 iv_ca_cost (class iv_ca *ivs)
6262 {
6263   /* This was a conditional expression but it triggered a bug in
6264      Sun C 5.5.  */
6265   if (ivs->bad_groups)
6266     return infinite_cost;
6267   else
6268     return ivs->cost;
6269 }
6270 
6271 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
6272    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
6273    respectively.  */
6274 
6275 static int
iv_ca_compare_deps(struct ivopts_data * data,class iv_ca * ivs,struct iv_group * group,class cost_pair * old_cp,class cost_pair * new_cp)6276 iv_ca_compare_deps (struct ivopts_data *data, class iv_ca *ivs,
6277 		    struct iv_group *group, class cost_pair *old_cp,
6278 		    class cost_pair *new_cp)
6279 {
6280   gcc_assert (old_cp && new_cp && old_cp != new_cp);
6281   unsigned old_n_invs = ivs->n_invs;
6282   iv_ca_set_cp (data, ivs, group, new_cp);
6283   unsigned new_n_invs = ivs->n_invs;
6284   iv_ca_set_cp (data, ivs, group, old_cp);
6285 
6286   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6287 }
6288 
6289 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6290    it before NEXT.  */
6291 
6292 static struct iv_ca_delta *
iv_ca_delta_add(struct iv_group * group,class cost_pair * old_cp,class cost_pair * new_cp,struct iv_ca_delta * next)6293 iv_ca_delta_add (struct iv_group *group, class cost_pair *old_cp,
6294 		 class cost_pair *new_cp, struct iv_ca_delta *next)
6295 {
6296   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6297 
6298   change->group = group;
6299   change->old_cp = old_cp;
6300   change->new_cp = new_cp;
6301   change->next = next;
6302 
6303   return change;
6304 }
6305 
6306 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6307    are rewritten.  */
6308 
6309 static struct iv_ca_delta *
iv_ca_delta_join(struct iv_ca_delta * l1,struct iv_ca_delta * l2)6310 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6311 {
6312   struct iv_ca_delta *last;
6313 
6314   if (!l2)
6315     return l1;
6316 
6317   if (!l1)
6318     return l2;
6319 
6320   for (last = l1; last->next; last = last->next)
6321     continue;
6322   last->next = l2;
6323 
6324   return l1;
6325 }
6326 
6327 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6328 
6329 static struct iv_ca_delta *
iv_ca_delta_reverse(struct iv_ca_delta * delta)6330 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6331 {
6332   struct iv_ca_delta *act, *next, *prev = NULL;
6333 
6334   for (act = delta; act; act = next)
6335     {
6336       next = act->next;
6337       act->next = prev;
6338       prev = act;
6339 
6340       std::swap (act->old_cp, act->new_cp);
6341     }
6342 
6343   return prev;
6344 }
6345 
6346 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6347    reverted instead.  */
6348 
6349 static void
iv_ca_delta_commit(struct ivopts_data * data,class iv_ca * ivs,struct iv_ca_delta * delta,bool forward)6350 iv_ca_delta_commit (struct ivopts_data *data, class iv_ca *ivs,
6351 		    struct iv_ca_delta *delta, bool forward)
6352 {
6353   class cost_pair *from, *to;
6354   struct iv_ca_delta *act;
6355 
6356   if (!forward)
6357     delta = iv_ca_delta_reverse (delta);
6358 
6359   for (act = delta; act; act = act->next)
6360     {
6361       from = act->old_cp;
6362       to = act->new_cp;
6363       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6364       iv_ca_set_cp (data, ivs, act->group, to);
6365     }
6366 
6367   if (!forward)
6368     iv_ca_delta_reverse (delta);
6369 }
6370 
6371 /* Returns true if CAND is used in IVS.  */
6372 
6373 static bool
iv_ca_cand_used_p(class iv_ca * ivs,struct iv_cand * cand)6374 iv_ca_cand_used_p (class iv_ca *ivs, struct iv_cand *cand)
6375 {
6376   return ivs->n_cand_uses[cand->id] > 0;
6377 }
6378 
6379 /* Returns number of induction variable candidates in the set IVS.  */
6380 
6381 static unsigned
iv_ca_n_cands(class iv_ca * ivs)6382 iv_ca_n_cands (class iv_ca *ivs)
6383 {
6384   return ivs->n_cands;
6385 }
6386 
6387 /* Free the list of changes DELTA.  */
6388 
6389 static void
iv_ca_delta_free(struct iv_ca_delta ** delta)6390 iv_ca_delta_free (struct iv_ca_delta **delta)
6391 {
6392   struct iv_ca_delta *act, *next;
6393 
6394   for (act = *delta; act; act = next)
6395     {
6396       next = act->next;
6397       free (act);
6398     }
6399 
6400   *delta = NULL;
6401 }
6402 
6403 /* Allocates new iv candidates assignment.  */
6404 
6405 static class iv_ca *
iv_ca_new(struct ivopts_data * data)6406 iv_ca_new (struct ivopts_data *data)
6407 {
6408   class iv_ca *nw = XNEW (class iv_ca);
6409 
6410   nw->upto = 0;
6411   nw->bad_groups = 0;
6412   nw->cand_for_group = XCNEWVEC (class cost_pair *,
6413 				 data->vgroups.length ());
6414   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6415   nw->cands = BITMAP_ALLOC (NULL);
6416   nw->n_cands = 0;
6417   nw->n_invs = 0;
6418   nw->cand_use_cost = no_cost;
6419   nw->cand_cost = 0;
6420   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6421   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6422   nw->cost = no_cost;
6423 
6424   return nw;
6425 }
6426 
6427 /* Free memory occupied by the set IVS.  */
6428 
6429 static void
iv_ca_free(class iv_ca ** ivs)6430 iv_ca_free (class iv_ca **ivs)
6431 {
6432   free ((*ivs)->cand_for_group);
6433   free ((*ivs)->n_cand_uses);
6434   BITMAP_FREE ((*ivs)->cands);
6435   free ((*ivs)->n_inv_var_uses);
6436   free ((*ivs)->n_inv_expr_uses);
6437   free (*ivs);
6438   *ivs = NULL;
6439 }
6440 
6441 /* Dumps IVS to FILE.  */
6442 
6443 static void
iv_ca_dump(struct ivopts_data * data,FILE * file,class iv_ca * ivs)6444 iv_ca_dump (struct ivopts_data *data, FILE *file, class iv_ca *ivs)
6445 {
6446   unsigned i;
6447   comp_cost cost = iv_ca_cost (ivs);
6448 
6449   fprintf (file, "  cost: %" PRId64 " (complexity %d)\n", cost.cost,
6450 	   cost.complexity);
6451   fprintf (file, "  reg_cost: %d\n",
6452 	   ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands));
6453   fprintf (file, "  cand_cost: %" PRId64 "\n  cand_group_cost: "
6454 	   "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6455 	   ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6456   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6457 
6458   for (i = 0; i < ivs->upto; i++)
6459     {
6460       struct iv_group *group = data->vgroups[i];
6461       class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6462       if (cp)
6463         fprintf (file, "   group:%d --> iv_cand:%d, cost=("
6464 		 "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6465 		 cp->cost.cost, cp->cost.complexity);
6466       else
6467 	fprintf (file, "   group:%d --> ??\n", group->id);
6468     }
6469 
6470   const char *pref = "";
6471   fprintf (file, "  invariant variables: ");
6472   for (i = 1; i <= data->max_inv_var_id; i++)
6473     if (ivs->n_inv_var_uses[i])
6474       {
6475 	fprintf (file, "%s%d", pref, i);
6476 	pref = ", ";
6477       }
6478 
6479   pref = "";
6480   fprintf (file, "\n  invariant expressions: ");
6481   for (i = 1; i <= data->max_inv_expr_id; i++)
6482     if (ivs->n_inv_expr_uses[i])
6483       {
6484 	fprintf (file, "%s%d", pref, i);
6485 	pref = ", ";
6486       }
6487 
6488   fprintf (file, "\n\n");
6489 }
6490 
6491 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6492    new set, and store differences in DELTA.  Number of induction variables
6493    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6494    the function will try to find a solution with mimimal iv candidates.  */
6495 
6496 static comp_cost
iv_ca_extend(struct ivopts_data * data,class iv_ca * ivs,struct iv_cand * cand,struct iv_ca_delta ** delta,unsigned * n_ivs,bool min_ncand)6497 iv_ca_extend (struct ivopts_data *data, class iv_ca *ivs,
6498 	      struct iv_cand *cand, struct iv_ca_delta **delta,
6499 	      unsigned *n_ivs, bool min_ncand)
6500 {
6501   unsigned i;
6502   comp_cost cost;
6503   struct iv_group *group;
6504   class cost_pair *old_cp, *new_cp;
6505 
6506   *delta = NULL;
6507   for (i = 0; i < ivs->upto; i++)
6508     {
6509       group = data->vgroups[i];
6510       old_cp = iv_ca_cand_for_group (ivs, group);
6511 
6512       if (old_cp
6513 	  && old_cp->cand == cand)
6514 	continue;
6515 
6516       new_cp = get_group_iv_cost (data, group, cand);
6517       if (!new_cp)
6518 	continue;
6519 
6520       if (!min_ncand)
6521 	{
6522 	  int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6523 	  /* Skip if new_cp depends on more invariants.  */
6524 	  if (cmp_invs > 0)
6525 	    continue;
6526 
6527 	  int cmp_cost = compare_cost_pair (new_cp, old_cp);
6528 	  /* Skip if new_cp is not cheaper.  */
6529 	  if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6530 	    continue;
6531 	}
6532 
6533       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6534     }
6535 
6536   iv_ca_delta_commit (data, ivs, *delta, true);
6537   cost = iv_ca_cost (ivs);
6538   if (n_ivs)
6539     *n_ivs = iv_ca_n_cands (ivs);
6540   iv_ca_delta_commit (data, ivs, *delta, false);
6541 
6542   return cost;
6543 }
6544 
6545 /* Try narrowing set IVS by removing CAND.  Return the cost of
6546    the new set and store the differences in DELTA.  START is
6547    the candidate with which we start narrowing.  */
6548 
6549 static comp_cost
iv_ca_narrow(struct ivopts_data * data,class iv_ca * ivs,struct iv_cand * cand,struct iv_cand * start,struct iv_ca_delta ** delta)6550 iv_ca_narrow (struct ivopts_data *data, class iv_ca *ivs,
6551 	      struct iv_cand *cand, struct iv_cand *start,
6552 	      struct iv_ca_delta **delta)
6553 {
6554   unsigned i, ci;
6555   struct iv_group *group;
6556   class cost_pair *old_cp, *new_cp, *cp;
6557   bitmap_iterator bi;
6558   struct iv_cand *cnd;
6559   comp_cost cost, best_cost, acost;
6560 
6561   *delta = NULL;
6562   for (i = 0; i < data->vgroups.length (); i++)
6563     {
6564       group = data->vgroups[i];
6565 
6566       old_cp = iv_ca_cand_for_group (ivs, group);
6567       if (old_cp->cand != cand)
6568 	continue;
6569 
6570       best_cost = iv_ca_cost (ivs);
6571       /* Start narrowing with START.  */
6572       new_cp = get_group_iv_cost (data, group, start);
6573 
6574       if (data->consider_all_candidates)
6575 	{
6576 	  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6577 	    {
6578 	      if (ci == cand->id || (start && ci == start->id))
6579 		continue;
6580 
6581 	      cnd = data->vcands[ci];
6582 
6583 	      cp = get_group_iv_cost (data, group, cnd);
6584 	      if (!cp)
6585 		continue;
6586 
6587 	      iv_ca_set_cp (data, ivs, group, cp);
6588 	      acost = iv_ca_cost (ivs);
6589 
6590 	      if (acost < best_cost)
6591 		{
6592 		  best_cost = acost;
6593 		  new_cp = cp;
6594 		}
6595 	    }
6596 	}
6597       else
6598 	{
6599 	  EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6600 	    {
6601 	      if (ci == cand->id || (start && ci == start->id))
6602 		continue;
6603 
6604 	      cnd = data->vcands[ci];
6605 
6606 	      cp = get_group_iv_cost (data, group, cnd);
6607 	      if (!cp)
6608 		continue;
6609 
6610 	      iv_ca_set_cp (data, ivs, group, cp);
6611 	      acost = iv_ca_cost (ivs);
6612 
6613 	      if (acost < best_cost)
6614 		{
6615 		  best_cost = acost;
6616 		  new_cp = cp;
6617 		}
6618 	    }
6619 	}
6620       /* Restore to old cp for use.  */
6621       iv_ca_set_cp (data, ivs, group, old_cp);
6622 
6623       if (!new_cp)
6624 	{
6625 	  iv_ca_delta_free (delta);
6626 	  return infinite_cost;
6627 	}
6628 
6629       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6630     }
6631 
6632   iv_ca_delta_commit (data, ivs, *delta, true);
6633   cost = iv_ca_cost (ivs);
6634   iv_ca_delta_commit (data, ivs, *delta, false);
6635 
6636   return cost;
6637 }
6638 
6639 /* Try optimizing the set of candidates IVS by removing candidates different
6640    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6641    differences in DELTA.  */
6642 
6643 static comp_cost
iv_ca_prune(struct ivopts_data * data,class iv_ca * ivs,struct iv_cand * except_cand,struct iv_ca_delta ** delta)6644 iv_ca_prune (struct ivopts_data *data, class iv_ca *ivs,
6645 	     struct iv_cand *except_cand, struct iv_ca_delta **delta)
6646 {
6647   bitmap_iterator bi;
6648   struct iv_ca_delta *act_delta, *best_delta;
6649   unsigned i;
6650   comp_cost best_cost, acost;
6651   struct iv_cand *cand;
6652 
6653   best_delta = NULL;
6654   best_cost = iv_ca_cost (ivs);
6655 
6656   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6657     {
6658       cand = data->vcands[i];
6659 
6660       if (cand == except_cand)
6661 	continue;
6662 
6663       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6664 
6665       if (acost < best_cost)
6666 	{
6667 	  best_cost = acost;
6668 	  iv_ca_delta_free (&best_delta);
6669 	  best_delta = act_delta;
6670 	}
6671       else
6672 	iv_ca_delta_free (&act_delta);
6673     }
6674 
6675   if (!best_delta)
6676     {
6677       *delta = NULL;
6678       return best_cost;
6679     }
6680 
6681   /* Recurse to possibly remove other unnecessary ivs.  */
6682   iv_ca_delta_commit (data, ivs, best_delta, true);
6683   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6684   iv_ca_delta_commit (data, ivs, best_delta, false);
6685   *delta = iv_ca_delta_join (best_delta, *delta);
6686   return best_cost;
6687 }
6688 
6689 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6690    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6691    the corresponding cost_pair, otherwise just return BEST_CP.  */
6692 
6693 static class cost_pair*
cheaper_cost_with_cand(struct ivopts_data * data,struct iv_group * group,unsigned int cand_idx,struct iv_cand * old_cand,class cost_pair * best_cp)6694 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6695 			unsigned int cand_idx, struct iv_cand *old_cand,
6696 			class cost_pair *best_cp)
6697 {
6698   struct iv_cand *cand;
6699   class cost_pair *cp;
6700 
6701   gcc_assert (old_cand != NULL && best_cp != NULL);
6702   if (cand_idx == old_cand->id)
6703     return best_cp;
6704 
6705   cand = data->vcands[cand_idx];
6706   cp = get_group_iv_cost (data, group, cand);
6707   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6708     return cp;
6709 
6710   return best_cp;
6711 }
6712 
6713 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6714    which are used by more than one iv uses.  For each of those candidates,
6715    this function tries to represent iv uses under that candidate using
6716    other ones with lower local cost, then tries to prune the new set.
6717    If the new set has lower cost, It returns the new cost after recording
6718    candidate replacement in list DELTA.  */
6719 
6720 static comp_cost
iv_ca_replace(struct ivopts_data * data,class iv_ca * ivs,struct iv_ca_delta ** delta)6721 iv_ca_replace (struct ivopts_data *data, class iv_ca *ivs,
6722 	       struct iv_ca_delta **delta)
6723 {
6724   bitmap_iterator bi, bj;
6725   unsigned int i, j, k;
6726   struct iv_cand *cand;
6727   comp_cost orig_cost, acost;
6728   struct iv_ca_delta *act_delta, *tmp_delta;
6729   class cost_pair *old_cp, *best_cp = NULL;
6730 
6731   *delta = NULL;
6732   orig_cost = iv_ca_cost (ivs);
6733 
6734   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6735     {
6736       if (ivs->n_cand_uses[i] == 1
6737 	  || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6738 	continue;
6739 
6740       cand = data->vcands[i];
6741 
6742       act_delta = NULL;
6743       /*  Represent uses under current candidate using other ones with
6744 	  lower local cost.  */
6745       for (j = 0; j < ivs->upto; j++)
6746 	{
6747 	  struct iv_group *group = data->vgroups[j];
6748 	  old_cp = iv_ca_cand_for_group (ivs, group);
6749 
6750 	  if (old_cp->cand != cand)
6751 	    continue;
6752 
6753 	  best_cp = old_cp;
6754 	  if (data->consider_all_candidates)
6755 	    for (k = 0; k < data->vcands.length (); k++)
6756 	      best_cp = cheaper_cost_with_cand (data, group, k,
6757 						old_cp->cand, best_cp);
6758 	  else
6759 	    EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6760 	      best_cp = cheaper_cost_with_cand (data, group, k,
6761 						old_cp->cand, best_cp);
6762 
6763 	  if (best_cp == old_cp)
6764 	    continue;
6765 
6766 	  act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6767 	}
6768       /* No need for further prune.  */
6769       if (!act_delta)
6770 	continue;
6771 
6772       /* Prune the new candidate set.  */
6773       iv_ca_delta_commit (data, ivs, act_delta, true);
6774       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6775       iv_ca_delta_commit (data, ivs, act_delta, false);
6776       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6777 
6778       if (acost < orig_cost)
6779 	{
6780 	  *delta = act_delta;
6781 	  return acost;
6782 	}
6783       else
6784 	iv_ca_delta_free (&act_delta);
6785     }
6786 
6787   return orig_cost;
6788 }
6789 
6790 /* Tries to extend the sets IVS in the best possible way in order to
6791    express the GROUP.  If ORIGINALP is true, prefer candidates from
6792    the original set of IVs, otherwise favor important candidates not
6793    based on any memory object.  */
6794 
6795 static bool
try_add_cand_for(struct ivopts_data * data,class iv_ca * ivs,struct iv_group * group,bool originalp)6796 try_add_cand_for (struct ivopts_data *data, class iv_ca *ivs,
6797 		  struct iv_group *group, bool originalp)
6798 {
6799   comp_cost best_cost, act_cost;
6800   unsigned i;
6801   bitmap_iterator bi;
6802   struct iv_cand *cand;
6803   struct iv_ca_delta *best_delta = NULL, *act_delta;
6804   class cost_pair *cp;
6805 
6806   iv_ca_add_group (data, ivs, group);
6807   best_cost = iv_ca_cost (ivs);
6808   cp = iv_ca_cand_for_group (ivs, group);
6809   if (cp)
6810     {
6811       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6812       iv_ca_set_no_cp (data, ivs, group);
6813     }
6814 
6815   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6816      first try important candidates not based on any memory object.  Only if
6817      this fails, try the specific ones.  Rationale -- in loops with many
6818      variables the best choice often is to use just one generic biv.  If we
6819      added here many ivs specific to the uses, the optimization algorithm later
6820      would be likely to get stuck in a local minimum, thus causing us to create
6821      too many ivs.  The approach from few ivs to more seems more likely to be
6822      successful -- starting from few ivs, replacing an expensive use by a
6823      specific iv should always be a win.  */
6824   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6825     {
6826       cand = data->vcands[i];
6827 
6828       if (originalp && cand->pos !=IP_ORIGINAL)
6829 	continue;
6830 
6831       if (!originalp && cand->iv->base_object != NULL_TREE)
6832 	continue;
6833 
6834       if (iv_ca_cand_used_p (ivs, cand))
6835 	continue;
6836 
6837       cp = get_group_iv_cost (data, group, cand);
6838       if (!cp)
6839 	continue;
6840 
6841       iv_ca_set_cp (data, ivs, group, cp);
6842       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6843 			       true);
6844       iv_ca_set_no_cp (data, ivs, group);
6845       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6846 
6847       if (act_cost < best_cost)
6848 	{
6849 	  best_cost = act_cost;
6850 
6851 	  iv_ca_delta_free (&best_delta);
6852 	  best_delta = act_delta;
6853 	}
6854       else
6855 	iv_ca_delta_free (&act_delta);
6856     }
6857 
6858   if (best_cost.infinite_cost_p ())
6859     {
6860       for (i = 0; i < group->n_map_members; i++)
6861 	{
6862 	  cp = group->cost_map + i;
6863 	  cand = cp->cand;
6864 	  if (!cand)
6865 	    continue;
6866 
6867 	  /* Already tried this.  */
6868 	  if (cand->important)
6869 	    {
6870 	      if (originalp && cand->pos == IP_ORIGINAL)
6871 		continue;
6872 	      if (!originalp && cand->iv->base_object == NULL_TREE)
6873 		continue;
6874 	    }
6875 
6876 	  if (iv_ca_cand_used_p (ivs, cand))
6877 	    continue;
6878 
6879 	  act_delta = NULL;
6880 	  iv_ca_set_cp (data, ivs, group, cp);
6881 	  act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6882 	  iv_ca_set_no_cp (data, ivs, group);
6883 	  act_delta = iv_ca_delta_add (group,
6884 				       iv_ca_cand_for_group (ivs, group),
6885 				       cp, act_delta);
6886 
6887 	  if (act_cost < best_cost)
6888 	    {
6889 	      best_cost = act_cost;
6890 
6891 	      if (best_delta)
6892 		iv_ca_delta_free (&best_delta);
6893 	      best_delta = act_delta;
6894 	    }
6895 	  else
6896 	    iv_ca_delta_free (&act_delta);
6897 	}
6898     }
6899 
6900   iv_ca_delta_commit (data, ivs, best_delta, true);
6901   iv_ca_delta_free (&best_delta);
6902 
6903   return !best_cost.infinite_cost_p ();
6904 }
6905 
6906 /* Finds an initial assignment of candidates to uses.  */
6907 
6908 static class iv_ca *
get_initial_solution(struct ivopts_data * data,bool originalp)6909 get_initial_solution (struct ivopts_data *data, bool originalp)
6910 {
6911   unsigned i;
6912   class iv_ca *ivs = iv_ca_new (data);
6913 
6914   for (i = 0; i < data->vgroups.length (); i++)
6915     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6916       {
6917 	iv_ca_free (&ivs);
6918 	return NULL;
6919       }
6920 
6921   return ivs;
6922 }
6923 
6924 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6925    points to a bool variable, this function tries to break local
6926    optimal fixed-point by replacing candidates in IVS if it's true.  */
6927 
6928 static bool
try_improve_iv_set(struct ivopts_data * data,class iv_ca * ivs,bool * try_replace_p)6929 try_improve_iv_set (struct ivopts_data *data,
6930 		    class iv_ca *ivs, bool *try_replace_p)
6931 {
6932   unsigned i, n_ivs;
6933   comp_cost acost, best_cost = iv_ca_cost (ivs);
6934   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6935   struct iv_cand *cand;
6936 
6937   /* Try extending the set of induction variables by one.  */
6938   for (i = 0; i < data->vcands.length (); i++)
6939     {
6940       cand = data->vcands[i];
6941 
6942       if (iv_ca_cand_used_p (ivs, cand))
6943 	continue;
6944 
6945       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6946       if (!act_delta)
6947 	continue;
6948 
6949       /* If we successfully added the candidate and the set is small enough,
6950 	 try optimizing it by removing other candidates.  */
6951       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6952       	{
6953 	  iv_ca_delta_commit (data, ivs, act_delta, true);
6954 	  acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6955 	  iv_ca_delta_commit (data, ivs, act_delta, false);
6956 	  act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6957 	}
6958 
6959       if (acost < best_cost)
6960 	{
6961 	  best_cost = acost;
6962 	  iv_ca_delta_free (&best_delta);
6963 	  best_delta = act_delta;
6964 	}
6965       else
6966 	iv_ca_delta_free (&act_delta);
6967     }
6968 
6969   if (!best_delta)
6970     {
6971       /* Try removing the candidates from the set instead.  */
6972       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6973 
6974       if (!best_delta && *try_replace_p)
6975 	{
6976 	  *try_replace_p = false;
6977 	  /* So far candidate selecting algorithm tends to choose fewer IVs
6978 	     so that it can handle cases in which loops have many variables
6979 	     but the best choice is often to use only one general biv.  One
6980 	     weakness is it can't handle opposite cases, in which different
6981 	     candidates should be chosen with respect to each use.  To solve
6982 	     the problem, we replace candidates in a manner described by the
6983 	     comments of iv_ca_replace, thus give general algorithm a chance
6984 	     to break local optimal fixed-point in these cases.  */
6985 	  best_cost = iv_ca_replace (data, ivs, &best_delta);
6986 	}
6987 
6988       if (!best_delta)
6989 	return false;
6990     }
6991 
6992   iv_ca_delta_commit (data, ivs, best_delta, true);
6993   iv_ca_delta_free (&best_delta);
6994   return best_cost == iv_ca_cost (ivs);
6995 }
6996 
6997 /* Attempts to find the optimal set of induction variables.  We do simple
6998    greedy heuristic -- we try to replace at most one candidate in the selected
6999    solution and remove the unused ivs while this improves the cost.  */
7000 
7001 static class iv_ca *
find_optimal_iv_set_1(struct ivopts_data * data,bool originalp)7002 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
7003 {
7004   class iv_ca *set;
7005   bool try_replace_p = true;
7006 
7007   /* Get the initial solution.  */
7008   set = get_initial_solution (data, originalp);
7009   if (!set)
7010     {
7011       if (dump_file && (dump_flags & TDF_DETAILS))
7012 	fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7013       return NULL;
7014     }
7015 
7016   if (dump_file && (dump_flags & TDF_DETAILS))
7017     {
7018       fprintf (dump_file, "Initial set of candidates:\n");
7019       iv_ca_dump (data, dump_file, set);
7020     }
7021 
7022   while (try_improve_iv_set (data, set, &try_replace_p))
7023     {
7024       if (dump_file && (dump_flags & TDF_DETAILS))
7025 	{
7026 	  fprintf (dump_file, "Improved to:\n");
7027 	  iv_ca_dump (data, dump_file, set);
7028 	}
7029     }
7030 
7031   /* If the set has infinite_cost, it can't be optimal.  */
7032   if (iv_ca_cost (set).infinite_cost_p ())
7033     {
7034       if (dump_file && (dump_flags & TDF_DETAILS))
7035 	fprintf (dump_file,
7036 		 "Overflow to infinite cost in try_improve_iv_set.\n");
7037       iv_ca_free (&set);
7038     }
7039   return set;
7040 }
7041 
7042 static class iv_ca *
find_optimal_iv_set(struct ivopts_data * data)7043 find_optimal_iv_set (struct ivopts_data *data)
7044 {
7045   unsigned i;
7046   comp_cost cost, origcost;
7047   class iv_ca *set, *origset;
7048 
7049   /* Determine the cost based on a strategy that starts with original IVs,
7050      and try again using a strategy that prefers candidates not based
7051      on any IVs.  */
7052   origset = find_optimal_iv_set_1 (data, true);
7053   set = find_optimal_iv_set_1 (data, false);
7054 
7055   if (!origset && !set)
7056     return NULL;
7057 
7058   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7059   cost = set ? iv_ca_cost (set) : infinite_cost;
7060 
7061   if (dump_file && (dump_flags & TDF_DETAILS))
7062     {
7063       fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
7064 	       origcost.cost, origcost.complexity);
7065       fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
7066 	       cost.cost, cost.complexity);
7067     }
7068 
7069   /* Choose the one with the best cost.  */
7070   if (origcost <= cost)
7071     {
7072       if (set)
7073 	iv_ca_free (&set);
7074       set = origset;
7075     }
7076   else if (origset)
7077     iv_ca_free (&origset);
7078 
7079   for (i = 0; i < data->vgroups.length (); i++)
7080     {
7081       struct iv_group *group = data->vgroups[i];
7082       group->selected = iv_ca_cand_for_group (set, group)->cand;
7083     }
7084 
7085   return set;
7086 }
7087 
7088 /* Creates a new induction variable corresponding to CAND.  */
7089 
7090 static void
create_new_iv(struct ivopts_data * data,struct iv_cand * cand)7091 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7092 {
7093   gimple_stmt_iterator incr_pos;
7094   tree base;
7095   struct iv_use *use;
7096   struct iv_group *group;
7097   bool after = false;
7098 
7099   gcc_assert (cand->iv != NULL);
7100 
7101   switch (cand->pos)
7102     {
7103     case IP_NORMAL:
7104       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7105       break;
7106 
7107     case IP_END:
7108       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7109       after = true;
7110       break;
7111 
7112     case IP_AFTER_USE:
7113       after = true;
7114       /* fall through */
7115     case IP_BEFORE_USE:
7116       incr_pos = gsi_for_stmt (cand->incremented_at);
7117       break;
7118 
7119     case IP_ORIGINAL:
7120       /* Mark that the iv is preserved.  */
7121       name_info (data, cand->var_before)->preserve_biv = true;
7122       name_info (data, cand->var_after)->preserve_biv = true;
7123 
7124       /* Rewrite the increment so that it uses var_before directly.  */
7125       use = find_interesting_uses_op (data, cand->var_after);
7126       group = data->vgroups[use->group_id];
7127       group->selected = cand;
7128       return;
7129     }
7130 
7131   gimple_add_tmp_var (cand->var_before);
7132 
7133   base = unshare_expr (cand->iv->base);
7134 
7135   create_iv (base, unshare_expr (cand->iv->step),
7136 	     cand->var_before, data->current_loop,
7137 	     &incr_pos, after, &cand->var_before, &cand->var_after);
7138 }
7139 
7140 /* Creates new induction variables described in SET.  */
7141 
7142 static void
create_new_ivs(struct ivopts_data * data,class iv_ca * set)7143 create_new_ivs (struct ivopts_data *data, class iv_ca *set)
7144 {
7145   unsigned i;
7146   struct iv_cand *cand;
7147   bitmap_iterator bi;
7148 
7149   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7150     {
7151       cand = data->vcands[i];
7152       create_new_iv (data, cand);
7153     }
7154 
7155   if (dump_file && (dump_flags & TDF_DETAILS))
7156     {
7157       fprintf (dump_file, "Selected IV set for loop %d",
7158 	       data->current_loop->num);
7159       if (data->loop_loc != UNKNOWN_LOCATION)
7160 	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7161 		 LOCATION_LINE (data->loop_loc));
7162       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7163 	       avg_loop_niter (data->current_loop));
7164       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7165       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7166 	{
7167 	  cand = data->vcands[i];
7168 	  dump_cand (dump_file, cand);
7169 	}
7170       fprintf (dump_file, "\n");
7171     }
7172 }
7173 
7174 /* Rewrites USE (definition of iv used in a nonlinear expression)
7175    using candidate CAND.  */
7176 
7177 static void
rewrite_use_nonlinear_expr(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7178 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7179 			    struct iv_use *use, struct iv_cand *cand)
7180 {
7181   gassign *ass;
7182   gimple_stmt_iterator bsi;
7183   tree comp, type = get_use_type (use), tgt;
7184 
7185   /* An important special case -- if we are asked to express value of
7186      the original iv by itself, just exit; there is no need to
7187      introduce a new computation (that might also need casting the
7188      variable to unsigned and back).  */
7189   if (cand->pos == IP_ORIGINAL
7190       && cand->incremented_at == use->stmt)
7191     {
7192       tree op = NULL_TREE;
7193       enum tree_code stmt_code;
7194 
7195       gcc_assert (is_gimple_assign (use->stmt));
7196       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7197 
7198       /* Check whether we may leave the computation unchanged.
7199 	 This is the case only if it does not rely on other
7200 	 computations in the loop -- otherwise, the computation
7201 	 we rely upon may be removed in remove_unused_ivs,
7202 	 thus leading to ICE.  */
7203       stmt_code = gimple_assign_rhs_code (use->stmt);
7204       if (stmt_code == PLUS_EXPR
7205 	  || stmt_code == MINUS_EXPR
7206 	  || stmt_code == POINTER_PLUS_EXPR)
7207 	{
7208 	  if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7209 	    op = gimple_assign_rhs2 (use->stmt);
7210 	  else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7211 	    op = gimple_assign_rhs1 (use->stmt);
7212 	}
7213 
7214       if (op != NULL_TREE)
7215 	{
7216 	  if (expr_invariant_in_loop_p (data->current_loop, op))
7217 	    return;
7218 	  if (TREE_CODE (op) == SSA_NAME)
7219 	    {
7220 	      struct iv *iv = get_iv (data, op);
7221 	      if (iv != NULL && integer_zerop (iv->step))
7222 		return;
7223 	    }
7224 	}
7225     }
7226 
7227   switch (gimple_code (use->stmt))
7228     {
7229     case GIMPLE_PHI:
7230       tgt = PHI_RESULT (use->stmt);
7231 
7232       /* If we should keep the biv, do not replace it.  */
7233       if (name_info (data, tgt)->preserve_biv)
7234 	return;
7235 
7236       bsi = gsi_after_labels (gimple_bb (use->stmt));
7237       break;
7238 
7239     case GIMPLE_ASSIGN:
7240       tgt = gimple_assign_lhs (use->stmt);
7241       bsi = gsi_for_stmt (use->stmt);
7242       break;
7243 
7244     default:
7245       gcc_unreachable ();
7246     }
7247 
7248   aff_tree aff_inv, aff_var;
7249   if (!get_computation_aff_1 (data->current_loop, use->stmt,
7250 			      use, cand, &aff_inv, &aff_var))
7251     gcc_unreachable ();
7252 
7253   unshare_aff_combination (&aff_inv);
7254   unshare_aff_combination (&aff_var);
7255   /* Prefer CSE opportunity than loop invariant by adding offset at last
7256      so that iv_uses have different offsets can be CSEed.  */
7257   poly_widest_int offset = aff_inv.offset;
7258   aff_inv.offset = 0;
7259 
7260   gimple_seq stmt_list = NULL, seq = NULL;
7261   tree comp_op1 = aff_combination_to_tree (&aff_inv);
7262   tree comp_op2 = aff_combination_to_tree (&aff_var);
7263   gcc_assert (comp_op1 && comp_op2);
7264 
7265   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7266   gimple_seq_add_seq (&stmt_list, seq);
7267   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7268   gimple_seq_add_seq (&stmt_list, seq);
7269 
7270   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7271     std::swap (comp_op1, comp_op2);
7272 
7273   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7274     {
7275       comp = fold_build_pointer_plus (comp_op1,
7276 				      fold_convert (sizetype, comp_op2));
7277       comp = fold_build_pointer_plus (comp,
7278 				      wide_int_to_tree (sizetype, offset));
7279     }
7280   else
7281     {
7282       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7283 			  fold_convert (TREE_TYPE (comp_op1), comp_op2));
7284       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7285 			  wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7286     }
7287 
7288   comp = fold_convert (type, comp);
7289   if (!valid_gimple_rhs_p (comp)
7290       || (gimple_code (use->stmt) != GIMPLE_PHI
7291 	  /* We can't allow re-allocating the stmt as it might be pointed
7292 	     to still.  */
7293 	  && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7294 	      >= gimple_num_ops (gsi_stmt (bsi)))))
7295     {
7296       comp = force_gimple_operand (comp, &seq, true, NULL);
7297       gimple_seq_add_seq (&stmt_list, seq);
7298       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7299 	{
7300 	  duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7301 	  /* As this isn't a plain copy we have to reset alignment
7302 	     information.  */
7303 	  if (SSA_NAME_PTR_INFO (comp))
7304 	    mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7305 	}
7306     }
7307 
7308   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7309   if (gimple_code (use->stmt) == GIMPLE_PHI)
7310     {
7311       ass = gimple_build_assign (tgt, comp);
7312       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7313 
7314       bsi = gsi_for_stmt (use->stmt);
7315       remove_phi_node (&bsi, false);
7316     }
7317   else
7318     {
7319       gimple_assign_set_rhs_from_tree (&bsi, comp);
7320       use->stmt = gsi_stmt (bsi);
7321     }
7322 }
7323 
7324 /* Performs a peephole optimization to reorder the iv update statement with
7325    a mem ref to enable instruction combining in later phases. The mem ref uses
7326    the iv value before the update, so the reordering transformation requires
7327    adjustment of the offset. CAND is the selected IV_CAND.
7328 
7329    Example:
7330 
7331    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7332    iv2 = iv1 + 1;
7333 
7334    if (t < val)      (1)
7335      goto L;
7336    goto Head;
7337 
7338 
7339    directly propagating t over to (1) will introduce overlapping live range
7340    thus increase register pressure. This peephole transform it into:
7341 
7342 
7343    iv2 = iv1 + 1;
7344    t = MEM_REF (base, iv2, 8, 8);
7345    if (t < val)
7346      goto L;
7347    goto Head;
7348 */
7349 
7350 static void
adjust_iv_update_pos(struct iv_cand * cand,struct iv_use * use)7351 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7352 {
7353   tree var_after;
7354   gimple *iv_update, *stmt;
7355   basic_block bb;
7356   gimple_stmt_iterator gsi, gsi_iv;
7357 
7358   if (cand->pos != IP_NORMAL)
7359     return;
7360 
7361   var_after = cand->var_after;
7362   iv_update = SSA_NAME_DEF_STMT (var_after);
7363 
7364   bb = gimple_bb (iv_update);
7365   gsi = gsi_last_nondebug_bb (bb);
7366   stmt = gsi_stmt (gsi);
7367 
7368   /* Only handle conditional statement for now.  */
7369   if (gimple_code (stmt) != GIMPLE_COND)
7370     return;
7371 
7372   gsi_prev_nondebug (&gsi);
7373   stmt = gsi_stmt (gsi);
7374   if (stmt != iv_update)
7375     return;
7376 
7377   gsi_prev_nondebug (&gsi);
7378   if (gsi_end_p (gsi))
7379     return;
7380 
7381   stmt = gsi_stmt (gsi);
7382   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7383     return;
7384 
7385   if (stmt != use->stmt)
7386     return;
7387 
7388   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7389     return;
7390 
7391   if (dump_file && (dump_flags & TDF_DETAILS))
7392     {
7393       fprintf (dump_file, "Reordering \n");
7394       print_gimple_stmt (dump_file, iv_update, 0);
7395       print_gimple_stmt (dump_file, use->stmt, 0);
7396       fprintf (dump_file, "\n");
7397     }
7398 
7399   gsi = gsi_for_stmt (use->stmt);
7400   gsi_iv = gsi_for_stmt (iv_update);
7401   gsi_move_before (&gsi_iv, &gsi);
7402 
7403   cand->pos = IP_BEFORE_USE;
7404   cand->incremented_at = use->stmt;
7405 }
7406 
7407 /* Return the alias pointer type that should be used for a MEM_REF
7408    associated with USE, which has type USE_PTR_ADDRESS.  */
7409 
7410 static tree
get_alias_ptr_type_for_ptr_address(iv_use * use)7411 get_alias_ptr_type_for_ptr_address (iv_use *use)
7412 {
7413   gcall *call = as_a <gcall *> (use->stmt);
7414   switch (gimple_call_internal_fn (call))
7415     {
7416     case IFN_MASK_LOAD:
7417     case IFN_MASK_STORE:
7418     case IFN_MASK_LOAD_LANES:
7419     case IFN_MASK_STORE_LANES:
7420     case IFN_LEN_LOAD:
7421     case IFN_LEN_STORE:
7422       /* The second argument contains the correct alias type.  */
7423       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7424       return TREE_TYPE (gimple_call_arg (call, 1));
7425 
7426     default:
7427       gcc_unreachable ();
7428     }
7429 }
7430 
7431 
7432 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7433 
7434 static void
rewrite_use_address(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7435 rewrite_use_address (struct ivopts_data *data,
7436 		     struct iv_use *use, struct iv_cand *cand)
7437 {
7438   aff_tree aff;
7439   bool ok;
7440 
7441   adjust_iv_update_pos (cand, use);
7442   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7443   gcc_assert (ok);
7444   unshare_aff_combination (&aff);
7445 
7446   /* To avoid undefined overflow problems, all IV candidates use unsigned
7447      integer types.  The drawback is that this makes it impossible for
7448      create_mem_ref to distinguish an IV that is based on a memory object
7449      from one that represents simply an offset.
7450 
7451      To work around this problem, we pass a hint to create_mem_ref that
7452      indicates which variable (if any) in aff is an IV based on a memory
7453      object.  Note that we only consider the candidate.  If this is not
7454      based on an object, the base of the reference is in some subexpression
7455      of the use -- but these will use pointer types, so they are recognized
7456      by the create_mem_ref heuristics anyway.  */
7457   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7458   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7459   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7460   tree type = use->mem_type;
7461   tree alias_ptr_type;
7462   if (use->type == USE_PTR_ADDRESS)
7463     alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7464   else
7465     {
7466       gcc_assert (type == TREE_TYPE (*use->op_p));
7467       unsigned int align = get_object_alignment (*use->op_p);
7468       if (align != TYPE_ALIGN (type))
7469 	type = build_aligned_type (type, align);
7470       alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7471     }
7472   tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7473 			     iv, base_hint, data->speed);
7474 
7475   if (use->type == USE_PTR_ADDRESS)
7476     {
7477       ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7478       ref = fold_convert (get_use_type (use), ref);
7479       ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7480 				      true, GSI_SAME_STMT);
7481     }
7482   else
7483     copy_ref_info (ref, *use->op_p);
7484 
7485   *use->op_p = ref;
7486 }
7487 
7488 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7489    candidate CAND.  */
7490 
7491 static void
rewrite_use_compare(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7492 rewrite_use_compare (struct ivopts_data *data,
7493 		     struct iv_use *use, struct iv_cand *cand)
7494 {
7495   tree comp, op, bound;
7496   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7497   enum tree_code compare;
7498   struct iv_group *group = data->vgroups[use->group_id];
7499   class cost_pair *cp = get_group_iv_cost (data, group, cand);
7500 
7501   bound = cp->value;
7502   if (bound)
7503     {
7504       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7505       tree var_type = TREE_TYPE (var);
7506       gimple_seq stmts;
7507 
7508       if (dump_file && (dump_flags & TDF_DETAILS))
7509 	{
7510 	  fprintf (dump_file, "Replacing exit test: ");
7511 	  print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7512 	}
7513       compare = cp->comp;
7514       bound = unshare_expr (fold_convert (var_type, bound));
7515       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7516       if (stmts)
7517 	gsi_insert_seq_on_edge_immediate (
7518 		loop_preheader_edge (data->current_loop),
7519 		stmts);
7520 
7521       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7522       gimple_cond_set_lhs (cond_stmt, var);
7523       gimple_cond_set_code (cond_stmt, compare);
7524       gimple_cond_set_rhs (cond_stmt, op);
7525       return;
7526     }
7527 
7528   /* The induction variable elimination failed; just express the original
7529      giv.  */
7530   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7531   gcc_assert (comp != NULL_TREE);
7532   gcc_assert (use->op_p != NULL);
7533   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7534 					 SSA_NAME_VAR (*use->op_p),
7535 					 true, GSI_SAME_STMT);
7536 }
7537 
7538 /* Rewrite the groups using the selected induction variables.  */
7539 
7540 static void
rewrite_groups(struct ivopts_data * data)7541 rewrite_groups (struct ivopts_data *data)
7542 {
7543   unsigned i, j;
7544 
7545   for (i = 0; i < data->vgroups.length (); i++)
7546     {
7547       struct iv_group *group = data->vgroups[i];
7548       struct iv_cand *cand = group->selected;
7549 
7550       gcc_assert (cand);
7551 
7552       if (group->type == USE_NONLINEAR_EXPR)
7553 	{
7554 	  for (j = 0; j < group->vuses.length (); j++)
7555 	    {
7556 	      rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7557 	      update_stmt (group->vuses[j]->stmt);
7558 	    }
7559 	}
7560       else if (address_p (group->type))
7561 	{
7562 	  for (j = 0; j < group->vuses.length (); j++)
7563 	    {
7564 	      rewrite_use_address (data, group->vuses[j], cand);
7565 	      update_stmt (group->vuses[j]->stmt);
7566 	    }
7567 	}
7568       else
7569 	{
7570 	  gcc_assert (group->type == USE_COMPARE);
7571 
7572 	  for (j = 0; j < group->vuses.length (); j++)
7573 	    {
7574 	      rewrite_use_compare (data, group->vuses[j], cand);
7575 	      update_stmt (group->vuses[j]->stmt);
7576 	    }
7577 	}
7578     }
7579 }
7580 
7581 /* Removes the ivs that are not used after rewriting.  */
7582 
7583 static void
remove_unused_ivs(struct ivopts_data * data,bitmap toremove)7584 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7585 {
7586   unsigned j;
7587   bitmap_iterator bi;
7588 
7589   /* Figure out an order in which to release SSA DEFs so that we don't
7590      release something that we'd have to propagate into a debug stmt
7591      afterwards.  */
7592   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7593     {
7594       struct version_info *info;
7595 
7596       info = ver_info (data, j);
7597       if (info->iv
7598 	  && !integer_zerop (info->iv->step)
7599 	  && !info->inv_id
7600 	  && !info->iv->nonlin_use
7601 	  && !info->preserve_biv)
7602 	{
7603 	  bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7604 
7605 	  tree def = info->iv->ssa_name;
7606 
7607 	  if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7608 	    {
7609 	      imm_use_iterator imm_iter;
7610 	      use_operand_p use_p;
7611 	      gimple *stmt;
7612 	      int count = 0;
7613 
7614 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7615 		{
7616 		  if (!gimple_debug_bind_p (stmt))
7617 		    continue;
7618 
7619 		  /* We just want to determine whether to do nothing
7620 		     (count == 0), to substitute the computed
7621 		     expression into a single use of the SSA DEF by
7622 		     itself (count == 1), or to use a debug temp
7623 		     because the SSA DEF is used multiple times or as
7624 		     part of a larger expression (count > 1). */
7625 		  count++;
7626 		  if (gimple_debug_bind_get_value (stmt) != def)
7627 		    count++;
7628 
7629 		  if (count > 1)
7630 		    break;
7631 		}
7632 
7633 	      if (!count)
7634 		continue;
7635 
7636 	      struct iv_use dummy_use;
7637 	      struct iv_cand *best_cand = NULL, *cand;
7638 	      unsigned i, best_pref = 0, cand_pref;
7639 	      tree comp = NULL_TREE;
7640 
7641 	      memset (&dummy_use, 0, sizeof (dummy_use));
7642 	      dummy_use.iv = info->iv;
7643 	      for (i = 0; i < data->vgroups.length () && i < 64; i++)
7644 		{
7645 		  cand = data->vgroups[i]->selected;
7646 		  if (cand == best_cand)
7647 		    continue;
7648 		  cand_pref = operand_equal_p (cand->iv->step,
7649 					       info->iv->step, 0)
7650 		    ? 4 : 0;
7651 		  cand_pref
7652 		    += TYPE_MODE (TREE_TYPE (cand->iv->base))
7653 		    == TYPE_MODE (TREE_TYPE (info->iv->base))
7654 		    ? 2 : 0;
7655 		  cand_pref
7656 		    += TREE_CODE (cand->iv->base) == INTEGER_CST
7657 		    ? 1 : 0;
7658 		  if (best_cand == NULL || best_pref < cand_pref)
7659 		    {
7660 		      tree this_comp
7661 			= get_debug_computation_at (data->current_loop,
7662 						    SSA_NAME_DEF_STMT (def),
7663 						    &dummy_use, cand);
7664 		      if (this_comp)
7665 			{
7666 			  best_cand = cand;
7667 			  best_pref = cand_pref;
7668 			  comp = this_comp;
7669 			}
7670 		    }
7671 		}
7672 
7673 	      if (!best_cand)
7674 		continue;
7675 
7676 	      comp = unshare_expr (comp);
7677 	      if (count > 1)
7678 		{
7679 		  tree vexpr = make_node (DEBUG_EXPR_DECL);
7680 		  DECL_ARTIFICIAL (vexpr) = 1;
7681 		  TREE_TYPE (vexpr) = TREE_TYPE (comp);
7682 		  if (SSA_NAME_VAR (def))
7683 		    SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7684 		  else
7685 		    SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7686 		  gdebug *def_temp
7687 		    = gimple_build_debug_bind (vexpr, comp, NULL);
7688 		  gimple_stmt_iterator gsi;
7689 
7690 		  if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7691 		    gsi = gsi_after_labels (gimple_bb
7692 					    (SSA_NAME_DEF_STMT (def)));
7693 		  else
7694 		    gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7695 
7696 		  gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7697 		  comp = vexpr;
7698 		}
7699 
7700 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7701 		{
7702 		  if (!gimple_debug_bind_p (stmt))
7703 		    continue;
7704 
7705 		  FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7706 		    SET_USE (use_p, comp);
7707 
7708 		  update_stmt (stmt);
7709 		}
7710 	    }
7711 	}
7712     }
7713 }
7714 
7715 /* Frees memory occupied by class tree_niter_desc in *VALUE. Callback
7716    for hash_map::traverse.  */
7717 
7718 bool
free_tree_niter_desc(edge const &,tree_niter_desc * const & value,void *)7719 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7720 {
7721   free (value);
7722   return true;
7723 }
7724 
7725 /* Frees data allocated by the optimization of a single loop.  */
7726 
7727 static void
free_loop_data(struct ivopts_data * data)7728 free_loop_data (struct ivopts_data *data)
7729 {
7730   unsigned i, j;
7731   bitmap_iterator bi;
7732   tree obj;
7733 
7734   if (data->niters)
7735     {
7736       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7737       delete data->niters;
7738       data->niters = NULL;
7739     }
7740 
7741   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7742     {
7743       struct version_info *info;
7744 
7745       info = ver_info (data, i);
7746       info->iv = NULL;
7747       info->has_nonlin_use = false;
7748       info->preserve_biv = false;
7749       info->inv_id = 0;
7750     }
7751   bitmap_clear (data->relevant);
7752   bitmap_clear (data->important_candidates);
7753 
7754   for (i = 0; i < data->vgroups.length (); i++)
7755     {
7756       struct iv_group *group = data->vgroups[i];
7757 
7758       for (j = 0; j < group->vuses.length (); j++)
7759 	free (group->vuses[j]);
7760       group->vuses.release ();
7761 
7762       BITMAP_FREE (group->related_cands);
7763       for (j = 0; j < group->n_map_members; j++)
7764 	{
7765 	  if (group->cost_map[j].inv_vars)
7766 	    BITMAP_FREE (group->cost_map[j].inv_vars);
7767 	  if (group->cost_map[j].inv_exprs)
7768 	    BITMAP_FREE (group->cost_map[j].inv_exprs);
7769 	}
7770 
7771       free (group->cost_map);
7772       free (group);
7773     }
7774   data->vgroups.truncate (0);
7775 
7776   for (i = 0; i < data->vcands.length (); i++)
7777     {
7778       struct iv_cand *cand = data->vcands[i];
7779 
7780       if (cand->inv_vars)
7781 	BITMAP_FREE (cand->inv_vars);
7782       if (cand->inv_exprs)
7783 	BITMAP_FREE (cand->inv_exprs);
7784       free (cand);
7785     }
7786   data->vcands.truncate (0);
7787 
7788   if (data->version_info_size < num_ssa_names)
7789     {
7790       data->version_info_size = 2 * num_ssa_names;
7791       free (data->version_info);
7792       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7793     }
7794 
7795   data->max_inv_var_id = 0;
7796   data->max_inv_expr_id = 0;
7797 
7798   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7799     SET_DECL_RTL (obj, NULL_RTX);
7800 
7801   decl_rtl_to_reset.truncate (0);
7802 
7803   data->inv_expr_tab->empty ();
7804 
7805   data->iv_common_cand_tab->empty ();
7806   data->iv_common_cands.truncate (0);
7807 }
7808 
7809 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7810    loop tree.  */
7811 
7812 static void
tree_ssa_iv_optimize_finalize(struct ivopts_data * data)7813 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7814 {
7815   free_loop_data (data);
7816   free (data->version_info);
7817   BITMAP_FREE (data->relevant);
7818   BITMAP_FREE (data->important_candidates);
7819 
7820   decl_rtl_to_reset.release ();
7821   data->vgroups.release ();
7822   data->vcands.release ();
7823   delete data->inv_expr_tab;
7824   data->inv_expr_tab = NULL;
7825   free_affine_expand_cache (&data->name_expansion_cache);
7826   if (data->base_object_map)
7827     delete data->base_object_map;
7828   delete data->iv_common_cand_tab;
7829   data->iv_common_cand_tab = NULL;
7830   data->iv_common_cands.release ();
7831   obstack_free (&data->iv_obstack, NULL);
7832 }
7833 
7834 /* Returns true if the loop body BODY includes any function calls.  */
7835 
7836 static bool
loop_body_includes_call(basic_block * body,unsigned num_nodes)7837 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7838 {
7839   gimple_stmt_iterator gsi;
7840   unsigned i;
7841 
7842   for (i = 0; i < num_nodes; i++)
7843     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7844       {
7845 	gimple *stmt = gsi_stmt (gsi);
7846 	if (is_gimple_call (stmt)
7847 	    && !gimple_call_internal_p (stmt)
7848 	    && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7849 	  return true;
7850       }
7851   return false;
7852 }
7853 
7854 /* Determine cost scaling factor for basic blocks in loop.  */
7855 #define COST_SCALING_FACTOR_BOUND (20)
7856 
7857 static void
determine_scaling_factor(struct ivopts_data * data,basic_block * body)7858 determine_scaling_factor (struct ivopts_data *data, basic_block *body)
7859 {
7860   int lfreq = data->current_loop->header->count.to_frequency (cfun);
7861   if (!data->speed || lfreq <= 0)
7862     return;
7863 
7864   int max_freq = lfreq;
7865   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7866     {
7867       body[i]->aux = (void *)(intptr_t) 1;
7868       if (max_freq < body[i]->count.to_frequency (cfun))
7869 	max_freq = body[i]->count.to_frequency (cfun);
7870     }
7871   if (max_freq > lfreq)
7872     {
7873       int divisor, factor;
7874       /* Check if scaling factor itself needs to be scaled by the bound.  This
7875 	 is to avoid overflow when scaling cost according to profile info.  */
7876       if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
7877 	{
7878 	  divisor = max_freq;
7879 	  factor = COST_SCALING_FACTOR_BOUND;
7880 	}
7881       else
7882 	{
7883 	  divisor = lfreq;
7884 	  factor = 1;
7885 	}
7886       for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7887 	{
7888 	  int bfreq = body[i]->count.to_frequency (cfun);
7889 	  if (bfreq <= lfreq)
7890 	    continue;
7891 
7892 	  body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
7893 	}
7894     }
7895 }
7896 
7897 /* Find doloop comparison use and set its doloop_p on if found.  */
7898 
7899 static bool
find_doloop_use(struct ivopts_data * data)7900 find_doloop_use (struct ivopts_data *data)
7901 {
7902   struct loop *loop = data->current_loop;
7903 
7904   for (unsigned i = 0; i < data->vgroups.length (); i++)
7905     {
7906       struct iv_group *group = data->vgroups[i];
7907       if (group->type == USE_COMPARE)
7908 	{
7909 	  gcc_assert (group->vuses.length () == 1);
7910 	  struct iv_use *use = group->vuses[0];
7911 	  gimple *stmt = use->stmt;
7912 	  if (gimple_code (stmt) == GIMPLE_COND)
7913 	    {
7914 	      basic_block bb = gimple_bb (stmt);
7915 	      edge true_edge, false_edge;
7916 	      extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
7917 	      /* This comparison is used for loop latch.  Require latch is empty
7918 		 for now.  */
7919 	      if ((loop->latch == true_edge->dest
7920 		   || loop->latch == false_edge->dest)
7921 		  && empty_block_p (loop->latch))
7922 		{
7923 		  group->doloop_p = true;
7924 		  if (dump_file && (dump_flags & TDF_DETAILS))
7925 		    {
7926 		      fprintf (dump_file, "Doloop cmp iv use: ");
7927 		      print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
7928 		    }
7929 		  return true;
7930 		}
7931 	    }
7932 	}
7933     }
7934 
7935   return false;
7936 }
7937 
7938 /* For the targets which support doloop, to predict whether later RTL doloop
7939    transformation will perform on this loop, further detect the doloop use and
7940    mark the flag doloop_use_p if predicted.  */
7941 
7942 void
analyze_and_mark_doloop_use(struct ivopts_data * data)7943 analyze_and_mark_doloop_use (struct ivopts_data *data)
7944 {
7945   data->doloop_use_p = false;
7946 
7947   if (!flag_branch_on_count_reg)
7948     return;
7949 
7950   if (data->current_loop->unroll == USHRT_MAX)
7951     return;
7952 
7953   if (!generic_predict_doloop_p (data))
7954     return;
7955 
7956   if (find_doloop_use (data))
7957     {
7958       data->doloop_use_p = true;
7959       if (dump_file && (dump_flags & TDF_DETAILS))
7960 	{
7961 	  struct loop *loop = data->current_loop;
7962 	  fprintf (dump_file,
7963 		   "Predict loop %d can perform"
7964 		   " doloop optimization later.\n",
7965 		   loop->num);
7966 	  flow_loop_dump (loop, dump_file, NULL, 1);
7967 	}
7968     }
7969 }
7970 
7971 /* Optimizes the LOOP.  Returns true if anything changed.  */
7972 
7973 static bool
tree_ssa_iv_optimize_loop(struct ivopts_data * data,class loop * loop,bitmap toremove)7974 tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
7975 			   bitmap toremove)
7976 {
7977   bool changed = false;
7978   class iv_ca *iv_ca;
7979   edge exit = single_dom_exit (loop);
7980   basic_block *body;
7981 
7982   gcc_assert (!data->niters);
7983   data->current_loop = loop;
7984   data->loop_loc = find_loop_location (loop).get_location_t ();
7985   data->speed = optimize_loop_for_speed_p (loop);
7986 
7987   if (dump_file && (dump_flags & TDF_DETAILS))
7988     {
7989       fprintf (dump_file, "Processing loop %d", loop->num);
7990       if (data->loop_loc != UNKNOWN_LOCATION)
7991 	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7992 		 LOCATION_LINE (data->loop_loc));
7993       fprintf (dump_file, "\n");
7994 
7995       if (exit)
7996 	{
7997 	  fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7998 		   exit->src->index, exit->dest->index);
7999 	  print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
8000 	  fprintf (dump_file, "\n");
8001 	}
8002 
8003       fprintf (dump_file, "\n");
8004     }
8005 
8006   body = get_loop_body (loop);
8007   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
8008   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
8009 
8010   data->loop_single_exit_p
8011     = exit != NULL && loop_only_exit_p (loop, body, exit);
8012 
8013   /* For each ssa name determines whether it behaves as an induction variable
8014      in some loop.  */
8015   if (!find_induction_variables (data))
8016     goto finish;
8017 
8018   /* Finds interesting uses (item 1).  */
8019   find_interesting_uses (data);
8020   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
8021     goto finish;
8022 
8023   /* Determine cost scaling factor for basic blocks in loop.  */
8024   determine_scaling_factor (data, body);
8025 
8026   /* Analyze doloop possibility and mark the doloop use if predicted.  */
8027   analyze_and_mark_doloop_use (data);
8028 
8029   /* Finds candidates for the induction variables (item 2).  */
8030   find_iv_candidates (data);
8031 
8032   /* Calculates the costs (item 3, part 1).  */
8033   determine_iv_costs (data);
8034   determine_group_iv_costs (data);
8035   determine_set_costs (data);
8036 
8037   /* Find the optimal set of induction variables (item 3, part 2).  */
8038   iv_ca = find_optimal_iv_set (data);
8039   /* Cleanup basic block aux field.  */
8040   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8041     body[i]->aux = NULL;
8042   if (!iv_ca)
8043     goto finish;
8044   changed = true;
8045 
8046   /* Create the new induction variables (item 4, part 1).  */
8047   create_new_ivs (data, iv_ca);
8048   iv_ca_free (&iv_ca);
8049 
8050   /* Rewrite the uses (item 4, part 2).  */
8051   rewrite_groups (data);
8052 
8053   /* Remove the ivs that are unused after rewriting.  */
8054   remove_unused_ivs (data, toremove);
8055 
8056 finish:
8057   free (body);
8058   free_loop_data (data);
8059 
8060   return changed;
8061 }
8062 
8063 /* Main entry point.  Optimizes induction variables in loops.  */
8064 
8065 void
tree_ssa_iv_optimize(void)8066 tree_ssa_iv_optimize (void)
8067 {
8068   class loop *loop;
8069   struct ivopts_data data;
8070   auto_bitmap toremove;
8071 
8072   tree_ssa_iv_optimize_init (&data);
8073 
8074   /* Optimize the loops starting with the innermost ones.  */
8075   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
8076     {
8077       if (!dbg_cnt (ivopts_loop))
8078 	continue;
8079 
8080       if (dump_file && (dump_flags & TDF_DETAILS))
8081 	flow_loop_dump (loop, dump_file, NULL, 1);
8082 
8083       tree_ssa_iv_optimize_loop (&data, loop, toremove);
8084     }
8085 
8086   /* Remove eliminated IV defs.  */
8087   release_defs_bitset (toremove);
8088 
8089   /* We have changed the structure of induction variables; it might happen
8090      that definitions in the scev database refer to some of them that were
8091      eliminated.  */
8092   scev_reset_htab ();
8093   /* Likewise niter and control-IV information.  */
8094   free_numbers_of_iterations_estimates (cfun);
8095 
8096   tree_ssa_iv_optimize_finalize (&data);
8097 }
8098 
8099 #include "gt-tree-ssa-loop-ivopts.h"
8100