1 /* Induction variable optimizations.
2    Copyright (C) 2003-2014 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This pass tries to find the optimal set of induction variables for the loop.
21    It optimizes just the basic linear induction variables (although adding
22    support for other types should not be too hard).  It includes the
23    optimizations commonly known as strength reduction, induction variable
24    coalescing and induction variable elimination.  It does it in the
25    following steps:
26 
27    1) The interesting uses of induction variables are found.  This includes
28 
29       -- uses of induction variables in non-linear expressions
30       -- addresses of arrays
31       -- comparisons of induction variables
32 
33    2) Candidates for the induction variables are found.  This includes
34 
35       -- old induction variables
36       -- the variables defined by expressions derived from the "interesting
37 	 uses" above
38 
39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
40       cost function assigns a cost to sets of induction variables and consists
41       of three parts:
42 
43       -- The use costs.  Each of the interesting uses chooses the best induction
44 	 variable in the set and adds its cost to the sum.  The cost reflects
45 	 the time spent on modifying the induction variables value to be usable
46 	 for the given purpose (adding base and offset for arrays, etc.).
47       -- The variable costs.  Each of the variables has a cost assigned that
48 	 reflects the costs associated with incrementing the value of the
49 	 variable.  The original variables are somewhat preferred.
50       -- The set cost.  Depending on the size of the set, extra cost may be
51 	 added to reflect register pressure.
52 
53       All the costs are defined in a machine-specific way, using the target
54       hooks and machine descriptions to determine them.
55 
56    4) The trees are transformed to use the new variables, the dead code is
57       removed.
58 
59    All of this is done loop by loop.  Doing it globally is theoretically
60    possible, it might give a better performance and it might enable us
61    to decide costs more precisely, but getting all the interactions right
62    would be complicated.  */
63 
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "stor-layout.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "gimple-pretty-print.h"
73 #include "pointer-set.h"
74 #include "hash-table.h"
75 #include "tree-ssa-alias.h"
76 #include "internal-fn.h"
77 #include "tree-eh.h"
78 #include "gimple-expr.h"
79 #include "is-a.h"
80 #include "gimple.h"
81 #include "gimplify.h"
82 #include "gimple-iterator.h"
83 #include "gimplify-me.h"
84 #include "gimple-ssa.h"
85 #include "cgraph.h"
86 #include "tree-cfg.h"
87 #include "tree-phinodes.h"
88 #include "ssa-iterators.h"
89 #include "stringpool.h"
90 #include "tree-ssanames.h"
91 #include "tree-ssa-loop-ivopts.h"
92 #include "tree-ssa-loop-manip.h"
93 #include "tree-ssa-loop-niter.h"
94 #include "tree-ssa-loop.h"
95 #include "expr.h"
96 #include "tree-dfa.h"
97 #include "tree-ssa.h"
98 #include "cfgloop.h"
99 #include "tree-pass.h"
100 #include "insn-config.h"
101 #include "tree-chrec.h"
102 #include "tree-scalar-evolution.h"
103 #include "cfgloop.h"
104 #include "params.h"
105 #include "langhooks.h"
106 #include "tree-affine.h"
107 #include "target.h"
108 #include "tree-inline.h"
109 #include "tree-ssa-propagate.h"
110 #include "expmed.h"
111 #include "tree-ssa-address.h"
112 
113 /* FIXME: Expressions are expanded to RTL in this pass to determine the
114    cost of different addressing modes.  This should be moved to a TBD
115    interface between the GIMPLE and RTL worlds.  */
116 #include "expr.h"
117 #include "recog.h"
118 
119 /* The infinite cost.  */
120 #define INFTY 10000000
121 
122 #define AVG_LOOP_NITER(LOOP) 5
123 
124 /* Returns the expected number of loop iterations for LOOP.
125    The average trip count is computed from profile data if it
126    exists. */
127 
128 static inline HOST_WIDE_INT
avg_loop_niter(struct loop * loop)129 avg_loop_niter (struct loop *loop)
130 {
131   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
132   if (niter == -1)
133     return AVG_LOOP_NITER (loop);
134 
135   return niter;
136 }
137 
138 /* Representation of the induction variable.  */
139 struct iv
140 {
141   tree base;		/* Initial value of the iv.  */
142   tree base_object;	/* A memory object to that the induction variable points.  */
143   tree step;		/* Step of the iv (constant only).  */
144   tree ssa_name;	/* The ssa name with the value.  */
145   bool biv_p;		/* Is it a biv?  */
146   bool have_use_for;	/* Do we already have a use for it?  */
147   unsigned use_id;	/* The identifier in the use if it is the case.  */
148 };
149 
150 /* Per-ssa version information (induction variable descriptions, etc.).  */
151 struct version_info
152 {
153   tree name;		/* The ssa name.  */
154   struct iv *iv;	/* Induction variable description.  */
155   bool has_nonlin_use;	/* For a loop-level invariant, whether it is used in
156 			   an expression that is not an induction variable.  */
157   bool preserve_biv;	/* For the original biv, whether to preserve it.  */
158   unsigned inv_id;	/* Id of an invariant.  */
159 };
160 
161 /* Types of uses.  */
162 enum use_type
163 {
164   USE_NONLINEAR_EXPR,	/* Use in a nonlinear expression.  */
165   USE_ADDRESS,		/* Use in an address.  */
166   USE_COMPARE		/* Use is a compare.  */
167 };
168 
169 /* Cost of a computation.  */
170 typedef struct
171 {
172   int cost;		/* The runtime cost.  */
173   unsigned complexity;	/* The estimate of the complexity of the code for
174 			   the computation (in no concrete units --
175 			   complexity field should be larger for more
176 			   complex expressions and addressing modes).  */
177 } comp_cost;
178 
179 static const comp_cost no_cost = {0, 0};
180 static const comp_cost infinite_cost = {INFTY, INFTY};
181 
182 /* The candidate - cost pair.  */
183 struct cost_pair
184 {
185   struct iv_cand *cand;	/* The candidate.  */
186   comp_cost cost;	/* The cost.  */
187   bitmap depends_on;	/* The list of invariants that have to be
188 			   preserved.  */
189   tree value;		/* For final value elimination, the expression for
190 			   the final value of the iv.  For iv elimination,
191 			   the new bound to compare with.  */
192   enum tree_code comp;	/* For iv elimination, the comparison.  */
193   int inv_expr_id;      /* Loop invariant expression id.  */
194 };
195 
196 /* Use.  */
197 struct iv_use
198 {
199   unsigned id;		/* The id of the use.  */
200   enum use_type type;	/* Type of the use.  */
201   struct iv *iv;	/* The induction variable it is based on.  */
202   gimple stmt;		/* Statement in that it occurs.  */
203   tree *op_p;		/* The place where it occurs.  */
204   bitmap related_cands;	/* The set of "related" iv candidates, plus the common
205 			   important ones.  */
206 
207   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
208   struct cost_pair *cost_map;
209 			/* The costs wrto the iv candidates.  */
210 
211   struct iv_cand *selected;
212 			/* The selected candidate.  */
213 };
214 
215 /* The position where the iv is computed.  */
216 enum iv_position
217 {
218   IP_NORMAL,		/* At the end, just before the exit condition.  */
219   IP_END,		/* At the end of the latch block.  */
220   IP_BEFORE_USE,	/* Immediately before a specific use.  */
221   IP_AFTER_USE,		/* Immediately after a specific use.  */
222   IP_ORIGINAL		/* The original biv.  */
223 };
224 
225 /* The induction variable candidate.  */
226 struct iv_cand
227 {
228   unsigned id;		/* The number of the candidate.  */
229   bool important;	/* Whether this is an "important" candidate, i.e. such
230 			   that it should be considered by all uses.  */
231   ENUM_BITFIELD(iv_position) pos : 8;	/* Where it is computed.  */
232   gimple incremented_at;/* For original biv, the statement where it is
233 			   incremented.  */
234   tree var_before;	/* The variable used for it before increment.  */
235   tree var_after;	/* The variable used for it after increment.  */
236   struct iv *iv;	/* The value of the candidate.  NULL for
237 			   "pseudocandidate" used to indicate the possibility
238 			   to replace the final value of an iv by direct
239 			   computation of the value.  */
240   unsigned cost;	/* Cost of the candidate.  */
241   unsigned cost_step;	/* Cost of the candidate's increment operation.  */
242   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
243 			      where it is incremented.  */
244   bitmap depends_on;	/* The list of invariants that are used in step of the
245 			   biv.  */
246 };
247 
248 /* Loop invariant expression hashtable entry.  */
249 struct iv_inv_expr_ent
250 {
251   tree expr;
252   int id;
253   hashval_t hash;
254 };
255 
256 /* The data used by the induction variable optimizations.  */
257 
258 typedef struct iv_use *iv_use_p;
259 
260 typedef struct iv_cand *iv_cand_p;
261 
262 /* Hashtable helpers.  */
263 
264 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
265 {
266   typedef iv_inv_expr_ent value_type;
267   typedef iv_inv_expr_ent compare_type;
268   static inline hashval_t hash (const value_type *);
269   static inline bool equal (const value_type *, const compare_type *);
270 };
271 
272 /* Hash function for loop invariant expressions.  */
273 
274 inline hashval_t
hash(const value_type * expr)275 iv_inv_expr_hasher::hash (const value_type *expr)
276 {
277   return expr->hash;
278 }
279 
280 /* Hash table equality function for expressions.  */
281 
282 inline bool
equal(const value_type * expr1,const compare_type * expr2)283 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
284 {
285   return expr1->hash == expr2->hash
286 	 && operand_equal_p (expr1->expr, expr2->expr, 0);
287 }
288 
289 struct ivopts_data
290 {
291   /* The currently optimized loop.  */
292   struct loop *current_loop;
293 
294   /* Numbers of iterations for all exits of the current loop.  */
295   struct pointer_map_t *niters;
296 
297   /* Number of registers used in it.  */
298   unsigned regs_used;
299 
300   /* The size of version_info array allocated.  */
301   unsigned version_info_size;
302 
303   /* The array of information for the ssa names.  */
304   struct version_info *version_info;
305 
306   /* The hashtable of loop invariant expressions created
307      by ivopt.  */
308   hash_table <iv_inv_expr_hasher> inv_expr_tab;
309 
310   /* Loop invariant expression id.  */
311   int inv_expr_id;
312 
313   /* The bitmap of indices in version_info whose value was changed.  */
314   bitmap relevant;
315 
316   /* The uses of induction variables.  */
317   vec<iv_use_p> iv_uses;
318 
319   /* The candidates.  */
320   vec<iv_cand_p> iv_candidates;
321 
322   /* A bitmap of important candidates.  */
323   bitmap important_candidates;
324 
325   /* The maximum invariant id.  */
326   unsigned max_inv_id;
327 
328   /* Whether to consider just related and important candidates when replacing a
329      use.  */
330   bool consider_all_candidates;
331 
332   /* Are we optimizing for speed?  */
333   bool speed;
334 
335   /* Whether the loop body includes any function calls.  */
336   bool body_includes_call;
337 
338   /* Whether the loop body can only be exited via single exit.  */
339   bool loop_single_exit_p;
340 };
341 
342 /* An assignment of iv candidates to uses.  */
343 
344 struct iv_ca
345 {
346   /* The number of uses covered by the assignment.  */
347   unsigned upto;
348 
349   /* Number of uses that cannot be expressed by the candidates in the set.  */
350   unsigned bad_uses;
351 
352   /* Candidate assigned to a use, together with the related costs.  */
353   struct cost_pair **cand_for_use;
354 
355   /* Number of times each candidate is used.  */
356   unsigned *n_cand_uses;
357 
358   /* The candidates used.  */
359   bitmap cands;
360 
361   /* The number of candidates in the set.  */
362   unsigned n_cands;
363 
364   /* Total number of registers needed.  */
365   unsigned n_regs;
366 
367   /* Total cost of expressing uses.  */
368   comp_cost cand_use_cost;
369 
370   /* Total cost of candidates.  */
371   unsigned cand_cost;
372 
373   /* Number of times each invariant is used.  */
374   unsigned *n_invariant_uses;
375 
376   /* The array holding the number of uses of each loop
377      invariant expressions created by ivopt.  */
378   unsigned *used_inv_expr;
379 
380   /* The number of created loop invariants.  */
381   unsigned num_used_inv_expr;
382 
383   /* Total cost of the assignment.  */
384   comp_cost cost;
385 };
386 
387 /* Difference of two iv candidate assignments.  */
388 
389 struct iv_ca_delta
390 {
391   /* Changed use.  */
392   struct iv_use *use;
393 
394   /* An old assignment (for rollback purposes).  */
395   struct cost_pair *old_cp;
396 
397   /* A new assignment.  */
398   struct cost_pair *new_cp;
399 
400   /* Next change in the list.  */
401   struct iv_ca_delta *next_change;
402 };
403 
404 /* Bound on number of candidates below that all candidates are considered.  */
405 
406 #define CONSIDER_ALL_CANDIDATES_BOUND \
407   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
408 
409 /* If there are more iv occurrences, we just give up (it is quite unlikely that
410    optimizing such a loop would help, and it would take ages).  */
411 
412 #define MAX_CONSIDERED_USES \
413   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
414 
415 /* If there are at most this number of ivs in the set, try removing unnecessary
416    ivs from the set always.  */
417 
418 #define ALWAYS_PRUNE_CAND_SET_BOUND \
419   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
420 
421 /* The list of trees for that the decl_rtl field must be reset is stored
422    here.  */
423 
424 static vec<tree> decl_rtl_to_reset;
425 
426 static comp_cost force_expr_to_var_cost (tree, bool);
427 
428 /* Number of uses recorded in DATA.  */
429 
430 static inline unsigned
n_iv_uses(struct ivopts_data * data)431 n_iv_uses (struct ivopts_data *data)
432 {
433   return data->iv_uses.length ();
434 }
435 
436 /* Ith use recorded in DATA.  */
437 
438 static inline struct iv_use *
iv_use(struct ivopts_data * data,unsigned i)439 iv_use (struct ivopts_data *data, unsigned i)
440 {
441   return data->iv_uses[i];
442 }
443 
444 /* Number of candidates recorded in DATA.  */
445 
446 static inline unsigned
n_iv_cands(struct ivopts_data * data)447 n_iv_cands (struct ivopts_data *data)
448 {
449   return data->iv_candidates.length ();
450 }
451 
452 /* Ith candidate recorded in DATA.  */
453 
454 static inline struct iv_cand *
iv_cand(struct ivopts_data * data,unsigned i)455 iv_cand (struct ivopts_data *data, unsigned i)
456 {
457   return data->iv_candidates[i];
458 }
459 
460 /* The single loop exit if it dominates the latch, NULL otherwise.  */
461 
462 edge
single_dom_exit(struct loop * loop)463 single_dom_exit (struct loop *loop)
464 {
465   edge exit = single_exit (loop);
466 
467   if (!exit)
468     return NULL;
469 
470   if (!just_once_each_iteration_p (loop, exit->src))
471     return NULL;
472 
473   return exit;
474 }
475 
476 /* Dumps information about the induction variable IV to FILE.  */
477 
478 void
dump_iv(FILE * file,struct iv * iv)479 dump_iv (FILE *file, struct iv *iv)
480 {
481   if (iv->ssa_name)
482     {
483       fprintf (file, "ssa name ");
484       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
485       fprintf (file, "\n");
486     }
487 
488   fprintf (file, "  type ");
489   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
490   fprintf (file, "\n");
491 
492   if (iv->step)
493     {
494       fprintf (file, "  base ");
495       print_generic_expr (file, iv->base, TDF_SLIM);
496       fprintf (file, "\n");
497 
498       fprintf (file, "  step ");
499       print_generic_expr (file, iv->step, TDF_SLIM);
500       fprintf (file, "\n");
501     }
502   else
503     {
504       fprintf (file, "  invariant ");
505       print_generic_expr (file, iv->base, TDF_SLIM);
506       fprintf (file, "\n");
507     }
508 
509   if (iv->base_object)
510     {
511       fprintf (file, "  base object ");
512       print_generic_expr (file, iv->base_object, TDF_SLIM);
513       fprintf (file, "\n");
514     }
515 
516   if (iv->biv_p)
517     fprintf (file, "  is a biv\n");
518 }
519 
520 /* Dumps information about the USE to FILE.  */
521 
522 void
dump_use(FILE * file,struct iv_use * use)523 dump_use (FILE *file, struct iv_use *use)
524 {
525   fprintf (file, "use %d\n", use->id);
526 
527   switch (use->type)
528     {
529     case USE_NONLINEAR_EXPR:
530       fprintf (file, "  generic\n");
531       break;
532 
533     case USE_ADDRESS:
534       fprintf (file, "  address\n");
535       break;
536 
537     case USE_COMPARE:
538       fprintf (file, "  compare\n");
539       break;
540 
541     default:
542       gcc_unreachable ();
543     }
544 
545   fprintf (file, "  in statement ");
546   print_gimple_stmt (file, use->stmt, 0, 0);
547   fprintf (file, "\n");
548 
549   fprintf (file, "  at position ");
550   if (use->op_p)
551     print_generic_expr (file, *use->op_p, TDF_SLIM);
552   fprintf (file, "\n");
553 
554   dump_iv (file, use->iv);
555 
556   if (use->related_cands)
557     {
558       fprintf (file, "  related candidates ");
559       dump_bitmap (file, use->related_cands);
560     }
561 }
562 
563 /* Dumps information about the uses to FILE.  */
564 
565 void
dump_uses(FILE * file,struct ivopts_data * data)566 dump_uses (FILE *file, struct ivopts_data *data)
567 {
568   unsigned i;
569   struct iv_use *use;
570 
571   for (i = 0; i < n_iv_uses (data); i++)
572     {
573       use = iv_use (data, i);
574 
575       dump_use (file, use);
576       fprintf (file, "\n");
577     }
578 }
579 
580 /* Dumps information about induction variable candidate CAND to FILE.  */
581 
582 void
dump_cand(FILE * file,struct iv_cand * cand)583 dump_cand (FILE *file, struct iv_cand *cand)
584 {
585   struct iv *iv = cand->iv;
586 
587   fprintf (file, "candidate %d%s\n",
588 	   cand->id, cand->important ? " (important)" : "");
589 
590   if (cand->depends_on)
591     {
592       fprintf (file, "  depends on ");
593       dump_bitmap (file, cand->depends_on);
594     }
595 
596   if (!iv)
597     {
598       fprintf (file, "  final value replacement\n");
599       return;
600     }
601 
602   if (cand->var_before)
603     {
604       fprintf (file, "  var_before ");
605       print_generic_expr (file, cand->var_before, TDF_SLIM);
606       fprintf (file, "\n");
607     }
608   if (cand->var_after)
609     {
610       fprintf (file, "  var_after ");
611       print_generic_expr (file, cand->var_after, TDF_SLIM);
612       fprintf (file, "\n");
613     }
614 
615   switch (cand->pos)
616     {
617     case IP_NORMAL:
618       fprintf (file, "  incremented before exit test\n");
619       break;
620 
621     case IP_BEFORE_USE:
622       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
623       break;
624 
625     case IP_AFTER_USE:
626       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
627       break;
628 
629     case IP_END:
630       fprintf (file, "  incremented at end\n");
631       break;
632 
633     case IP_ORIGINAL:
634       fprintf (file, "  original biv\n");
635       break;
636     }
637 
638   dump_iv (file, iv);
639 }
640 
641 /* Returns the info for ssa version VER.  */
642 
643 static inline struct version_info *
ver_info(struct ivopts_data * data,unsigned ver)644 ver_info (struct ivopts_data *data, unsigned ver)
645 {
646   return data->version_info + ver;
647 }
648 
649 /* Returns the info for ssa name NAME.  */
650 
651 static inline struct version_info *
name_info(struct ivopts_data * data,tree name)652 name_info (struct ivopts_data *data, tree name)
653 {
654   return ver_info (data, SSA_NAME_VERSION (name));
655 }
656 
657 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
658    emitted in LOOP.  */
659 
660 static bool
stmt_after_ip_normal_pos(struct loop * loop,gimple stmt)661 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
662 {
663   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
664 
665   gcc_assert (bb);
666 
667   if (sbb == loop->latch)
668     return true;
669 
670   if (sbb != bb)
671     return false;
672 
673   return stmt == last_stmt (bb);
674 }
675 
676 /* Returns true if STMT if after the place where the original induction
677    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
678    if the positions are identical.  */
679 
680 static bool
stmt_after_inc_pos(struct iv_cand * cand,gimple stmt,bool true_if_equal)681 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
682 {
683   basic_block cand_bb = gimple_bb (cand->incremented_at);
684   basic_block stmt_bb = gimple_bb (stmt);
685 
686   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
687     return false;
688 
689   if (stmt_bb != cand_bb)
690     return true;
691 
692   if (true_if_equal
693       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
694     return true;
695   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
696 }
697 
698 /* Returns true if STMT if after the place where the induction variable
699    CAND is incremented in LOOP.  */
700 
701 static bool
stmt_after_increment(struct loop * loop,struct iv_cand * cand,gimple stmt)702 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
703 {
704   switch (cand->pos)
705     {
706     case IP_END:
707       return false;
708 
709     case IP_NORMAL:
710       return stmt_after_ip_normal_pos (loop, stmt);
711 
712     case IP_ORIGINAL:
713     case IP_AFTER_USE:
714       return stmt_after_inc_pos (cand, stmt, false);
715 
716     case IP_BEFORE_USE:
717       return stmt_after_inc_pos (cand, stmt, true);
718 
719     default:
720       gcc_unreachable ();
721     }
722 }
723 
724 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
725 
726 static bool
abnormal_ssa_name_p(tree exp)727 abnormal_ssa_name_p (tree exp)
728 {
729   if (!exp)
730     return false;
731 
732   if (TREE_CODE (exp) != SSA_NAME)
733     return false;
734 
735   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
736 }
737 
738 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
739    abnormal phi node.  Callback for for_each_index.  */
740 
741 static bool
idx_contains_abnormal_ssa_name_p(tree base,tree * index,void * data ATTRIBUTE_UNUSED)742 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
743 				  void *data ATTRIBUTE_UNUSED)
744 {
745   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
746     {
747       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
748 	return false;
749       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
750 	return false;
751     }
752 
753   return !abnormal_ssa_name_p (*index);
754 }
755 
756 /* Returns true if EXPR contains a ssa name that occurs in an
757    abnormal phi node.  */
758 
759 bool
contains_abnormal_ssa_name_p(tree expr)760 contains_abnormal_ssa_name_p (tree expr)
761 {
762   enum tree_code code;
763   enum tree_code_class codeclass;
764 
765   if (!expr)
766     return false;
767 
768   code = TREE_CODE (expr);
769   codeclass = TREE_CODE_CLASS (code);
770 
771   if (code == SSA_NAME)
772     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
773 
774   if (code == INTEGER_CST
775       || is_gimple_min_invariant (expr))
776     return false;
777 
778   if (code == ADDR_EXPR)
779     return !for_each_index (&TREE_OPERAND (expr, 0),
780 			    idx_contains_abnormal_ssa_name_p,
781 			    NULL);
782 
783   if (code == COND_EXPR)
784     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
785       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
786       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
787 
788   switch (codeclass)
789     {
790     case tcc_binary:
791     case tcc_comparison:
792       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
793 	return true;
794 
795       /* Fallthru.  */
796     case tcc_unary:
797       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
798 	return true;
799 
800       break;
801 
802     default:
803       gcc_unreachable ();
804     }
805 
806   return false;
807 }
808 
809 /*  Returns the structure describing number of iterations determined from
810     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
811 
812 static struct tree_niter_desc *
niter_for_exit(struct ivopts_data * data,edge exit)813 niter_for_exit (struct ivopts_data *data, edge exit)
814 {
815   struct tree_niter_desc *desc;
816   void **slot;
817 
818   if (!data->niters)
819     {
820       data->niters = pointer_map_create ();
821       slot = NULL;
822     }
823   else
824     slot = pointer_map_contains (data->niters, exit);
825 
826   if (!slot)
827     {
828       /* Try to determine number of iterations.  We cannot safely work with ssa
829          names that appear in phi nodes on abnormal edges, so that we do not
830          create overlapping life ranges for them (PR 27283).  */
831       desc = XNEW (struct tree_niter_desc);
832       if (!number_of_iterations_exit (data->current_loop,
833 				      exit, desc, true)
834      	  || contains_abnormal_ssa_name_p (desc->niter))
835 	{
836 	  XDELETE (desc);
837 	  desc = NULL;
838 	}
839       slot = pointer_map_insert (data->niters, exit);
840       *slot = desc;
841     }
842   else
843     desc = (struct tree_niter_desc *) *slot;
844 
845   return desc;
846 }
847 
848 /* Returns the structure describing number of iterations determined from
849    single dominating exit of DATA->current_loop, or NULL if something
850    goes wrong.  */
851 
852 static struct tree_niter_desc *
niter_for_single_dom_exit(struct ivopts_data * data)853 niter_for_single_dom_exit (struct ivopts_data *data)
854 {
855   edge exit = single_dom_exit (data->current_loop);
856 
857   if (!exit)
858     return NULL;
859 
860   return niter_for_exit (data, exit);
861 }
862 
863 /* Initializes data structures used by the iv optimization pass, stored
864    in DATA.  */
865 
866 static void
tree_ssa_iv_optimize_init(struct ivopts_data * data)867 tree_ssa_iv_optimize_init (struct ivopts_data *data)
868 {
869   data->version_info_size = 2 * num_ssa_names;
870   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
871   data->relevant = BITMAP_ALLOC (NULL);
872   data->important_candidates = BITMAP_ALLOC (NULL);
873   data->max_inv_id = 0;
874   data->niters = NULL;
875   data->iv_uses.create (20);
876   data->iv_candidates.create (20);
877   data->inv_expr_tab.create (10);
878   data->inv_expr_id = 0;
879   decl_rtl_to_reset.create (20);
880 }
881 
882 /* Returns a memory object to that EXPR points.  In case we are able to
883    determine that it does not point to any such object, NULL is returned.  */
884 
885 static tree
determine_base_object(tree expr)886 determine_base_object (tree expr)
887 {
888   enum tree_code code = TREE_CODE (expr);
889   tree base, obj;
890 
891   /* If this is a pointer casted to any type, we need to determine
892      the base object for the pointer; so handle conversions before
893      throwing away non-pointer expressions.  */
894   if (CONVERT_EXPR_P (expr))
895     return determine_base_object (TREE_OPERAND (expr, 0));
896 
897   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
898     return NULL_TREE;
899 
900   switch (code)
901     {
902     case INTEGER_CST:
903       return NULL_TREE;
904 
905     case ADDR_EXPR:
906       obj = TREE_OPERAND (expr, 0);
907       base = get_base_address (obj);
908 
909       if (!base)
910 	return expr;
911 
912       if (TREE_CODE (base) == MEM_REF)
913 	return determine_base_object (TREE_OPERAND (base, 0));
914 
915       return fold_convert (ptr_type_node,
916 		           build_fold_addr_expr (base));
917 
918     case POINTER_PLUS_EXPR:
919       return determine_base_object (TREE_OPERAND (expr, 0));
920 
921     case PLUS_EXPR:
922     case MINUS_EXPR:
923       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
924       gcc_unreachable ();
925 
926     default:
927       return fold_convert (ptr_type_node, expr);
928     }
929 }
930 
931 /* Allocates an induction variable with given initial value BASE and step STEP
932    for loop LOOP.  */
933 
934 static struct iv *
alloc_iv(tree base,tree step)935 alloc_iv (tree base, tree step)
936 {
937   tree base_object = base;
938   struct iv *iv = XCNEW (struct iv);
939   gcc_assert (step != NULL_TREE);
940 
941   /* Lower all address expressions except ones with DECL_P as operand.
942      By doing this:
943        1) More accurate cost can be computed for address expressions;
944        2) Duplicate candidates won't be created for bases in different
945           forms, like &a[0] and &a.  */
946   STRIP_NOPS (base_object);
947   if (TREE_CODE (base_object) == ADDR_EXPR
948       && !DECL_P (TREE_OPERAND (base_object, 0)))
949     {
950       aff_tree comb;
951       double_int size;
952       base_object = get_inner_reference_aff (TREE_OPERAND (base_object, 0),
953 					     &comb, &size);
954       gcc_assert (base_object != NULL_TREE);
955       base_object = build_fold_addr_expr (base_object);
956       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
957     }
958 
959   iv->base = base;
960   iv->base_object = determine_base_object (base_object);
961   iv->step = step;
962   iv->biv_p = false;
963   iv->have_use_for = false;
964   iv->use_id = 0;
965   iv->ssa_name = NULL_TREE;
966 
967   return iv;
968 }
969 
970 /* Sets STEP and BASE for induction variable IV.  */
971 
972 static void
set_iv(struct ivopts_data * data,tree iv,tree base,tree step)973 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
974 {
975   struct version_info *info = name_info (data, iv);
976 
977   gcc_assert (!info->iv);
978 
979   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
980   info->iv = alloc_iv (base, step);
981   info->iv->ssa_name = iv;
982 }
983 
984 /* Finds induction variable declaration for VAR.  */
985 
986 static struct iv *
get_iv(struct ivopts_data * data,tree var)987 get_iv (struct ivopts_data *data, tree var)
988 {
989   basic_block bb;
990   tree type = TREE_TYPE (var);
991 
992   if (!POINTER_TYPE_P (type)
993       && !INTEGRAL_TYPE_P (type))
994     return NULL;
995 
996   if (!name_info (data, var)->iv)
997     {
998       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
999 
1000       if (!bb
1001 	  || !flow_bb_inside_loop_p (data->current_loop, bb))
1002 	set_iv (data, var, var, build_int_cst (type, 0));
1003     }
1004 
1005   return name_info (data, var)->iv;
1006 }
1007 
1008 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
1009    not define a simple affine biv with nonzero step.  */
1010 
1011 static tree
determine_biv_step(gimple phi)1012 determine_biv_step (gimple phi)
1013 {
1014   struct loop *loop = gimple_bb (phi)->loop_father;
1015   tree name = PHI_RESULT (phi);
1016   affine_iv iv;
1017 
1018   if (virtual_operand_p (name))
1019     return NULL_TREE;
1020 
1021   if (!simple_iv (loop, loop, name, &iv, true))
1022     return NULL_TREE;
1023 
1024   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1025 }
1026 
1027 /* Finds basic ivs.  */
1028 
1029 static bool
find_bivs(struct ivopts_data * data)1030 find_bivs (struct ivopts_data *data)
1031 {
1032   gimple phi;
1033   tree step, type, base;
1034   bool found = false;
1035   struct loop *loop = data->current_loop;
1036   gimple_stmt_iterator psi;
1037 
1038   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1039     {
1040       phi = gsi_stmt (psi);
1041 
1042       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1043 	continue;
1044 
1045       step = determine_biv_step (phi);
1046       if (!step)
1047 	continue;
1048 
1049       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1050       base = expand_simple_operations (base);
1051       if (contains_abnormal_ssa_name_p (base)
1052 	  || contains_abnormal_ssa_name_p (step))
1053 	continue;
1054 
1055       type = TREE_TYPE (PHI_RESULT (phi));
1056       base = fold_convert (type, base);
1057       if (step)
1058 	{
1059 	  if (POINTER_TYPE_P (type))
1060 	    step = convert_to_ptrofftype (step);
1061 	  else
1062 	    step = fold_convert (type, step);
1063 	}
1064 
1065       set_iv (data, PHI_RESULT (phi), base, step);
1066       found = true;
1067     }
1068 
1069   return found;
1070 }
1071 
1072 /* Marks basic ivs.  */
1073 
1074 static void
mark_bivs(struct ivopts_data * data)1075 mark_bivs (struct ivopts_data *data)
1076 {
1077   gimple phi, def;
1078   tree var;
1079   struct iv *iv, *incr_iv;
1080   struct loop *loop = data->current_loop;
1081   basic_block incr_bb;
1082   gimple_stmt_iterator psi;
1083 
1084   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1085     {
1086       phi = gsi_stmt (psi);
1087 
1088       iv = get_iv (data, PHI_RESULT (phi));
1089       if (!iv)
1090 	continue;
1091 
1092       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1093       def = SSA_NAME_DEF_STMT (var);
1094       /* Don't mark iv peeled from other one as biv.  */
1095       if (def
1096 	  && gimple_code (def) == GIMPLE_PHI
1097 	  && gimple_bb (def) == loop->header)
1098 	continue;
1099 
1100       incr_iv = get_iv (data, var);
1101       if (!incr_iv)
1102 	continue;
1103 
1104       /* If the increment is in the subloop, ignore it.  */
1105       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1106       if (incr_bb->loop_father != data->current_loop
1107 	  || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1108 	continue;
1109 
1110       iv->biv_p = true;
1111       incr_iv->biv_p = true;
1112     }
1113 }
1114 
1115 /* Checks whether STMT defines a linear induction variable and stores its
1116    parameters to IV.  */
1117 
1118 static bool
find_givs_in_stmt_scev(struct ivopts_data * data,gimple stmt,affine_iv * iv)1119 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1120 {
1121   tree lhs;
1122   struct loop *loop = data->current_loop;
1123 
1124   iv->base = NULL_TREE;
1125   iv->step = NULL_TREE;
1126 
1127   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1128     return false;
1129 
1130   lhs = gimple_assign_lhs (stmt);
1131   if (TREE_CODE (lhs) != SSA_NAME)
1132     return false;
1133 
1134   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1135     return false;
1136   iv->base = expand_simple_operations (iv->base);
1137 
1138   if (contains_abnormal_ssa_name_p (iv->base)
1139       || contains_abnormal_ssa_name_p (iv->step))
1140     return false;
1141 
1142   /* If STMT could throw, then do not consider STMT as defining a GIV.
1143      While this will suppress optimizations, we can not safely delete this
1144      GIV and associated statements, even if it appears it is not used.  */
1145   if (stmt_could_throw_p (stmt))
1146     return false;
1147 
1148   return true;
1149 }
1150 
1151 /* Finds general ivs in statement STMT.  */
1152 
1153 static void
find_givs_in_stmt(struct ivopts_data * data,gimple stmt)1154 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1155 {
1156   affine_iv iv;
1157 
1158   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1159     return;
1160 
1161   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1162 }
1163 
1164 /* Finds general ivs in basic block BB.  */
1165 
1166 static void
find_givs_in_bb(struct ivopts_data * data,basic_block bb)1167 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1168 {
1169   gimple_stmt_iterator bsi;
1170 
1171   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1172     find_givs_in_stmt (data, gsi_stmt (bsi));
1173 }
1174 
1175 /* Finds general ivs.  */
1176 
1177 static void
find_givs(struct ivopts_data * data)1178 find_givs (struct ivopts_data *data)
1179 {
1180   struct loop *loop = data->current_loop;
1181   basic_block *body = get_loop_body_in_dom_order (loop);
1182   unsigned i;
1183 
1184   for (i = 0; i < loop->num_nodes; i++)
1185     find_givs_in_bb (data, body[i]);
1186   free (body);
1187 }
1188 
1189 /* For each ssa name defined in LOOP determines whether it is an induction
1190    variable and if so, its initial value and step.  */
1191 
1192 static bool
find_induction_variables(struct ivopts_data * data)1193 find_induction_variables (struct ivopts_data *data)
1194 {
1195   unsigned i;
1196   bitmap_iterator bi;
1197 
1198   if (!find_bivs (data))
1199     return false;
1200 
1201   find_givs (data);
1202   mark_bivs (data);
1203 
1204   if (dump_file && (dump_flags & TDF_DETAILS))
1205     {
1206       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1207 
1208       if (niter)
1209 	{
1210 	  fprintf (dump_file, "  number of iterations ");
1211 	  print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1212 	  if (!integer_zerop (niter->may_be_zero))
1213 	    {
1214 	      fprintf (dump_file, "; zero if ");
1215 	      print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1216 	    }
1217 	  fprintf (dump_file, "\n\n");
1218     	};
1219 
1220       fprintf (dump_file, "Induction variables:\n\n");
1221 
1222       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1223 	{
1224 	  if (ver_info (data, i)->iv)
1225 	    dump_iv (dump_file, ver_info (data, i)->iv);
1226 	}
1227     }
1228 
1229   return true;
1230 }
1231 
1232 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1233 
1234 static struct iv_use *
record_use(struct ivopts_data * data,tree * use_p,struct iv * iv,gimple stmt,enum use_type use_type)1235 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1236 	    gimple stmt, enum use_type use_type)
1237 {
1238   struct iv_use *use = XCNEW (struct iv_use);
1239 
1240   use->id = n_iv_uses (data);
1241   use->type = use_type;
1242   use->iv = iv;
1243   use->stmt = stmt;
1244   use->op_p = use_p;
1245   use->related_cands = BITMAP_ALLOC (NULL);
1246 
1247   /* To avoid showing ssa name in the dumps, if it was not reset by the
1248      caller.  */
1249   iv->ssa_name = NULL_TREE;
1250 
1251   if (dump_file && (dump_flags & TDF_DETAILS))
1252     dump_use (dump_file, use);
1253 
1254   data->iv_uses.safe_push (use);
1255 
1256   return use;
1257 }
1258 
1259 /* Checks whether OP is a loop-level invariant and if so, records it.
1260    NONLINEAR_USE is true if the invariant is used in a way we do not
1261    handle specially.  */
1262 
1263 static void
record_invariant(struct ivopts_data * data,tree op,bool nonlinear_use)1264 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1265 {
1266   basic_block bb;
1267   struct version_info *info;
1268 
1269   if (TREE_CODE (op) != SSA_NAME
1270       || virtual_operand_p (op))
1271     return;
1272 
1273   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1274   if (bb
1275       && flow_bb_inside_loop_p (data->current_loop, bb))
1276     return;
1277 
1278   info = name_info (data, op);
1279   info->name = op;
1280   info->has_nonlin_use |= nonlinear_use;
1281   if (!info->inv_id)
1282     info->inv_id = ++data->max_inv_id;
1283   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1284 }
1285 
1286 /* Checks whether the use OP is interesting and if so, records it.  */
1287 
1288 static struct iv_use *
find_interesting_uses_op(struct ivopts_data * data,tree op)1289 find_interesting_uses_op (struct ivopts_data *data, tree op)
1290 {
1291   struct iv *iv;
1292   struct iv *civ;
1293   gimple stmt;
1294   struct iv_use *use;
1295 
1296   if (TREE_CODE (op) != SSA_NAME)
1297     return NULL;
1298 
1299   iv = get_iv (data, op);
1300   if (!iv)
1301     return NULL;
1302 
1303   if (iv->have_use_for)
1304     {
1305       use = iv_use (data, iv->use_id);
1306 
1307       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1308       return use;
1309     }
1310 
1311   if (integer_zerop (iv->step))
1312     {
1313       record_invariant (data, op, true);
1314       return NULL;
1315     }
1316   iv->have_use_for = true;
1317 
1318   civ = XNEW (struct iv);
1319   *civ = *iv;
1320 
1321   stmt = SSA_NAME_DEF_STMT (op);
1322   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1323 	      || is_gimple_assign (stmt));
1324 
1325   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1326   iv->use_id = use->id;
1327 
1328   return use;
1329 }
1330 
1331 /* Given a condition in statement STMT, checks whether it is a compare
1332    of an induction variable and an invariant.  If this is the case,
1333    CONTROL_VAR is set to location of the iv, BOUND to the location of
1334    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1335    induction variable descriptions, and true is returned.  If this is not
1336    the case, CONTROL_VAR and BOUND are set to the arguments of the
1337    condition and false is returned.  */
1338 
1339 static bool
extract_cond_operands(struct ivopts_data * data,gimple stmt,tree ** control_var,tree ** bound,struct iv ** iv_var,struct iv ** iv_bound)1340 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1341 		       tree **control_var, tree **bound,
1342 		       struct iv **iv_var, struct iv **iv_bound)
1343 {
1344   /* The objects returned when COND has constant operands.  */
1345   static struct iv const_iv;
1346   static tree zero;
1347   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1348   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1349   bool ret = false;
1350 
1351   if (gimple_code (stmt) == GIMPLE_COND)
1352     {
1353       op0 = gimple_cond_lhs_ptr (stmt);
1354       op1 = gimple_cond_rhs_ptr (stmt);
1355     }
1356   else
1357     {
1358       op0 = gimple_assign_rhs1_ptr (stmt);
1359       op1 = gimple_assign_rhs2_ptr (stmt);
1360     }
1361 
1362   zero = integer_zero_node;
1363   const_iv.step = integer_zero_node;
1364 
1365   if (TREE_CODE (*op0) == SSA_NAME)
1366     iv0 = get_iv (data, *op0);
1367   if (TREE_CODE (*op1) == SSA_NAME)
1368     iv1 = get_iv (data, *op1);
1369 
1370   /* Exactly one of the compared values must be an iv, and the other one must
1371      be an invariant.  */
1372   if (!iv0 || !iv1)
1373     goto end;
1374 
1375   if (integer_zerop (iv0->step))
1376     {
1377       /* Control variable may be on the other side.  */
1378       tmp_op = op0; op0 = op1; op1 = tmp_op;
1379       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1380     }
1381   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1382 
1383 end:
1384   if (control_var)
1385     *control_var = op0;;
1386   if (iv_var)
1387     *iv_var = iv0;;
1388   if (bound)
1389     *bound = op1;
1390   if (iv_bound)
1391     *iv_bound = iv1;
1392 
1393   return ret;
1394 }
1395 
1396 /* Checks whether the condition in STMT is interesting and if so,
1397    records it.  */
1398 
1399 static void
find_interesting_uses_cond(struct ivopts_data * data,gimple stmt)1400 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1401 {
1402   tree *var_p, *bound_p;
1403   struct iv *var_iv, *civ;
1404 
1405   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1406     {
1407       find_interesting_uses_op (data, *var_p);
1408       find_interesting_uses_op (data, *bound_p);
1409       return;
1410     }
1411 
1412   civ = XNEW (struct iv);
1413   *civ = *var_iv;
1414   record_use (data, NULL, civ, stmt, USE_COMPARE);
1415 }
1416 
1417 /* Returns the outermost loop EXPR is obviously invariant in
1418    relative to the loop LOOP, i.e. if all its operands are defined
1419    outside of the returned loop.  Returns NULL if EXPR is not
1420    even obviously invariant in LOOP.  */
1421 
1422 struct loop *
outermost_invariant_loop_for_expr(struct loop * loop,tree expr)1423 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1424 {
1425   basic_block def_bb;
1426   unsigned i, len;
1427 
1428   if (is_gimple_min_invariant (expr))
1429     return current_loops->tree_root;
1430 
1431   if (TREE_CODE (expr) == SSA_NAME)
1432     {
1433       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1434       if (def_bb)
1435 	{
1436 	  if (flow_bb_inside_loop_p (loop, def_bb))
1437 	    return NULL;
1438 	  return superloop_at_depth (loop,
1439 				     loop_depth (def_bb->loop_father) + 1);
1440 	}
1441 
1442       return current_loops->tree_root;
1443     }
1444 
1445   if (!EXPR_P (expr))
1446     return NULL;
1447 
1448   unsigned maxdepth = 0;
1449   len = TREE_OPERAND_LENGTH (expr);
1450   for (i = 0; i < len; i++)
1451     {
1452       struct loop *ivloop;
1453       if (!TREE_OPERAND (expr, i))
1454 	continue;
1455 
1456       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1457       if (!ivloop)
1458 	return NULL;
1459       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1460     }
1461 
1462   return superloop_at_depth (loop, maxdepth);
1463 }
1464 
1465 /* Returns true if expression EXPR is obviously invariant in LOOP,
1466    i.e. if all its operands are defined outside of the LOOP.  LOOP
1467    should not be the function body.  */
1468 
1469 bool
expr_invariant_in_loop_p(struct loop * loop,tree expr)1470 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1471 {
1472   basic_block def_bb;
1473   unsigned i, len;
1474 
1475   gcc_assert (loop_depth (loop) > 0);
1476 
1477   if (is_gimple_min_invariant (expr))
1478     return true;
1479 
1480   if (TREE_CODE (expr) == SSA_NAME)
1481     {
1482       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1483       if (def_bb
1484 	  && flow_bb_inside_loop_p (loop, def_bb))
1485 	return false;
1486 
1487       return true;
1488     }
1489 
1490   if (!EXPR_P (expr))
1491     return false;
1492 
1493   len = TREE_OPERAND_LENGTH (expr);
1494   for (i = 0; i < len; i++)
1495     if (TREE_OPERAND (expr, i)
1496 	&& !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1497       return false;
1498 
1499   return true;
1500 }
1501 
1502 /* Cumulates the steps of indices into DATA and replaces their values with the
1503    initial ones.  Returns false when the value of the index cannot be determined.
1504    Callback for for_each_index.  */
1505 
1506 struct ifs_ivopts_data
1507 {
1508   struct ivopts_data *ivopts_data;
1509   gimple stmt;
1510   tree step;
1511 };
1512 
1513 static bool
idx_find_step(tree base,tree * idx,void * data)1514 idx_find_step (tree base, tree *idx, void *data)
1515 {
1516   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1517   struct iv *iv;
1518   tree step, iv_base, iv_step, lbound, off;
1519   struct loop *loop = dta->ivopts_data->current_loop;
1520 
1521   /* If base is a component ref, require that the offset of the reference
1522      be invariant.  */
1523   if (TREE_CODE (base) == COMPONENT_REF)
1524     {
1525       off = component_ref_field_offset (base);
1526       return expr_invariant_in_loop_p (loop, off);
1527     }
1528 
1529   /* If base is array, first check whether we will be able to move the
1530      reference out of the loop (in order to take its address in strength
1531      reduction).  In order for this to work we need both lower bound
1532      and step to be loop invariants.  */
1533   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1534     {
1535       /* Moreover, for a range, the size needs to be invariant as well.  */
1536       if (TREE_CODE (base) == ARRAY_RANGE_REF
1537 	  && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1538 	return false;
1539 
1540       step = array_ref_element_size (base);
1541       lbound = array_ref_low_bound (base);
1542 
1543       if (!expr_invariant_in_loop_p (loop, step)
1544 	  || !expr_invariant_in_loop_p (loop, lbound))
1545 	return false;
1546     }
1547 
1548   if (TREE_CODE (*idx) != SSA_NAME)
1549     return true;
1550 
1551   iv = get_iv (dta->ivopts_data, *idx);
1552   if (!iv)
1553     return false;
1554 
1555   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1556 	  *&x[0], which is not folded and does not trigger the
1557 	  ARRAY_REF path below.  */
1558   *idx = iv->base;
1559 
1560   if (integer_zerop (iv->step))
1561     return true;
1562 
1563   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1564     {
1565       step = array_ref_element_size (base);
1566 
1567       /* We only handle addresses whose step is an integer constant.  */
1568       if (TREE_CODE (step) != INTEGER_CST)
1569 	return false;
1570     }
1571   else
1572     /* The step for pointer arithmetics already is 1 byte.  */
1573     step = size_one_node;
1574 
1575   iv_base = iv->base;
1576   iv_step = iv->step;
1577   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1578 			    sizetype, &iv_base, &iv_step, dta->stmt,
1579 			    false))
1580     {
1581       /* The index might wrap.  */
1582       return false;
1583     }
1584 
1585   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1586   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1587 
1588   return true;
1589 }
1590 
1591 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1592    object is passed to it in DATA.  */
1593 
1594 static bool
idx_record_use(tree base,tree * idx,void * vdata)1595 idx_record_use (tree base, tree *idx,
1596 		void *vdata)
1597 {
1598   struct ivopts_data *data = (struct ivopts_data *) vdata;
1599   find_interesting_uses_op (data, *idx);
1600   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1601     {
1602       find_interesting_uses_op (data, array_ref_element_size (base));
1603       find_interesting_uses_op (data, array_ref_low_bound (base));
1604     }
1605   return true;
1606 }
1607 
1608 /* If we can prove that TOP = cst * BOT for some constant cst,
1609    store cst to MUL and return true.  Otherwise return false.
1610    The returned value is always sign-extended, regardless of the
1611    signedness of TOP and BOT.  */
1612 
1613 static bool
constant_multiple_of(tree top,tree bot,double_int * mul)1614 constant_multiple_of (tree top, tree bot, double_int *mul)
1615 {
1616   tree mby;
1617   enum tree_code code;
1618   double_int res, p0, p1;
1619   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1620 
1621   STRIP_NOPS (top);
1622   STRIP_NOPS (bot);
1623 
1624   if (operand_equal_p (top, bot, 0))
1625     {
1626       *mul = double_int_one;
1627       return true;
1628     }
1629 
1630   code = TREE_CODE (top);
1631   switch (code)
1632     {
1633     case MULT_EXPR:
1634       mby = TREE_OPERAND (top, 1);
1635       if (TREE_CODE (mby) != INTEGER_CST)
1636 	return false;
1637 
1638       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1639 	return false;
1640 
1641       *mul = (res * tree_to_double_int (mby)).sext (precision);
1642       return true;
1643 
1644     case PLUS_EXPR:
1645     case MINUS_EXPR:
1646       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1647 	  || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1648 	return false;
1649 
1650       if (code == MINUS_EXPR)
1651 	p1 = -p1;
1652       *mul = (p0 + p1).sext (precision);
1653       return true;
1654 
1655     case INTEGER_CST:
1656       if (TREE_CODE (bot) != INTEGER_CST)
1657 	return false;
1658 
1659       p0 = tree_to_double_int (top).sext (precision);
1660       p1 = tree_to_double_int (bot).sext (precision);
1661       if (p1.is_zero ())
1662 	return false;
1663       *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1664       return res.is_zero ();
1665 
1666     default:
1667       return false;
1668     }
1669 }
1670 
1671 /* Return true if memory reference REF with step STEP may be unaligned.  */
1672 
1673 static bool
may_be_unaligned_p(tree ref,tree step)1674 may_be_unaligned_p (tree ref, tree step)
1675 {
1676   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1677      thus they are not misaligned.  */
1678   if (TREE_CODE (ref) == TARGET_MEM_REF)
1679     return false;
1680 
1681   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1682   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1683     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1684 
1685   unsigned HOST_WIDE_INT bitpos;
1686   unsigned int ref_align;
1687   get_object_alignment_1 (ref, &ref_align, &bitpos);
1688   if (ref_align < align
1689       || (bitpos % align) != 0
1690       || (bitpos % BITS_PER_UNIT) != 0)
1691     return true;
1692 
1693   unsigned int trailing_zeros = tree_ctz (step);
1694   if (trailing_zeros < HOST_BITS_PER_INT
1695       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1696     return true;
1697 
1698   return false;
1699 }
1700 
1701 /* Return true if EXPR may be non-addressable.   */
1702 
1703 bool
may_be_nonaddressable_p(tree expr)1704 may_be_nonaddressable_p (tree expr)
1705 {
1706   switch (TREE_CODE (expr))
1707     {
1708     case TARGET_MEM_REF:
1709       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1710 	 target, thus they are always addressable.  */
1711       return false;
1712 
1713     case COMPONENT_REF:
1714       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1715 	     || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1716 
1717     case VIEW_CONVERT_EXPR:
1718       /* This kind of view-conversions may wrap non-addressable objects
1719 	 and make them look addressable.  After some processing the
1720 	 non-addressability may be uncovered again, causing ADDR_EXPRs
1721 	 of inappropriate objects to be built.  */
1722       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1723 	  || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1724 	return true;
1725 
1726       /* ... fall through ... */
1727 
1728     case ARRAY_REF:
1729     case ARRAY_RANGE_REF:
1730       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1731 
1732     CASE_CONVERT:
1733       return true;
1734 
1735     default:
1736       break;
1737     }
1738 
1739   return false;
1740 }
1741 
1742 /* Finds addresses in *OP_P inside STMT.  */
1743 
1744 static void
find_interesting_uses_address(struct ivopts_data * data,gimple stmt,tree * op_p)1745 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1746 {
1747   tree base = *op_p, step = size_zero_node;
1748   struct iv *civ;
1749   struct ifs_ivopts_data ifs_ivopts_data;
1750 
1751   /* Do not play with volatile memory references.  A bit too conservative,
1752      perhaps, but safe.  */
1753   if (gimple_has_volatile_ops (stmt))
1754     goto fail;
1755 
1756   /* Ignore bitfields for now.  Not really something terribly complicated
1757      to handle.  TODO.  */
1758   if (TREE_CODE (base) == BIT_FIELD_REF)
1759     goto fail;
1760 
1761   base = unshare_expr (base);
1762 
1763   if (TREE_CODE (base) == TARGET_MEM_REF)
1764     {
1765       tree type = build_pointer_type (TREE_TYPE (base));
1766       tree astep;
1767 
1768       if (TMR_BASE (base)
1769 	  && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1770 	{
1771 	  civ = get_iv (data, TMR_BASE (base));
1772 	  if (!civ)
1773 	    goto fail;
1774 
1775 	  TMR_BASE (base) = civ->base;
1776 	  step = civ->step;
1777 	}
1778       if (TMR_INDEX2 (base)
1779 	  && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1780 	{
1781 	  civ = get_iv (data, TMR_INDEX2 (base));
1782 	  if (!civ)
1783 	    goto fail;
1784 
1785 	  TMR_INDEX2 (base) = civ->base;
1786 	  step = civ->step;
1787 	}
1788       if (TMR_INDEX (base)
1789 	  && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1790 	{
1791 	  civ = get_iv (data, TMR_INDEX (base));
1792 	  if (!civ)
1793 	    goto fail;
1794 
1795 	  TMR_INDEX (base) = civ->base;
1796 	  astep = civ->step;
1797 
1798 	  if (astep)
1799 	    {
1800 	      if (TMR_STEP (base))
1801 		astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1802 
1803 	      step = fold_build2 (PLUS_EXPR, type, step, astep);
1804 	    }
1805 	}
1806 
1807       if (integer_zerop (step))
1808 	goto fail;
1809       base = tree_mem_ref_addr (type, base);
1810     }
1811   else
1812     {
1813       ifs_ivopts_data.ivopts_data = data;
1814       ifs_ivopts_data.stmt = stmt;
1815       ifs_ivopts_data.step = size_zero_node;
1816       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1817 	  || integer_zerop (ifs_ivopts_data.step))
1818 	goto fail;
1819       step = ifs_ivopts_data.step;
1820 
1821       /* Check that the base expression is addressable.  This needs
1822 	 to be done after substituting bases of IVs into it.  */
1823       if (may_be_nonaddressable_p (base))
1824 	goto fail;
1825 
1826       /* Moreover, on strict alignment platforms, check that it is
1827 	 sufficiently aligned.  */
1828       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1829 	goto fail;
1830 
1831       base = build_fold_addr_expr (base);
1832 
1833       /* Substituting bases of IVs into the base expression might
1834 	 have caused folding opportunities.  */
1835       if (TREE_CODE (base) == ADDR_EXPR)
1836 	{
1837 	  tree *ref = &TREE_OPERAND (base, 0);
1838 	  while (handled_component_p (*ref))
1839 	    ref = &TREE_OPERAND (*ref, 0);
1840 	  if (TREE_CODE (*ref) == MEM_REF)
1841 	    {
1842 	      tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1843 				      TREE_OPERAND (*ref, 0),
1844 				      TREE_OPERAND (*ref, 1));
1845 	      if (tem)
1846 		*ref = tem;
1847 	    }
1848 	}
1849     }
1850 
1851   civ = alloc_iv (base, step);
1852   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1853   return;
1854 
1855 fail:
1856   for_each_index (op_p, idx_record_use, data);
1857 }
1858 
1859 /* Finds and records invariants used in STMT.  */
1860 
1861 static void
find_invariants_stmt(struct ivopts_data * data,gimple stmt)1862 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1863 {
1864   ssa_op_iter iter;
1865   use_operand_p use_p;
1866   tree op;
1867 
1868   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1869     {
1870       op = USE_FROM_PTR (use_p);
1871       record_invariant (data, op, false);
1872     }
1873 }
1874 
1875 /* Finds interesting uses of induction variables in the statement STMT.  */
1876 
1877 static void
find_interesting_uses_stmt(struct ivopts_data * data,gimple stmt)1878 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1879 {
1880   struct iv *iv;
1881   tree op, *lhs, *rhs;
1882   ssa_op_iter iter;
1883   use_operand_p use_p;
1884   enum tree_code code;
1885 
1886   find_invariants_stmt (data, stmt);
1887 
1888   if (gimple_code (stmt) == GIMPLE_COND)
1889     {
1890       find_interesting_uses_cond (data, stmt);
1891       return;
1892     }
1893 
1894   if (is_gimple_assign (stmt))
1895     {
1896       lhs = gimple_assign_lhs_ptr (stmt);
1897       rhs = gimple_assign_rhs1_ptr (stmt);
1898 
1899       if (TREE_CODE (*lhs) == SSA_NAME)
1900 	{
1901 	  /* If the statement defines an induction variable, the uses are not
1902 	     interesting by themselves.  */
1903 
1904 	  iv = get_iv (data, *lhs);
1905 
1906 	  if (iv && !integer_zerop (iv->step))
1907 	    return;
1908 	}
1909 
1910       code = gimple_assign_rhs_code (stmt);
1911       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1912 	  && (REFERENCE_CLASS_P (*rhs)
1913 	      || is_gimple_val (*rhs)))
1914 	{
1915 	  if (REFERENCE_CLASS_P (*rhs))
1916 	    find_interesting_uses_address (data, stmt, rhs);
1917 	  else
1918 	    find_interesting_uses_op (data, *rhs);
1919 
1920 	  if (REFERENCE_CLASS_P (*lhs))
1921 	    find_interesting_uses_address (data, stmt, lhs);
1922 	  return;
1923 	}
1924       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1925 	{
1926 	  find_interesting_uses_cond (data, stmt);
1927 	  return;
1928 	}
1929 
1930       /* TODO -- we should also handle address uses of type
1931 
1932 	 memory = call (whatever);
1933 
1934 	 and
1935 
1936 	 call (memory).  */
1937     }
1938 
1939   if (gimple_code (stmt) == GIMPLE_PHI
1940       && gimple_bb (stmt) == data->current_loop->header)
1941     {
1942       iv = get_iv (data, PHI_RESULT (stmt));
1943 
1944       if (iv && !integer_zerop (iv->step))
1945 	return;
1946     }
1947 
1948   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1949     {
1950       op = USE_FROM_PTR (use_p);
1951 
1952       if (TREE_CODE (op) != SSA_NAME)
1953 	continue;
1954 
1955       iv = get_iv (data, op);
1956       if (!iv)
1957 	continue;
1958 
1959       find_interesting_uses_op (data, op);
1960     }
1961 }
1962 
1963 /* Finds interesting uses of induction variables outside of loops
1964    on loop exit edge EXIT.  */
1965 
1966 static void
find_interesting_uses_outside(struct ivopts_data * data,edge exit)1967 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1968 {
1969   gimple phi;
1970   gimple_stmt_iterator psi;
1971   tree def;
1972 
1973   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1974     {
1975       phi = gsi_stmt (psi);
1976       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1977       if (!virtual_operand_p (def))
1978         find_interesting_uses_op (data, def);
1979     }
1980 }
1981 
1982 /* Finds uses of the induction variables that are interesting.  */
1983 
1984 static void
find_interesting_uses(struct ivopts_data * data)1985 find_interesting_uses (struct ivopts_data *data)
1986 {
1987   basic_block bb;
1988   gimple_stmt_iterator bsi;
1989   basic_block *body = get_loop_body (data->current_loop);
1990   unsigned i;
1991   struct version_info *info;
1992   edge e;
1993 
1994   if (dump_file && (dump_flags & TDF_DETAILS))
1995     fprintf (dump_file, "Uses:\n\n");
1996 
1997   for (i = 0; i < data->current_loop->num_nodes; i++)
1998     {
1999       edge_iterator ei;
2000       bb = body[i];
2001 
2002       FOR_EACH_EDGE (e, ei, bb->succs)
2003 	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2004 	    && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2005 	  find_interesting_uses_outside (data, e);
2006 
2007       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2008 	find_interesting_uses_stmt (data, gsi_stmt (bsi));
2009       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2010 	if (!is_gimple_debug (gsi_stmt (bsi)))
2011 	  find_interesting_uses_stmt (data, gsi_stmt (bsi));
2012     }
2013 
2014   if (dump_file && (dump_flags & TDF_DETAILS))
2015     {
2016       bitmap_iterator bi;
2017 
2018       fprintf (dump_file, "\n");
2019 
2020       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2021 	{
2022 	  info = ver_info (data, i);
2023 	  if (info->inv_id)
2024 	    {
2025 	      fprintf (dump_file, "  ");
2026 	      print_generic_expr (dump_file, info->name, TDF_SLIM);
2027 	      fprintf (dump_file, " is invariant (%d)%s\n",
2028 		       info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2029 	    }
2030 	}
2031 
2032       fprintf (dump_file, "\n");
2033     }
2034 
2035   free (body);
2036 }
2037 
2038 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2039    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2040    we are at the top-level of the processed address.  */
2041 
2042 static tree
strip_offset_1(tree expr,bool inside_addr,bool top_compref,HOST_WIDE_INT * offset)2043 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2044 		HOST_WIDE_INT *offset)
2045 {
2046   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2047   enum tree_code code;
2048   tree type, orig_type = TREE_TYPE (expr);
2049   HOST_WIDE_INT off0, off1, st;
2050   tree orig_expr = expr;
2051 
2052   STRIP_NOPS (expr);
2053 
2054   type = TREE_TYPE (expr);
2055   code = TREE_CODE (expr);
2056   *offset = 0;
2057 
2058   switch (code)
2059     {
2060     case INTEGER_CST:
2061       if (!cst_and_fits_in_hwi (expr)
2062 	  || integer_zerop (expr))
2063 	return orig_expr;
2064 
2065       *offset = int_cst_value (expr);
2066       return build_int_cst (orig_type, 0);
2067 
2068     case POINTER_PLUS_EXPR:
2069     case PLUS_EXPR:
2070     case MINUS_EXPR:
2071       op0 = TREE_OPERAND (expr, 0);
2072       op1 = TREE_OPERAND (expr, 1);
2073 
2074       op0 = strip_offset_1 (op0, false, false, &off0);
2075       op1 = strip_offset_1 (op1, false, false, &off1);
2076 
2077       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2078       if (op0 == TREE_OPERAND (expr, 0)
2079 	  && op1 == TREE_OPERAND (expr, 1))
2080 	return orig_expr;
2081 
2082       if (integer_zerop (op1))
2083 	expr = op0;
2084       else if (integer_zerop (op0))
2085 	{
2086 	  if (code == MINUS_EXPR)
2087 	    expr = fold_build1 (NEGATE_EXPR, type, op1);
2088 	  else
2089 	    expr = op1;
2090 	}
2091       else
2092 	expr = fold_build2 (code, type, op0, op1);
2093 
2094       return fold_convert (orig_type, expr);
2095 
2096     case MULT_EXPR:
2097       op1 = TREE_OPERAND (expr, 1);
2098       if (!cst_and_fits_in_hwi (op1))
2099 	return orig_expr;
2100 
2101       op0 = TREE_OPERAND (expr, 0);
2102       op0 = strip_offset_1 (op0, false, false, &off0);
2103       if (op0 == TREE_OPERAND (expr, 0))
2104 	return orig_expr;
2105 
2106       *offset = off0 * int_cst_value (op1);
2107       if (integer_zerop (op0))
2108 	expr = op0;
2109       else
2110 	expr = fold_build2 (MULT_EXPR, type, op0, op1);
2111 
2112       return fold_convert (orig_type, expr);
2113 
2114     case ARRAY_REF:
2115     case ARRAY_RANGE_REF:
2116       if (!inside_addr)
2117 	return orig_expr;
2118 
2119       step = array_ref_element_size (expr);
2120       if (!cst_and_fits_in_hwi (step))
2121 	break;
2122 
2123       st = int_cst_value (step);
2124       op1 = TREE_OPERAND (expr, 1);
2125       op1 = strip_offset_1 (op1, false, false, &off1);
2126       *offset = off1 * st;
2127 
2128       if (top_compref
2129 	  && integer_zerop (op1))
2130 	{
2131 	  /* Strip the component reference completely.  */
2132 	  op0 = TREE_OPERAND (expr, 0);
2133 	  op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2134 	  *offset += off0;
2135 	  return op0;
2136 	}
2137       break;
2138 
2139     case COMPONENT_REF:
2140       {
2141 	tree field;
2142 
2143 	if (!inside_addr)
2144 	  return orig_expr;
2145 
2146 	tmp = component_ref_field_offset (expr);
2147 	field = TREE_OPERAND (expr, 1);
2148 	if (top_compref
2149 	    && cst_and_fits_in_hwi (tmp)
2150 	    && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2151 	  {
2152 	    HOST_WIDE_INT boffset, abs_off;
2153 
2154 	    /* Strip the component reference completely.  */
2155 	    op0 = TREE_OPERAND (expr, 0);
2156 	    op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2157 	    boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2158 	    abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2159 	    if (boffset < 0)
2160 	      abs_off = -abs_off;
2161 
2162 	    *offset = off0 + int_cst_value (tmp) + abs_off;
2163 	    return op0;
2164 	  }
2165       }
2166       break;
2167 
2168     case ADDR_EXPR:
2169       op0 = TREE_OPERAND (expr, 0);
2170       op0 = strip_offset_1 (op0, true, true, &off0);
2171       *offset += off0;
2172 
2173       if (op0 == TREE_OPERAND (expr, 0))
2174 	return orig_expr;
2175 
2176       expr = build_fold_addr_expr (op0);
2177       return fold_convert (orig_type, expr);
2178 
2179     case MEM_REF:
2180       /* ???  Offset operand?  */
2181       inside_addr = false;
2182       break;
2183 
2184     default:
2185       return orig_expr;
2186     }
2187 
2188   /* Default handling of expressions for that we want to recurse into
2189      the first operand.  */
2190   op0 = TREE_OPERAND (expr, 0);
2191   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2192   *offset += off0;
2193 
2194   if (op0 == TREE_OPERAND (expr, 0)
2195       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2196     return orig_expr;
2197 
2198   expr = copy_node (expr);
2199   TREE_OPERAND (expr, 0) = op0;
2200   if (op1)
2201     TREE_OPERAND (expr, 1) = op1;
2202 
2203   /* Inside address, we might strip the top level component references,
2204      thus changing type of the expression.  Handling of ADDR_EXPR
2205      will fix that.  */
2206   expr = fold_convert (orig_type, expr);
2207 
2208   return expr;
2209 }
2210 
2211 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2212 
2213 static tree
strip_offset(tree expr,unsigned HOST_WIDE_INT * offset)2214 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2215 {
2216   HOST_WIDE_INT off;
2217   tree core = strip_offset_1 (expr, false, false, &off);
2218   *offset = off;
2219   return core;
2220 }
2221 
2222 /* Returns variant of TYPE that can be used as base for different uses.
2223    We return unsigned type with the same precision, which avoids problems
2224    with overflows.  */
2225 
2226 static tree
generic_type_for(tree type)2227 generic_type_for (tree type)
2228 {
2229   if (POINTER_TYPE_P (type))
2230     return unsigned_type_for (type);
2231 
2232   if (TYPE_UNSIGNED (type))
2233     return type;
2234 
2235   return unsigned_type_for (type);
2236 }
2237 
2238 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2239    the bitmap to that we should store it.  */
2240 
2241 static struct ivopts_data *fd_ivopts_data;
2242 static tree
find_depends(tree * expr_p,int * ws ATTRIBUTE_UNUSED,void * data)2243 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2244 {
2245   bitmap *depends_on = (bitmap *) data;
2246   struct version_info *info;
2247 
2248   if (TREE_CODE (*expr_p) != SSA_NAME)
2249     return NULL_TREE;
2250   info = name_info (fd_ivopts_data, *expr_p);
2251 
2252   if (!info->inv_id || info->has_nonlin_use)
2253     return NULL_TREE;
2254 
2255   if (!*depends_on)
2256     *depends_on = BITMAP_ALLOC (NULL);
2257   bitmap_set_bit (*depends_on, info->inv_id);
2258 
2259   return NULL_TREE;
2260 }
2261 
2262 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2263    position to POS.  If USE is not NULL, the candidate is set as related to
2264    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2265    replacement of the final value of the iv by a direct computation.  */
2266 
2267 static struct iv_cand *
add_candidate_1(struct ivopts_data * data,tree base,tree step,bool important,enum iv_position pos,struct iv_use * use,gimple incremented_at)2268 add_candidate_1 (struct ivopts_data *data,
2269 		 tree base, tree step, bool important, enum iv_position pos,
2270 		 struct iv_use *use, gimple incremented_at)
2271 {
2272   unsigned i;
2273   struct iv_cand *cand = NULL;
2274   tree type, orig_type;
2275 
2276   /* For non-original variables, make sure their values are computed in a type
2277      that does not invoke undefined behavior on overflows (since in general,
2278      we cannot prove that these induction variables are non-wrapping).  */
2279   if (pos != IP_ORIGINAL)
2280     {
2281       orig_type = TREE_TYPE (base);
2282       type = generic_type_for (orig_type);
2283       if (type != orig_type)
2284 	{
2285 	  base = fold_convert (type, base);
2286 	  step = fold_convert (type, step);
2287 	}
2288     }
2289 
2290   for (i = 0; i < n_iv_cands (data); i++)
2291     {
2292       cand = iv_cand (data, i);
2293 
2294       if (cand->pos != pos)
2295 	continue;
2296 
2297       if (cand->incremented_at != incremented_at
2298 	  || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2299 	      && cand->ainc_use != use))
2300 	continue;
2301 
2302       if (!cand->iv)
2303 	{
2304 	  if (!base && !step)
2305 	    break;
2306 
2307 	  continue;
2308 	}
2309 
2310       if (!base && !step)
2311 	continue;
2312 
2313       if (operand_equal_p (base, cand->iv->base, 0)
2314 	  && operand_equal_p (step, cand->iv->step, 0)
2315           && (TYPE_PRECISION (TREE_TYPE (base))
2316               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2317 	break;
2318     }
2319 
2320   if (i == n_iv_cands (data))
2321     {
2322       cand = XCNEW (struct iv_cand);
2323       cand->id = i;
2324 
2325       if (!base && !step)
2326 	cand->iv = NULL;
2327       else
2328 	cand->iv = alloc_iv (base, step);
2329 
2330       cand->pos = pos;
2331       if (pos != IP_ORIGINAL && cand->iv)
2332 	{
2333 	  cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2334 	  cand->var_after = cand->var_before;
2335 	}
2336       cand->important = important;
2337       cand->incremented_at = incremented_at;
2338       data->iv_candidates.safe_push (cand);
2339 
2340       if (step
2341 	  && TREE_CODE (step) != INTEGER_CST)
2342 	{
2343 	  fd_ivopts_data = data;
2344 	  walk_tree (&step, find_depends, &cand->depends_on, NULL);
2345 	}
2346 
2347       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2348 	cand->ainc_use = use;
2349       else
2350 	cand->ainc_use = NULL;
2351 
2352       if (dump_file && (dump_flags & TDF_DETAILS))
2353 	dump_cand (dump_file, cand);
2354     }
2355 
2356   if (important && !cand->important)
2357     {
2358       cand->important = true;
2359       if (dump_file && (dump_flags & TDF_DETAILS))
2360 	fprintf (dump_file, "Candidate %d is important\n", cand->id);
2361     }
2362 
2363   if (use)
2364     {
2365       bitmap_set_bit (use->related_cands, i);
2366       if (dump_file && (dump_flags & TDF_DETAILS))
2367 	fprintf (dump_file, "Candidate %d is related to use %d\n",
2368 		 cand->id, use->id);
2369     }
2370 
2371   return cand;
2372 }
2373 
2374 /* Returns true if incrementing the induction variable at the end of the LOOP
2375    is allowed.
2376 
2377    The purpose is to avoid splitting latch edge with a biv increment, thus
2378    creating a jump, possibly confusing other optimization passes and leaving
2379    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2380    is not available (so we do not have a better alternative), or if the latch
2381    edge is already nonempty.  */
2382 
2383 static bool
allow_ip_end_pos_p(struct loop * loop)2384 allow_ip_end_pos_p (struct loop *loop)
2385 {
2386   if (!ip_normal_pos (loop))
2387     return true;
2388 
2389   if (!empty_block_p (ip_end_pos (loop)))
2390     return true;
2391 
2392   return false;
2393 }
2394 
2395 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2396    Important field is set to IMPORTANT.  */
2397 
2398 static void
add_autoinc_candidates(struct ivopts_data * data,tree base,tree step,bool important,struct iv_use * use)2399 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2400 			bool important, struct iv_use *use)
2401 {
2402   basic_block use_bb = gimple_bb (use->stmt);
2403   enum machine_mode mem_mode;
2404   unsigned HOST_WIDE_INT cstepi;
2405 
2406   /* If we insert the increment in any position other than the standard
2407      ones, we must ensure that it is incremented once per iteration.
2408      It must not be in an inner nested loop, or one side of an if
2409      statement.  */
2410   if (use_bb->loop_father != data->current_loop
2411       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2412       || stmt_could_throw_p (use->stmt)
2413       || !cst_and_fits_in_hwi (step))
2414     return;
2415 
2416   cstepi = int_cst_value (step);
2417 
2418   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2419   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2420 	|| USE_STORE_PRE_INCREMENT (mem_mode))
2421        && GET_MODE_SIZE (mem_mode) == cstepi)
2422       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2423 	   || USE_STORE_PRE_DECREMENT (mem_mode))
2424 	  && GET_MODE_SIZE (mem_mode) == -cstepi))
2425     {
2426       enum tree_code code = MINUS_EXPR;
2427       tree new_base;
2428       tree new_step = step;
2429 
2430       if (POINTER_TYPE_P (TREE_TYPE (base)))
2431 	{
2432 	  new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2433 	  code = POINTER_PLUS_EXPR;
2434 	}
2435       else
2436 	new_step = fold_convert (TREE_TYPE (base), new_step);
2437       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2438       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2439 		       use->stmt);
2440     }
2441   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2442 	|| USE_STORE_POST_INCREMENT (mem_mode))
2443        && GET_MODE_SIZE (mem_mode) == cstepi)
2444       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2445 	   || USE_STORE_POST_DECREMENT (mem_mode))
2446 	  && GET_MODE_SIZE (mem_mode) == -cstepi))
2447     {
2448       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2449 		       use->stmt);
2450     }
2451 }
2452 
2453 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2454    position to POS.  If USE is not NULL, the candidate is set as related to
2455    it.  The candidate computation is scheduled on all available positions.  */
2456 
2457 static void
add_candidate(struct ivopts_data * data,tree base,tree step,bool important,struct iv_use * use)2458 add_candidate (struct ivopts_data *data,
2459 	       tree base, tree step, bool important, struct iv_use *use)
2460 {
2461   if (ip_normal_pos (data->current_loop))
2462     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2463   if (ip_end_pos (data->current_loop)
2464       && allow_ip_end_pos_p (data->current_loop))
2465     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2466 
2467   if (use != NULL && use->type == USE_ADDRESS)
2468     add_autoinc_candidates (data, base, step, important, use);
2469 }
2470 
2471 /* Adds standard iv candidates.  */
2472 
2473 static void
add_standard_iv_candidates(struct ivopts_data * data)2474 add_standard_iv_candidates (struct ivopts_data *data)
2475 {
2476   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2477 
2478   /* The same for a double-integer type if it is still fast enough.  */
2479   if (TYPE_PRECISION
2480         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2481       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2482     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2483 		   build_int_cst (long_integer_type_node, 1), true, NULL);
2484 
2485   /* The same for a double-integer type if it is still fast enough.  */
2486   if (TYPE_PRECISION
2487         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2488       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2489     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2490 		   build_int_cst (long_long_integer_type_node, 1), true, NULL);
2491 }
2492 
2493 
2494 /* Adds candidates bases on the old induction variable IV.  */
2495 
2496 static void
add_old_iv_candidates(struct ivopts_data * data,struct iv * iv)2497 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2498 {
2499   gimple phi;
2500   tree def;
2501   struct iv_cand *cand;
2502 
2503   add_candidate (data, iv->base, iv->step, true, NULL);
2504 
2505   /* The same, but with initial value zero.  */
2506   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2507     add_candidate (data, size_int (0), iv->step, true, NULL);
2508   else
2509     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2510 		   iv->step, true, NULL);
2511 
2512   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2513   if (gimple_code (phi) == GIMPLE_PHI)
2514     {
2515       /* Additionally record the possibility of leaving the original iv
2516 	 untouched.  */
2517       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2518       /* Don't add candidate if it's from another PHI node because
2519 	 it's an affine iv appearing in the form of PEELED_CHREC.  */
2520       phi = SSA_NAME_DEF_STMT (def);
2521       if (gimple_code (phi) != GIMPLE_PHI)
2522 	{
2523 	  cand = add_candidate_1 (data,
2524 				  iv->base, iv->step, true, IP_ORIGINAL, NULL,
2525 				  SSA_NAME_DEF_STMT (def));
2526 	  cand->var_before = iv->ssa_name;
2527 	  cand->var_after = def;
2528 	}
2529       else
2530 	gcc_assert (gimple_bb (phi) == data->current_loop->header);
2531     }
2532 }
2533 
2534 /* Adds candidates based on the old induction variables.  */
2535 
2536 static void
add_old_ivs_candidates(struct ivopts_data * data)2537 add_old_ivs_candidates (struct ivopts_data *data)
2538 {
2539   unsigned i;
2540   struct iv *iv;
2541   bitmap_iterator bi;
2542 
2543   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2544     {
2545       iv = ver_info (data, i)->iv;
2546       if (iv && iv->biv_p && !integer_zerop (iv->step))
2547 	add_old_iv_candidates (data, iv);
2548     }
2549 }
2550 
2551 /* Adds candidates based on the value of the induction variable IV and USE.  */
2552 
2553 static void
add_iv_value_candidates(struct ivopts_data * data,struct iv * iv,struct iv_use * use)2554 add_iv_value_candidates (struct ivopts_data *data,
2555 			 struct iv *iv, struct iv_use *use)
2556 {
2557   unsigned HOST_WIDE_INT offset;
2558   tree base;
2559   tree basetype;
2560 
2561   add_candidate (data, iv->base, iv->step, false, use);
2562 
2563   /* The same, but with initial value zero.  Make such variable important,
2564      since it is generic enough so that possibly many uses may be based
2565      on it.  */
2566   basetype = TREE_TYPE (iv->base);
2567   if (POINTER_TYPE_P (basetype))
2568     basetype = sizetype;
2569   add_candidate (data, build_int_cst (basetype, 0),
2570 		 iv->step, true, use);
2571 
2572   /* Third, try removing the constant offset.  Make sure to even
2573      add a candidate for &a[0] vs. (T *)&a.  */
2574   base = strip_offset (iv->base, &offset);
2575   if (offset
2576       || base != iv->base)
2577     add_candidate (data, base, iv->step, false, use);
2578 }
2579 
2580 /* Adds candidates based on the uses.  */
2581 
2582 static void
add_derived_ivs_candidates(struct ivopts_data * data)2583 add_derived_ivs_candidates (struct ivopts_data *data)
2584 {
2585   unsigned i;
2586 
2587   for (i = 0; i < n_iv_uses (data); i++)
2588     {
2589       struct iv_use *use = iv_use (data, i);
2590 
2591       if (!use)
2592 	continue;
2593 
2594       switch (use->type)
2595 	{
2596 	case USE_NONLINEAR_EXPR:
2597 	case USE_COMPARE:
2598 	case USE_ADDRESS:
2599 	  /* Just add the ivs based on the value of the iv used here.  */
2600 	  add_iv_value_candidates (data, use->iv, use);
2601 	  break;
2602 
2603 	default:
2604 	  gcc_unreachable ();
2605 	}
2606     }
2607 }
2608 
2609 /* Record important candidates and add them to related_cands bitmaps
2610    if needed.  */
2611 
2612 static void
record_important_candidates(struct ivopts_data * data)2613 record_important_candidates (struct ivopts_data *data)
2614 {
2615   unsigned i;
2616   struct iv_use *use;
2617 
2618   for (i = 0; i < n_iv_cands (data); i++)
2619     {
2620       struct iv_cand *cand = iv_cand (data, i);
2621 
2622       if (cand->important)
2623 	bitmap_set_bit (data->important_candidates, i);
2624     }
2625 
2626   data->consider_all_candidates = (n_iv_cands (data)
2627 				   <= CONSIDER_ALL_CANDIDATES_BOUND);
2628 
2629   if (data->consider_all_candidates)
2630     {
2631       /* We will not need "related_cands" bitmaps in this case,
2632 	 so release them to decrease peak memory consumption.  */
2633       for (i = 0; i < n_iv_uses (data); i++)
2634 	{
2635 	  use = iv_use (data, i);
2636 	  BITMAP_FREE (use->related_cands);
2637 	}
2638     }
2639   else
2640     {
2641       /* Add important candidates to the related_cands bitmaps.  */
2642       for (i = 0; i < n_iv_uses (data); i++)
2643 	bitmap_ior_into (iv_use (data, i)->related_cands,
2644 			 data->important_candidates);
2645     }
2646 }
2647 
2648 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2649    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2650    we allocate a simple list to every use.  */
2651 
2652 static void
alloc_use_cost_map(struct ivopts_data * data)2653 alloc_use_cost_map (struct ivopts_data *data)
2654 {
2655   unsigned i, size, s;
2656 
2657   for (i = 0; i < n_iv_uses (data); i++)
2658     {
2659       struct iv_use *use = iv_use (data, i);
2660 
2661       if (data->consider_all_candidates)
2662 	size = n_iv_cands (data);
2663       else
2664 	{
2665 	  s = bitmap_count_bits (use->related_cands);
2666 
2667 	  /* Round up to the power of two, so that moduling by it is fast.  */
2668 	  size = s ? (1 << ceil_log2 (s)) : 1;
2669 	}
2670 
2671       use->n_map_members = size;
2672       use->cost_map = XCNEWVEC (struct cost_pair, size);
2673     }
2674 }
2675 
2676 /* Returns description of computation cost of expression whose runtime
2677    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2678 
2679 static comp_cost
new_cost(unsigned runtime,unsigned complexity)2680 new_cost (unsigned runtime, unsigned complexity)
2681 {
2682   comp_cost cost;
2683 
2684   cost.cost = runtime;
2685   cost.complexity = complexity;
2686 
2687   return cost;
2688 }
2689 
2690 /* Adds costs COST1 and COST2.  */
2691 
2692 static comp_cost
add_costs(comp_cost cost1,comp_cost cost2)2693 add_costs (comp_cost cost1, comp_cost cost2)
2694 {
2695   cost1.cost += cost2.cost;
2696   cost1.complexity += cost2.complexity;
2697 
2698   return cost1;
2699 }
2700 /* Subtracts costs COST1 and COST2.  */
2701 
2702 static comp_cost
sub_costs(comp_cost cost1,comp_cost cost2)2703 sub_costs (comp_cost cost1, comp_cost cost2)
2704 {
2705   cost1.cost -= cost2.cost;
2706   cost1.complexity -= cost2.complexity;
2707 
2708   return cost1;
2709 }
2710 
2711 /* Returns a negative number if COST1 < COST2, a positive number if
2712    COST1 > COST2, and 0 if COST1 = COST2.  */
2713 
2714 static int
compare_costs(comp_cost cost1,comp_cost cost2)2715 compare_costs (comp_cost cost1, comp_cost cost2)
2716 {
2717   if (cost1.cost == cost2.cost)
2718     return cost1.complexity - cost2.complexity;
2719 
2720   return cost1.cost - cost2.cost;
2721 }
2722 
2723 /* Returns true if COST is infinite.  */
2724 
2725 static bool
infinite_cost_p(comp_cost cost)2726 infinite_cost_p (comp_cost cost)
2727 {
2728   return cost.cost == INFTY;
2729 }
2730 
2731 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2732    on invariants DEPENDS_ON and that the value used in expressing it
2733    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2734 
2735 static void
set_use_iv_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,comp_cost cost,bitmap depends_on,tree value,enum tree_code comp,int inv_expr_id)2736 set_use_iv_cost (struct ivopts_data *data,
2737 		 struct iv_use *use, struct iv_cand *cand,
2738 		 comp_cost cost, bitmap depends_on, tree value,
2739 		 enum tree_code comp, int inv_expr_id)
2740 {
2741   unsigned i, s;
2742 
2743   if (infinite_cost_p (cost))
2744     {
2745       BITMAP_FREE (depends_on);
2746       return;
2747     }
2748 
2749   if (data->consider_all_candidates)
2750     {
2751       use->cost_map[cand->id].cand = cand;
2752       use->cost_map[cand->id].cost = cost;
2753       use->cost_map[cand->id].depends_on = depends_on;
2754       use->cost_map[cand->id].value = value;
2755       use->cost_map[cand->id].comp = comp;
2756       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2757       return;
2758     }
2759 
2760   /* n_map_members is a power of two, so this computes modulo.  */
2761   s = cand->id & (use->n_map_members - 1);
2762   for (i = s; i < use->n_map_members; i++)
2763     if (!use->cost_map[i].cand)
2764       goto found;
2765   for (i = 0; i < s; i++)
2766     if (!use->cost_map[i].cand)
2767       goto found;
2768 
2769   gcc_unreachable ();
2770 
2771 found:
2772   use->cost_map[i].cand = cand;
2773   use->cost_map[i].cost = cost;
2774   use->cost_map[i].depends_on = depends_on;
2775   use->cost_map[i].value = value;
2776   use->cost_map[i].comp = comp;
2777   use->cost_map[i].inv_expr_id = inv_expr_id;
2778 }
2779 
2780 /* Gets cost of (USE, CANDIDATE) pair.  */
2781 
2782 static struct cost_pair *
get_use_iv_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)2783 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2784 		 struct iv_cand *cand)
2785 {
2786   unsigned i, s;
2787   struct cost_pair *ret;
2788 
2789   if (!cand)
2790     return NULL;
2791 
2792   if (data->consider_all_candidates)
2793     {
2794       ret = use->cost_map + cand->id;
2795       if (!ret->cand)
2796 	return NULL;
2797 
2798       return ret;
2799     }
2800 
2801   /* n_map_members is a power of two, so this computes modulo.  */
2802   s = cand->id & (use->n_map_members - 1);
2803   for (i = s; i < use->n_map_members; i++)
2804     if (use->cost_map[i].cand == cand)
2805       return use->cost_map + i;
2806     else if (use->cost_map[i].cand == NULL)
2807       return NULL;
2808   for (i = 0; i < s; i++)
2809     if (use->cost_map[i].cand == cand)
2810       return use->cost_map + i;
2811     else if (use->cost_map[i].cand == NULL)
2812       return NULL;
2813 
2814   return NULL;
2815 }
2816 
2817 /* Returns estimate on cost of computing SEQ.  */
2818 
2819 static unsigned
seq_cost(rtx seq,bool speed)2820 seq_cost (rtx seq, bool speed)
2821 {
2822   unsigned cost = 0;
2823   rtx set;
2824 
2825   for (; seq; seq = NEXT_INSN (seq))
2826     {
2827       set = single_set (seq);
2828       if (set)
2829 	cost += set_src_cost (SET_SRC (set), speed);
2830       else
2831 	cost++;
2832     }
2833 
2834   return cost;
2835 }
2836 
2837 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2838 static rtx
produce_memory_decl_rtl(tree obj,int * regno)2839 produce_memory_decl_rtl (tree obj, int *regno)
2840 {
2841   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2842   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2843   rtx x;
2844 
2845   gcc_assert (obj);
2846   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2847     {
2848       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2849       x = gen_rtx_SYMBOL_REF (address_mode, name);
2850       SET_SYMBOL_REF_DECL (x, obj);
2851       x = gen_rtx_MEM (DECL_MODE (obj), x);
2852       set_mem_addr_space (x, as);
2853       targetm.encode_section_info (obj, x, true);
2854     }
2855   else
2856     {
2857       x = gen_raw_REG (address_mode, (*regno)++);
2858       x = gen_rtx_MEM (DECL_MODE (obj), x);
2859       set_mem_addr_space (x, as);
2860     }
2861 
2862   return x;
2863 }
2864 
2865 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2866    walk_tree.  DATA contains the actual fake register number.  */
2867 
2868 static tree
prepare_decl_rtl(tree * expr_p,int * ws,void * data)2869 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2870 {
2871   tree obj = NULL_TREE;
2872   rtx x = NULL_RTX;
2873   int *regno = (int *) data;
2874 
2875   switch (TREE_CODE (*expr_p))
2876     {
2877     case ADDR_EXPR:
2878       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2879 	   handled_component_p (*expr_p);
2880 	   expr_p = &TREE_OPERAND (*expr_p, 0))
2881 	continue;
2882       obj = *expr_p;
2883       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2884         x = produce_memory_decl_rtl (obj, regno);
2885       break;
2886 
2887     case SSA_NAME:
2888       *ws = 0;
2889       obj = SSA_NAME_VAR (*expr_p);
2890       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2891       if (!obj)
2892 	return NULL_TREE;
2893       if (!DECL_RTL_SET_P (obj))
2894 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2895       break;
2896 
2897     case VAR_DECL:
2898     case PARM_DECL:
2899     case RESULT_DECL:
2900       *ws = 0;
2901       obj = *expr_p;
2902 
2903       if (DECL_RTL_SET_P (obj))
2904 	break;
2905 
2906       if (DECL_MODE (obj) == BLKmode)
2907 	x = produce_memory_decl_rtl (obj, regno);
2908       else
2909 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2910 
2911       break;
2912 
2913     default:
2914       break;
2915     }
2916 
2917   if (x)
2918     {
2919       decl_rtl_to_reset.safe_push (obj);
2920       SET_DECL_RTL (obj, x);
2921     }
2922 
2923   return NULL_TREE;
2924 }
2925 
2926 /* Determines cost of the computation of EXPR.  */
2927 
2928 static unsigned
computation_cost(tree expr,bool speed)2929 computation_cost (tree expr, bool speed)
2930 {
2931   rtx seq, rslt;
2932   tree type = TREE_TYPE (expr);
2933   unsigned cost;
2934   /* Avoid using hard regs in ways which may be unsupported.  */
2935   int regno = LAST_VIRTUAL_REGISTER + 1;
2936   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2937   enum node_frequency real_frequency = node->frequency;
2938 
2939   node->frequency = NODE_FREQUENCY_NORMAL;
2940   crtl->maybe_hot_insn_p = speed;
2941   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2942   start_sequence ();
2943   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2944   seq = get_insns ();
2945   end_sequence ();
2946   default_rtl_profile ();
2947   node->frequency = real_frequency;
2948 
2949   cost = seq_cost (seq, speed);
2950   if (MEM_P (rslt))
2951     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2952 			  TYPE_ADDR_SPACE (type), speed);
2953   else if (!REG_P (rslt))
2954     cost += set_src_cost (rslt, speed);
2955 
2956   return cost;
2957 }
2958 
2959 /* Returns variable containing the value of candidate CAND at statement AT.  */
2960 
2961 static tree
var_at_stmt(struct loop * loop,struct iv_cand * cand,gimple stmt)2962 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2963 {
2964   if (stmt_after_increment (loop, cand, stmt))
2965     return cand->var_after;
2966   else
2967     return cand->var_before;
2968 }
2969 
2970 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2971    same precision that is at least as wide as the precision of TYPE, stores
2972    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2973    type of A and B.  */
2974 
2975 static tree
determine_common_wider_type(tree * a,tree * b)2976 determine_common_wider_type (tree *a, tree *b)
2977 {
2978   tree wider_type = NULL;
2979   tree suba, subb;
2980   tree atype = TREE_TYPE (*a);
2981 
2982   if (CONVERT_EXPR_P (*a))
2983     {
2984       suba = TREE_OPERAND (*a, 0);
2985       wider_type = TREE_TYPE (suba);
2986       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2987 	return atype;
2988     }
2989   else
2990     return atype;
2991 
2992   if (CONVERT_EXPR_P (*b))
2993     {
2994       subb = TREE_OPERAND (*b, 0);
2995       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2996 	return atype;
2997     }
2998   else
2999     return atype;
3000 
3001   *a = suba;
3002   *b = subb;
3003   return wider_type;
3004 }
3005 
3006 /* Determines the expression by that USE is expressed from induction variable
3007    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3008    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3009 
3010 static bool
get_computation_aff(struct loop * loop,struct iv_use * use,struct iv_cand * cand,gimple at,struct aff_tree * aff)3011 get_computation_aff (struct loop *loop,
3012 		     struct iv_use *use, struct iv_cand *cand, gimple at,
3013 		     struct aff_tree *aff)
3014 {
3015   tree ubase = use->iv->base;
3016   tree ustep = use->iv->step;
3017   tree cbase = cand->iv->base;
3018   tree cstep = cand->iv->step, cstep_common;
3019   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3020   tree common_type, var;
3021   tree uutype;
3022   aff_tree cbase_aff, var_aff;
3023   double_int rat;
3024 
3025   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3026     {
3027       /* We do not have a precision to express the values of use.  */
3028       return false;
3029     }
3030 
3031   var = var_at_stmt (loop, cand, at);
3032   uutype = unsigned_type_for (utype);
3033 
3034   /* If the conversion is not noop, perform it.  */
3035   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3036     {
3037       cstep = fold_convert (uutype, cstep);
3038       cbase = fold_convert (uutype, cbase);
3039       var = fold_convert (uutype, var);
3040     }
3041 
3042   if (!constant_multiple_of (ustep, cstep, &rat))
3043     return false;
3044 
3045   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3046      type, we achieve better folding by computing their difference in this
3047      wider type, and cast the result to UUTYPE.  We do not need to worry about
3048      overflows, as all the arithmetics will in the end be performed in UUTYPE
3049      anyway.  */
3050   common_type = determine_common_wider_type (&ubase, &cbase);
3051 
3052   /* use = ubase - ratio * cbase + ratio * var.  */
3053   tree_to_aff_combination (ubase, common_type, aff);
3054   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3055   tree_to_aff_combination (var, uutype, &var_aff);
3056 
3057   /* We need to shift the value if we are after the increment.  */
3058   if (stmt_after_increment (loop, cand, at))
3059     {
3060       aff_tree cstep_aff;
3061 
3062       if (common_type != uutype)
3063 	cstep_common = fold_convert (common_type, cstep);
3064       else
3065 	cstep_common = cstep;
3066 
3067       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3068       aff_combination_add (&cbase_aff, &cstep_aff);
3069     }
3070 
3071   aff_combination_scale (&cbase_aff, -rat);
3072   aff_combination_add (aff, &cbase_aff);
3073   if (common_type != uutype)
3074     aff_combination_convert (aff, uutype);
3075 
3076   aff_combination_scale (&var_aff, rat);
3077   aff_combination_add (aff, &var_aff);
3078 
3079   return true;
3080 }
3081 
3082 /* Return the type of USE.  */
3083 
3084 static tree
get_use_type(struct iv_use * use)3085 get_use_type (struct iv_use *use)
3086 {
3087   tree base_type = TREE_TYPE (use->iv->base);
3088   tree type;
3089 
3090   if (use->type == USE_ADDRESS)
3091     {
3092       /* The base_type may be a void pointer.  Create a pointer type based on
3093 	 the mem_ref instead.  */
3094       type = build_pointer_type (TREE_TYPE (*use->op_p));
3095       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3096 		  == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3097     }
3098   else
3099     type = base_type;
3100 
3101   return type;
3102 }
3103 
3104 /* Determines the expression by that USE is expressed from induction variable
3105    CAND at statement AT in LOOP.  The computation is unshared.  */
3106 
3107 static tree
get_computation_at(struct loop * loop,struct iv_use * use,struct iv_cand * cand,gimple at)3108 get_computation_at (struct loop *loop,
3109 		    struct iv_use *use, struct iv_cand *cand, gimple at)
3110 {
3111   aff_tree aff;
3112   tree type = get_use_type (use);
3113 
3114   if (!get_computation_aff (loop, use, cand, at, &aff))
3115     return NULL_TREE;
3116   unshare_aff_combination (&aff);
3117   return fold_convert (type, aff_combination_to_tree (&aff));
3118 }
3119 
3120 /* Determines the expression by that USE is expressed from induction variable
3121    CAND in LOOP.  The computation is unshared.  */
3122 
3123 static tree
get_computation(struct loop * loop,struct iv_use * use,struct iv_cand * cand)3124 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3125 {
3126   return get_computation_at (loop, use, cand, use->stmt);
3127 }
3128 
3129 /* Adjust the cost COST for being in loop setup rather than loop body.
3130    If we're optimizing for space, the loop setup overhead is constant;
3131    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3132 static unsigned
adjust_setup_cost(struct ivopts_data * data,unsigned cost)3133 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3134 {
3135   if (cost == INFTY)
3136     return cost;
3137   else if (optimize_loop_for_speed_p (data->current_loop))
3138     return cost / avg_loop_niter (data->current_loop);
3139   else
3140     return cost;
3141 }
3142 
3143 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3144    validity for a memory reference accessing memory of mode MODE in
3145    address space AS.  */
3146 
3147 
3148 bool
multiplier_allowed_in_address_p(HOST_WIDE_INT ratio,enum machine_mode mode,addr_space_t as)3149 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3150 				 addr_space_t as)
3151 {
3152 #define MAX_RATIO 128
3153   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3154   static vec<sbitmap> valid_mult_list;
3155   sbitmap valid_mult;
3156 
3157   if (data_index >= valid_mult_list.length ())
3158     valid_mult_list.safe_grow_cleared (data_index + 1);
3159 
3160   valid_mult = valid_mult_list[data_index];
3161   if (!valid_mult)
3162     {
3163       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3164       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3165       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3166       rtx addr, scaled;
3167       HOST_WIDE_INT i;
3168 
3169       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3170       bitmap_clear (valid_mult);
3171       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3172       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3173       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3174 	{
3175 	  XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3176 	  if (memory_address_addr_space_p (mode, addr, as)
3177 	      || memory_address_addr_space_p (mode, scaled, as))
3178 	    bitmap_set_bit (valid_mult, i + MAX_RATIO);
3179 	}
3180 
3181       if (dump_file && (dump_flags & TDF_DETAILS))
3182 	{
3183 	  fprintf (dump_file, "  allowed multipliers:");
3184 	  for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3185 	    if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3186 	      fprintf (dump_file, " %d", (int) i);
3187 	  fprintf (dump_file, "\n");
3188 	  fprintf (dump_file, "\n");
3189 	}
3190 
3191       valid_mult_list[data_index] = valid_mult;
3192     }
3193 
3194   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3195     return false;
3196 
3197   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3198 }
3199 
3200 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3201    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3202    variable is omitted.  Compute the cost for a memory reference that accesses
3203    a memory location of mode MEM_MODE in address space AS.
3204 
3205    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3206    size of MEM_MODE / RATIO) is available.  To make this determination, we
3207    look at the size of the increment to be made, which is given in CSTEP.
3208    CSTEP may be zero if the step is unknown.
3209    STMT_AFTER_INC is true iff the statement we're looking at is after the
3210    increment of the original biv.
3211 
3212    TODO -- there must be some better way.  This all is quite crude.  */
3213 
3214 enum ainc_type
3215 {
3216   AINC_PRE_INC,		/* Pre increment.  */
3217   AINC_PRE_DEC,		/* Pre decrement.  */
3218   AINC_POST_INC,	/* Post increment.  */
3219   AINC_POST_DEC,	/* Post decrement.  */
3220   AINC_NONE		/* Also the number of auto increment types.  */
3221 };
3222 
3223 typedef struct address_cost_data_s
3224 {
3225   HOST_WIDE_INT min_offset, max_offset;
3226   unsigned costs[2][2][2][2];
3227   unsigned ainc_costs[AINC_NONE];
3228 } *address_cost_data;
3229 
3230 
3231 static comp_cost
get_address_cost(bool symbol_present,bool var_present,unsigned HOST_WIDE_INT offset,HOST_WIDE_INT ratio,HOST_WIDE_INT cstep,enum machine_mode mem_mode,addr_space_t as,bool speed,bool stmt_after_inc,bool * may_autoinc)3232 get_address_cost (bool symbol_present, bool var_present,
3233 		  unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3234 		  HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3235 		  addr_space_t as, bool speed,
3236 		  bool stmt_after_inc, bool *may_autoinc)
3237 {
3238   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3239   static vec<address_cost_data> address_cost_data_list;
3240   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3241   address_cost_data data;
3242   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3243   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3244   unsigned cost, acost, complexity;
3245   enum ainc_type autoinc_type;
3246   bool offset_p, ratio_p, autoinc;
3247   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3248   unsigned HOST_WIDE_INT mask;
3249   unsigned bits;
3250 
3251   if (data_index >= address_cost_data_list.length ())
3252     address_cost_data_list.safe_grow_cleared (data_index + 1);
3253 
3254   data = address_cost_data_list[data_index];
3255   if (!data)
3256     {
3257       HOST_WIDE_INT i;
3258       HOST_WIDE_INT rat, off = 0;
3259       int old_cse_not_expected, width;
3260       unsigned sym_p, var_p, off_p, rat_p, add_c;
3261       rtx seq, addr, base;
3262       rtx reg0, reg1;
3263 
3264       data = (address_cost_data) xcalloc (1, sizeof (*data));
3265 
3266       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3267 
3268       width = GET_MODE_BITSIZE (address_mode) - 1;
3269       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3270 	width = HOST_BITS_PER_WIDE_INT - 1;
3271       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3272 
3273       for (i = width; i >= 0; i--)
3274 	{
3275 	  off = -((unsigned HOST_WIDE_INT) 1 << i);
3276 	  XEXP (addr, 1) = gen_int_mode (off, address_mode);
3277 	  if (memory_address_addr_space_p (mem_mode, addr, as))
3278 	    break;
3279 	}
3280       data->min_offset = (i == -1? 0 : off);
3281 
3282       for (i = width; i >= 0; i--)
3283 	{
3284 	  off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3285 	  XEXP (addr, 1) = gen_int_mode (off, address_mode);
3286 	  if (memory_address_addr_space_p (mem_mode, addr, as))
3287 	    break;
3288 	}
3289       if (i == -1)
3290         off = 0;
3291       data->max_offset = off;
3292 
3293       if (dump_file && (dump_flags & TDF_DETAILS))
3294 	{
3295 	  fprintf (dump_file, "get_address_cost:\n");
3296 	  fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3297 		   GET_MODE_NAME (mem_mode),
3298 		   data->min_offset);
3299 	  fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3300 		   GET_MODE_NAME (mem_mode),
3301 		   data->max_offset);
3302 	}
3303 
3304       rat = 1;
3305       for (i = 2; i <= MAX_RATIO; i++)
3306 	if (multiplier_allowed_in_address_p (i, mem_mode, as))
3307 	  {
3308 	    rat = i;
3309 	    break;
3310 	  }
3311 
3312       /* Compute the cost of various addressing modes.  */
3313       acost = 0;
3314       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3315       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3316 
3317       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3318 	  || USE_STORE_PRE_DECREMENT (mem_mode))
3319 	{
3320 	  addr = gen_rtx_PRE_DEC (address_mode, reg0);
3321 	  has_predec[mem_mode]
3322 	    = memory_address_addr_space_p (mem_mode, addr, as);
3323 
3324 	  if (has_predec[mem_mode])
3325 	    data->ainc_costs[AINC_PRE_DEC]
3326 	      = address_cost (addr, mem_mode, as, speed);
3327 	}
3328       if (USE_LOAD_POST_DECREMENT (mem_mode)
3329 	  || USE_STORE_POST_DECREMENT (mem_mode))
3330 	{
3331 	  addr = gen_rtx_POST_DEC (address_mode, reg0);
3332 	  has_postdec[mem_mode]
3333 	    = memory_address_addr_space_p (mem_mode, addr, as);
3334 
3335 	  if (has_postdec[mem_mode])
3336 	    data->ainc_costs[AINC_POST_DEC]
3337 	      = address_cost (addr, mem_mode, as, speed);
3338 	}
3339       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3340 	  || USE_STORE_PRE_DECREMENT (mem_mode))
3341 	{
3342 	  addr = gen_rtx_PRE_INC (address_mode, reg0);
3343 	  has_preinc[mem_mode]
3344 	    = memory_address_addr_space_p (mem_mode, addr, as);
3345 
3346 	  if (has_preinc[mem_mode])
3347 	    data->ainc_costs[AINC_PRE_INC]
3348 	      = address_cost (addr, mem_mode, as, speed);
3349 	}
3350       if (USE_LOAD_POST_INCREMENT (mem_mode)
3351 	  || USE_STORE_POST_INCREMENT (mem_mode))
3352 	{
3353 	  addr = gen_rtx_POST_INC (address_mode, reg0);
3354 	  has_postinc[mem_mode]
3355 	    = memory_address_addr_space_p (mem_mode, addr, as);
3356 
3357 	  if (has_postinc[mem_mode])
3358 	    data->ainc_costs[AINC_POST_INC]
3359 	      = address_cost (addr, mem_mode, as, speed);
3360 	}
3361       for (i = 0; i < 16; i++)
3362 	{
3363 	  sym_p = i & 1;
3364 	  var_p = (i >> 1) & 1;
3365 	  off_p = (i >> 2) & 1;
3366 	  rat_p = (i >> 3) & 1;
3367 
3368 	  addr = reg0;
3369 	  if (rat_p)
3370 	    addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3371 				   gen_int_mode (rat, address_mode));
3372 
3373 	  if (var_p)
3374 	    addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3375 
3376 	  if (sym_p)
3377 	    {
3378 	      base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3379 	      /* ??? We can run into trouble with some backends by presenting
3380 		 it with symbols which haven't been properly passed through
3381 		 targetm.encode_section_info.  By setting the local bit, we
3382 		 enhance the probability of things working.  */
3383 	      SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3384 
3385 	      if (off_p)
3386 		base = gen_rtx_fmt_e (CONST, address_mode,
3387 				      gen_rtx_fmt_ee
3388 					(PLUS, address_mode, base,
3389 					 gen_int_mode (off, address_mode)));
3390 	    }
3391 	  else if (off_p)
3392 	    base = gen_int_mode (off, address_mode);
3393 	  else
3394 	    base = NULL_RTX;
3395 
3396 	  if (base)
3397 	    addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3398 
3399 	  start_sequence ();
3400 	  /* To avoid splitting addressing modes, pretend that no cse will
3401 	     follow.  */
3402 	  old_cse_not_expected = cse_not_expected;
3403 	  cse_not_expected = true;
3404 	  addr = memory_address_addr_space (mem_mode, addr, as);
3405 	  cse_not_expected = old_cse_not_expected;
3406 	  seq = get_insns ();
3407 	  end_sequence ();
3408 
3409 	  acost = seq_cost (seq, speed);
3410 	  acost += address_cost (addr, mem_mode, as, speed);
3411 
3412 	  if (!acost)
3413 	    acost = 1;
3414 	  data->costs[sym_p][var_p][off_p][rat_p] = acost;
3415 	}
3416 
3417       /* On some targets, it is quite expensive to load symbol to a register,
3418 	 which makes addresses that contain symbols look much more expensive.
3419 	 However, the symbol will have to be loaded in any case before the
3420 	 loop (and quite likely we have it in register already), so it does not
3421 	 make much sense to penalize them too heavily.  So make some final
3422          tweaks for the SYMBOL_PRESENT modes:
3423 
3424          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3425 	 var is cheaper, use this mode with small penalty.
3426 	 If VAR_PRESENT is true, try whether the mode with
3427 	 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3428 	 if this is the case, use it.  */
3429       add_c = add_cost (speed, address_mode);
3430       for (i = 0; i < 8; i++)
3431 	{
3432 	  var_p = i & 1;
3433 	  off_p = (i >> 1) & 1;
3434 	  rat_p = (i >> 2) & 1;
3435 
3436 	  acost = data->costs[0][1][off_p][rat_p] + 1;
3437 	  if (var_p)
3438 	    acost += add_c;
3439 
3440 	  if (acost < data->costs[1][var_p][off_p][rat_p])
3441 	    data->costs[1][var_p][off_p][rat_p] = acost;
3442 	}
3443 
3444       if (dump_file && (dump_flags & TDF_DETAILS))
3445 	{
3446 	  fprintf (dump_file, "Address costs:\n");
3447 
3448 	  for (i = 0; i < 16; i++)
3449 	    {
3450 	      sym_p = i & 1;
3451 	      var_p = (i >> 1) & 1;
3452 	      off_p = (i >> 2) & 1;
3453 	      rat_p = (i >> 3) & 1;
3454 
3455 	      fprintf (dump_file, "  ");
3456 	      if (sym_p)
3457 		fprintf (dump_file, "sym + ");
3458 	      if (var_p)
3459 		fprintf (dump_file, "var + ");
3460 	      if (off_p)
3461 		fprintf (dump_file, "cst + ");
3462 	      if (rat_p)
3463 		fprintf (dump_file, "rat * ");
3464 
3465 	      acost = data->costs[sym_p][var_p][off_p][rat_p];
3466 	      fprintf (dump_file, "index costs %d\n", acost);
3467 	    }
3468 	  if (has_predec[mem_mode] || has_postdec[mem_mode]
3469 	      || has_preinc[mem_mode] || has_postinc[mem_mode])
3470 	    fprintf (dump_file, "  May include autoinc/dec\n");
3471 	  fprintf (dump_file, "\n");
3472 	}
3473 
3474       address_cost_data_list[data_index] = data;
3475     }
3476 
3477   bits = GET_MODE_BITSIZE (address_mode);
3478   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3479   offset &= mask;
3480   if ((offset >> (bits - 1) & 1))
3481     offset |= ~mask;
3482   s_offset = offset;
3483 
3484   autoinc = false;
3485   autoinc_type = AINC_NONE;
3486   msize = GET_MODE_SIZE (mem_mode);
3487   autoinc_offset = offset;
3488   if (stmt_after_inc)
3489     autoinc_offset += ratio * cstep;
3490   if (symbol_present || var_present || ratio != 1)
3491     autoinc = false;
3492   else
3493     {
3494       if (has_postinc[mem_mode] && autoinc_offset == 0
3495 	  && msize == cstep)
3496 	autoinc_type = AINC_POST_INC;
3497       else if (has_postdec[mem_mode] && autoinc_offset == 0
3498 	       && msize == -cstep)
3499 	autoinc_type = AINC_POST_DEC;
3500       else if (has_preinc[mem_mode] && autoinc_offset == msize
3501 	       && msize == cstep)
3502 	autoinc_type = AINC_PRE_INC;
3503       else if (has_predec[mem_mode] && autoinc_offset == -msize
3504 	       && msize == -cstep)
3505 	autoinc_type = AINC_PRE_DEC;
3506 
3507       if (autoinc_type != AINC_NONE)
3508 	autoinc = true;
3509     }
3510 
3511   cost = 0;
3512   offset_p = (s_offset != 0
3513 	      && data->min_offset <= s_offset
3514 	      && s_offset <= data->max_offset);
3515   ratio_p = (ratio != 1
3516 	     && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3517 
3518   if (ratio != 1 && !ratio_p)
3519     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3520 
3521   if (s_offset && !offset_p && !symbol_present)
3522     cost += add_cost (speed, address_mode);
3523 
3524   if (may_autoinc)
3525     *may_autoinc = autoinc;
3526   if (autoinc)
3527     acost = data->ainc_costs[autoinc_type];
3528   else
3529     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3530   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3531   return new_cost (cost + acost, complexity);
3532 }
3533 
3534  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3535     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3536     calculating the operands of EXPR.  Returns true if successful, and returns
3537     the cost in COST.  */
3538 
3539 static bool
get_shiftadd_cost(tree expr,enum machine_mode mode,comp_cost cost0,comp_cost cost1,tree mult,bool speed,comp_cost * cost)3540 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3541                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3542 {
3543   comp_cost res;
3544   tree op1 = TREE_OPERAND (expr, 1);
3545   tree cst = TREE_OPERAND (mult, 1);
3546   tree multop = TREE_OPERAND (mult, 0);
3547   int m = exact_log2 (int_cst_value (cst));
3548   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3549   int sa_cost;
3550   bool equal_p = false;
3551 
3552   if (!(m >= 0 && m < maxm))
3553     return false;
3554 
3555   if (operand_equal_p (op1, mult, 0))
3556     equal_p = true;
3557 
3558   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3559              ? shiftadd_cost (speed, mode, m)
3560              : (equal_p
3561                 ? shiftsub1_cost (speed, mode, m)
3562                 : shiftsub0_cost (speed, mode, m)));
3563   res = new_cost (sa_cost, 0);
3564   res = add_costs (res, equal_p ? cost0 : cost1);
3565 
3566   STRIP_NOPS (multop);
3567   if (!is_gimple_val (multop))
3568     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3569 
3570   *cost = res;
3571   return true;
3572 }
3573 
3574 /* Estimates cost of forcing expression EXPR into a variable.  */
3575 
3576 static comp_cost
force_expr_to_var_cost(tree expr,bool speed)3577 force_expr_to_var_cost (tree expr, bool speed)
3578 {
3579   static bool costs_initialized = false;
3580   static unsigned integer_cost [2];
3581   static unsigned symbol_cost [2];
3582   static unsigned address_cost [2];
3583   tree op0, op1;
3584   comp_cost cost0, cost1, cost;
3585   enum machine_mode mode;
3586 
3587   if (!costs_initialized)
3588     {
3589       tree type = build_pointer_type (integer_type_node);
3590       tree var, addr;
3591       rtx x;
3592       int i;
3593 
3594       var = create_tmp_var_raw (integer_type_node, "test_var");
3595       TREE_STATIC (var) = 1;
3596       x = produce_memory_decl_rtl (var, NULL);
3597       SET_DECL_RTL (var, x);
3598 
3599       addr = build1 (ADDR_EXPR, type, var);
3600 
3601 
3602       for (i = 0; i < 2; i++)
3603 	{
3604 	  integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3605 							     2000), i);
3606 
3607 	  symbol_cost[i] = computation_cost (addr, i) + 1;
3608 
3609 	  address_cost[i]
3610 	    = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3611 	  if (dump_file && (dump_flags & TDF_DETAILS))
3612 	    {
3613 	      fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3614 	      fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3615 	      fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3616 	      fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3617 	      fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3618 	      fprintf (dump_file, "\n");
3619 	    }
3620 	}
3621 
3622       costs_initialized = true;
3623     }
3624 
3625   STRIP_NOPS (expr);
3626 
3627   if (SSA_VAR_P (expr))
3628     return no_cost;
3629 
3630   if (is_gimple_min_invariant (expr))
3631     {
3632       if (TREE_CODE (expr) == INTEGER_CST)
3633 	return new_cost (integer_cost [speed], 0);
3634 
3635       if (TREE_CODE (expr) == ADDR_EXPR)
3636 	{
3637 	  tree obj = TREE_OPERAND (expr, 0);
3638 
3639 	  if (TREE_CODE (obj) == VAR_DECL
3640 	      || TREE_CODE (obj) == PARM_DECL
3641 	      || TREE_CODE (obj) == RESULT_DECL)
3642 	    return new_cost (symbol_cost [speed], 0);
3643 	}
3644 
3645       return new_cost (address_cost [speed], 0);
3646     }
3647 
3648   switch (TREE_CODE (expr))
3649     {
3650     case POINTER_PLUS_EXPR:
3651     case PLUS_EXPR:
3652     case MINUS_EXPR:
3653     case MULT_EXPR:
3654       op0 = TREE_OPERAND (expr, 0);
3655       op1 = TREE_OPERAND (expr, 1);
3656       STRIP_NOPS (op0);
3657       STRIP_NOPS (op1);
3658       break;
3659 
3660     CASE_CONVERT:
3661     case NEGATE_EXPR:
3662       op0 = TREE_OPERAND (expr, 0);
3663       STRIP_NOPS (op0);
3664       op1 = NULL_TREE;
3665       break;
3666 
3667     default:
3668       /* Just an arbitrary value, FIXME.  */
3669       return new_cost (target_spill_cost[speed], 0);
3670     }
3671 
3672   if (op0 == NULL_TREE
3673       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3674     cost0 = no_cost;
3675   else
3676     cost0 = force_expr_to_var_cost (op0, speed);
3677 
3678   if (op1 == NULL_TREE
3679       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3680     cost1 = no_cost;
3681   else
3682     cost1 = force_expr_to_var_cost (op1, speed);
3683 
3684   mode = TYPE_MODE (TREE_TYPE (expr));
3685   switch (TREE_CODE (expr))
3686     {
3687     case POINTER_PLUS_EXPR:
3688     case PLUS_EXPR:
3689     case MINUS_EXPR:
3690     case NEGATE_EXPR:
3691       cost = new_cost (add_cost (speed, mode), 0);
3692       if (TREE_CODE (expr) != NEGATE_EXPR)
3693         {
3694           tree mult = NULL_TREE;
3695           comp_cost sa_cost;
3696           if (TREE_CODE (op1) == MULT_EXPR)
3697             mult = op1;
3698           else if (TREE_CODE (op0) == MULT_EXPR)
3699             mult = op0;
3700 
3701           if (mult != NULL_TREE
3702               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3703               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3704                                     speed, &sa_cost))
3705             return sa_cost;
3706         }
3707       break;
3708 
3709     CASE_CONVERT:
3710       {
3711 	tree inner_mode, outer_mode;
3712 	outer_mode = TREE_TYPE (expr);
3713 	inner_mode = TREE_TYPE (op0);
3714 	cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3715 				       TYPE_MODE (inner_mode), speed), 0);
3716       }
3717       break;
3718 
3719     case MULT_EXPR:
3720       if (cst_and_fits_in_hwi (op0))
3721 	cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3722 					     mode, speed), 0);
3723       else if (cst_and_fits_in_hwi (op1))
3724 	cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3725 					     mode, speed), 0);
3726       else
3727 	return new_cost (target_spill_cost [speed], 0);
3728       break;
3729 
3730     default:
3731       gcc_unreachable ();
3732     }
3733 
3734   cost = add_costs (cost, cost0);
3735   cost = add_costs (cost, cost1);
3736 
3737   /* Bound the cost by target_spill_cost.  The parts of complicated
3738      computations often are either loop invariant or at least can
3739      be shared between several iv uses, so letting this grow without
3740      limits would not give reasonable results.  */
3741   if (cost.cost > (int) target_spill_cost [speed])
3742     cost.cost = target_spill_cost [speed];
3743 
3744   return cost;
3745 }
3746 
3747 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3748    invariants the computation depends on.  */
3749 
3750 static comp_cost
force_var_cost(struct ivopts_data * data,tree expr,bitmap * depends_on)3751 force_var_cost (struct ivopts_data *data,
3752 		tree expr, bitmap *depends_on)
3753 {
3754   if (depends_on)
3755     {
3756       fd_ivopts_data = data;
3757       walk_tree (&expr, find_depends, depends_on, NULL);
3758     }
3759 
3760   return force_expr_to_var_cost (expr, data->speed);
3761 }
3762 
3763 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3764    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3765    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3766    invariants the computation depends on.  */
3767 
3768 static comp_cost
split_address_cost(struct ivopts_data * data,tree addr,bool * symbol_present,bool * var_present,unsigned HOST_WIDE_INT * offset,bitmap * depends_on)3769 split_address_cost (struct ivopts_data *data,
3770 		    tree addr, bool *symbol_present, bool *var_present,
3771 		    unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3772 {
3773   tree core;
3774   HOST_WIDE_INT bitsize;
3775   HOST_WIDE_INT bitpos;
3776   tree toffset;
3777   enum machine_mode mode;
3778   int unsignedp, volatilep;
3779 
3780   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3781 			      &unsignedp, &volatilep, false);
3782 
3783   if (toffset != 0
3784       || bitpos % BITS_PER_UNIT != 0
3785       || TREE_CODE (core) != VAR_DECL)
3786     {
3787       *symbol_present = false;
3788       *var_present = true;
3789       fd_ivopts_data = data;
3790       walk_tree (&addr, find_depends, depends_on, NULL);
3791       return new_cost (target_spill_cost[data->speed], 0);
3792     }
3793 
3794   *offset += bitpos / BITS_PER_UNIT;
3795   if (TREE_STATIC (core)
3796       || DECL_EXTERNAL (core))
3797     {
3798       *symbol_present = true;
3799       *var_present = false;
3800       return no_cost;
3801     }
3802 
3803   *symbol_present = false;
3804   *var_present = true;
3805   return no_cost;
3806 }
3807 
3808 /* Estimates cost of expressing difference of addresses E1 - E2 as
3809    var + symbol + offset.  The value of offset is added to OFFSET,
3810    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3811    part is missing.  DEPENDS_ON is a set of the invariants the computation
3812    depends on.  */
3813 
3814 static comp_cost
ptr_difference_cost(struct ivopts_data * data,tree e1,tree e2,bool * symbol_present,bool * var_present,unsigned HOST_WIDE_INT * offset,bitmap * depends_on)3815 ptr_difference_cost (struct ivopts_data *data,
3816 		     tree e1, tree e2, bool *symbol_present, bool *var_present,
3817 		     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3818 {
3819   HOST_WIDE_INT diff = 0;
3820   aff_tree aff_e1, aff_e2;
3821   tree type;
3822 
3823   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3824 
3825   if (ptr_difference_const (e1, e2, &diff))
3826     {
3827       *offset += diff;
3828       *symbol_present = false;
3829       *var_present = false;
3830       return no_cost;
3831     }
3832 
3833   if (integer_zerop (e2))
3834     return split_address_cost (data, TREE_OPERAND (e1, 0),
3835 			       symbol_present, var_present, offset, depends_on);
3836 
3837   *symbol_present = false;
3838   *var_present = true;
3839 
3840   type = signed_type_for (TREE_TYPE (e1));
3841   tree_to_aff_combination (e1, type, &aff_e1);
3842   tree_to_aff_combination (e2, type, &aff_e2);
3843   aff_combination_scale (&aff_e2, double_int_minus_one);
3844   aff_combination_add (&aff_e1, &aff_e2);
3845 
3846   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3847 }
3848 
3849 /* Estimates cost of expressing difference E1 - E2 as
3850    var + symbol + offset.  The value of offset is added to OFFSET,
3851    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3852    part is missing.  DEPENDS_ON is a set of the invariants the computation
3853    depends on.  */
3854 
3855 static comp_cost
difference_cost(struct ivopts_data * data,tree e1,tree e2,bool * symbol_present,bool * var_present,unsigned HOST_WIDE_INT * offset,bitmap * depends_on)3856 difference_cost (struct ivopts_data *data,
3857 		 tree e1, tree e2, bool *symbol_present, bool *var_present,
3858 		 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3859 {
3860   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3861   unsigned HOST_WIDE_INT off1, off2;
3862   aff_tree aff_e1, aff_e2;
3863   tree type;
3864 
3865   e1 = strip_offset (e1, &off1);
3866   e2 = strip_offset (e2, &off2);
3867   *offset += off1 - off2;
3868 
3869   STRIP_NOPS (e1);
3870   STRIP_NOPS (e2);
3871 
3872   if (TREE_CODE (e1) == ADDR_EXPR)
3873     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3874 				offset, depends_on);
3875   *symbol_present = false;
3876 
3877   if (operand_equal_p (e1, e2, 0))
3878     {
3879       *var_present = false;
3880       return no_cost;
3881     }
3882 
3883   *var_present = true;
3884 
3885   if (integer_zerop (e2))
3886     return force_var_cost (data, e1, depends_on);
3887 
3888   if (integer_zerop (e1))
3889     {
3890       comp_cost cost = force_var_cost (data, e2, depends_on);
3891       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3892       return cost;
3893     }
3894 
3895   type = signed_type_for (TREE_TYPE (e1));
3896   tree_to_aff_combination (e1, type, &aff_e1);
3897   tree_to_aff_combination (e2, type, &aff_e2);
3898   aff_combination_scale (&aff_e2, double_int_minus_one);
3899   aff_combination_add (&aff_e1, &aff_e2);
3900 
3901   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3902 }
3903 
3904 /* Returns true if AFF1 and AFF2 are identical.  */
3905 
3906 static bool
compare_aff_trees(aff_tree * aff1,aff_tree * aff2)3907 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3908 {
3909   unsigned i;
3910 
3911   if (aff1->n != aff2->n)
3912     return false;
3913 
3914   for (i = 0; i < aff1->n; i++)
3915     {
3916       if (aff1->elts[i].coef != aff2->elts[i].coef)
3917         return false;
3918 
3919       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3920         return false;
3921     }
3922   return true;
3923 }
3924 
3925 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3926 
3927 static int
get_expr_id(struct ivopts_data * data,tree expr)3928 get_expr_id (struct ivopts_data *data, tree expr)
3929 {
3930   struct iv_inv_expr_ent ent;
3931   struct iv_inv_expr_ent **slot;
3932 
3933   ent.expr = expr;
3934   ent.hash = iterative_hash_expr (expr, 0);
3935   slot = data->inv_expr_tab.find_slot (&ent, INSERT);
3936   if (*slot)
3937     return (*slot)->id;
3938 
3939   *slot = XNEW (struct iv_inv_expr_ent);
3940   (*slot)->expr = expr;
3941   (*slot)->hash = ent.hash;
3942   (*slot)->id = data->inv_expr_id++;
3943   return (*slot)->id;
3944 }
3945 
3946 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3947    requires a new compiler generated temporary.  Returns -1 otherwise.
3948    ADDRESS_P is a flag indicating if the expression is for address
3949    computation.  */
3950 
3951 static int
get_loop_invariant_expr_id(struct ivopts_data * data,tree ubase,tree cbase,HOST_WIDE_INT ratio,bool address_p)3952 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3953                             tree cbase, HOST_WIDE_INT ratio,
3954                             bool address_p)
3955 {
3956   aff_tree ubase_aff, cbase_aff;
3957   tree expr, ub, cb;
3958 
3959   STRIP_NOPS (ubase);
3960   STRIP_NOPS (cbase);
3961   ub = ubase;
3962   cb = cbase;
3963 
3964   if ((TREE_CODE (ubase) == INTEGER_CST)
3965       && (TREE_CODE (cbase) == INTEGER_CST))
3966     return -1;
3967 
3968   /* Strips the constant part. */
3969   if (TREE_CODE (ubase) == PLUS_EXPR
3970       || TREE_CODE (ubase) == MINUS_EXPR
3971       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3972     {
3973       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3974         ubase = TREE_OPERAND (ubase, 0);
3975     }
3976 
3977   /* Strips the constant part. */
3978   if (TREE_CODE (cbase) == PLUS_EXPR
3979       || TREE_CODE (cbase) == MINUS_EXPR
3980       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3981     {
3982       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3983         cbase = TREE_OPERAND (cbase, 0);
3984     }
3985 
3986   if (address_p)
3987     {
3988       if (((TREE_CODE (ubase) == SSA_NAME)
3989            || (TREE_CODE (ubase) == ADDR_EXPR
3990                && is_gimple_min_invariant (ubase)))
3991           && (TREE_CODE (cbase) == INTEGER_CST))
3992         return -1;
3993 
3994       if (((TREE_CODE (cbase) == SSA_NAME)
3995            || (TREE_CODE (cbase) == ADDR_EXPR
3996                && is_gimple_min_invariant (cbase)))
3997           && (TREE_CODE (ubase) == INTEGER_CST))
3998         return -1;
3999     }
4000 
4001   if (ratio == 1)
4002     {
4003       if (operand_equal_p (ubase, cbase, 0))
4004         return -1;
4005 
4006       if (TREE_CODE (ubase) == ADDR_EXPR
4007           && TREE_CODE (cbase) == ADDR_EXPR)
4008         {
4009           tree usym, csym;
4010 
4011           usym = TREE_OPERAND (ubase, 0);
4012           csym = TREE_OPERAND (cbase, 0);
4013           if (TREE_CODE (usym) == ARRAY_REF)
4014             {
4015               tree ind = TREE_OPERAND (usym, 1);
4016               if (TREE_CODE (ind) == INTEGER_CST
4017                   && tree_fits_shwi_p (ind)
4018                   && tree_to_shwi (ind) == 0)
4019                 usym = TREE_OPERAND (usym, 0);
4020             }
4021           if (TREE_CODE (csym) == ARRAY_REF)
4022             {
4023               tree ind = TREE_OPERAND (csym, 1);
4024               if (TREE_CODE (ind) == INTEGER_CST
4025                   && tree_fits_shwi_p (ind)
4026                   && tree_to_shwi (ind) == 0)
4027                 csym = TREE_OPERAND (csym, 0);
4028             }
4029           if (operand_equal_p (usym, csym, 0))
4030             return -1;
4031         }
4032       /* Now do more complex comparison  */
4033       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4034       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4035       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4036         return -1;
4037     }
4038 
4039   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4040   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4041 
4042   aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
4043   aff_combination_add (&ubase_aff, &cbase_aff);
4044   expr = aff_combination_to_tree (&ubase_aff);
4045   return get_expr_id (data, expr);
4046 }
4047 
4048 
4049 
4050 /* Determines the cost of the computation by that USE is expressed
4051    from induction variable CAND.  If ADDRESS_P is true, we just need
4052    to create an address from it, otherwise we want to get it into
4053    register.  A set of invariants we depend on is stored in
4054    DEPENDS_ON.  AT is the statement at that the value is computed.
4055    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4056    addressing is likely.  */
4057 
4058 static comp_cost
get_computation_cost_at(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,bool address_p,bitmap * depends_on,gimple at,bool * can_autoinc,int * inv_expr_id)4059 get_computation_cost_at (struct ivopts_data *data,
4060 			 struct iv_use *use, struct iv_cand *cand,
4061 			 bool address_p, bitmap *depends_on, gimple at,
4062 			 bool *can_autoinc,
4063                          int *inv_expr_id)
4064 {
4065   tree ubase = use->iv->base, ustep = use->iv->step;
4066   tree cbase, cstep;
4067   tree utype = TREE_TYPE (ubase), ctype;
4068   unsigned HOST_WIDE_INT cstepi, offset = 0;
4069   HOST_WIDE_INT ratio, aratio;
4070   bool var_present, symbol_present, stmt_is_after_inc;
4071   comp_cost cost;
4072   double_int rat;
4073   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4074   enum machine_mode mem_mode = (address_p
4075 				? TYPE_MODE (TREE_TYPE (*use->op_p))
4076 				: VOIDmode);
4077 
4078   *depends_on = NULL;
4079 
4080   /* Only consider real candidates.  */
4081   if (!cand->iv)
4082     return infinite_cost;
4083 
4084   cbase = cand->iv->base;
4085   cstep = cand->iv->step;
4086   ctype = TREE_TYPE (cbase);
4087 
4088   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4089     {
4090       /* We do not have a precision to express the values of use.  */
4091       return infinite_cost;
4092     }
4093 
4094   if (address_p
4095       || (use->iv->base_object
4096 	  && cand->iv->base_object
4097 	  && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4098 	  && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4099     {
4100       /* Do not try to express address of an object with computation based
4101 	 on address of a different object.  This may cause problems in rtl
4102 	 level alias analysis (that does not expect this to be happening,
4103 	 as this is illegal in C), and would be unlikely to be useful
4104 	 anyway.  */
4105       if (use->iv->base_object
4106 	  && cand->iv->base_object
4107 	  && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4108 	return infinite_cost;
4109     }
4110 
4111   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4112     {
4113       /* TODO -- add direct handling of this case.  */
4114       goto fallback;
4115     }
4116 
4117   /* CSTEPI is removed from the offset in case statement is after the
4118      increment.  If the step is not constant, we use zero instead.
4119      This is a bit imprecise (there is the extra addition), but
4120      redundancy elimination is likely to transform the code so that
4121      it uses value of the variable before increment anyway,
4122      so it is not that much unrealistic.  */
4123   if (cst_and_fits_in_hwi (cstep))
4124     cstepi = int_cst_value (cstep);
4125   else
4126     cstepi = 0;
4127 
4128   if (!constant_multiple_of (ustep, cstep, &rat))
4129     return infinite_cost;
4130 
4131   if (rat.fits_shwi ())
4132     ratio = rat.to_shwi ();
4133   else
4134     return infinite_cost;
4135 
4136   STRIP_NOPS (cbase);
4137   ctype = TREE_TYPE (cbase);
4138 
4139   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4140 
4141   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4142      or ratio == 1, it is better to handle this like
4143 
4144      ubase - ratio * cbase + ratio * var
4145 
4146      (also holds in the case ratio == -1, TODO.  */
4147 
4148   if (cst_and_fits_in_hwi (cbase))
4149     {
4150       offset = - ratio * int_cst_value (cbase);
4151       cost = difference_cost (data,
4152 			      ubase, build_int_cst (utype, 0),
4153 			      &symbol_present, &var_present, &offset,
4154 			      depends_on);
4155       cost.cost /= avg_loop_niter (data->current_loop);
4156     }
4157   else if (ratio == 1)
4158     {
4159       tree real_cbase = cbase;
4160 
4161       /* Check to see if any adjustment is needed.  */
4162       if (cstepi == 0 && stmt_is_after_inc)
4163         {
4164           aff_tree real_cbase_aff;
4165           aff_tree cstep_aff;
4166 
4167           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4168                                    &real_cbase_aff);
4169           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4170 
4171           aff_combination_add (&real_cbase_aff, &cstep_aff);
4172           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4173         }
4174 
4175       cost = difference_cost (data,
4176 			      ubase, real_cbase,
4177 			      &symbol_present, &var_present, &offset,
4178 			      depends_on);
4179       cost.cost /= avg_loop_niter (data->current_loop);
4180     }
4181   else if (address_p
4182 	   && !POINTER_TYPE_P (ctype)
4183 	   && multiplier_allowed_in_address_p
4184 		(ratio, mem_mode,
4185 			TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4186     {
4187       cbase
4188 	= fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4189       cost = difference_cost (data,
4190 			      ubase, cbase,
4191 			      &symbol_present, &var_present, &offset,
4192 			      depends_on);
4193       cost.cost /= avg_loop_niter (data->current_loop);
4194     }
4195   else
4196     {
4197       cost = force_var_cost (data, cbase, depends_on);
4198       cost = add_costs (cost,
4199 			difference_cost (data,
4200 					 ubase, build_int_cst (utype, 0),
4201 					 &symbol_present, &var_present,
4202 					 &offset, depends_on));
4203       cost.cost /= avg_loop_niter (data->current_loop);
4204       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4205     }
4206 
4207   if (inv_expr_id)
4208     {
4209       *inv_expr_id =
4210           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4211       /* Clear depends on.  */
4212       if (*inv_expr_id != -1 && depends_on && *depends_on)
4213         bitmap_clear (*depends_on);
4214     }
4215 
4216   /* If we are after the increment, the value of the candidate is higher by
4217      one iteration.  */
4218   if (stmt_is_after_inc)
4219     offset -= ratio * cstepi;
4220 
4221   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4222      (symbol/var1/const parts may be omitted).  If we are looking for an
4223      address, find the cost of addressing this.  */
4224   if (address_p)
4225     return add_costs (cost,
4226 		      get_address_cost (symbol_present, var_present,
4227 					offset, ratio, cstepi,
4228 					mem_mode,
4229 					TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4230 					speed, stmt_is_after_inc,
4231 					can_autoinc));
4232 
4233   /* Otherwise estimate the costs for computing the expression.  */
4234   if (!symbol_present && !var_present && !offset)
4235     {
4236       if (ratio != 1)
4237 	cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4238       return cost;
4239     }
4240 
4241   /* Symbol + offset should be compile-time computable so consider that they
4242       are added once to the variable, if present.  */
4243   if (var_present && (symbol_present || offset))
4244     cost.cost += adjust_setup_cost (data,
4245 				    add_cost (speed, TYPE_MODE (ctype)));
4246 
4247   /* Having offset does not affect runtime cost in case it is added to
4248      symbol, but it increases complexity.  */
4249   if (offset)
4250     cost.complexity++;
4251 
4252   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4253 
4254   aratio = ratio > 0 ? ratio : -ratio;
4255   if (aratio != 1)
4256     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4257   return cost;
4258 
4259 fallback:
4260   if (can_autoinc)
4261     *can_autoinc = false;
4262 
4263   {
4264     /* Just get the expression, expand it and measure the cost.  */
4265     tree comp = get_computation_at (data->current_loop, use, cand, at);
4266 
4267     if (!comp)
4268       return infinite_cost;
4269 
4270     if (address_p)
4271       comp = build_simple_mem_ref (comp);
4272 
4273     return new_cost (computation_cost (comp, speed), 0);
4274   }
4275 }
4276 
4277 /* Determines the cost of the computation by that USE is expressed
4278    from induction variable CAND.  If ADDRESS_P is true, we just need
4279    to create an address from it, otherwise we want to get it into
4280    register.  A set of invariants we depend on is stored in
4281    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4282    autoinc addressing is likely.  */
4283 
4284 static comp_cost
get_computation_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,bool address_p,bitmap * depends_on,bool * can_autoinc,int * inv_expr_id)4285 get_computation_cost (struct ivopts_data *data,
4286 		      struct iv_use *use, struct iv_cand *cand,
4287 		      bool address_p, bitmap *depends_on,
4288                       bool *can_autoinc, int *inv_expr_id)
4289 {
4290   return get_computation_cost_at (data,
4291 				  use, cand, address_p, depends_on, use->stmt,
4292 				  can_autoinc, inv_expr_id);
4293 }
4294 
4295 /* Determines cost of basing replacement of USE on CAND in a generic
4296    expression.  */
4297 
4298 static bool
determine_use_iv_cost_generic(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)4299 determine_use_iv_cost_generic (struct ivopts_data *data,
4300 			       struct iv_use *use, struct iv_cand *cand)
4301 {
4302   bitmap depends_on;
4303   comp_cost cost;
4304   int inv_expr_id = -1;
4305 
4306   /* The simple case first -- if we need to express value of the preserved
4307      original biv, the cost is 0.  This also prevents us from counting the
4308      cost of increment twice -- once at this use and once in the cost of
4309      the candidate.  */
4310   if (cand->pos == IP_ORIGINAL
4311       && cand->incremented_at == use->stmt)
4312     {
4313       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4314                        ERROR_MARK, -1);
4315       return true;
4316     }
4317 
4318   cost = get_computation_cost (data, use, cand, false, &depends_on,
4319                                NULL, &inv_expr_id);
4320 
4321   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4322                    inv_expr_id);
4323 
4324   return !infinite_cost_p (cost);
4325 }
4326 
4327 /* Determines cost of basing replacement of USE on CAND in an address.  */
4328 
4329 static bool
determine_use_iv_cost_address(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)4330 determine_use_iv_cost_address (struct ivopts_data *data,
4331 			       struct iv_use *use, struct iv_cand *cand)
4332 {
4333   bitmap depends_on;
4334   bool can_autoinc;
4335   int inv_expr_id = -1;
4336   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4337 					 &can_autoinc, &inv_expr_id);
4338 
4339   if (cand->ainc_use == use)
4340     {
4341       if (can_autoinc)
4342 	cost.cost -= cand->cost_step;
4343       /* If we generated the candidate solely for exploiting autoincrement
4344 	 opportunities, and it turns out it can't be used, set the cost to
4345 	 infinity to make sure we ignore it.  */
4346       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4347 	cost = infinite_cost;
4348     }
4349   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4350                    inv_expr_id);
4351 
4352   return !infinite_cost_p (cost);
4353 }
4354 
4355 /* Computes value of candidate CAND at position AT in iteration NITER, and
4356    stores it to VAL.  */
4357 
4358 static void
cand_value_at(struct loop * loop,struct iv_cand * cand,gimple at,tree niter,aff_tree * val)4359 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4360 	       aff_tree *val)
4361 {
4362   aff_tree step, delta, nit;
4363   struct iv *iv = cand->iv;
4364   tree type = TREE_TYPE (iv->base);
4365   tree steptype = type;
4366   if (POINTER_TYPE_P (type))
4367     steptype = sizetype;
4368   steptype = unsigned_type_for (type);
4369 
4370   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4371   aff_combination_convert (&step, steptype);
4372   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4373   aff_combination_convert (&nit, steptype);
4374   aff_combination_mult (&nit, &step, &delta);
4375   if (stmt_after_increment (loop, cand, at))
4376     aff_combination_add (&delta, &step);
4377 
4378   tree_to_aff_combination (iv->base, type, val);
4379   if (!POINTER_TYPE_P (type))
4380     aff_combination_convert (val, steptype);
4381   aff_combination_add (val, &delta);
4382 }
4383 
4384 /* Returns period of induction variable iv.  */
4385 
4386 static tree
iv_period(struct iv * iv)4387 iv_period (struct iv *iv)
4388 {
4389   tree step = iv->step, period, type;
4390   tree pow2div;
4391 
4392   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4393 
4394   type = unsigned_type_for (TREE_TYPE (step));
4395   /* Period of the iv is lcm (step, type_range)/step -1,
4396      i.e., N*type_range/step - 1. Since type range is power
4397      of two, N == (step >> num_of_ending_zeros_binary (step),
4398      so the final result is
4399 
4400        (type_range >> num_of_ending_zeros_binary (step)) - 1
4401 
4402   */
4403   pow2div = num_ending_zeros (step);
4404 
4405   period = build_low_bits_mask (type,
4406                                 (TYPE_PRECISION (type)
4407                                  - tree_to_uhwi (pow2div)));
4408 
4409   return period;
4410 }
4411 
4412 /* Returns the comparison operator used when eliminating the iv USE.  */
4413 
4414 static enum tree_code
iv_elimination_compare(struct ivopts_data * data,struct iv_use * use)4415 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4416 {
4417   struct loop *loop = data->current_loop;
4418   basic_block ex_bb;
4419   edge exit;
4420 
4421   ex_bb = gimple_bb (use->stmt);
4422   exit = EDGE_SUCC (ex_bb, 0);
4423   if (flow_bb_inside_loop_p (loop, exit->dest))
4424     exit = EDGE_SUCC (ex_bb, 1);
4425 
4426   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4427 }
4428 
4429 static tree
strip_wrap_conserving_type_conversions(tree exp)4430 strip_wrap_conserving_type_conversions (tree exp)
4431 {
4432   while (tree_ssa_useless_type_conversion (exp)
4433 	 && (nowrap_type_p (TREE_TYPE (exp))
4434 	     == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4435     exp = TREE_OPERAND (exp, 0);
4436   return exp;
4437 }
4438 
4439 /* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
4440    check for an exact match.  */
4441 
4442 static bool
expr_equal_p(tree e,tree what)4443 expr_equal_p (tree e, tree what)
4444 {
4445   gimple stmt;
4446   enum tree_code code;
4447 
4448   e = strip_wrap_conserving_type_conversions (e);
4449   what = strip_wrap_conserving_type_conversions (what);
4450 
4451   code = TREE_CODE (what);
4452   if (TREE_TYPE (e) != TREE_TYPE (what))
4453     return false;
4454 
4455   if (operand_equal_p (e, what, 0))
4456     return true;
4457 
4458   if (TREE_CODE (e) != SSA_NAME)
4459     return false;
4460 
4461   stmt = SSA_NAME_DEF_STMT (e);
4462   if (gimple_code (stmt) != GIMPLE_ASSIGN
4463       || gimple_assign_rhs_code (stmt) != code)
4464     return false;
4465 
4466   switch (get_gimple_rhs_class (code))
4467     {
4468     case GIMPLE_BINARY_RHS:
4469       if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4470 	return false;
4471       /* Fallthru.  */
4472 
4473     case GIMPLE_UNARY_RHS:
4474     case GIMPLE_SINGLE_RHS:
4475       return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4476     default:
4477       return false;
4478     }
4479 }
4480 
4481 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4482    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4483    calculation is performed in non-wrapping type.
4484 
4485    TODO: More generally, we could test for the situation that
4486 	 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4487 	 This would require knowing the sign of OFFSET.
4488 
4489 	 Also, we only look for the first addition in the computation of BASE.
4490 	 More complex analysis would be better, but introducing it just for
4491 	 this optimization seems like an overkill.  */
4492 
4493 static bool
difference_cannot_overflow_p(tree base,tree offset)4494 difference_cannot_overflow_p (tree base, tree offset)
4495 {
4496   enum tree_code code;
4497   tree e1, e2;
4498 
4499   if (!nowrap_type_p (TREE_TYPE (base)))
4500     return false;
4501 
4502   base = expand_simple_operations (base);
4503 
4504   if (TREE_CODE (base) == SSA_NAME)
4505     {
4506       gimple stmt = SSA_NAME_DEF_STMT (base);
4507 
4508       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4509 	return false;
4510 
4511       code = gimple_assign_rhs_code (stmt);
4512       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4513 	return false;
4514 
4515       e1 = gimple_assign_rhs1 (stmt);
4516       e2 = gimple_assign_rhs2 (stmt);
4517     }
4518   else
4519     {
4520       code = TREE_CODE (base);
4521       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4522 	return false;
4523       e1 = TREE_OPERAND (base, 0);
4524       e2 = TREE_OPERAND (base, 1);
4525     }
4526 
4527   /* TODO: deeper inspection may be necessary to prove the equality.  */
4528   switch (code)
4529     {
4530     case PLUS_EXPR:
4531       return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4532     case POINTER_PLUS_EXPR:
4533       return expr_equal_p (e2, offset);
4534 
4535     default:
4536       return false;
4537     }
4538 }
4539 
4540 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4541    comparison with CAND.  NITER describes the number of iterations of
4542    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4543 
4544    We aim to handle the following situation:
4545 
4546    sometype *base, *p;
4547    int a, b, i;
4548 
4549    i = a;
4550    p = p_0 = base + a;
4551 
4552    do
4553      {
4554        bla (*p);
4555        p++;
4556        i++;
4557      }
4558    while (i < b);
4559 
4560    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4561    We aim to optimize this to
4562 
4563    p = p_0 = base + a;
4564    do
4565      {
4566        bla (*p);
4567        p++;
4568      }
4569    while (p < p_0 - a + b);
4570 
4571    This preserves the correctness, since the pointer arithmetics does not
4572    overflow.  More precisely:
4573 
4574    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4575       overflow in computing it or the values of p.
4576    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4577       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4578 
4579 static bool
iv_elimination_compare_lt(struct ivopts_data * data,struct iv_cand * cand,enum tree_code * comp_p,struct tree_niter_desc * niter)4580 iv_elimination_compare_lt (struct ivopts_data *data,
4581                            struct iv_cand *cand, enum tree_code *comp_p,
4582 			   struct tree_niter_desc *niter)
4583 {
4584   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4585   struct aff_tree nit, tmpa, tmpb;
4586   enum tree_code comp;
4587   HOST_WIDE_INT step;
4588 
4589   /* We need to know that the candidate induction variable does not overflow.
4590      While more complex analysis may be used to prove this, for now just
4591      check that the variable appears in the original program and that it
4592      is computed in a type that guarantees no overflows.  */
4593   cand_type = TREE_TYPE (cand->iv->base);
4594   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4595     return false;
4596 
4597   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4598      the calculation of the BOUND could overflow, making the comparison
4599      invalid.  */
4600   if (!data->loop_single_exit_p)
4601     return false;
4602 
4603   /* We need to be able to decide whether candidate is increasing or decreasing
4604      in order to choose the right comparison operator.  */
4605   if (!cst_and_fits_in_hwi (cand->iv->step))
4606     return false;
4607   step = int_cst_value (cand->iv->step);
4608 
4609   /* Check that the number of iterations matches the expected pattern:
4610      a + 1 > b ? 0 : b - a - 1.  */
4611   mbz = niter->may_be_zero;
4612   if (TREE_CODE (mbz) == GT_EXPR)
4613     {
4614       /* Handle a + 1 > b.  */
4615       tree op0 = TREE_OPERAND (mbz, 0);
4616       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4617 	{
4618 	  a = TREE_OPERAND (op0, 0);
4619 	  b = TREE_OPERAND (mbz, 1);
4620 	}
4621       else
4622 	return false;
4623     }
4624   else if (TREE_CODE (mbz) == LT_EXPR)
4625     {
4626       tree op1 = TREE_OPERAND (mbz, 1);
4627 
4628       /* Handle b < a + 1.  */
4629       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4630         {
4631           a = TREE_OPERAND (op1, 0);
4632           b = TREE_OPERAND (mbz, 0);
4633         }
4634       else
4635 	return false;
4636     }
4637   else
4638     return false;
4639 
4640   /* Expected number of iterations is B - A - 1.  Check that it matches
4641      the actual number, i.e., that B - A - NITER = 1.  */
4642   tree_to_aff_combination (niter->niter, nit_type, &nit);
4643   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4644   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4645   aff_combination_scale (&nit, double_int_minus_one);
4646   aff_combination_scale (&tmpa, double_int_minus_one);
4647   aff_combination_add (&tmpb, &tmpa);
4648   aff_combination_add (&tmpb, &nit);
4649   if (tmpb.n != 0 || tmpb.offset != double_int_one)
4650     return false;
4651 
4652   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4653      overflow.  */
4654   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4655 			cand->iv->step,
4656 			fold_convert (TREE_TYPE (cand->iv->step), a));
4657   if (!difference_cannot_overflow_p (cand->iv->base, offset))
4658     return false;
4659 
4660   /* Determine the new comparison operator.  */
4661   comp = step < 0 ? GT_EXPR : LT_EXPR;
4662   if (*comp_p == NE_EXPR)
4663     *comp_p = comp;
4664   else if (*comp_p == EQ_EXPR)
4665     *comp_p = invert_tree_comparison (comp, false);
4666   else
4667     gcc_unreachable ();
4668 
4669   return true;
4670 }
4671 
4672 /* Check whether it is possible to express the condition in USE by comparison
4673    of candidate CAND.  If so, store the value compared with to BOUND, and the
4674    comparison operator to COMP.  */
4675 
4676 static bool
may_eliminate_iv(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,tree * bound,enum tree_code * comp)4677 may_eliminate_iv (struct ivopts_data *data,
4678 		  struct iv_use *use, struct iv_cand *cand, tree *bound,
4679 		  enum tree_code *comp)
4680 {
4681   basic_block ex_bb;
4682   edge exit;
4683   tree period;
4684   struct loop *loop = data->current_loop;
4685   aff_tree bnd;
4686   struct tree_niter_desc *desc = NULL;
4687 
4688   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4689     return false;
4690 
4691   /* For now works only for exits that dominate the loop latch.
4692      TODO: extend to other conditions inside loop body.  */
4693   ex_bb = gimple_bb (use->stmt);
4694   if (use->stmt != last_stmt (ex_bb)
4695       || gimple_code (use->stmt) != GIMPLE_COND
4696       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4697     return false;
4698 
4699   exit = EDGE_SUCC (ex_bb, 0);
4700   if (flow_bb_inside_loop_p (loop, exit->dest))
4701     exit = EDGE_SUCC (ex_bb, 1);
4702   if (flow_bb_inside_loop_p (loop, exit->dest))
4703     return false;
4704 
4705   desc = niter_for_exit (data, exit);
4706   if (!desc)
4707     return false;
4708 
4709   /* Determine whether we can use the variable to test the exit condition.
4710      This is the case iff the period of the induction variable is greater
4711      than the number of iterations for which the exit condition is true.  */
4712   period = iv_period (cand->iv);
4713 
4714   /* If the number of iterations is constant, compare against it directly.  */
4715   if (TREE_CODE (desc->niter) == INTEGER_CST)
4716     {
4717       /* See cand_value_at.  */
4718       if (stmt_after_increment (loop, cand, use->stmt))
4719         {
4720           if (!tree_int_cst_lt (desc->niter, period))
4721             return false;
4722         }
4723       else
4724         {
4725           if (tree_int_cst_lt (period, desc->niter))
4726             return false;
4727         }
4728     }
4729 
4730   /* If not, and if this is the only possible exit of the loop, see whether
4731      we can get a conservative estimate on the number of iterations of the
4732      entire loop and compare against that instead.  */
4733   else
4734     {
4735       double_int period_value, max_niter;
4736 
4737       max_niter = desc->max;
4738       if (stmt_after_increment (loop, cand, use->stmt))
4739         max_niter += double_int_one;
4740       period_value = tree_to_double_int (period);
4741       if (max_niter.ugt (period_value))
4742         {
4743           /* See if we can take advantage of inferred loop bound information.  */
4744           if (data->loop_single_exit_p)
4745             {
4746               if (!max_loop_iterations (loop, &max_niter))
4747                 return false;
4748               /* The loop bound is already adjusted by adding 1.  */
4749               if (max_niter.ugt (period_value))
4750                 return false;
4751             }
4752           else
4753             return false;
4754         }
4755     }
4756 
4757   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4758 
4759   *bound = fold_convert (TREE_TYPE (cand->iv->base),
4760 			 aff_combination_to_tree (&bnd));
4761   *comp = iv_elimination_compare (data, use);
4762 
4763   /* It is unlikely that computing the number of iterations using division
4764      would be more profitable than keeping the original induction variable.  */
4765   if (expression_expensive_p (*bound))
4766     return false;
4767 
4768   /* Sometimes, it is possible to handle the situation that the number of
4769      iterations may be zero unless additional assumtions by using <
4770      instead of != in the exit condition.
4771 
4772      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4773 	   base the exit condition on it.  However, that is often too
4774 	   expensive.  */
4775   if (!integer_zerop (desc->may_be_zero))
4776     return iv_elimination_compare_lt (data, cand, comp, desc);
4777 
4778   return true;
4779 }
4780 
4781  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4782     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4783 
4784 static int
parm_decl_cost(struct ivopts_data * data,tree bound)4785 parm_decl_cost (struct ivopts_data *data, tree bound)
4786 {
4787   tree sbound = bound;
4788   STRIP_NOPS (sbound);
4789 
4790   if (TREE_CODE (sbound) == SSA_NAME
4791       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4792       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4793       && data->body_includes_call)
4794     return COSTS_N_INSNS (1);
4795 
4796   return 0;
4797 }
4798 
4799 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4800 
4801 static bool
determine_use_iv_cost_condition(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)4802 determine_use_iv_cost_condition (struct ivopts_data *data,
4803 				 struct iv_use *use, struct iv_cand *cand)
4804 {
4805   tree bound = NULL_TREE;
4806   struct iv *cmp_iv;
4807   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4808   comp_cost elim_cost, express_cost, cost, bound_cost;
4809   bool ok;
4810   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4811   tree *control_var, *bound_cst;
4812   enum tree_code comp = ERROR_MARK;
4813 
4814   /* Only consider real candidates.  */
4815   if (!cand->iv)
4816     {
4817       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4818 		       ERROR_MARK, -1);
4819       return false;
4820     }
4821 
4822   /* Try iv elimination.  */
4823   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4824     {
4825       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4826       if (elim_cost.cost == 0)
4827         elim_cost.cost = parm_decl_cost (data, bound);
4828       else if (TREE_CODE (bound) == INTEGER_CST)
4829         elim_cost.cost = 0;
4830       /* If we replace a loop condition 'i < n' with 'p < base + n',
4831 	 depends_on_elim will have 'base' and 'n' set, which implies
4832 	 that both 'base' and 'n' will be live during the loop.	 More likely,
4833 	 'base + n' will be loop invariant, resulting in only one live value
4834 	 during the loop.  So in that case we clear depends_on_elim and set
4835         elim_inv_expr_id instead.  */
4836       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4837 	{
4838 	  elim_inv_expr_id = get_expr_id (data, bound);
4839 	  bitmap_clear (depends_on_elim);
4840 	}
4841       /* The bound is a loop invariant, so it will be only computed
4842 	 once.  */
4843       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4844     }
4845   else
4846     elim_cost = infinite_cost;
4847 
4848   /* Try expressing the original giv.  If it is compared with an invariant,
4849      note that we cannot get rid of it.  */
4850   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4851 			      NULL, &cmp_iv);
4852   gcc_assert (ok);
4853 
4854   /* When the condition is a comparison of the candidate IV against
4855      zero, prefer this IV.
4856 
4857      TODO: The constant that we're subtracting from the cost should
4858      be target-dependent.  This information should be added to the
4859      target costs for each backend.  */
4860   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4861       && integer_zerop (*bound_cst)
4862       && (operand_equal_p (*control_var, cand->var_after, 0)
4863 	  || operand_equal_p (*control_var, cand->var_before, 0)))
4864     elim_cost.cost -= 1;
4865 
4866   express_cost = get_computation_cost (data, use, cand, false,
4867 				       &depends_on_express, NULL,
4868                                        &express_inv_expr_id);
4869   fd_ivopts_data = data;
4870   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4871 
4872   /* Count the cost of the original bound as well.  */
4873   bound_cost = force_var_cost (data, *bound_cst, NULL);
4874   if (bound_cost.cost == 0)
4875     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4876   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4877     bound_cost.cost = 0;
4878   express_cost.cost += bound_cost.cost;
4879 
4880   /* Choose the better approach, preferring the eliminated IV. */
4881   if (compare_costs (elim_cost, express_cost) <= 0)
4882     {
4883       cost = elim_cost;
4884       depends_on = depends_on_elim;
4885       depends_on_elim = NULL;
4886       inv_expr_id = elim_inv_expr_id;
4887     }
4888   else
4889     {
4890       cost = express_cost;
4891       depends_on = depends_on_express;
4892       depends_on_express = NULL;
4893       bound = NULL_TREE;
4894       comp = ERROR_MARK;
4895       inv_expr_id = express_inv_expr_id;
4896     }
4897 
4898   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4899 
4900   if (depends_on_elim)
4901     BITMAP_FREE (depends_on_elim);
4902   if (depends_on_express)
4903     BITMAP_FREE (depends_on_express);
4904 
4905   return !infinite_cost_p (cost);
4906 }
4907 
4908 /* Determines cost of basing replacement of USE on CAND.  Returns false
4909    if USE cannot be based on CAND.  */
4910 
4911 static bool
determine_use_iv_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)4912 determine_use_iv_cost (struct ivopts_data *data,
4913 		       struct iv_use *use, struct iv_cand *cand)
4914 {
4915   switch (use->type)
4916     {
4917     case USE_NONLINEAR_EXPR:
4918       return determine_use_iv_cost_generic (data, use, cand);
4919 
4920     case USE_ADDRESS:
4921       return determine_use_iv_cost_address (data, use, cand);
4922 
4923     case USE_COMPARE:
4924       return determine_use_iv_cost_condition (data, use, cand);
4925 
4926     default:
4927       gcc_unreachable ();
4928     }
4929 }
4930 
4931 /* Return true if get_computation_cost indicates that autoincrement is
4932    a possibility for the pair of USE and CAND, false otherwise.  */
4933 
4934 static bool
autoinc_possible_for_pair(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)4935 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4936 			   struct iv_cand *cand)
4937 {
4938   bitmap depends_on;
4939   bool can_autoinc;
4940   comp_cost cost;
4941 
4942   if (use->type != USE_ADDRESS)
4943     return false;
4944 
4945   cost = get_computation_cost (data, use, cand, true, &depends_on,
4946 			       &can_autoinc, NULL);
4947 
4948   BITMAP_FREE (depends_on);
4949 
4950   return !infinite_cost_p (cost) && can_autoinc;
4951 }
4952 
4953 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4954    use that allows autoincrement, and set their AINC_USE if possible.  */
4955 
4956 static void
set_autoinc_for_original_candidates(struct ivopts_data * data)4957 set_autoinc_for_original_candidates (struct ivopts_data *data)
4958 {
4959   unsigned i, j;
4960 
4961   for (i = 0; i < n_iv_cands (data); i++)
4962     {
4963       struct iv_cand *cand = iv_cand (data, i);
4964       struct iv_use *closest_before = NULL;
4965       struct iv_use *closest_after = NULL;
4966       if (cand->pos != IP_ORIGINAL)
4967 	continue;
4968 
4969       for (j = 0; j < n_iv_uses (data); j++)
4970 	{
4971 	  struct iv_use *use = iv_use (data, j);
4972 	  unsigned uid = gimple_uid (use->stmt);
4973 
4974 	  if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4975 	    continue;
4976 
4977 	  if (uid < gimple_uid (cand->incremented_at)
4978 	      && (closest_before == NULL
4979 		  || uid > gimple_uid (closest_before->stmt)))
4980 	    closest_before = use;
4981 
4982 	  if (uid > gimple_uid (cand->incremented_at)
4983 	      && (closest_after == NULL
4984 		  || uid < gimple_uid (closest_after->stmt)))
4985 	    closest_after = use;
4986 	}
4987 
4988       if (closest_before != NULL
4989 	  && autoinc_possible_for_pair (data, closest_before, cand))
4990 	cand->ainc_use = closest_before;
4991       else if (closest_after != NULL
4992 	       && autoinc_possible_for_pair (data, closest_after, cand))
4993 	cand->ainc_use = closest_after;
4994     }
4995 }
4996 
4997 /* Finds the candidates for the induction variables.  */
4998 
4999 static void
find_iv_candidates(struct ivopts_data * data)5000 find_iv_candidates (struct ivopts_data *data)
5001 {
5002   /* Add commonly used ivs.  */
5003   add_standard_iv_candidates (data);
5004 
5005   /* Add old induction variables.  */
5006   add_old_ivs_candidates (data);
5007 
5008   /* Add induction variables derived from uses.  */
5009   add_derived_ivs_candidates (data);
5010 
5011   set_autoinc_for_original_candidates (data);
5012 
5013   /* Record the important candidates.  */
5014   record_important_candidates (data);
5015 }
5016 
5017 /* Determines costs of basing the use of the iv on an iv candidate.  */
5018 
5019 static void
determine_use_iv_costs(struct ivopts_data * data)5020 determine_use_iv_costs (struct ivopts_data *data)
5021 {
5022   unsigned i, j;
5023   struct iv_use *use;
5024   struct iv_cand *cand;
5025   bitmap to_clear = BITMAP_ALLOC (NULL);
5026 
5027   alloc_use_cost_map (data);
5028 
5029   for (i = 0; i < n_iv_uses (data); i++)
5030     {
5031       use = iv_use (data, i);
5032 
5033       if (data->consider_all_candidates)
5034 	{
5035 	  for (j = 0; j < n_iv_cands (data); j++)
5036 	    {
5037 	      cand = iv_cand (data, j);
5038 	      determine_use_iv_cost (data, use, cand);
5039 	    }
5040 	}
5041       else
5042 	{
5043 	  bitmap_iterator bi;
5044 
5045 	  EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5046 	    {
5047 	      cand = iv_cand (data, j);
5048 	      if (!determine_use_iv_cost (data, use, cand))
5049 		bitmap_set_bit (to_clear, j);
5050 	    }
5051 
5052 	  /* Remove the candidates for that the cost is infinite from
5053 	     the list of related candidates.  */
5054 	  bitmap_and_compl_into (use->related_cands, to_clear);
5055 	  bitmap_clear (to_clear);
5056 	}
5057     }
5058 
5059   BITMAP_FREE (to_clear);
5060 
5061   if (dump_file && (dump_flags & TDF_DETAILS))
5062     {
5063       fprintf (dump_file, "Use-candidate costs:\n");
5064 
5065       for (i = 0; i < n_iv_uses (data); i++)
5066 	{
5067 	  use = iv_use (data, i);
5068 
5069 	  fprintf (dump_file, "Use %d:\n", i);
5070 	  fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5071 	  for (j = 0; j < use->n_map_members; j++)
5072 	    {
5073 	      if (!use->cost_map[j].cand
5074 		  || infinite_cost_p (use->cost_map[j].cost))
5075 		continue;
5076 
5077 	      fprintf (dump_file, "  %d\t%d\t%d\t",
5078 		       use->cost_map[j].cand->id,
5079 		       use->cost_map[j].cost.cost,
5080 		       use->cost_map[j].cost.complexity);
5081 	      if (use->cost_map[j].depends_on)
5082 		bitmap_print (dump_file,
5083 			      use->cost_map[j].depends_on, "","");
5084               if (use->cost_map[j].inv_expr_id != -1)
5085                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5086 	      fprintf (dump_file, "\n");
5087 	    }
5088 
5089 	  fprintf (dump_file, "\n");
5090 	}
5091       fprintf (dump_file, "\n");
5092     }
5093 }
5094 
5095 /* Determines cost of the candidate CAND.  */
5096 
5097 static void
determine_iv_cost(struct ivopts_data * data,struct iv_cand * cand)5098 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5099 {
5100   comp_cost cost_base;
5101   unsigned cost, cost_step;
5102   tree base;
5103 
5104   if (!cand->iv)
5105     {
5106       cand->cost = 0;
5107       return;
5108     }
5109 
5110   /* There are two costs associated with the candidate -- its increment
5111      and its initialization.  The second is almost negligible for any loop
5112      that rolls enough, so we take it just very little into account.  */
5113 
5114   base = cand->iv->base;
5115   cost_base = force_var_cost (data, base, NULL);
5116   /* It will be exceptional that the iv register happens to be initialized with
5117      the proper value at no cost.  In general, there will at least be a regcopy
5118      or a const set.  */
5119   if (cost_base.cost == 0)
5120     cost_base.cost = COSTS_N_INSNS (1);
5121   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5122 
5123   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5124 
5125   /* Prefer the original ivs unless we may gain something by replacing it.
5126      The reason is to make debugging simpler; so this is not relevant for
5127      artificial ivs created by other optimization passes.  */
5128   if (cand->pos != IP_ORIGINAL
5129       || !SSA_NAME_VAR (cand->var_before)
5130       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5131     cost++;
5132 
5133   /* Prefer not to insert statements into latch unless there are some
5134      already (so that we do not create unnecessary jumps).  */
5135   if (cand->pos == IP_END
5136       && empty_block_p (ip_end_pos (data->current_loop)))
5137     cost++;
5138 
5139   cand->cost = cost;
5140   cand->cost_step = cost_step;
5141 }
5142 
5143 /* Determines costs of computation of the candidates.  */
5144 
5145 static void
determine_iv_costs(struct ivopts_data * data)5146 determine_iv_costs (struct ivopts_data *data)
5147 {
5148   unsigned i;
5149 
5150   if (dump_file && (dump_flags & TDF_DETAILS))
5151     {
5152       fprintf (dump_file, "Candidate costs:\n");
5153       fprintf (dump_file, "  cand\tcost\n");
5154     }
5155 
5156   for (i = 0; i < n_iv_cands (data); i++)
5157     {
5158       struct iv_cand *cand = iv_cand (data, i);
5159 
5160       determine_iv_cost (data, cand);
5161 
5162       if (dump_file && (dump_flags & TDF_DETAILS))
5163 	fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5164     }
5165 
5166   if (dump_file && (dump_flags & TDF_DETAILS))
5167     fprintf (dump_file, "\n");
5168 }
5169 
5170 /* Calculates cost for having SIZE induction variables.  */
5171 
5172 static unsigned
ivopts_global_cost_for_size(struct ivopts_data * data,unsigned size)5173 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5174 {
5175   /* We add size to the cost, so that we prefer eliminating ivs
5176      if possible.  */
5177   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5178 					    data->body_includes_call);
5179 }
5180 
5181 /* For each size of the induction variable set determine the penalty.  */
5182 
5183 static void
determine_set_costs(struct ivopts_data * data)5184 determine_set_costs (struct ivopts_data *data)
5185 {
5186   unsigned j, n;
5187   gimple phi;
5188   gimple_stmt_iterator psi;
5189   tree op;
5190   struct loop *loop = data->current_loop;
5191   bitmap_iterator bi;
5192 
5193   if (dump_file && (dump_flags & TDF_DETAILS))
5194     {
5195       fprintf (dump_file, "Global costs:\n");
5196       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5197       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5198       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5199       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5200     }
5201 
5202   n = 0;
5203   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5204     {
5205       phi = gsi_stmt (psi);
5206       op = PHI_RESULT (phi);
5207 
5208       if (virtual_operand_p (op))
5209 	continue;
5210 
5211       if (get_iv (data, op))
5212 	continue;
5213 
5214       n++;
5215     }
5216 
5217   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5218     {
5219       struct version_info *info = ver_info (data, j);
5220 
5221       if (info->inv_id && info->has_nonlin_use)
5222 	n++;
5223     }
5224 
5225   data->regs_used = n;
5226   if (dump_file && (dump_flags & TDF_DETAILS))
5227     fprintf (dump_file, "  regs_used %d\n", n);
5228 
5229   if (dump_file && (dump_flags & TDF_DETAILS))
5230     {
5231       fprintf (dump_file, "  cost for size:\n");
5232       fprintf (dump_file, "  ivs\tcost\n");
5233       for (j = 0; j <= 2 * target_avail_regs; j++)
5234 	fprintf (dump_file, "  %d\t%d\n", j,
5235 		 ivopts_global_cost_for_size (data, j));
5236       fprintf (dump_file, "\n");
5237     }
5238 }
5239 
5240 /* Returns true if A is a cheaper cost pair than B.  */
5241 
5242 static bool
cheaper_cost_pair(struct cost_pair * a,struct cost_pair * b)5243 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5244 {
5245   int cmp;
5246 
5247   if (!a)
5248     return false;
5249 
5250   if (!b)
5251     return true;
5252 
5253   cmp = compare_costs (a->cost, b->cost);
5254   if (cmp < 0)
5255     return true;
5256 
5257   if (cmp > 0)
5258     return false;
5259 
5260   /* In case the costs are the same, prefer the cheaper candidate.  */
5261   if (a->cand->cost < b->cand->cost)
5262     return true;
5263 
5264   return false;
5265 }
5266 
5267 
5268 /* Returns candidate by that USE is expressed in IVS.  */
5269 
5270 static struct cost_pair *
iv_ca_cand_for_use(struct iv_ca * ivs,struct iv_use * use)5271 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5272 {
5273   return ivs->cand_for_use[use->id];
5274 }
5275 
5276 /* Computes the cost field of IVS structure.  */
5277 
5278 static void
iv_ca_recount_cost(struct ivopts_data * data,struct iv_ca * ivs)5279 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5280 {
5281   comp_cost cost = ivs->cand_use_cost;
5282 
5283   cost.cost += ivs->cand_cost;
5284 
5285   cost.cost += ivopts_global_cost_for_size (data,
5286                                             ivs->n_regs + ivs->num_used_inv_expr);
5287 
5288   ivs->cost = cost;
5289 }
5290 
5291 /* Remove invariants in set INVS to set IVS.  */
5292 
5293 static void
iv_ca_set_remove_invariants(struct iv_ca * ivs,bitmap invs)5294 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5295 {
5296   bitmap_iterator bi;
5297   unsigned iid;
5298 
5299   if (!invs)
5300     return;
5301 
5302   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5303     {
5304       ivs->n_invariant_uses[iid]--;
5305       if (ivs->n_invariant_uses[iid] == 0)
5306         ivs->n_regs--;
5307     }
5308 }
5309 
5310 /* Set USE not to be expressed by any candidate in IVS.  */
5311 
5312 static void
iv_ca_set_no_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_use * use)5313 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5314 		 struct iv_use *use)
5315 {
5316   unsigned uid = use->id, cid;
5317   struct cost_pair *cp;
5318 
5319   cp = ivs->cand_for_use[uid];
5320   if (!cp)
5321     return;
5322   cid = cp->cand->id;
5323 
5324   ivs->bad_uses++;
5325   ivs->cand_for_use[uid] = NULL;
5326   ivs->n_cand_uses[cid]--;
5327 
5328   if (ivs->n_cand_uses[cid] == 0)
5329     {
5330       bitmap_clear_bit (ivs->cands, cid);
5331       /* Do not count the pseudocandidates.  */
5332       if (cp->cand->iv)
5333 	ivs->n_regs--;
5334       ivs->n_cands--;
5335       ivs->cand_cost -= cp->cand->cost;
5336 
5337       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5338     }
5339 
5340   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5341 
5342   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5343 
5344   if (cp->inv_expr_id != -1)
5345     {
5346       ivs->used_inv_expr[cp->inv_expr_id]--;
5347       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5348         ivs->num_used_inv_expr--;
5349     }
5350   iv_ca_recount_cost (data, ivs);
5351 }
5352 
5353 /* Add invariants in set INVS to set IVS.  */
5354 
5355 static void
iv_ca_set_add_invariants(struct iv_ca * ivs,bitmap invs)5356 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5357 {
5358   bitmap_iterator bi;
5359   unsigned iid;
5360 
5361   if (!invs)
5362     return;
5363 
5364   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5365     {
5366       ivs->n_invariant_uses[iid]++;
5367       if (ivs->n_invariant_uses[iid] == 1)
5368         ivs->n_regs++;
5369     }
5370 }
5371 
5372 /* Set cost pair for USE in set IVS to CP.  */
5373 
5374 static void
iv_ca_set_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_use * use,struct cost_pair * cp)5375 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5376 	      struct iv_use *use, struct cost_pair *cp)
5377 {
5378   unsigned uid = use->id, cid;
5379 
5380   if (ivs->cand_for_use[uid] == cp)
5381     return;
5382 
5383   if (ivs->cand_for_use[uid])
5384     iv_ca_set_no_cp (data, ivs, use);
5385 
5386   if (cp)
5387     {
5388       cid = cp->cand->id;
5389 
5390       ivs->bad_uses--;
5391       ivs->cand_for_use[uid] = cp;
5392       ivs->n_cand_uses[cid]++;
5393       if (ivs->n_cand_uses[cid] == 1)
5394 	{
5395 	  bitmap_set_bit (ivs->cands, cid);
5396 	  /* Do not count the pseudocandidates.  */
5397 	  if (cp->cand->iv)
5398 	    ivs->n_regs++;
5399 	  ivs->n_cands++;
5400 	  ivs->cand_cost += cp->cand->cost;
5401 
5402 	  iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5403 	}
5404 
5405       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5406       iv_ca_set_add_invariants (ivs, cp->depends_on);
5407 
5408       if (cp->inv_expr_id != -1)
5409         {
5410           ivs->used_inv_expr[cp->inv_expr_id]++;
5411           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5412             ivs->num_used_inv_expr++;
5413         }
5414       iv_ca_recount_cost (data, ivs);
5415     }
5416 }
5417 
5418 /* Extend set IVS by expressing USE by some of the candidates in it
5419    if possible. All important candidates will be considered
5420    if IMPORTANT_CANDIDATES is true.  */
5421 
5422 static void
iv_ca_add_use(struct ivopts_data * data,struct iv_ca * ivs,struct iv_use * use,bool important_candidates)5423 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5424 	       struct iv_use *use, bool important_candidates)
5425 {
5426   struct cost_pair *best_cp = NULL, *cp;
5427   bitmap_iterator bi;
5428   bitmap cands;
5429   unsigned i;
5430 
5431   gcc_assert (ivs->upto >= use->id);
5432 
5433   if (ivs->upto == use->id)
5434     {
5435       ivs->upto++;
5436       ivs->bad_uses++;
5437     }
5438 
5439   cands = (important_candidates ? data->important_candidates : ivs->cands);
5440   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5441     {
5442       struct iv_cand *cand = iv_cand (data, i);
5443 
5444       cp = get_use_iv_cost (data, use, cand);
5445 
5446       if (cheaper_cost_pair (cp, best_cp))
5447 	best_cp = cp;
5448     }
5449 
5450   iv_ca_set_cp (data, ivs, use, best_cp);
5451 }
5452 
5453 /* Get cost for assignment IVS.  */
5454 
5455 static comp_cost
iv_ca_cost(struct iv_ca * ivs)5456 iv_ca_cost (struct iv_ca *ivs)
5457 {
5458   /* This was a conditional expression but it triggered a bug in
5459      Sun C 5.5.  */
5460   if (ivs->bad_uses)
5461     return infinite_cost;
5462   else
5463     return ivs->cost;
5464 }
5465 
5466 /* Returns true if all dependences of CP are among invariants in IVS.  */
5467 
5468 static bool
iv_ca_has_deps(struct iv_ca * ivs,struct cost_pair * cp)5469 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5470 {
5471   unsigned i;
5472   bitmap_iterator bi;
5473 
5474   if (!cp->depends_on)
5475     return true;
5476 
5477   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5478     {
5479       if (ivs->n_invariant_uses[i] == 0)
5480 	return false;
5481     }
5482 
5483   return true;
5484 }
5485 
5486 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5487    it before NEXT_CHANGE.  */
5488 
5489 static struct iv_ca_delta *
iv_ca_delta_add(struct iv_use * use,struct cost_pair * old_cp,struct cost_pair * new_cp,struct iv_ca_delta * next_change)5490 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5491 		 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5492 {
5493   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5494 
5495   change->use = use;
5496   change->old_cp = old_cp;
5497   change->new_cp = new_cp;
5498   change->next_change = next_change;
5499 
5500   return change;
5501 }
5502 
5503 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5504    are rewritten.  */
5505 
5506 static struct iv_ca_delta *
iv_ca_delta_join(struct iv_ca_delta * l1,struct iv_ca_delta * l2)5507 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5508 {
5509   struct iv_ca_delta *last;
5510 
5511   if (!l2)
5512     return l1;
5513 
5514   if (!l1)
5515     return l2;
5516 
5517   for (last = l1; last->next_change; last = last->next_change)
5518     continue;
5519   last->next_change = l2;
5520 
5521   return l1;
5522 }
5523 
5524 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5525 
5526 static struct iv_ca_delta *
iv_ca_delta_reverse(struct iv_ca_delta * delta)5527 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5528 {
5529   struct iv_ca_delta *act, *next, *prev = NULL;
5530   struct cost_pair *tmp;
5531 
5532   for (act = delta; act; act = next)
5533     {
5534       next = act->next_change;
5535       act->next_change = prev;
5536       prev = act;
5537 
5538       tmp = act->old_cp;
5539       act->old_cp = act->new_cp;
5540       act->new_cp = tmp;
5541     }
5542 
5543   return prev;
5544 }
5545 
5546 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5547    reverted instead.  */
5548 
5549 static void
iv_ca_delta_commit(struct ivopts_data * data,struct iv_ca * ivs,struct iv_ca_delta * delta,bool forward)5550 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5551 		    struct iv_ca_delta *delta, bool forward)
5552 {
5553   struct cost_pair *from, *to;
5554   struct iv_ca_delta *act;
5555 
5556   if (!forward)
5557     delta = iv_ca_delta_reverse (delta);
5558 
5559   for (act = delta; act; act = act->next_change)
5560     {
5561       from = act->old_cp;
5562       to = act->new_cp;
5563       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5564       iv_ca_set_cp (data, ivs, act->use, to);
5565     }
5566 
5567   if (!forward)
5568     iv_ca_delta_reverse (delta);
5569 }
5570 
5571 /* Returns true if CAND is used in IVS.  */
5572 
5573 static bool
iv_ca_cand_used_p(struct iv_ca * ivs,struct iv_cand * cand)5574 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5575 {
5576   return ivs->n_cand_uses[cand->id] > 0;
5577 }
5578 
5579 /* Returns number of induction variable candidates in the set IVS.  */
5580 
5581 static unsigned
iv_ca_n_cands(struct iv_ca * ivs)5582 iv_ca_n_cands (struct iv_ca *ivs)
5583 {
5584   return ivs->n_cands;
5585 }
5586 
5587 /* Free the list of changes DELTA.  */
5588 
5589 static void
iv_ca_delta_free(struct iv_ca_delta ** delta)5590 iv_ca_delta_free (struct iv_ca_delta **delta)
5591 {
5592   struct iv_ca_delta *act, *next;
5593 
5594   for (act = *delta; act; act = next)
5595     {
5596       next = act->next_change;
5597       free (act);
5598     }
5599 
5600   *delta = NULL;
5601 }
5602 
5603 /* Allocates new iv candidates assignment.  */
5604 
5605 static struct iv_ca *
iv_ca_new(struct ivopts_data * data)5606 iv_ca_new (struct ivopts_data *data)
5607 {
5608   struct iv_ca *nw = XNEW (struct iv_ca);
5609 
5610   nw->upto = 0;
5611   nw->bad_uses = 0;
5612   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5613   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5614   nw->cands = BITMAP_ALLOC (NULL);
5615   nw->n_cands = 0;
5616   nw->n_regs = 0;
5617   nw->cand_use_cost = no_cost;
5618   nw->cand_cost = 0;
5619   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5620   nw->cost = no_cost;
5621   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5622   nw->num_used_inv_expr = 0;
5623 
5624   return nw;
5625 }
5626 
5627 /* Free memory occupied by the set IVS.  */
5628 
5629 static void
iv_ca_free(struct iv_ca ** ivs)5630 iv_ca_free (struct iv_ca **ivs)
5631 {
5632   free ((*ivs)->cand_for_use);
5633   free ((*ivs)->n_cand_uses);
5634   BITMAP_FREE ((*ivs)->cands);
5635   free ((*ivs)->n_invariant_uses);
5636   free ((*ivs)->used_inv_expr);
5637   free (*ivs);
5638   *ivs = NULL;
5639 }
5640 
5641 /* Dumps IVS to FILE.  */
5642 
5643 static void
iv_ca_dump(struct ivopts_data * data,FILE * file,struct iv_ca * ivs)5644 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5645 {
5646   const char *pref = "  invariants ";
5647   unsigned i;
5648   comp_cost cost = iv_ca_cost (ivs);
5649 
5650   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5651   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5652            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5653   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5654 
5655    for (i = 0; i < ivs->upto; i++)
5656     {
5657       struct iv_use *use = iv_use (data, i);
5658       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5659       if (cp)
5660         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5661                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5662       else
5663         fprintf (file, "   use:%d --> ??\n", use->id);
5664     }
5665 
5666   for (i = 1; i <= data->max_inv_id; i++)
5667     if (ivs->n_invariant_uses[i])
5668       {
5669 	fprintf (file, "%s%d", pref, i);
5670 	pref = ", ";
5671       }
5672   fprintf (file, "\n\n");
5673 }
5674 
5675 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5676    new set, and store differences in DELTA.  Number of induction variables
5677    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5678    the function will try to find a solution with mimimal iv candidates.  */
5679 
5680 static comp_cost
iv_ca_extend(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_ca_delta ** delta,unsigned * n_ivs,bool min_ncand)5681 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5682 	      struct iv_cand *cand, struct iv_ca_delta **delta,
5683 	      unsigned *n_ivs, bool min_ncand)
5684 {
5685   unsigned i;
5686   comp_cost cost;
5687   struct iv_use *use;
5688   struct cost_pair *old_cp, *new_cp;
5689 
5690   *delta = NULL;
5691   for (i = 0; i < ivs->upto; i++)
5692     {
5693       use = iv_use (data, i);
5694       old_cp = iv_ca_cand_for_use (ivs, use);
5695 
5696       if (old_cp
5697 	  && old_cp->cand == cand)
5698 	continue;
5699 
5700       new_cp = get_use_iv_cost (data, use, cand);
5701       if (!new_cp)
5702 	continue;
5703 
5704       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5705 	continue;
5706 
5707       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5708         continue;
5709 
5710       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5711     }
5712 
5713   iv_ca_delta_commit (data, ivs, *delta, true);
5714   cost = iv_ca_cost (ivs);
5715   if (n_ivs)
5716     *n_ivs = iv_ca_n_cands (ivs);
5717   iv_ca_delta_commit (data, ivs, *delta, false);
5718 
5719   return cost;
5720 }
5721 
5722 /* Try narrowing set IVS by removing CAND.  Return the cost of
5723    the new set and store the differences in DELTA.  START is
5724    the candidate with which we start narrowing.  */
5725 
5726 static comp_cost
iv_ca_narrow(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_cand * start,struct iv_ca_delta ** delta)5727 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5728 	      struct iv_cand *cand, struct iv_cand *start,
5729 	      struct iv_ca_delta **delta)
5730 {
5731   unsigned i, ci;
5732   struct iv_use *use;
5733   struct cost_pair *old_cp, *new_cp, *cp;
5734   bitmap_iterator bi;
5735   struct iv_cand *cnd;
5736   comp_cost cost, best_cost, acost;
5737 
5738   *delta = NULL;
5739   for (i = 0; i < n_iv_uses (data); i++)
5740     {
5741       use = iv_use (data, i);
5742 
5743       old_cp = iv_ca_cand_for_use (ivs, use);
5744       if (old_cp->cand != cand)
5745 	continue;
5746 
5747       best_cost = iv_ca_cost (ivs);
5748       /* Start narrowing with START.  */
5749       new_cp = get_use_iv_cost (data, use, start);
5750 
5751       if (data->consider_all_candidates)
5752 	{
5753 	  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5754 	    {
5755 	      if (ci == cand->id || (start && ci == start->id))
5756 		continue;
5757 
5758 	      cnd = iv_cand (data, ci);
5759 
5760 	      cp = get_use_iv_cost (data, use, cnd);
5761 	      if (!cp)
5762 		continue;
5763 
5764 	      iv_ca_set_cp (data, ivs, use, cp);
5765 	      acost = iv_ca_cost (ivs);
5766 
5767 	      if (compare_costs (acost, best_cost) < 0)
5768 		{
5769 		  best_cost = acost;
5770 		  new_cp = cp;
5771 		}
5772 	    }
5773 	}
5774       else
5775 	{
5776 	  EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5777 	    {
5778 	      if (ci == cand->id || (start && ci == start->id))
5779 		continue;
5780 
5781 	      cnd = iv_cand (data, ci);
5782 
5783 	      cp = get_use_iv_cost (data, use, cnd);
5784 	      if (!cp)
5785 		continue;
5786 
5787 	      iv_ca_set_cp (data, ivs, use, cp);
5788 	      acost = iv_ca_cost (ivs);
5789 
5790 	      if (compare_costs (acost, best_cost) < 0)
5791 		{
5792 		  best_cost = acost;
5793 		  new_cp = cp;
5794 		}
5795 	    }
5796 	}
5797       /* Restore to old cp for use.  */
5798       iv_ca_set_cp (data, ivs, use, old_cp);
5799 
5800       if (!new_cp)
5801 	{
5802 	  iv_ca_delta_free (delta);
5803 	  return infinite_cost;
5804 	}
5805 
5806       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5807     }
5808 
5809   iv_ca_delta_commit (data, ivs, *delta, true);
5810   cost = iv_ca_cost (ivs);
5811   iv_ca_delta_commit (data, ivs, *delta, false);
5812 
5813   return cost;
5814 }
5815 
5816 /* Try optimizing the set of candidates IVS by removing candidates different
5817    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5818    differences in DELTA.  */
5819 
5820 static comp_cost
iv_ca_prune(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * except_cand,struct iv_ca_delta ** delta)5821 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5822 	     struct iv_cand *except_cand, struct iv_ca_delta **delta)
5823 {
5824   bitmap_iterator bi;
5825   struct iv_ca_delta *act_delta, *best_delta;
5826   unsigned i;
5827   comp_cost best_cost, acost;
5828   struct iv_cand *cand;
5829 
5830   best_delta = NULL;
5831   best_cost = iv_ca_cost (ivs);
5832 
5833   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5834     {
5835       cand = iv_cand (data, i);
5836 
5837       if (cand == except_cand)
5838 	continue;
5839 
5840       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5841 
5842       if (compare_costs (acost, best_cost) < 0)
5843 	{
5844 	  best_cost = acost;
5845 	  iv_ca_delta_free (&best_delta);
5846 	  best_delta = act_delta;
5847 	}
5848       else
5849 	iv_ca_delta_free (&act_delta);
5850     }
5851 
5852   if (!best_delta)
5853     {
5854       *delta = NULL;
5855       return best_cost;
5856     }
5857 
5858   /* Recurse to possibly remove other unnecessary ivs.  */
5859   iv_ca_delta_commit (data, ivs, best_delta, true);
5860   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5861   iv_ca_delta_commit (data, ivs, best_delta, false);
5862   *delta = iv_ca_delta_join (best_delta, *delta);
5863   return best_cost;
5864 }
5865 
5866 /* Tries to extend the sets IVS in the best possible way in order
5867    to express the USE.  If ORIGINALP is true, prefer candidates from
5868    the original set of IVs, otherwise favor important candidates not
5869    based on any memory object.  */
5870 
5871 static bool
try_add_cand_for(struct ivopts_data * data,struct iv_ca * ivs,struct iv_use * use,bool originalp)5872 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5873 		  struct iv_use *use, bool originalp)
5874 {
5875   comp_cost best_cost, act_cost;
5876   unsigned i;
5877   bitmap_iterator bi;
5878   struct iv_cand *cand;
5879   struct iv_ca_delta *best_delta = NULL, *act_delta;
5880   struct cost_pair *cp;
5881 
5882   iv_ca_add_use (data, ivs, use, false);
5883   best_cost = iv_ca_cost (ivs);
5884 
5885   cp = iv_ca_cand_for_use (ivs, use);
5886   if (!cp)
5887     {
5888       ivs->upto--;
5889       ivs->bad_uses--;
5890       iv_ca_add_use (data, ivs, use, true);
5891       best_cost = iv_ca_cost (ivs);
5892       cp = iv_ca_cand_for_use (ivs, use);
5893     }
5894   if (cp)
5895     {
5896       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5897       iv_ca_set_no_cp (data, ivs, use);
5898     }
5899 
5900   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5901      first try important candidates not based on any memory object.  Only if
5902      this fails, try the specific ones.  Rationale -- in loops with many
5903      variables the best choice often is to use just one generic biv.  If we
5904      added here many ivs specific to the uses, the optimization algorithm later
5905      would be likely to get stuck in a local minimum, thus causing us to create
5906      too many ivs.  The approach from few ivs to more seems more likely to be
5907      successful -- starting from few ivs, replacing an expensive use by a
5908      specific iv should always be a win.  */
5909   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5910     {
5911       cand = iv_cand (data, i);
5912 
5913       if (originalp && cand->pos !=IP_ORIGINAL)
5914 	continue;
5915 
5916       if (!originalp && cand->iv->base_object != NULL_TREE)
5917 	continue;
5918 
5919       if (iv_ca_cand_used_p (ivs, cand))
5920         continue;
5921 
5922       cp = get_use_iv_cost (data, use, cand);
5923       if (!cp)
5924 	continue;
5925 
5926       iv_ca_set_cp (data, ivs, use, cp);
5927       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5928                                true);
5929       iv_ca_set_no_cp (data, ivs, use);
5930       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5931 
5932       if (compare_costs (act_cost, best_cost) < 0)
5933 	{
5934 	  best_cost = act_cost;
5935 
5936 	  iv_ca_delta_free (&best_delta);
5937 	  best_delta = act_delta;
5938 	}
5939       else
5940 	iv_ca_delta_free (&act_delta);
5941     }
5942 
5943   if (infinite_cost_p (best_cost))
5944     {
5945       for (i = 0; i < use->n_map_members; i++)
5946 	{
5947 	  cp = use->cost_map + i;
5948 	  cand = cp->cand;
5949 	  if (!cand)
5950 	    continue;
5951 
5952 	  /* Already tried this.  */
5953 	  if (cand->important)
5954 	    {
5955 	      if (originalp && cand->pos == IP_ORIGINAL)
5956 		continue;
5957 	      if (!originalp && cand->iv->base_object == NULL_TREE)
5958 		continue;
5959 	    }
5960 
5961 	  if (iv_ca_cand_used_p (ivs, cand))
5962 	    continue;
5963 
5964 	  act_delta = NULL;
5965 	  iv_ca_set_cp (data, ivs, use, cp);
5966 	  act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5967 	  iv_ca_set_no_cp (data, ivs, use);
5968 	  act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5969 				       cp, act_delta);
5970 
5971 	  if (compare_costs (act_cost, best_cost) < 0)
5972 	    {
5973 	      best_cost = act_cost;
5974 
5975 	      if (best_delta)
5976 		iv_ca_delta_free (&best_delta);
5977 	      best_delta = act_delta;
5978 	    }
5979 	  else
5980 	    iv_ca_delta_free (&act_delta);
5981 	}
5982     }
5983 
5984   iv_ca_delta_commit (data, ivs, best_delta, true);
5985   iv_ca_delta_free (&best_delta);
5986 
5987   return !infinite_cost_p (best_cost);
5988 }
5989 
5990 /* Finds an initial assignment of candidates to uses.  */
5991 
5992 static struct iv_ca *
get_initial_solution(struct ivopts_data * data,bool originalp)5993 get_initial_solution (struct ivopts_data *data, bool originalp)
5994 {
5995   struct iv_ca *ivs = iv_ca_new (data);
5996   unsigned i;
5997 
5998   for (i = 0; i < n_iv_uses (data); i++)
5999     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6000       {
6001 	iv_ca_free (&ivs);
6002 	return NULL;
6003       }
6004 
6005   return ivs;
6006 }
6007 
6008 /* Tries to improve set of induction variables IVS.  */
6009 
6010 static bool
try_improve_iv_set(struct ivopts_data * data,struct iv_ca * ivs)6011 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
6012 {
6013   unsigned i, n_ivs;
6014   comp_cost acost, best_cost = iv_ca_cost (ivs);
6015   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6016   struct iv_cand *cand;
6017 
6018   /* Try extending the set of induction variables by one.  */
6019   for (i = 0; i < n_iv_cands (data); i++)
6020     {
6021       cand = iv_cand (data, i);
6022 
6023       if (iv_ca_cand_used_p (ivs, cand))
6024 	continue;
6025 
6026       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6027       if (!act_delta)
6028 	continue;
6029 
6030       /* If we successfully added the candidate and the set is small enough,
6031 	 try optimizing it by removing other candidates.  */
6032       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6033       	{
6034 	  iv_ca_delta_commit (data, ivs, act_delta, true);
6035 	  acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6036 	  iv_ca_delta_commit (data, ivs, act_delta, false);
6037 	  act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6038 	}
6039 
6040       if (compare_costs (acost, best_cost) < 0)
6041 	{
6042 	  best_cost = acost;
6043 	  iv_ca_delta_free (&best_delta);
6044 	  best_delta = act_delta;
6045 	}
6046       else
6047 	iv_ca_delta_free (&act_delta);
6048     }
6049 
6050   if (!best_delta)
6051     {
6052       /* Try removing the candidates from the set instead.  */
6053       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6054 
6055       /* Nothing more we can do.  */
6056       if (!best_delta)
6057 	return false;
6058     }
6059 
6060   iv_ca_delta_commit (data, ivs, best_delta, true);
6061   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6062   iv_ca_delta_free (&best_delta);
6063   return true;
6064 }
6065 
6066 /* Attempts to find the optimal set of induction variables.  We do simple
6067    greedy heuristic -- we try to replace at most one candidate in the selected
6068    solution and remove the unused ivs while this improves the cost.  */
6069 
6070 static struct iv_ca *
find_optimal_iv_set_1(struct ivopts_data * data,bool originalp)6071 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6072 {
6073   struct iv_ca *set;
6074 
6075   /* Get the initial solution.  */
6076   set = get_initial_solution (data, originalp);
6077   if (!set)
6078     {
6079       if (dump_file && (dump_flags & TDF_DETAILS))
6080 	fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6081       return NULL;
6082     }
6083 
6084   if (dump_file && (dump_flags & TDF_DETAILS))
6085     {
6086       fprintf (dump_file, "Initial set of candidates:\n");
6087       iv_ca_dump (data, dump_file, set);
6088     }
6089 
6090   while (try_improve_iv_set (data, set))
6091     {
6092       if (dump_file && (dump_flags & TDF_DETAILS))
6093 	{
6094 	  fprintf (dump_file, "Improved to:\n");
6095 	  iv_ca_dump (data, dump_file, set);
6096 	}
6097     }
6098 
6099   return set;
6100 }
6101 
6102 static struct iv_ca *
find_optimal_iv_set(struct ivopts_data * data)6103 find_optimal_iv_set (struct ivopts_data *data)
6104 {
6105   unsigned i;
6106   struct iv_ca *set, *origset;
6107   struct iv_use *use;
6108   comp_cost cost, origcost;
6109 
6110   /* Determine the cost based on a strategy that starts with original IVs,
6111      and try again using a strategy that prefers candidates not based
6112      on any IVs.  */
6113   origset = find_optimal_iv_set_1 (data, true);
6114   set = find_optimal_iv_set_1 (data, false);
6115 
6116   if (!origset && !set)
6117     return NULL;
6118 
6119   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6120   cost = set ? iv_ca_cost (set) : infinite_cost;
6121 
6122   if (dump_file && (dump_flags & TDF_DETAILS))
6123     {
6124       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6125 	       origcost.cost, origcost.complexity);
6126       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6127 	       cost.cost, cost.complexity);
6128     }
6129 
6130   /* Choose the one with the best cost.  */
6131   if (compare_costs (origcost, cost) <= 0)
6132     {
6133       if (set)
6134 	iv_ca_free (&set);
6135       set = origset;
6136     }
6137   else if (origset)
6138     iv_ca_free (&origset);
6139 
6140   for (i = 0; i < n_iv_uses (data); i++)
6141     {
6142       use = iv_use (data, i);
6143       use->selected = iv_ca_cand_for_use (set, use)->cand;
6144     }
6145 
6146   return set;
6147 }
6148 
6149 /* Creates a new induction variable corresponding to CAND.  */
6150 
6151 static void
create_new_iv(struct ivopts_data * data,struct iv_cand * cand)6152 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6153 {
6154   gimple_stmt_iterator incr_pos;
6155   tree base;
6156   bool after = false;
6157 
6158   if (!cand->iv)
6159     return;
6160 
6161   switch (cand->pos)
6162     {
6163     case IP_NORMAL:
6164       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6165       break;
6166 
6167     case IP_END:
6168       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6169       after = true;
6170       break;
6171 
6172     case IP_AFTER_USE:
6173       after = true;
6174       /* fall through */
6175     case IP_BEFORE_USE:
6176       incr_pos = gsi_for_stmt (cand->incremented_at);
6177       break;
6178 
6179     case IP_ORIGINAL:
6180       /* Mark that the iv is preserved.  */
6181       name_info (data, cand->var_before)->preserve_biv = true;
6182       name_info (data, cand->var_after)->preserve_biv = true;
6183 
6184       /* Rewrite the increment so that it uses var_before directly.  */
6185       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6186       return;
6187     }
6188 
6189   gimple_add_tmp_var (cand->var_before);
6190 
6191   base = unshare_expr (cand->iv->base);
6192 
6193   create_iv (base, unshare_expr (cand->iv->step),
6194 	     cand->var_before, data->current_loop,
6195 	     &incr_pos, after, &cand->var_before, &cand->var_after);
6196 }
6197 
6198 /* Creates new induction variables described in SET.  */
6199 
6200 static void
create_new_ivs(struct ivopts_data * data,struct iv_ca * set)6201 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6202 {
6203   unsigned i;
6204   struct iv_cand *cand;
6205   bitmap_iterator bi;
6206 
6207   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6208     {
6209       cand = iv_cand (data, i);
6210       create_new_iv (data, cand);
6211     }
6212 
6213   if (dump_file && (dump_flags & TDF_DETAILS))
6214     {
6215       fprintf (dump_file, "\nSelected IV set: \n");
6216       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6217         {
6218           cand = iv_cand (data, i);
6219           dump_cand (dump_file, cand);
6220         }
6221       fprintf (dump_file, "\n");
6222     }
6223 }
6224 
6225 /* Rewrites USE (definition of iv used in a nonlinear expression)
6226    using candidate CAND.  */
6227 
6228 static void
rewrite_use_nonlinear_expr(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)6229 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6230 			    struct iv_use *use, struct iv_cand *cand)
6231 {
6232   tree comp;
6233   tree op, tgt;
6234   gimple ass;
6235   gimple_stmt_iterator bsi;
6236 
6237   /* An important special case -- if we are asked to express value of
6238      the original iv by itself, just exit; there is no need to
6239      introduce a new computation (that might also need casting the
6240      variable to unsigned and back).  */
6241   if (cand->pos == IP_ORIGINAL
6242       && cand->incremented_at == use->stmt)
6243     {
6244       enum tree_code stmt_code;
6245 
6246       gcc_assert (is_gimple_assign (use->stmt));
6247       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6248 
6249       /* Check whether we may leave the computation unchanged.
6250 	 This is the case only if it does not rely on other
6251 	 computations in the loop -- otherwise, the computation
6252 	 we rely upon may be removed in remove_unused_ivs,
6253 	 thus leading to ICE.  */
6254       stmt_code = gimple_assign_rhs_code (use->stmt);
6255       if (stmt_code == PLUS_EXPR
6256 	  || stmt_code == MINUS_EXPR
6257 	  || stmt_code == POINTER_PLUS_EXPR)
6258 	{
6259 	  if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6260 	    op = gimple_assign_rhs2 (use->stmt);
6261 	  else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6262 	    op = gimple_assign_rhs1 (use->stmt);
6263 	  else
6264 	    op = NULL_TREE;
6265 	}
6266       else
6267 	op = NULL_TREE;
6268 
6269       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6270 	return;
6271     }
6272 
6273   comp = get_computation (data->current_loop, use, cand);
6274   gcc_assert (comp != NULL_TREE);
6275 
6276   switch (gimple_code (use->stmt))
6277     {
6278     case GIMPLE_PHI:
6279       tgt = PHI_RESULT (use->stmt);
6280 
6281       /* If we should keep the biv, do not replace it.  */
6282       if (name_info (data, tgt)->preserve_biv)
6283 	return;
6284 
6285       bsi = gsi_after_labels (gimple_bb (use->stmt));
6286       break;
6287 
6288     case GIMPLE_ASSIGN:
6289       tgt = gimple_assign_lhs (use->stmt);
6290       bsi = gsi_for_stmt (use->stmt);
6291       break;
6292 
6293     default:
6294       gcc_unreachable ();
6295     }
6296 
6297   if (!valid_gimple_rhs_p (comp)
6298       || (gimple_code (use->stmt) != GIMPLE_PHI
6299 	  /* We can't allow re-allocating the stmt as it might be pointed
6300 	     to still.  */
6301 	  && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6302 	      >= gimple_num_ops (gsi_stmt (bsi)))))
6303     {
6304       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6305 				       true, GSI_SAME_STMT);
6306       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6307 	{
6308 	  duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6309 	  /* As this isn't a plain copy we have to reset alignment
6310 	     information.  */
6311 	  if (SSA_NAME_PTR_INFO (comp))
6312 	    mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6313 	}
6314     }
6315 
6316   if (gimple_code (use->stmt) == GIMPLE_PHI)
6317     {
6318       ass = gimple_build_assign (tgt, comp);
6319       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6320 
6321       bsi = gsi_for_stmt (use->stmt);
6322       remove_phi_node (&bsi, false);
6323     }
6324   else
6325     {
6326       gimple_assign_set_rhs_from_tree (&bsi, comp);
6327       use->stmt = gsi_stmt (bsi);
6328     }
6329 }
6330 
6331 /* Performs a peephole optimization to reorder the iv update statement with
6332    a mem ref to enable instruction combining in later phases. The mem ref uses
6333    the iv value before the update, so the reordering transformation requires
6334    adjustment of the offset. CAND is the selected IV_CAND.
6335 
6336    Example:
6337 
6338    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6339    iv2 = iv1 + 1;
6340 
6341    if (t < val)      (1)
6342      goto L;
6343    goto Head;
6344 
6345 
6346    directly propagating t over to (1) will introduce overlapping live range
6347    thus increase register pressure. This peephole transform it into:
6348 
6349 
6350    iv2 = iv1 + 1;
6351    t = MEM_REF (base, iv2, 8, 8);
6352    if (t < val)
6353      goto L;
6354    goto Head;
6355 */
6356 
6357 static void
adjust_iv_update_pos(struct iv_cand * cand,struct iv_use * use)6358 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6359 {
6360   tree var_after;
6361   gimple iv_update, stmt;
6362   basic_block bb;
6363   gimple_stmt_iterator gsi, gsi_iv;
6364 
6365   if (cand->pos != IP_NORMAL)
6366     return;
6367 
6368   var_after = cand->var_after;
6369   iv_update = SSA_NAME_DEF_STMT (var_after);
6370 
6371   bb = gimple_bb (iv_update);
6372   gsi = gsi_last_nondebug_bb (bb);
6373   stmt = gsi_stmt (gsi);
6374 
6375   /* Only handle conditional statement for now.  */
6376   if (gimple_code (stmt) != GIMPLE_COND)
6377     return;
6378 
6379   gsi_prev_nondebug (&gsi);
6380   stmt = gsi_stmt (gsi);
6381   if (stmt != iv_update)
6382     return;
6383 
6384   gsi_prev_nondebug (&gsi);
6385   if (gsi_end_p (gsi))
6386     return;
6387 
6388   stmt = gsi_stmt (gsi);
6389   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6390     return;
6391 
6392   if (stmt != use->stmt)
6393     return;
6394 
6395   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6396     return;
6397 
6398   if (dump_file && (dump_flags & TDF_DETAILS))
6399     {
6400       fprintf (dump_file, "Reordering \n");
6401       print_gimple_stmt (dump_file, iv_update, 0, 0);
6402       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6403       fprintf (dump_file, "\n");
6404     }
6405 
6406   gsi = gsi_for_stmt (use->stmt);
6407   gsi_iv = gsi_for_stmt (iv_update);
6408   gsi_move_before (&gsi_iv, &gsi);
6409 
6410   cand->pos = IP_BEFORE_USE;
6411   cand->incremented_at = use->stmt;
6412 }
6413 
6414 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6415 
6416 static void
rewrite_use_address(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)6417 rewrite_use_address (struct ivopts_data *data,
6418 		     struct iv_use *use, struct iv_cand *cand)
6419 {
6420   aff_tree aff;
6421   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6422   tree base_hint = NULL_TREE;
6423   tree ref, iv;
6424   bool ok;
6425 
6426   adjust_iv_update_pos (cand, use);
6427   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6428   gcc_assert (ok);
6429   unshare_aff_combination (&aff);
6430 
6431   /* To avoid undefined overflow problems, all IV candidates use unsigned
6432      integer types.  The drawback is that this makes it impossible for
6433      create_mem_ref to distinguish an IV that is based on a memory object
6434      from one that represents simply an offset.
6435 
6436      To work around this problem, we pass a hint to create_mem_ref that
6437      indicates which variable (if any) in aff is an IV based on a memory
6438      object.  Note that we only consider the candidate.  If this is not
6439      based on an object, the base of the reference is in some subexpression
6440      of the use -- but these will use pointer types, so they are recognized
6441      by the create_mem_ref heuristics anyway.  */
6442   if (cand->iv->base_object)
6443     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6444 
6445   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6446   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6447 			reference_alias_ptr_type (*use->op_p),
6448 			iv, base_hint, data->speed);
6449   copy_ref_info (ref, *use->op_p);
6450   *use->op_p = ref;
6451 }
6452 
6453 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6454    candidate CAND.  */
6455 
6456 static void
rewrite_use_compare(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)6457 rewrite_use_compare (struct ivopts_data *data,
6458 		     struct iv_use *use, struct iv_cand *cand)
6459 {
6460   tree comp, *var_p, op, bound;
6461   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6462   enum tree_code compare;
6463   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6464   bool ok;
6465 
6466   bound = cp->value;
6467   if (bound)
6468     {
6469       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6470       tree var_type = TREE_TYPE (var);
6471       gimple_seq stmts;
6472 
6473       if (dump_file && (dump_flags & TDF_DETAILS))
6474         {
6475           fprintf (dump_file, "Replacing exit test: ");
6476           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6477         }
6478       compare = cp->comp;
6479       bound = unshare_expr (fold_convert (var_type, bound));
6480       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6481       if (stmts)
6482 	gsi_insert_seq_on_edge_immediate (
6483 		loop_preheader_edge (data->current_loop),
6484 		stmts);
6485 
6486       gimple_cond_set_lhs (use->stmt, var);
6487       gimple_cond_set_code (use->stmt, compare);
6488       gimple_cond_set_rhs (use->stmt, op);
6489       return;
6490     }
6491 
6492   /* The induction variable elimination failed; just express the original
6493      giv.  */
6494   comp = get_computation (data->current_loop, use, cand);
6495   gcc_assert (comp != NULL_TREE);
6496 
6497   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6498   gcc_assert (ok);
6499 
6500   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6501 				     true, GSI_SAME_STMT);
6502 }
6503 
6504 /* Rewrites USE using candidate CAND.  */
6505 
6506 static void
rewrite_use(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)6507 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6508 {
6509   switch (use->type)
6510     {
6511       case USE_NONLINEAR_EXPR:
6512 	rewrite_use_nonlinear_expr (data, use, cand);
6513 	break;
6514 
6515       case USE_ADDRESS:
6516 	rewrite_use_address (data, use, cand);
6517 	break;
6518 
6519       case USE_COMPARE:
6520 	rewrite_use_compare (data, use, cand);
6521 	break;
6522 
6523       default:
6524 	gcc_unreachable ();
6525     }
6526 
6527   update_stmt (use->stmt);
6528 }
6529 
6530 /* Rewrite the uses using the selected induction variables.  */
6531 
6532 static void
rewrite_uses(struct ivopts_data * data)6533 rewrite_uses (struct ivopts_data *data)
6534 {
6535   unsigned i;
6536   struct iv_cand *cand;
6537   struct iv_use *use;
6538 
6539   for (i = 0; i < n_iv_uses (data); i++)
6540     {
6541       use = iv_use (data, i);
6542       cand = use->selected;
6543       gcc_assert (cand);
6544 
6545       rewrite_use (data, use, cand);
6546     }
6547 }
6548 
6549 /* Removes the ivs that are not used after rewriting.  */
6550 
6551 static void
remove_unused_ivs(struct ivopts_data * data)6552 remove_unused_ivs (struct ivopts_data *data)
6553 {
6554   unsigned j;
6555   bitmap_iterator bi;
6556   bitmap toremove = BITMAP_ALLOC (NULL);
6557 
6558   /* Figure out an order in which to release SSA DEFs so that we don't
6559      release something that we'd have to propagate into a debug stmt
6560      afterwards.  */
6561   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6562     {
6563       struct version_info *info;
6564 
6565       info = ver_info (data, j);
6566       if (info->iv
6567 	  && !integer_zerop (info->iv->step)
6568 	  && !info->inv_id
6569 	  && !info->iv->have_use_for
6570 	  && !info->preserve_biv)
6571 	{
6572 	  bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6573 
6574 	  tree def = info->iv->ssa_name;
6575 
6576 	  if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6577 	    {
6578 	      imm_use_iterator imm_iter;
6579 	      use_operand_p use_p;
6580 	      gimple stmt;
6581 	      int count = 0;
6582 
6583 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6584 		{
6585 		  if (!gimple_debug_bind_p (stmt))
6586 		    continue;
6587 
6588 		  /* We just want to determine whether to do nothing
6589 		     (count == 0), to substitute the computed
6590 		     expression into a single use of the SSA DEF by
6591 		     itself (count == 1), or to use a debug temp
6592 		     because the SSA DEF is used multiple times or as
6593 		     part of a larger expression (count > 1). */
6594 		  count++;
6595 		  if (gimple_debug_bind_get_value (stmt) != def)
6596 		    count++;
6597 
6598 		  if (count > 1)
6599 		    BREAK_FROM_IMM_USE_STMT (imm_iter);
6600 		}
6601 
6602 	      if (!count)
6603 		continue;
6604 
6605 	      struct iv_use dummy_use;
6606 	      struct iv_cand *best_cand = NULL, *cand;
6607 	      unsigned i, best_pref = 0, cand_pref;
6608 
6609 	      memset (&dummy_use, 0, sizeof (dummy_use));
6610 	      dummy_use.iv = info->iv;
6611 	      for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6612 		{
6613 		  cand = iv_use (data, i)->selected;
6614 		  if (cand == best_cand)
6615 		    continue;
6616 		  cand_pref = operand_equal_p (cand->iv->step,
6617 					       info->iv->step, 0)
6618 		    ? 4 : 0;
6619 		  cand_pref
6620 		    += TYPE_MODE (TREE_TYPE (cand->iv->base))
6621 		    == TYPE_MODE (TREE_TYPE (info->iv->base))
6622 		    ? 2 : 0;
6623 		  cand_pref
6624 		    += TREE_CODE (cand->iv->base) == INTEGER_CST
6625 		    ? 1 : 0;
6626 		  if (best_cand == NULL || best_pref < cand_pref)
6627 		    {
6628 		      best_cand = cand;
6629 		      best_pref = cand_pref;
6630 		    }
6631 		}
6632 
6633 	      if (!best_cand)
6634 		continue;
6635 
6636 	      tree comp = get_computation_at (data->current_loop,
6637 					      &dummy_use, best_cand,
6638 					      SSA_NAME_DEF_STMT (def));
6639 	      if (!comp)
6640 		continue;
6641 
6642 	      if (count > 1)
6643 		{
6644 		  tree vexpr = make_node (DEBUG_EXPR_DECL);
6645 		  DECL_ARTIFICIAL (vexpr) = 1;
6646 		  TREE_TYPE (vexpr) = TREE_TYPE (comp);
6647 		  if (SSA_NAME_VAR (def))
6648 		    DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6649 		  else
6650 		    DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6651 		  gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6652 		  gimple_stmt_iterator gsi;
6653 
6654 		  if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6655 		    gsi = gsi_after_labels (gimple_bb
6656 					    (SSA_NAME_DEF_STMT (def)));
6657 		  else
6658 		    gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6659 
6660 		  gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6661 		  comp = vexpr;
6662 		}
6663 
6664 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6665 		{
6666 		  if (!gimple_debug_bind_p (stmt))
6667 		    continue;
6668 
6669 		  FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6670 		    SET_USE (use_p, comp);
6671 
6672 		  update_stmt (stmt);
6673 		}
6674 	    }
6675 	}
6676     }
6677 
6678   release_defs_bitset (toremove);
6679 
6680   BITMAP_FREE (toremove);
6681 }
6682 
6683 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6684    for pointer_map_traverse.  */
6685 
6686 static bool
free_tree_niter_desc(const void * key ATTRIBUTE_UNUSED,void ** value,void * data ATTRIBUTE_UNUSED)6687 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6688                       void *data ATTRIBUTE_UNUSED)
6689 {
6690   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6691 
6692   free (niter);
6693   return true;
6694 }
6695 
6696 /* Frees data allocated by the optimization of a single loop.  */
6697 
6698 static void
free_loop_data(struct ivopts_data * data)6699 free_loop_data (struct ivopts_data *data)
6700 {
6701   unsigned i, j;
6702   bitmap_iterator bi;
6703   tree obj;
6704 
6705   if (data->niters)
6706     {
6707       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6708       pointer_map_destroy (data->niters);
6709       data->niters = NULL;
6710     }
6711 
6712   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6713     {
6714       struct version_info *info;
6715 
6716       info = ver_info (data, i);
6717       free (info->iv);
6718       info->iv = NULL;
6719       info->has_nonlin_use = false;
6720       info->preserve_biv = false;
6721       info->inv_id = 0;
6722     }
6723   bitmap_clear (data->relevant);
6724   bitmap_clear (data->important_candidates);
6725 
6726   for (i = 0; i < n_iv_uses (data); i++)
6727     {
6728       struct iv_use *use = iv_use (data, i);
6729 
6730       free (use->iv);
6731       BITMAP_FREE (use->related_cands);
6732       for (j = 0; j < use->n_map_members; j++)
6733 	if (use->cost_map[j].depends_on)
6734 	  BITMAP_FREE (use->cost_map[j].depends_on);
6735       free (use->cost_map);
6736       free (use);
6737     }
6738   data->iv_uses.truncate (0);
6739 
6740   for (i = 0; i < n_iv_cands (data); i++)
6741     {
6742       struct iv_cand *cand = iv_cand (data, i);
6743 
6744       free (cand->iv);
6745       if (cand->depends_on)
6746 	BITMAP_FREE (cand->depends_on);
6747       free (cand);
6748     }
6749   data->iv_candidates.truncate (0);
6750 
6751   if (data->version_info_size < num_ssa_names)
6752     {
6753       data->version_info_size = 2 * num_ssa_names;
6754       free (data->version_info);
6755       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6756     }
6757 
6758   data->max_inv_id = 0;
6759 
6760   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6761     SET_DECL_RTL (obj, NULL_RTX);
6762 
6763   decl_rtl_to_reset.truncate (0);
6764 
6765   data->inv_expr_tab.empty ();
6766   data->inv_expr_id = 0;
6767 }
6768 
6769 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6770    loop tree.  */
6771 
6772 static void
tree_ssa_iv_optimize_finalize(struct ivopts_data * data)6773 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6774 {
6775   free_loop_data (data);
6776   free (data->version_info);
6777   BITMAP_FREE (data->relevant);
6778   BITMAP_FREE (data->important_candidates);
6779 
6780   decl_rtl_to_reset.release ();
6781   data->iv_uses.release ();
6782   data->iv_candidates.release ();
6783   data->inv_expr_tab.dispose ();
6784 }
6785 
6786 /* Returns true if the loop body BODY includes any function calls.  */
6787 
6788 static bool
loop_body_includes_call(basic_block * body,unsigned num_nodes)6789 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6790 {
6791   gimple_stmt_iterator gsi;
6792   unsigned i;
6793 
6794   for (i = 0; i < num_nodes; i++)
6795     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6796       {
6797 	gimple stmt = gsi_stmt (gsi);
6798 	if (is_gimple_call (stmt)
6799 	    && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6800 	  return true;
6801       }
6802   return false;
6803 }
6804 
6805 /* Optimizes the LOOP.  Returns true if anything changed.  */
6806 
6807 static bool
tree_ssa_iv_optimize_loop(struct ivopts_data * data,struct loop * loop)6808 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6809 {
6810   bool changed = false;
6811   struct iv_ca *iv_ca;
6812   edge exit = single_dom_exit (loop);
6813   basic_block *body;
6814 
6815   gcc_assert (!data->niters);
6816   data->current_loop = loop;
6817   data->speed = optimize_loop_for_speed_p (loop);
6818 
6819   if (dump_file && (dump_flags & TDF_DETAILS))
6820     {
6821       fprintf (dump_file, "Processing loop %d\n", loop->num);
6822 
6823       if (exit)
6824 	{
6825 	  fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6826 		   exit->src->index, exit->dest->index);
6827 	  print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6828 	  fprintf (dump_file, "\n");
6829 	}
6830 
6831       fprintf (dump_file, "\n");
6832     }
6833 
6834   body = get_loop_body (loop);
6835   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6836   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6837   free (body);
6838 
6839   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6840 
6841   /* For each ssa name determines whether it behaves as an induction variable
6842      in some loop.  */
6843   if (!find_induction_variables (data))
6844     goto finish;
6845 
6846   /* Finds interesting uses (item 1).  */
6847   find_interesting_uses (data);
6848   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6849     goto finish;
6850 
6851   /* Finds candidates for the induction variables (item 2).  */
6852   find_iv_candidates (data);
6853 
6854   /* Calculates the costs (item 3, part 1).  */
6855   determine_iv_costs (data);
6856   determine_use_iv_costs (data);
6857   determine_set_costs (data);
6858 
6859   /* Find the optimal set of induction variables (item 3, part 2).  */
6860   iv_ca = find_optimal_iv_set (data);
6861   if (!iv_ca)
6862     goto finish;
6863   changed = true;
6864 
6865   /* Create the new induction variables (item 4, part 1).  */
6866   create_new_ivs (data, iv_ca);
6867   iv_ca_free (&iv_ca);
6868 
6869   /* Rewrite the uses (item 4, part 2).  */
6870   rewrite_uses (data);
6871 
6872   /* Remove the ivs that are unused after rewriting.  */
6873   remove_unused_ivs (data);
6874 
6875   /* We have changed the structure of induction variables; it might happen
6876      that definitions in the scev database refer to some of them that were
6877      eliminated.  */
6878   scev_reset ();
6879 
6880 finish:
6881   free_loop_data (data);
6882 
6883   return changed;
6884 }
6885 
6886 /* Main entry point.  Optimizes induction variables in loops.  */
6887 
6888 void
tree_ssa_iv_optimize(void)6889 tree_ssa_iv_optimize (void)
6890 {
6891   struct loop *loop;
6892   struct ivopts_data data;
6893 
6894   tree_ssa_iv_optimize_init (&data);
6895 
6896   /* Optimize the loops starting with the innermost ones.  */
6897   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
6898     {
6899       if (dump_file && (dump_flags & TDF_DETAILS))
6900 	flow_loop_dump (loop, dump_file, NULL, 1);
6901 
6902       tree_ssa_iv_optimize_loop (&data, loop);
6903     }
6904 
6905   tree_ssa_iv_optimize_finalize (&data);
6906 }
6907