xref: /openbsd/gnu/gcc/gcc/tree-ssa-loop-ivopts.c (revision 404b540a)
1 /* Induction variable optimizations.
2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING.  If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA.  */
20 
21 /* This pass tries to find the optimal set of induction variables for the loop.
22    It optimizes just the basic linear induction variables (although adding
23    support for other types should not be too hard).  It includes the
24    optimizations commonly known as strength reduction, induction variable
25    coalescing and induction variable elimination.  It does it in the
26    following steps:
27 
28    1) The interesting uses of induction variables are found.  This includes
29 
30       -- uses of induction variables in non-linear expressions
31       -- addresses of arrays
32       -- comparisons of induction variables
33 
34    2) Candidates for the induction variables are found.  This includes
35 
36       -- old induction variables
37       -- the variables defined by expressions derived from the "interesting
38 	 uses" above
39 
40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
41       cost function assigns a cost to sets of induction variables and consists
42       of three parts:
43 
44       -- The use costs.  Each of the interesting uses chooses the best induction
45 	 variable in the set and adds its cost to the sum.  The cost reflects
46 	 the time spent on modifying the induction variables value to be usable
47 	 for the given purpose (adding base and offset for arrays, etc.).
48       -- The variable costs.  Each of the variables has a cost assigned that
49 	 reflects the costs associated with incrementing the value of the
50 	 variable.  The original variables are somewhat preferred.
51       -- The set cost.  Depending on the size of the set, extra cost may be
52 	 added to reflect register pressure.
53 
54       All the costs are defined in a machine-specific way, using the target
55       hooks and machine descriptions to determine them.
56 
57    4) The trees are transformed to use the new variables, the dead code is
58       removed.
59 
60    All of this is done loop by loop.  Doing it globally is theoretically
61    possible, it might give a better performance and it might enable us
62    to decide costs more precisely, but getting all the interactions right
63    would be complicated.  */
64 
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
92 
93 /* The infinite cost.  */
94 #define INFTY 10000000
95 
96 /* The expected number of loop iterations.  TODO -- use profiling instead of
97    this.  */
98 #define AVG_LOOP_NITER(LOOP) 5
99 
100 
101 /* Representation of the induction variable.  */
102 struct iv
103 {
104   tree base;		/* Initial value of the iv.  */
105   tree base_object;	/* A memory object to that the induction variable points.  */
106   tree step;		/* Step of the iv (constant only).  */
107   tree ssa_name;	/* The ssa name with the value.  */
108   bool biv_p;		/* Is it a biv?  */
109   bool have_use_for;	/* Do we already have a use for it?  */
110   unsigned use_id;	/* The identifier in the use if it is the case.  */
111 };
112 
113 /* Per-ssa version information (induction variable descriptions, etc.).  */
114 struct version_info
115 {
116   tree name;		/* The ssa name.  */
117   struct iv *iv;	/* Induction variable description.  */
118   bool has_nonlin_use;	/* For a loop-level invariant, whether it is used in
119 			   an expression that is not an induction variable.  */
120   unsigned inv_id;	/* Id of an invariant.  */
121   bool preserve_biv;	/* For the original biv, whether to preserve it.  */
122 };
123 
124 /* Types of uses.  */
125 enum use_type
126 {
127   USE_NONLINEAR_EXPR,	/* Use in a nonlinear expression.  */
128   USE_ADDRESS,		/* Use in an address.  */
129   USE_COMPARE		/* Use is a compare.  */
130 };
131 
132 /* The candidate - cost pair.  */
133 struct cost_pair
134 {
135   struct iv_cand *cand;	/* The candidate.  */
136   unsigned cost;	/* The cost.  */
137   bitmap depends_on;	/* The list of invariants that have to be
138 			   preserved.  */
139   tree value;		/* For final value elimination, the expression for
140 			   the final value of the iv.  For iv elimination,
141 			   the new bound to compare with.  */
142 };
143 
144 /* Use.  */
145 struct iv_use
146 {
147   unsigned id;		/* The id of the use.  */
148   enum use_type type;	/* Type of the use.  */
149   struct iv *iv;	/* The induction variable it is based on.  */
150   tree stmt;		/* Statement in that it occurs.  */
151   tree *op_p;		/* The place where it occurs.  */
152   bitmap related_cands;	/* The set of "related" iv candidates, plus the common
153 			   important ones.  */
154 
155   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
156   struct cost_pair *cost_map;
157 			/* The costs wrto the iv candidates.  */
158 
159   struct iv_cand *selected;
160 			/* The selected candidate.  */
161 };
162 
163 /* The position where the iv is computed.  */
164 enum iv_position
165 {
166   IP_NORMAL,		/* At the end, just before the exit condition.  */
167   IP_END,		/* At the end of the latch block.  */
168   IP_ORIGINAL		/* The original biv.  */
169 };
170 
171 /* The induction variable candidate.  */
172 struct iv_cand
173 {
174   unsigned id;		/* The number of the candidate.  */
175   bool important;	/* Whether this is an "important" candidate, i.e. such
176 			   that it should be considered by all uses.  */
177   enum iv_position pos;	/* Where it is computed.  */
178   tree incremented_at;	/* For original biv, the statement where it is
179 			   incremented.  */
180   tree var_before;	/* The variable used for it before increment.  */
181   tree var_after;	/* The variable used for it after increment.  */
182   struct iv *iv;	/* The value of the candidate.  NULL for
183 			   "pseudocandidate" used to indicate the possibility
184 			   to replace the final value of an iv by direct
185 			   computation of the value.  */
186   unsigned cost;	/* Cost of the candidate.  */
187   bitmap depends_on;	/* The list of invariants that are used in step of the
188 			   biv.  */
189 };
190 
191 /* The data used by the induction variable optimizations.  */
192 
193 typedef struct iv_use *iv_use_p;
194 DEF_VEC_P(iv_use_p);
195 DEF_VEC_ALLOC_P(iv_use_p,heap);
196 
197 typedef struct iv_cand *iv_cand_p;
198 DEF_VEC_P(iv_cand_p);
199 DEF_VEC_ALLOC_P(iv_cand_p,heap);
200 
201 struct ivopts_data
202 {
203   /* The currently optimized loop.  */
204   struct loop *current_loop;
205 
206   /* Number of registers used in it.  */
207   unsigned regs_used;
208 
209   /* Numbers of iterations for all exits of the current loop.  */
210   htab_t niters;
211 
212   /* The size of version_info array allocated.  */
213   unsigned version_info_size;
214 
215   /* The array of information for the ssa names.  */
216   struct version_info *version_info;
217 
218   /* The bitmap of indices in version_info whose value was changed.  */
219   bitmap relevant;
220 
221   /* The maximum invariant id.  */
222   unsigned max_inv_id;
223 
224   /* The uses of induction variables.  */
225   VEC(iv_use_p,heap) *iv_uses;
226 
227   /* The candidates.  */
228   VEC(iv_cand_p,heap) *iv_candidates;
229 
230   /* A bitmap of important candidates.  */
231   bitmap important_candidates;
232 
233   /* Whether to consider just related and important candidates when replacing a
234      use.  */
235   bool consider_all_candidates;
236 };
237 
238 /* An assignment of iv candidates to uses.  */
239 
240 struct iv_ca
241 {
242   /* The number of uses covered by the assignment.  */
243   unsigned upto;
244 
245   /* Number of uses that cannot be expressed by the candidates in the set.  */
246   unsigned bad_uses;
247 
248   /* Candidate assigned to a use, together with the related costs.  */
249   struct cost_pair **cand_for_use;
250 
251   /* Number of times each candidate is used.  */
252   unsigned *n_cand_uses;
253 
254   /* The candidates used.  */
255   bitmap cands;
256 
257   /* The number of candidates in the set.  */
258   unsigned n_cands;
259 
260   /* Total number of registers needed.  */
261   unsigned n_regs;
262 
263   /* Total cost of expressing uses.  */
264   unsigned cand_use_cost;
265 
266   /* Total cost of candidates.  */
267   unsigned cand_cost;
268 
269   /* Number of times each invariant is used.  */
270   unsigned *n_invariant_uses;
271 
272   /* Total cost of the assignment.  */
273   unsigned cost;
274 };
275 
276 /* Difference of two iv candidate assignments.  */
277 
278 struct iv_ca_delta
279 {
280   /* Changed use.  */
281   struct iv_use *use;
282 
283   /* An old assignment (for rollback purposes).  */
284   struct cost_pair *old_cp;
285 
286   /* A new assignment.  */
287   struct cost_pair *new_cp;
288 
289   /* Next change in the list.  */
290   struct iv_ca_delta *next_change;
291 };
292 
293 /* Bound on number of candidates below that all candidates are considered.  */
294 
295 #define CONSIDER_ALL_CANDIDATES_BOUND \
296   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
297 
298 /* If there are more iv occurrences, we just give up (it is quite unlikely that
299    optimizing such a loop would help, and it would take ages).  */
300 
301 #define MAX_CONSIDERED_USES \
302   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
303 
304 /* If there are at most this number of ivs in the set, try removing unnecessary
305    ivs from the set always.  */
306 
307 #define ALWAYS_PRUNE_CAND_SET_BOUND \
308   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
309 
310 /* The list of trees for that the decl_rtl field must be reset is stored
311    here.  */
312 
VEC(tree,heap)313 static VEC(tree,heap) *decl_rtl_to_reset;
314 
315 /* Number of uses recorded in DATA.  */
316 
317 static inline unsigned
318 n_iv_uses (struct ivopts_data *data)
319 {
320   return VEC_length (iv_use_p, data->iv_uses);
321 }
322 
323 /* Ith use recorded in DATA.  */
324 
325 static inline struct iv_use *
iv_use(struct ivopts_data * data,unsigned i)326 iv_use (struct ivopts_data *data, unsigned i)
327 {
328   return VEC_index (iv_use_p, data->iv_uses, i);
329 }
330 
331 /* Number of candidates recorded in DATA.  */
332 
333 static inline unsigned
n_iv_cands(struct ivopts_data * data)334 n_iv_cands (struct ivopts_data *data)
335 {
336   return VEC_length (iv_cand_p, data->iv_candidates);
337 }
338 
339 /* Ith candidate recorded in DATA.  */
340 
341 static inline struct iv_cand *
iv_cand(struct ivopts_data * data,unsigned i)342 iv_cand (struct ivopts_data *data, unsigned i)
343 {
344   return VEC_index (iv_cand_p, data->iv_candidates, i);
345 }
346 
347 /* The single loop exit if it dominates the latch, NULL otherwise.  */
348 
349 edge
single_dom_exit(struct loop * loop)350 single_dom_exit (struct loop *loop)
351 {
352   edge exit = loop->single_exit;
353 
354   if (!exit)
355     return NULL;
356 
357   if (!just_once_each_iteration_p (loop, exit->src))
358     return NULL;
359 
360   return exit;
361 }
362 
363 /* Dumps information about the induction variable IV to FILE.  */
364 
365 extern void dump_iv (FILE *, struct iv *);
366 void
dump_iv(FILE * file,struct iv * iv)367 dump_iv (FILE *file, struct iv *iv)
368 {
369   if (iv->ssa_name)
370     {
371       fprintf (file, "ssa name ");
372       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
373       fprintf (file, "\n");
374     }
375 
376   fprintf (file, "  type ");
377   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
378   fprintf (file, "\n");
379 
380   if (iv->step)
381     {
382       fprintf (file, "  base ");
383       print_generic_expr (file, iv->base, TDF_SLIM);
384       fprintf (file, "\n");
385 
386       fprintf (file, "  step ");
387       print_generic_expr (file, iv->step, TDF_SLIM);
388       fprintf (file, "\n");
389     }
390   else
391     {
392       fprintf (file, "  invariant ");
393       print_generic_expr (file, iv->base, TDF_SLIM);
394       fprintf (file, "\n");
395     }
396 
397   if (iv->base_object)
398     {
399       fprintf (file, "  base object ");
400       print_generic_expr (file, iv->base_object, TDF_SLIM);
401       fprintf (file, "\n");
402     }
403 
404   if (iv->biv_p)
405     fprintf (file, "  is a biv\n");
406 }
407 
408 /* Dumps information about the USE to FILE.  */
409 
410 extern void dump_use (FILE *, struct iv_use *);
411 void
dump_use(FILE * file,struct iv_use * use)412 dump_use (FILE *file, struct iv_use *use)
413 {
414   fprintf (file, "use %d\n", use->id);
415 
416   switch (use->type)
417     {
418     case USE_NONLINEAR_EXPR:
419       fprintf (file, "  generic\n");
420       break;
421 
422     case USE_ADDRESS:
423       fprintf (file, "  address\n");
424       break;
425 
426     case USE_COMPARE:
427       fprintf (file, "  compare\n");
428       break;
429 
430     default:
431       gcc_unreachable ();
432     }
433 
434   fprintf (file, "  in statement ");
435   print_generic_expr (file, use->stmt, TDF_SLIM);
436   fprintf (file, "\n");
437 
438   fprintf (file, "  at position ");
439   if (use->op_p)
440     print_generic_expr (file, *use->op_p, TDF_SLIM);
441   fprintf (file, "\n");
442 
443   dump_iv (file, use->iv);
444 
445   if (use->related_cands)
446     {
447       fprintf (file, "  related candidates ");
448       dump_bitmap (file, use->related_cands);
449     }
450 }
451 
452 /* Dumps information about the uses to FILE.  */
453 
454 extern void dump_uses (FILE *, struct ivopts_data *);
455 void
dump_uses(FILE * file,struct ivopts_data * data)456 dump_uses (FILE *file, struct ivopts_data *data)
457 {
458   unsigned i;
459   struct iv_use *use;
460 
461   for (i = 0; i < n_iv_uses (data); i++)
462     {
463       use = iv_use (data, i);
464 
465       dump_use (file, use);
466       fprintf (file, "\n");
467     }
468 }
469 
470 /* Dumps information about induction variable candidate CAND to FILE.  */
471 
472 extern void dump_cand (FILE *, struct iv_cand *);
473 void
dump_cand(FILE * file,struct iv_cand * cand)474 dump_cand (FILE *file, struct iv_cand *cand)
475 {
476   struct iv *iv = cand->iv;
477 
478   fprintf (file, "candidate %d%s\n",
479 	   cand->id, cand->important ? " (important)" : "");
480 
481   if (cand->depends_on)
482     {
483       fprintf (file, "  depends on ");
484       dump_bitmap (file, cand->depends_on);
485     }
486 
487   if (!iv)
488     {
489       fprintf (file, "  final value replacement\n");
490       return;
491     }
492 
493   switch (cand->pos)
494     {
495     case IP_NORMAL:
496       fprintf (file, "  incremented before exit test\n");
497       break;
498 
499     case IP_END:
500       fprintf (file, "  incremented at end\n");
501       break;
502 
503     case IP_ORIGINAL:
504       fprintf (file, "  original biv\n");
505       break;
506     }
507 
508   dump_iv (file, iv);
509 }
510 
511 /* Returns the info for ssa version VER.  */
512 
513 static inline struct version_info *
ver_info(struct ivopts_data * data,unsigned ver)514 ver_info (struct ivopts_data *data, unsigned ver)
515 {
516   return data->version_info + ver;
517 }
518 
519 /* Returns the info for ssa name NAME.  */
520 
521 static inline struct version_info *
name_info(struct ivopts_data * data,tree name)522 name_info (struct ivopts_data *data, tree name)
523 {
524   return ver_info (data, SSA_NAME_VERSION (name));
525 }
526 
527 /* Checks whether there exists number X such that X * B = A, counting modulo
528    2^BITS.  */
529 
530 static bool
divide(unsigned bits,unsigned HOST_WIDE_INT a,unsigned HOST_WIDE_INT b,HOST_WIDE_INT * x)531 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
532 	HOST_WIDE_INT *x)
533 {
534   unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
535   unsigned HOST_WIDE_INT inv, ex, val;
536   unsigned i;
537 
538   a &= mask;
539   b &= mask;
540 
541   /* First divide the whole equation by 2 as long as possible.  */
542   while (!(a & 1) && !(b & 1))
543     {
544       a >>= 1;
545       b >>= 1;
546       bits--;
547       mask >>= 1;
548     }
549 
550   if (!(b & 1))
551     {
552       /* If b is still even, a is odd and there is no such x.  */
553       return false;
554     }
555 
556   /* Find the inverse of b.  We compute it as
557      b^(2^(bits - 1) - 1) (mod 2^bits).  */
558   inv = 1;
559   ex = b;
560   for (i = 0; i < bits - 1; i++)
561     {
562       inv = (inv * ex) & mask;
563       ex = (ex * ex) & mask;
564     }
565 
566   val = (a * inv) & mask;
567 
568   gcc_assert (((val * b) & mask) == a);
569 
570   if ((val >> (bits - 1)) & 1)
571     val |= ~mask;
572 
573   *x = val;
574 
575   return true;
576 }
577 
578 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
579    emitted in LOOP.  */
580 
581 static bool
stmt_after_ip_normal_pos(struct loop * loop,tree stmt)582 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
583 {
584   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
585 
586   gcc_assert (bb);
587 
588   if (sbb == loop->latch)
589     return true;
590 
591   if (sbb != bb)
592     return false;
593 
594   return stmt == last_stmt (bb);
595 }
596 
597 /* Returns true if STMT if after the place where the original induction
598    variable CAND is incremented.  */
599 
600 static bool
stmt_after_ip_original_pos(struct iv_cand * cand,tree stmt)601 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
602 {
603   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
604   basic_block stmt_bb = bb_for_stmt (stmt);
605   block_stmt_iterator bsi;
606 
607   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
608     return false;
609 
610   if (stmt_bb != cand_bb)
611     return true;
612 
613   /* Scan the block from the end, since the original ivs are usually
614      incremented at the end of the loop body.  */
615   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
616     {
617       if (bsi_stmt (bsi) == cand->incremented_at)
618 	return false;
619       if (bsi_stmt (bsi) == stmt)
620 	return true;
621     }
622 }
623 
624 /* Returns true if STMT if after the place where the induction variable
625    CAND is incremented in LOOP.  */
626 
627 static bool
stmt_after_increment(struct loop * loop,struct iv_cand * cand,tree stmt)628 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
629 {
630   switch (cand->pos)
631     {
632     case IP_END:
633       return false;
634 
635     case IP_NORMAL:
636       return stmt_after_ip_normal_pos (loop, stmt);
637 
638     case IP_ORIGINAL:
639       return stmt_after_ip_original_pos (cand, stmt);
640 
641     default:
642       gcc_unreachable ();
643     }
644 }
645 
646 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
647 
648 static bool
abnormal_ssa_name_p(tree exp)649 abnormal_ssa_name_p (tree exp)
650 {
651   if (!exp)
652     return false;
653 
654   if (TREE_CODE (exp) != SSA_NAME)
655     return false;
656 
657   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
658 }
659 
660 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
661    abnormal phi node.  Callback for for_each_index.  */
662 
663 static bool
idx_contains_abnormal_ssa_name_p(tree base,tree * index,void * data ATTRIBUTE_UNUSED)664 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
665 				  void *data ATTRIBUTE_UNUSED)
666 {
667   if (TREE_CODE (base) == ARRAY_REF)
668     {
669       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
670 	return false;
671       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
672 	return false;
673     }
674 
675   return !abnormal_ssa_name_p (*index);
676 }
677 
678 /* Returns true if EXPR contains a ssa name that occurs in an
679    abnormal phi node.  */
680 
681 bool
contains_abnormal_ssa_name_p(tree expr)682 contains_abnormal_ssa_name_p (tree expr)
683 {
684   enum tree_code code;
685   enum tree_code_class class;
686 
687   if (!expr)
688     return false;
689 
690   code = TREE_CODE (expr);
691   class = TREE_CODE_CLASS (code);
692 
693   if (code == SSA_NAME)
694     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
695 
696   if (code == INTEGER_CST
697       || is_gimple_min_invariant (expr))
698     return false;
699 
700   if (code == ADDR_EXPR)
701     return !for_each_index (&TREE_OPERAND (expr, 0),
702 			    idx_contains_abnormal_ssa_name_p,
703 			    NULL);
704 
705   switch (class)
706     {
707     case tcc_binary:
708     case tcc_comparison:
709       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
710 	return true;
711 
712       /* Fallthru.  */
713     case tcc_unary:
714       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
715 	return true;
716 
717       break;
718 
719     default:
720       gcc_unreachable ();
721     }
722 
723   return false;
724 }
725 
726 /* Element of the table in that we cache the numbers of iterations obtained
727    from exits of the loop.  */
728 
729 struct nfe_cache_elt
730 {
731   /* The edge for that the number of iterations is cached.  */
732   edge exit;
733 
734   /* Number of iterations corresponding to this exit, or NULL if it cannot be
735      determined.  */
736   tree niter;
737 };
738 
739 /* Hash function for nfe_cache_elt E.  */
740 
741 static hashval_t
nfe_hash(const void * e)742 nfe_hash (const void *e)
743 {
744   const struct nfe_cache_elt *elt = e;
745 
746   return htab_hash_pointer (elt->exit);
747 }
748 
749 /* Equality function for nfe_cache_elt E1 and edge E2.  */
750 
751 static int
nfe_eq(const void * e1,const void * e2)752 nfe_eq (const void *e1, const void *e2)
753 {
754   const struct nfe_cache_elt *elt1 = e1;
755 
756   return elt1->exit == e2;
757 }
758 
759 /*  Returns tree describing number of iterations determined from
760     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
761 
762 static tree
niter_for_exit(struct ivopts_data * data,edge exit)763 niter_for_exit (struct ivopts_data *data, edge exit)
764 {
765   struct nfe_cache_elt *nfe_desc;
766   struct tree_niter_desc desc;
767   PTR *slot;
768 
769   slot = htab_find_slot_with_hash (data->niters, exit,
770 				   htab_hash_pointer (exit),
771 				   INSERT);
772 
773   if (!*slot)
774     {
775       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
776       nfe_desc->exit = exit;
777 
778       /* Try to determine number of iterations.  We must know it
779 	 unconditionally (i.e., without possibility of # of iterations
780 	 being zero).  Also, we cannot safely work with ssa names that
781 	 appear in phi nodes on abnormal edges, so that we do not create
782 	 overlapping life ranges for them (PR 27283).  */
783       if (number_of_iterations_exit (data->current_loop,
784 				     exit, &desc, true)
785 	  && zero_p (desc.may_be_zero)
786      	  && !contains_abnormal_ssa_name_p (desc.niter))
787 	nfe_desc->niter = desc.niter;
788       else
789 	nfe_desc->niter = NULL_TREE;
790     }
791   else
792     nfe_desc = *slot;
793 
794   return nfe_desc->niter;
795 }
796 
797 /* Returns tree describing number of iterations determined from
798    single dominating exit of DATA->current_loop, or NULL if something
799    goes wrong.  */
800 
801 static tree
niter_for_single_dom_exit(struct ivopts_data * data)802 niter_for_single_dom_exit (struct ivopts_data *data)
803 {
804   edge exit = single_dom_exit (data->current_loop);
805 
806   if (!exit)
807     return NULL;
808 
809   return niter_for_exit (data, exit);
810 }
811 
812 /* Initializes data structures used by the iv optimization pass, stored
813    in DATA.  */
814 
815 static void
tree_ssa_iv_optimize_init(struct ivopts_data * data)816 tree_ssa_iv_optimize_init (struct ivopts_data *data)
817 {
818   data->version_info_size = 2 * num_ssa_names;
819   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
820   data->relevant = BITMAP_ALLOC (NULL);
821   data->important_candidates = BITMAP_ALLOC (NULL);
822   data->max_inv_id = 0;
823   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
824   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
825   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
826   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
827 }
828 
829 /* Returns a memory object to that EXPR points.  In case we are able to
830    determine that it does not point to any such object, NULL is returned.  */
831 
832 static tree
determine_base_object(tree expr)833 determine_base_object (tree expr)
834 {
835   enum tree_code code = TREE_CODE (expr);
836   tree base, obj, op0, op1;
837 
838   /* If this is a pointer casted to any type, we need to determine
839      the base object for the pointer; so handle conversions before
840      throwing away non-pointer expressions.  */
841   if (TREE_CODE (expr) == NOP_EXPR
842       || TREE_CODE (expr) == CONVERT_EXPR)
843     return determine_base_object (TREE_OPERAND (expr, 0));
844 
845   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
846     return NULL_TREE;
847 
848   switch (code)
849     {
850     case INTEGER_CST:
851       return NULL_TREE;
852 
853     case ADDR_EXPR:
854       obj = TREE_OPERAND (expr, 0);
855       base = get_base_address (obj);
856 
857       if (!base)
858 	return expr;
859 
860       if (TREE_CODE (base) == INDIRECT_REF)
861 	return determine_base_object (TREE_OPERAND (base, 0));
862 
863       return fold_convert (ptr_type_node,
864 		           build_fold_addr_expr (base));
865 
866     case PLUS_EXPR:
867     case MINUS_EXPR:
868       op0 = determine_base_object (TREE_OPERAND (expr, 0));
869       op1 = determine_base_object (TREE_OPERAND (expr, 1));
870 
871       if (!op1)
872 	return op0;
873 
874       if (!op0)
875 	return (code == PLUS_EXPR
876 		? op1
877 		: fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
878 
879       return fold_build2 (code, ptr_type_node, op0, op1);
880 
881     default:
882       return fold_convert (ptr_type_node, expr);
883     }
884 }
885 
886 /* Allocates an induction variable with given initial value BASE and step STEP
887    for loop LOOP.  */
888 
889 static struct iv *
alloc_iv(tree base,tree step)890 alloc_iv (tree base, tree step)
891 {
892   struct iv *iv = XCNEW (struct iv);
893 
894   if (step && integer_zerop (step))
895     step = NULL_TREE;
896 
897   iv->base = base;
898   iv->base_object = determine_base_object (base);
899   iv->step = step;
900   iv->biv_p = false;
901   iv->have_use_for = false;
902   iv->use_id = 0;
903   iv->ssa_name = NULL_TREE;
904 
905   return iv;
906 }
907 
908 /* Sets STEP and BASE for induction variable IV.  */
909 
910 static void
set_iv(struct ivopts_data * data,tree iv,tree base,tree step)911 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
912 {
913   struct version_info *info = name_info (data, iv);
914 
915   gcc_assert (!info->iv);
916 
917   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
918   info->iv = alloc_iv (base, step);
919   info->iv->ssa_name = iv;
920 }
921 
922 /* Finds induction variable declaration for VAR.  */
923 
924 static struct iv *
get_iv(struct ivopts_data * data,tree var)925 get_iv (struct ivopts_data *data, tree var)
926 {
927   basic_block bb;
928 
929   if (!name_info (data, var)->iv)
930     {
931       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
932 
933       if (!bb
934 	  || !flow_bb_inside_loop_p (data->current_loop, bb))
935 	set_iv (data, var, var, NULL_TREE);
936     }
937 
938   return name_info (data, var)->iv;
939 }
940 
941 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
942    not define a simple affine biv with nonzero step.  */
943 
944 static tree
determine_biv_step(tree phi)945 determine_biv_step (tree phi)
946 {
947   struct loop *loop = bb_for_stmt (phi)->loop_father;
948   tree name = PHI_RESULT (phi);
949   affine_iv iv;
950 
951   if (!is_gimple_reg (name))
952     return NULL_TREE;
953 
954   if (!simple_iv (loop, phi, name, &iv, true))
955     return NULL_TREE;
956 
957   return (zero_p (iv.step) ? NULL_TREE : iv.step);
958 }
959 
960 /* Finds basic ivs.  */
961 
962 static bool
find_bivs(struct ivopts_data * data)963 find_bivs (struct ivopts_data *data)
964 {
965   tree phi, step, type, base;
966   bool found = false;
967   struct loop *loop = data->current_loop;
968 
969   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
970     {
971       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
972 	continue;
973 
974       step = determine_biv_step (phi);
975       if (!step)
976 	continue;
977 
978       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
979       base = expand_simple_operations (base);
980       if (contains_abnormal_ssa_name_p (base)
981 	  || contains_abnormal_ssa_name_p (step))
982 	continue;
983 
984       type = TREE_TYPE (PHI_RESULT (phi));
985       base = fold_convert (type, base);
986       if (step)
987 	step = fold_convert (type, step);
988 
989       set_iv (data, PHI_RESULT (phi), base, step);
990       found = true;
991     }
992 
993   return found;
994 }
995 
996 /* Marks basic ivs.  */
997 
998 static void
mark_bivs(struct ivopts_data * data)999 mark_bivs (struct ivopts_data *data)
1000 {
1001   tree phi, var;
1002   struct iv *iv, *incr_iv;
1003   struct loop *loop = data->current_loop;
1004   basic_block incr_bb;
1005 
1006   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1007     {
1008       iv = get_iv (data, PHI_RESULT (phi));
1009       if (!iv)
1010 	continue;
1011 
1012       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1013       incr_iv = get_iv (data, var);
1014       if (!incr_iv)
1015 	continue;
1016 
1017       /* If the increment is in the subloop, ignore it.  */
1018       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1019       if (incr_bb->loop_father != data->current_loop
1020 	  || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1021 	continue;
1022 
1023       iv->biv_p = true;
1024       incr_iv->biv_p = true;
1025     }
1026 }
1027 
1028 /* Checks whether STMT defines a linear induction variable and stores its
1029    parameters to IV.  */
1030 
1031 static bool
find_givs_in_stmt_scev(struct ivopts_data * data,tree stmt,affine_iv * iv)1032 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1033 {
1034   tree lhs;
1035   struct loop *loop = data->current_loop;
1036 
1037   iv->base = NULL_TREE;
1038   iv->step = NULL_TREE;
1039 
1040   if (TREE_CODE (stmt) != MODIFY_EXPR)
1041     return false;
1042 
1043   lhs = TREE_OPERAND (stmt, 0);
1044   if (TREE_CODE (lhs) != SSA_NAME)
1045     return false;
1046 
1047   if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
1048     return false;
1049   iv->base = expand_simple_operations (iv->base);
1050 
1051   if (contains_abnormal_ssa_name_p (iv->base)
1052       || contains_abnormal_ssa_name_p (iv->step))
1053     return false;
1054 
1055   return true;
1056 }
1057 
1058 /* Finds general ivs in statement STMT.  */
1059 
1060 static void
find_givs_in_stmt(struct ivopts_data * data,tree stmt)1061 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1062 {
1063   affine_iv iv;
1064 
1065   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1066     return;
1067 
1068   set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
1069 }
1070 
1071 /* Finds general ivs in basic block BB.  */
1072 
1073 static void
find_givs_in_bb(struct ivopts_data * data,basic_block bb)1074 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1075 {
1076   block_stmt_iterator bsi;
1077 
1078   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1079     find_givs_in_stmt (data, bsi_stmt (bsi));
1080 }
1081 
1082 /* Finds general ivs.  */
1083 
1084 static void
find_givs(struct ivopts_data * data)1085 find_givs (struct ivopts_data *data)
1086 {
1087   struct loop *loop = data->current_loop;
1088   basic_block *body = get_loop_body_in_dom_order (loop);
1089   unsigned i;
1090 
1091   for (i = 0; i < loop->num_nodes; i++)
1092     find_givs_in_bb (data, body[i]);
1093   free (body);
1094 }
1095 
1096 /* For each ssa name defined in LOOP determines whether it is an induction
1097    variable and if so, its initial value and step.  */
1098 
1099 static bool
find_induction_variables(struct ivopts_data * data)1100 find_induction_variables (struct ivopts_data *data)
1101 {
1102   unsigned i;
1103   bitmap_iterator bi;
1104 
1105   if (!find_bivs (data))
1106     return false;
1107 
1108   find_givs (data);
1109   mark_bivs (data);
1110 
1111   if (dump_file && (dump_flags & TDF_DETAILS))
1112     {
1113       tree niter = niter_for_single_dom_exit (data);
1114 
1115       if (niter)
1116 	{
1117 	  fprintf (dump_file, "  number of iterations ");
1118 	  print_generic_expr (dump_file, niter, TDF_SLIM);
1119 	  fprintf (dump_file, "\n\n");
1120     	};
1121 
1122       fprintf (dump_file, "Induction variables:\n\n");
1123 
1124       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1125 	{
1126 	  if (ver_info (data, i)->iv)
1127 	    dump_iv (dump_file, ver_info (data, i)->iv);
1128 	}
1129     }
1130 
1131   return true;
1132 }
1133 
1134 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1135 
1136 static struct iv_use *
record_use(struct ivopts_data * data,tree * use_p,struct iv * iv,tree stmt,enum use_type use_type)1137 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1138 	    tree stmt, enum use_type use_type)
1139 {
1140   struct iv_use *use = XCNEW (struct iv_use);
1141 
1142   use->id = n_iv_uses (data);
1143   use->type = use_type;
1144   use->iv = iv;
1145   use->stmt = stmt;
1146   use->op_p = use_p;
1147   use->related_cands = BITMAP_ALLOC (NULL);
1148 
1149   /* To avoid showing ssa name in the dumps, if it was not reset by the
1150      caller.  */
1151   iv->ssa_name = NULL_TREE;
1152 
1153   if (dump_file && (dump_flags & TDF_DETAILS))
1154     dump_use (dump_file, use);
1155 
1156   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1157 
1158   return use;
1159 }
1160 
1161 /* Checks whether OP is a loop-level invariant and if so, records it.
1162    NONLINEAR_USE is true if the invariant is used in a way we do not
1163    handle specially.  */
1164 
1165 static void
record_invariant(struct ivopts_data * data,tree op,bool nonlinear_use)1166 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1167 {
1168   basic_block bb;
1169   struct version_info *info;
1170 
1171   if (TREE_CODE (op) != SSA_NAME
1172       || !is_gimple_reg (op))
1173     return;
1174 
1175   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1176   if (bb
1177       && flow_bb_inside_loop_p (data->current_loop, bb))
1178     return;
1179 
1180   info = name_info (data, op);
1181   info->name = op;
1182   info->has_nonlin_use |= nonlinear_use;
1183   if (!info->inv_id)
1184     info->inv_id = ++data->max_inv_id;
1185   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1186 }
1187 
1188 /* Checks whether the use OP is interesting and if so, records it.  */
1189 
1190 static struct iv_use *
find_interesting_uses_op(struct ivopts_data * data,tree op)1191 find_interesting_uses_op (struct ivopts_data *data, tree op)
1192 {
1193   struct iv *iv;
1194   struct iv *civ;
1195   tree stmt;
1196   struct iv_use *use;
1197 
1198   if (TREE_CODE (op) != SSA_NAME)
1199     return NULL;
1200 
1201   iv = get_iv (data, op);
1202   if (!iv)
1203     return NULL;
1204 
1205   if (iv->have_use_for)
1206     {
1207       use = iv_use (data, iv->use_id);
1208 
1209       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1210       return use;
1211     }
1212 
1213   if (zero_p (iv->step))
1214     {
1215       record_invariant (data, op, true);
1216       return NULL;
1217     }
1218   iv->have_use_for = true;
1219 
1220   civ = XNEW (struct iv);
1221   *civ = *iv;
1222 
1223   stmt = SSA_NAME_DEF_STMT (op);
1224   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1225 	      || TREE_CODE (stmt) == MODIFY_EXPR);
1226 
1227   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1228   iv->use_id = use->id;
1229 
1230   return use;
1231 }
1232 
1233 /* Checks whether the condition *COND_P in STMT is interesting
1234    and if so, records it.  */
1235 
1236 static void
find_interesting_uses_cond(struct ivopts_data * data,tree stmt,tree * cond_p)1237 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1238 {
1239   tree *op0_p;
1240   tree *op1_p;
1241   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1242   struct iv const_iv;
1243   tree zero = integer_zero_node;
1244 
1245   const_iv.step = NULL_TREE;
1246 
1247   if (TREE_CODE (*cond_p) != SSA_NAME
1248       && !COMPARISON_CLASS_P (*cond_p))
1249     return;
1250 
1251   if (TREE_CODE (*cond_p) == SSA_NAME)
1252     {
1253       op0_p = cond_p;
1254       op1_p = &zero;
1255     }
1256   else
1257     {
1258       op0_p = &TREE_OPERAND (*cond_p, 0);
1259       op1_p = &TREE_OPERAND (*cond_p, 1);
1260     }
1261 
1262   if (TREE_CODE (*op0_p) == SSA_NAME)
1263     iv0 = get_iv (data, *op0_p);
1264   else
1265     iv0 = &const_iv;
1266 
1267   if (TREE_CODE (*op1_p) == SSA_NAME)
1268     iv1 = get_iv (data, *op1_p);
1269   else
1270     iv1 = &const_iv;
1271 
1272   if (/* When comparing with non-invariant value, we may not do any senseful
1273 	 induction variable elimination.  */
1274       (!iv0 || !iv1)
1275       /* Eliminating condition based on two ivs would be nontrivial.
1276 	 ??? TODO -- it is not really important to handle this case.  */
1277       || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1278     {
1279       find_interesting_uses_op (data, *op0_p);
1280       find_interesting_uses_op (data, *op1_p);
1281       return;
1282     }
1283 
1284   if (zero_p (iv0->step) && zero_p (iv1->step))
1285     {
1286       /* If both are invariants, this is a work for unswitching.  */
1287       return;
1288     }
1289 
1290   civ = XNEW (struct iv);
1291   *civ = zero_p (iv0->step) ? *iv1: *iv0;
1292   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1293 }
1294 
1295 /* Returns true if expression EXPR is obviously invariant in LOOP,
1296    i.e. if all its operands are defined outside of the LOOP.  */
1297 
1298 bool
expr_invariant_in_loop_p(struct loop * loop,tree expr)1299 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1300 {
1301   basic_block def_bb;
1302   unsigned i, len;
1303 
1304   if (is_gimple_min_invariant (expr))
1305     return true;
1306 
1307   if (TREE_CODE (expr) == SSA_NAME)
1308     {
1309       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1310       if (def_bb
1311 	  && flow_bb_inside_loop_p (loop, def_bb))
1312 	return false;
1313 
1314       return true;
1315     }
1316 
1317   if (!EXPR_P (expr))
1318     return false;
1319 
1320   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1321   for (i = 0; i < len; i++)
1322     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1323       return false;
1324 
1325   return true;
1326 }
1327 
1328 /* Cumulates the steps of indices into DATA and replaces their values with the
1329    initial ones.  Returns false when the value of the index cannot be determined.
1330    Callback for for_each_index.  */
1331 
1332 struct ifs_ivopts_data
1333 {
1334   struct ivopts_data *ivopts_data;
1335   tree stmt;
1336   tree *step_p;
1337 };
1338 
1339 static bool
idx_find_step(tree base,tree * idx,void * data)1340 idx_find_step (tree base, tree *idx, void *data)
1341 {
1342   struct ifs_ivopts_data *dta = data;
1343   struct iv *iv;
1344   tree step, iv_base, iv_step, lbound, off;
1345   struct loop *loop = dta->ivopts_data->current_loop;
1346 
1347   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1348       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1349     return false;
1350 
1351   /* If base is a component ref, require that the offset of the reference
1352      be invariant.  */
1353   if (TREE_CODE (base) == COMPONENT_REF)
1354     {
1355       off = component_ref_field_offset (base);
1356       return expr_invariant_in_loop_p (loop, off);
1357     }
1358 
1359   /* If base is array, first check whether we will be able to move the
1360      reference out of the loop (in order to take its address in strength
1361      reduction).  In order for this to work we need both lower bound
1362      and step to be loop invariants.  */
1363   if (TREE_CODE (base) == ARRAY_REF)
1364     {
1365       step = array_ref_element_size (base);
1366       lbound = array_ref_low_bound (base);
1367 
1368       if (!expr_invariant_in_loop_p (loop, step)
1369 	  || !expr_invariant_in_loop_p (loop, lbound))
1370 	return false;
1371     }
1372 
1373   if (TREE_CODE (*idx) != SSA_NAME)
1374     return true;
1375 
1376   iv = get_iv (dta->ivopts_data, *idx);
1377   if (!iv)
1378     return false;
1379 
1380   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1381 	  *&x[0], which is not folded and does not trigger the
1382 	  ARRAY_REF path below.  */
1383   *idx = iv->base;
1384 
1385   if (!iv->step)
1386     return true;
1387 
1388   if (TREE_CODE (base) == ARRAY_REF)
1389     {
1390       step = array_ref_element_size (base);
1391 
1392       /* We only handle addresses whose step is an integer constant.  */
1393       if (TREE_CODE (step) != INTEGER_CST)
1394 	return false;
1395     }
1396   else
1397     /* The step for pointer arithmetics already is 1 byte.  */
1398     step = build_int_cst (sizetype, 1);
1399 
1400   iv_base = iv->base;
1401   iv_step = iv->step;
1402   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1403 			    sizetype, &iv_base, &iv_step, dta->stmt,
1404 			    false))
1405     {
1406       /* The index might wrap.  */
1407       return false;
1408     }
1409 
1410   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1411 
1412   if (!*dta->step_p)
1413     *dta->step_p = step;
1414   else
1415     *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1416 
1417   return true;
1418 }
1419 
1420 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1421    object is passed to it in DATA.  */
1422 
1423 static bool
idx_record_use(tree base,tree * idx,void * data)1424 idx_record_use (tree base, tree *idx,
1425 		void *data)
1426 {
1427   find_interesting_uses_op (data, *idx);
1428   if (TREE_CODE (base) == ARRAY_REF)
1429     {
1430       find_interesting_uses_op (data, array_ref_element_size (base));
1431       find_interesting_uses_op (data, array_ref_low_bound (base));
1432     }
1433   return true;
1434 }
1435 
1436 /* Returns true if memory reference REF may be unaligned.  */
1437 
1438 static bool
may_be_unaligned_p(tree ref)1439 may_be_unaligned_p (tree ref)
1440 {
1441   tree base;
1442   tree base_type;
1443   HOST_WIDE_INT bitsize;
1444   HOST_WIDE_INT bitpos;
1445   tree toffset;
1446   enum machine_mode mode;
1447   int unsignedp, volatilep;
1448   unsigned base_align;
1449 
1450   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1451      thus they are not misaligned.  */
1452   if (TREE_CODE (ref) == TARGET_MEM_REF)
1453     return false;
1454 
1455   /* The test below is basically copy of what expr.c:normal_inner_ref
1456      does to check whether the object must be loaded by parts when
1457      STRICT_ALIGNMENT is true.  */
1458   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1459 			      &unsignedp, &volatilep, true);
1460   base_type = TREE_TYPE (base);
1461   base_align = TYPE_ALIGN (base_type);
1462 
1463   if (mode != BLKmode
1464       && (base_align < GET_MODE_ALIGNMENT (mode)
1465 	  || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1466 	  || bitpos % BITS_PER_UNIT != 0))
1467     return true;
1468 
1469   return false;
1470 }
1471 
1472 /* Return true if EXPR may be non-addressable.   */
1473 
1474 static bool
may_be_nonaddressable_p(tree expr)1475 may_be_nonaddressable_p (tree expr)
1476 {
1477   switch (TREE_CODE (expr))
1478     {
1479     case COMPONENT_REF:
1480       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1481 	     || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1482 
1483     case ARRAY_REF:
1484     case ARRAY_RANGE_REF:
1485       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1486 
1487     case VIEW_CONVERT_EXPR:
1488       /* This kind of view-conversions may wrap non-addressable objects
1489 	 and make them look addressable.  After some processing the
1490 	 non-addressability may be uncovered again, causing ADDR_EXPRs
1491 	 of inappropriate objects to be built.  */
1492       return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1493 	     && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1494 
1495     default:
1496       break;
1497     }
1498 
1499   return false;
1500 }
1501 
1502 /* Finds addresses in *OP_P inside STMT.  */
1503 
1504 static void
find_interesting_uses_address(struct ivopts_data * data,tree stmt,tree * op_p)1505 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1506 {
1507   tree base = *op_p, step = NULL;
1508   struct iv *civ;
1509   struct ifs_ivopts_data ifs_ivopts_data;
1510 
1511   /* Do not play with volatile memory references.  A bit too conservative,
1512      perhaps, but safe.  */
1513   if (stmt_ann (stmt)->has_volatile_ops)
1514     goto fail;
1515 
1516   /* Ignore bitfields for now.  Not really something terribly complicated
1517      to handle.  TODO.  */
1518   if (TREE_CODE (base) == BIT_FIELD_REF)
1519     goto fail;
1520 
1521   if (may_be_nonaddressable_p (base))
1522     goto fail;
1523 
1524   if (STRICT_ALIGNMENT
1525       && may_be_unaligned_p (base))
1526     goto fail;
1527 
1528   base = unshare_expr (base);
1529 
1530   if (TREE_CODE (base) == TARGET_MEM_REF)
1531     {
1532       tree type = build_pointer_type (TREE_TYPE (base));
1533       tree astep;
1534 
1535       if (TMR_BASE (base)
1536 	  && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1537 	{
1538 	  civ = get_iv (data, TMR_BASE (base));
1539 	  if (!civ)
1540 	    goto fail;
1541 
1542 	  TMR_BASE (base) = civ->base;
1543 	  step = civ->step;
1544 	}
1545       if (TMR_INDEX (base)
1546 	  && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1547 	{
1548 	  civ = get_iv (data, TMR_INDEX (base));
1549 	  if (!civ)
1550 	    goto fail;
1551 
1552 	  TMR_INDEX (base) = civ->base;
1553 	  astep = civ->step;
1554 
1555 	  if (astep)
1556 	    {
1557 	      if (TMR_STEP (base))
1558 		astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1559 
1560 	      if (step)
1561 		step = fold_build2 (PLUS_EXPR, type, step, astep);
1562 	      else
1563 		step = astep;
1564 	    }
1565 	}
1566 
1567       if (zero_p (step))
1568 	goto fail;
1569       base = tree_mem_ref_addr (type, base);
1570     }
1571   else
1572     {
1573       ifs_ivopts_data.ivopts_data = data;
1574       ifs_ivopts_data.stmt = stmt;
1575       ifs_ivopts_data.step_p = &step;
1576       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1577 	  || zero_p (step))
1578 	goto fail;
1579 
1580       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1581       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1582 
1583       base = build_fold_addr_expr (base);
1584 
1585       /* Substituting bases of IVs into the base expression might
1586 	 have caused folding opportunities.  */
1587       if (TREE_CODE (base) == ADDR_EXPR)
1588 	{
1589 	  tree *ref = &TREE_OPERAND (base, 0);
1590 	  while (handled_component_p (*ref))
1591 	    ref = &TREE_OPERAND (*ref, 0);
1592 	  if (TREE_CODE (*ref) == INDIRECT_REF)
1593 	    *ref = fold_indirect_ref (*ref);
1594 	}
1595     }
1596 
1597   civ = alloc_iv (base, step);
1598   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1599   return;
1600 
1601 fail:
1602   for_each_index (op_p, idx_record_use, data);
1603 }
1604 
1605 /* Finds and records invariants used in STMT.  */
1606 
1607 static void
find_invariants_stmt(struct ivopts_data * data,tree stmt)1608 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1609 {
1610   ssa_op_iter iter;
1611   use_operand_p use_p;
1612   tree op;
1613 
1614   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1615     {
1616       op = USE_FROM_PTR (use_p);
1617       record_invariant (data, op, false);
1618     }
1619 }
1620 
1621 /* Finds interesting uses of induction variables in the statement STMT.  */
1622 
1623 static void
find_interesting_uses_stmt(struct ivopts_data * data,tree stmt)1624 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1625 {
1626   struct iv *iv;
1627   tree op, lhs, rhs;
1628   ssa_op_iter iter;
1629   use_operand_p use_p;
1630 
1631   find_invariants_stmt (data, stmt);
1632 
1633   if (TREE_CODE (stmt) == COND_EXPR)
1634     {
1635       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1636       return;
1637     }
1638 
1639   if (TREE_CODE (stmt) == MODIFY_EXPR)
1640     {
1641       lhs = TREE_OPERAND (stmt, 0);
1642       rhs = TREE_OPERAND (stmt, 1);
1643 
1644       if (TREE_CODE (lhs) == SSA_NAME)
1645 	{
1646 	  /* If the statement defines an induction variable, the uses are not
1647 	     interesting by themselves.  */
1648 
1649 	  iv = get_iv (data, lhs);
1650 
1651 	  if (iv && !zero_p (iv->step))
1652 	    return;
1653 	}
1654 
1655       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1656 	{
1657 	case tcc_comparison:
1658 	  find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1659 	  return;
1660 
1661 	case tcc_reference:
1662 	  find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1663 	  if (REFERENCE_CLASS_P (lhs))
1664 	    find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1665 	  return;
1666 
1667 	default: ;
1668 	}
1669 
1670       if (REFERENCE_CLASS_P (lhs)
1671 	  && is_gimple_val (rhs))
1672 	{
1673 	  find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1674 	  find_interesting_uses_op (data, rhs);
1675 	  return;
1676 	}
1677 
1678       /* TODO -- we should also handle address uses of type
1679 
1680 	 memory = call (whatever);
1681 
1682 	 and
1683 
1684 	 call (memory).  */
1685     }
1686 
1687   if (TREE_CODE (stmt) == PHI_NODE
1688       && bb_for_stmt (stmt) == data->current_loop->header)
1689     {
1690       lhs = PHI_RESULT (stmt);
1691       iv = get_iv (data, lhs);
1692 
1693       if (iv && !zero_p (iv->step))
1694 	return;
1695     }
1696 
1697   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1698     {
1699       op = USE_FROM_PTR (use_p);
1700 
1701       if (TREE_CODE (op) != SSA_NAME)
1702 	continue;
1703 
1704       iv = get_iv (data, op);
1705       if (!iv)
1706 	continue;
1707 
1708       find_interesting_uses_op (data, op);
1709     }
1710 }
1711 
1712 /* Finds interesting uses of induction variables outside of loops
1713    on loop exit edge EXIT.  */
1714 
1715 static void
find_interesting_uses_outside(struct ivopts_data * data,edge exit)1716 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1717 {
1718   tree phi, def;
1719 
1720   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1721     {
1722       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1723       find_interesting_uses_op (data, def);
1724     }
1725 }
1726 
1727 /* Finds uses of the induction variables that are interesting.  */
1728 
1729 static void
find_interesting_uses(struct ivopts_data * data)1730 find_interesting_uses (struct ivopts_data *data)
1731 {
1732   basic_block bb;
1733   block_stmt_iterator bsi;
1734   tree phi;
1735   basic_block *body = get_loop_body (data->current_loop);
1736   unsigned i;
1737   struct version_info *info;
1738   edge e;
1739 
1740   if (dump_file && (dump_flags & TDF_DETAILS))
1741     fprintf (dump_file, "Uses:\n\n");
1742 
1743   for (i = 0; i < data->current_loop->num_nodes; i++)
1744     {
1745       edge_iterator ei;
1746       bb = body[i];
1747 
1748       FOR_EACH_EDGE (e, ei, bb->succs)
1749 	if (e->dest != EXIT_BLOCK_PTR
1750 	    && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1751 	  find_interesting_uses_outside (data, e);
1752 
1753       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1754 	find_interesting_uses_stmt (data, phi);
1755       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1756 	find_interesting_uses_stmt (data, bsi_stmt (bsi));
1757     }
1758 
1759   if (dump_file && (dump_flags & TDF_DETAILS))
1760     {
1761       bitmap_iterator bi;
1762 
1763       fprintf (dump_file, "\n");
1764 
1765       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1766 	{
1767 	  info = ver_info (data, i);
1768 	  if (info->inv_id)
1769 	    {
1770 	      fprintf (dump_file, "  ");
1771 	      print_generic_expr (dump_file, info->name, TDF_SLIM);
1772 	      fprintf (dump_file, " is invariant (%d)%s\n",
1773 		       info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1774 	    }
1775 	}
1776 
1777       fprintf (dump_file, "\n");
1778     }
1779 
1780   free (body);
1781 }
1782 
1783 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1784    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1785    we are at the top-level of the processed address.  */
1786 
1787 static tree
strip_offset_1(tree expr,bool inside_addr,bool top_compref,unsigned HOST_WIDE_INT * offset)1788 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1789 		unsigned HOST_WIDE_INT *offset)
1790 {
1791   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1792   enum tree_code code;
1793   tree type, orig_type = TREE_TYPE (expr);
1794   unsigned HOST_WIDE_INT off0, off1, st;
1795   tree orig_expr = expr;
1796 
1797   STRIP_NOPS (expr);
1798 
1799   type = TREE_TYPE (expr);
1800   code = TREE_CODE (expr);
1801   *offset = 0;
1802 
1803   switch (code)
1804     {
1805     case INTEGER_CST:
1806       if (!cst_and_fits_in_hwi (expr)
1807 	  || zero_p (expr))
1808 	return orig_expr;
1809 
1810       *offset = int_cst_value (expr);
1811       return build_int_cst (orig_type, 0);
1812 
1813     case PLUS_EXPR:
1814     case MINUS_EXPR:
1815       op0 = TREE_OPERAND (expr, 0);
1816       op1 = TREE_OPERAND (expr, 1);
1817 
1818       op0 = strip_offset_1 (op0, false, false, &off0);
1819       op1 = strip_offset_1 (op1, false, false, &off1);
1820 
1821       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1822       if (op0 == TREE_OPERAND (expr, 0)
1823 	  && op1 == TREE_OPERAND (expr, 1))
1824 	return orig_expr;
1825 
1826       if (zero_p (op1))
1827 	expr = op0;
1828       else if (zero_p (op0))
1829 	{
1830 	  if (code == PLUS_EXPR)
1831 	    expr = op1;
1832 	  else
1833 	    expr = fold_build1 (NEGATE_EXPR, type, op1);
1834 	}
1835       else
1836 	expr = fold_build2 (code, type, op0, op1);
1837 
1838       return fold_convert (orig_type, expr);
1839 
1840     case ARRAY_REF:
1841       if (!inside_addr)
1842 	return orig_expr;
1843 
1844       step = array_ref_element_size (expr);
1845       if (!cst_and_fits_in_hwi (step))
1846 	break;
1847 
1848       st = int_cst_value (step);
1849       op1 = TREE_OPERAND (expr, 1);
1850       op1 = strip_offset_1 (op1, false, false, &off1);
1851       *offset = off1 * st;
1852 
1853       if (top_compref
1854 	  && zero_p (op1))
1855 	{
1856 	  /* Strip the component reference completely.  */
1857 	  op0 = TREE_OPERAND (expr, 0);
1858 	  op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1859 	  *offset += off0;
1860 	  return op0;
1861 	}
1862       break;
1863 
1864     case COMPONENT_REF:
1865       if (!inside_addr)
1866 	return orig_expr;
1867 
1868       tmp = component_ref_field_offset (expr);
1869       if (top_compref
1870 	  && cst_and_fits_in_hwi (tmp))
1871 	{
1872 	  /* Strip the component reference completely.  */
1873 	  op0 = TREE_OPERAND (expr, 0);
1874 	  op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1875 	  *offset = off0 + int_cst_value (tmp);
1876 	  return op0;
1877 	}
1878       break;
1879 
1880     case ADDR_EXPR:
1881       op0 = TREE_OPERAND (expr, 0);
1882       op0 = strip_offset_1 (op0, true, true, &off0);
1883       *offset += off0;
1884 
1885       if (op0 == TREE_OPERAND (expr, 0))
1886 	return orig_expr;
1887 
1888       expr = build_fold_addr_expr (op0);
1889       return fold_convert (orig_type, expr);
1890 
1891     case INDIRECT_REF:
1892       inside_addr = false;
1893       break;
1894 
1895     default:
1896       return orig_expr;
1897     }
1898 
1899   /* Default handling of expressions for that we want to recurse into
1900      the first operand.  */
1901   op0 = TREE_OPERAND (expr, 0);
1902   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1903   *offset += off0;
1904 
1905   if (op0 == TREE_OPERAND (expr, 0)
1906       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1907     return orig_expr;
1908 
1909   expr = copy_node (expr);
1910   TREE_OPERAND (expr, 0) = op0;
1911   if (op1)
1912     TREE_OPERAND (expr, 1) = op1;
1913 
1914   /* Inside address, we might strip the top level component references,
1915      thus changing type of the expression.  Handling of ADDR_EXPR
1916      will fix that.  */
1917   expr = fold_convert (orig_type, expr);
1918 
1919   return expr;
1920 }
1921 
1922 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1923 
1924 static tree
strip_offset(tree expr,unsigned HOST_WIDE_INT * offset)1925 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1926 {
1927   return strip_offset_1 (expr, false, false, offset);
1928 }
1929 
1930 /* Returns variant of TYPE that can be used as base for different uses.
1931    We return unsigned type with the same precision, which avoids problems
1932    with overflows.  */
1933 
1934 static tree
generic_type_for(tree type)1935 generic_type_for (tree type)
1936 {
1937   if (POINTER_TYPE_P (type))
1938     return unsigned_type_for (type);
1939 
1940   if (TYPE_UNSIGNED (type))
1941     return type;
1942 
1943   return unsigned_type_for (type);
1944 }
1945 
1946 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1947    the bitmap to that we should store it.  */
1948 
1949 static struct ivopts_data *fd_ivopts_data;
1950 static tree
find_depends(tree * expr_p,int * ws ATTRIBUTE_UNUSED,void * data)1951 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1952 {
1953   bitmap *depends_on = data;
1954   struct version_info *info;
1955 
1956   if (TREE_CODE (*expr_p) != SSA_NAME)
1957     return NULL_TREE;
1958   info = name_info (fd_ivopts_data, *expr_p);
1959 
1960   if (!info->inv_id || info->has_nonlin_use)
1961     return NULL_TREE;
1962 
1963   if (!*depends_on)
1964     *depends_on = BITMAP_ALLOC (NULL);
1965   bitmap_set_bit (*depends_on, info->inv_id);
1966 
1967   return NULL_TREE;
1968 }
1969 
1970 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1971    position to POS.  If USE is not NULL, the candidate is set as related to
1972    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1973    replacement of the final value of the iv by a direct computation.  */
1974 
1975 static struct iv_cand *
add_candidate_1(struct ivopts_data * data,tree base,tree step,bool important,enum iv_position pos,struct iv_use * use,tree incremented_at)1976 add_candidate_1 (struct ivopts_data *data,
1977 		 tree base, tree step, bool important, enum iv_position pos,
1978 		 struct iv_use *use, tree incremented_at)
1979 {
1980   unsigned i;
1981   struct iv_cand *cand = NULL;
1982   tree type, orig_type;
1983 
1984   if (base)
1985     {
1986       orig_type = TREE_TYPE (base);
1987       type = generic_type_for (orig_type);
1988       if (type != orig_type)
1989 	{
1990 	  base = fold_convert (type, base);
1991 	  if (step)
1992 	    step = fold_convert (type, step);
1993 	}
1994     }
1995 
1996   for (i = 0; i < n_iv_cands (data); i++)
1997     {
1998       cand = iv_cand (data, i);
1999 
2000       if (cand->pos != pos)
2001 	continue;
2002 
2003       if (cand->incremented_at != incremented_at)
2004 	continue;
2005 
2006       if (!cand->iv)
2007 	{
2008 	  if (!base && !step)
2009 	    break;
2010 
2011 	  continue;
2012 	}
2013 
2014       if (!base && !step)
2015 	continue;
2016 
2017       if (!operand_equal_p (base, cand->iv->base, 0))
2018 	continue;
2019 
2020       if (zero_p (cand->iv->step))
2021 	{
2022 	  if (zero_p (step))
2023 	    break;
2024 	}
2025       else
2026 	{
2027 	  if (step && operand_equal_p (step, cand->iv->step, 0))
2028 	    break;
2029 	}
2030     }
2031 
2032   if (i == n_iv_cands (data))
2033     {
2034       cand = XCNEW (struct iv_cand);
2035       cand->id = i;
2036 
2037       if (!base && !step)
2038 	cand->iv = NULL;
2039       else
2040 	cand->iv = alloc_iv (base, step);
2041 
2042       cand->pos = pos;
2043       if (pos != IP_ORIGINAL && cand->iv)
2044 	{
2045 	  cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2046 	  cand->var_after = cand->var_before;
2047 	}
2048       cand->important = important;
2049       cand->incremented_at = incremented_at;
2050       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2051 
2052       if (step
2053 	  && TREE_CODE (step) != INTEGER_CST)
2054 	{
2055 	  fd_ivopts_data = data;
2056 	  walk_tree (&step, find_depends, &cand->depends_on, NULL);
2057 	}
2058 
2059       if (dump_file && (dump_flags & TDF_DETAILS))
2060 	dump_cand (dump_file, cand);
2061     }
2062 
2063   if (important && !cand->important)
2064     {
2065       cand->important = true;
2066       if (dump_file && (dump_flags & TDF_DETAILS))
2067 	fprintf (dump_file, "Candidate %d is important\n", cand->id);
2068     }
2069 
2070   if (use)
2071     {
2072       bitmap_set_bit (use->related_cands, i);
2073       if (dump_file && (dump_flags & TDF_DETAILS))
2074 	fprintf (dump_file, "Candidate %d is related to use %d\n",
2075 		 cand->id, use->id);
2076     }
2077 
2078   return cand;
2079 }
2080 
2081 /* Returns true if incrementing the induction variable at the end of the LOOP
2082    is allowed.
2083 
2084    The purpose is to avoid splitting latch edge with a biv increment, thus
2085    creating a jump, possibly confusing other optimization passes and leaving
2086    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2087    is not available (so we do not have a better alternative), or if the latch
2088    edge is already nonempty.  */
2089 
2090 static bool
allow_ip_end_pos_p(struct loop * loop)2091 allow_ip_end_pos_p (struct loop *loop)
2092 {
2093   if (!ip_normal_pos (loop))
2094     return true;
2095 
2096   if (!empty_block_p (ip_end_pos (loop)))
2097     return true;
2098 
2099   return false;
2100 }
2101 
2102 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2103    position to POS.  If USE is not NULL, the candidate is set as related to
2104    it.  The candidate computation is scheduled on all available positions.  */
2105 
2106 static void
add_candidate(struct ivopts_data * data,tree base,tree step,bool important,struct iv_use * use)2107 add_candidate (struct ivopts_data *data,
2108 	       tree base, tree step, bool important, struct iv_use *use)
2109 {
2110   if (ip_normal_pos (data->current_loop))
2111     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2112   if (ip_end_pos (data->current_loop)
2113       && allow_ip_end_pos_p (data->current_loop))
2114     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2115 }
2116 
2117 /* Add a standard "0 + 1 * iteration" iv candidate for a
2118    type with SIZE bits.  */
2119 
2120 static void
add_standard_iv_candidates_for_size(struct ivopts_data * data,unsigned int size)2121 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2122 				     unsigned int size)
2123 {
2124   tree type = lang_hooks.types.type_for_size (size, true);
2125   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2126 		 true, NULL);
2127 }
2128 
2129 /* Adds standard iv candidates.  */
2130 
2131 static void
add_standard_iv_candidates(struct ivopts_data * data)2132 add_standard_iv_candidates (struct ivopts_data *data)
2133 {
2134   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2135 
2136   /* The same for a double-integer type if it is still fast enough.  */
2137   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2138     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2139 }
2140 
2141 
2142 /* Adds candidates bases on the old induction variable IV.  */
2143 
2144 static void
add_old_iv_candidates(struct ivopts_data * data,struct iv * iv)2145 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2146 {
2147   tree phi, def;
2148   struct iv_cand *cand;
2149 
2150   add_candidate (data, iv->base, iv->step, true, NULL);
2151 
2152   /* The same, but with initial value zero.  */
2153   add_candidate (data,
2154 		 build_int_cst (TREE_TYPE (iv->base), 0),
2155 		 iv->step, true, NULL);
2156 
2157   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2158   if (TREE_CODE (phi) == PHI_NODE)
2159     {
2160       /* Additionally record the possibility of leaving the original iv
2161 	 untouched.  */
2162       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2163       cand = add_candidate_1 (data,
2164 			      iv->base, iv->step, true, IP_ORIGINAL, NULL,
2165 			      SSA_NAME_DEF_STMT (def));
2166       cand->var_before = iv->ssa_name;
2167       cand->var_after = def;
2168     }
2169 }
2170 
2171 /* Adds candidates based on the old induction variables.  */
2172 
2173 static void
add_old_ivs_candidates(struct ivopts_data * data)2174 add_old_ivs_candidates (struct ivopts_data *data)
2175 {
2176   unsigned i;
2177   struct iv *iv;
2178   bitmap_iterator bi;
2179 
2180   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2181     {
2182       iv = ver_info (data, i)->iv;
2183       if (iv && iv->biv_p && !zero_p (iv->step))
2184 	add_old_iv_candidates (data, iv);
2185     }
2186 }
2187 
2188 /* Adds candidates based on the value of the induction variable IV and USE.  */
2189 
2190 static void
add_iv_value_candidates(struct ivopts_data * data,struct iv * iv,struct iv_use * use)2191 add_iv_value_candidates (struct ivopts_data *data,
2192 			 struct iv *iv, struct iv_use *use)
2193 {
2194   unsigned HOST_WIDE_INT offset;
2195   tree base;
2196 
2197   add_candidate (data, iv->base, iv->step, false, use);
2198 
2199   /* The same, but with initial value zero.  Make such variable important,
2200      since it is generic enough so that possibly many uses may be based
2201      on it.  */
2202   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2203 		 iv->step, true, use);
2204 
2205   /* Third, try removing the constant offset.  */
2206   base = strip_offset (iv->base, &offset);
2207   if (offset)
2208     add_candidate (data, base, iv->step, false, use);
2209 }
2210 
2211 /* Adds candidates based on the uses.  */
2212 
2213 static void
add_derived_ivs_candidates(struct ivopts_data * data)2214 add_derived_ivs_candidates (struct ivopts_data *data)
2215 {
2216   unsigned i;
2217 
2218   for (i = 0; i < n_iv_uses (data); i++)
2219     {
2220       struct iv_use *use = iv_use (data, i);
2221 
2222       if (!use)
2223 	continue;
2224 
2225       switch (use->type)
2226 	{
2227 	case USE_NONLINEAR_EXPR:
2228 	case USE_COMPARE:
2229 	case USE_ADDRESS:
2230 	  /* Just add the ivs based on the value of the iv used here.  */
2231 	  add_iv_value_candidates (data, use->iv, use);
2232 	  break;
2233 
2234 	default:
2235 	  gcc_unreachable ();
2236 	}
2237     }
2238 }
2239 
2240 /* Record important candidates and add them to related_cands bitmaps
2241    if needed.  */
2242 
2243 static void
record_important_candidates(struct ivopts_data * data)2244 record_important_candidates (struct ivopts_data *data)
2245 {
2246   unsigned i;
2247   struct iv_use *use;
2248 
2249   for (i = 0; i < n_iv_cands (data); i++)
2250     {
2251       struct iv_cand *cand = iv_cand (data, i);
2252 
2253       if (cand->important)
2254 	bitmap_set_bit (data->important_candidates, i);
2255     }
2256 
2257   data->consider_all_candidates = (n_iv_cands (data)
2258 				   <= CONSIDER_ALL_CANDIDATES_BOUND);
2259 
2260   if (data->consider_all_candidates)
2261     {
2262       /* We will not need "related_cands" bitmaps in this case,
2263 	 so release them to decrease peak memory consumption.  */
2264       for (i = 0; i < n_iv_uses (data); i++)
2265 	{
2266 	  use = iv_use (data, i);
2267 	  BITMAP_FREE (use->related_cands);
2268 	}
2269     }
2270   else
2271     {
2272       /* Add important candidates to the related_cands bitmaps.  */
2273       for (i = 0; i < n_iv_uses (data); i++)
2274 	bitmap_ior_into (iv_use (data, i)->related_cands,
2275 			 data->important_candidates);
2276     }
2277 }
2278 
2279 /* Finds the candidates for the induction variables.  */
2280 
2281 static void
find_iv_candidates(struct ivopts_data * data)2282 find_iv_candidates (struct ivopts_data *data)
2283 {
2284   /* Add commonly used ivs.  */
2285   add_standard_iv_candidates (data);
2286 
2287   /* Add old induction variables.  */
2288   add_old_ivs_candidates (data);
2289 
2290   /* Add induction variables derived from uses.  */
2291   add_derived_ivs_candidates (data);
2292 
2293   /* Record the important candidates.  */
2294   record_important_candidates (data);
2295 }
2296 
2297 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2298    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2299    we allocate a simple list to every use.  */
2300 
2301 static void
alloc_use_cost_map(struct ivopts_data * data)2302 alloc_use_cost_map (struct ivopts_data *data)
2303 {
2304   unsigned i, size, s, j;
2305 
2306   for (i = 0; i < n_iv_uses (data); i++)
2307     {
2308       struct iv_use *use = iv_use (data, i);
2309       bitmap_iterator bi;
2310 
2311       if (data->consider_all_candidates)
2312 	size = n_iv_cands (data);
2313       else
2314 	{
2315 	  s = 0;
2316 	  EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2317 	    {
2318 	      s++;
2319 	    }
2320 
2321 	  /* Round up to the power of two, so that moduling by it is fast.  */
2322 	  for (size = 1; size < s; size <<= 1)
2323 	    continue;
2324 	}
2325 
2326       use->n_map_members = size;
2327       use->cost_map = XCNEWVEC (struct cost_pair, size);
2328     }
2329 }
2330 
2331 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2332    on invariants DEPENDS_ON and that the value used in expressing it
2333    is VALUE.*/
2334 
2335 static void
set_use_iv_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,unsigned cost,bitmap depends_on,tree value)2336 set_use_iv_cost (struct ivopts_data *data,
2337 		 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2338 		 bitmap depends_on, tree value)
2339 {
2340   unsigned i, s;
2341 
2342   if (cost == INFTY)
2343     {
2344       BITMAP_FREE (depends_on);
2345       return;
2346     }
2347 
2348   if (data->consider_all_candidates)
2349     {
2350       use->cost_map[cand->id].cand = cand;
2351       use->cost_map[cand->id].cost = cost;
2352       use->cost_map[cand->id].depends_on = depends_on;
2353       use->cost_map[cand->id].value = value;
2354       return;
2355     }
2356 
2357   /* n_map_members is a power of two, so this computes modulo.  */
2358   s = cand->id & (use->n_map_members - 1);
2359   for (i = s; i < use->n_map_members; i++)
2360     if (!use->cost_map[i].cand)
2361       goto found;
2362   for (i = 0; i < s; i++)
2363     if (!use->cost_map[i].cand)
2364       goto found;
2365 
2366   gcc_unreachable ();
2367 
2368 found:
2369   use->cost_map[i].cand = cand;
2370   use->cost_map[i].cost = cost;
2371   use->cost_map[i].depends_on = depends_on;
2372   use->cost_map[i].value = value;
2373 }
2374 
2375 /* Gets cost of (USE, CANDIDATE) pair.  */
2376 
2377 static struct cost_pair *
get_use_iv_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)2378 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2379 		 struct iv_cand *cand)
2380 {
2381   unsigned i, s;
2382   struct cost_pair *ret;
2383 
2384   if (!cand)
2385     return NULL;
2386 
2387   if (data->consider_all_candidates)
2388     {
2389       ret = use->cost_map + cand->id;
2390       if (!ret->cand)
2391 	return NULL;
2392 
2393       return ret;
2394     }
2395 
2396   /* n_map_members is a power of two, so this computes modulo.  */
2397   s = cand->id & (use->n_map_members - 1);
2398   for (i = s; i < use->n_map_members; i++)
2399     if (use->cost_map[i].cand == cand)
2400       return use->cost_map + i;
2401 
2402   for (i = 0; i < s; i++)
2403     if (use->cost_map[i].cand == cand)
2404       return use->cost_map + i;
2405 
2406   return NULL;
2407 }
2408 
2409 /* Returns estimate on cost of computing SEQ.  */
2410 
2411 static unsigned
seq_cost(rtx seq)2412 seq_cost (rtx seq)
2413 {
2414   unsigned cost = 0;
2415   rtx set;
2416 
2417   for (; seq; seq = NEXT_INSN (seq))
2418     {
2419       set = single_set (seq);
2420       if (set)
2421 	cost += rtx_cost (set, SET);
2422       else
2423 	cost++;
2424     }
2425 
2426   return cost;
2427 }
2428 
2429 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2430 static rtx
produce_memory_decl_rtl(tree obj,int * regno)2431 produce_memory_decl_rtl (tree obj, int *regno)
2432 {
2433   rtx x;
2434 
2435   gcc_assert (obj);
2436   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2437     {
2438       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2439       x = gen_rtx_SYMBOL_REF (Pmode, name);
2440     }
2441   else
2442     x = gen_raw_REG (Pmode, (*regno)++);
2443 
2444   return gen_rtx_MEM (DECL_MODE (obj), x);
2445 }
2446 
2447 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2448    walk_tree.  DATA contains the actual fake register number.  */
2449 
2450 static tree
prepare_decl_rtl(tree * expr_p,int * ws,void * data)2451 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2452 {
2453   tree obj = NULL_TREE;
2454   rtx x = NULL_RTX;
2455   int *regno = data;
2456 
2457   switch (TREE_CODE (*expr_p))
2458     {
2459     case ADDR_EXPR:
2460       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2461 	   handled_component_p (*expr_p);
2462 	   expr_p = &TREE_OPERAND (*expr_p, 0))
2463 	continue;
2464       obj = *expr_p;
2465       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2466         x = produce_memory_decl_rtl (obj, regno);
2467       break;
2468 
2469     case SSA_NAME:
2470       *ws = 0;
2471       obj = SSA_NAME_VAR (*expr_p);
2472       if (!DECL_RTL_SET_P (obj))
2473 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2474       break;
2475 
2476     case VAR_DECL:
2477     case PARM_DECL:
2478     case RESULT_DECL:
2479       *ws = 0;
2480       obj = *expr_p;
2481 
2482       if (DECL_RTL_SET_P (obj))
2483 	break;
2484 
2485       if (DECL_MODE (obj) == BLKmode)
2486 	x = produce_memory_decl_rtl (obj, regno);
2487       else
2488 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2489 
2490       break;
2491 
2492     default:
2493       break;
2494     }
2495 
2496   if (x)
2497     {
2498       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2499       SET_DECL_RTL (obj, x);
2500     }
2501 
2502   return NULL_TREE;
2503 }
2504 
2505 /* Determines cost of the computation of EXPR.  */
2506 
2507 static unsigned
computation_cost(tree expr)2508 computation_cost (tree expr)
2509 {
2510   rtx seq, rslt;
2511   tree type = TREE_TYPE (expr);
2512   unsigned cost;
2513   /* Avoid using hard regs in ways which may be unsupported.  */
2514   int regno = LAST_VIRTUAL_REGISTER + 1;
2515 
2516   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2517   start_sequence ();
2518   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2519   seq = get_insns ();
2520   end_sequence ();
2521 
2522   cost = seq_cost (seq);
2523   if (MEM_P (rslt))
2524     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2525 
2526   return cost;
2527 }
2528 
2529 /* Returns variable containing the value of candidate CAND at statement AT.  */
2530 
2531 static tree
var_at_stmt(struct loop * loop,struct iv_cand * cand,tree stmt)2532 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2533 {
2534   if (stmt_after_increment (loop, cand, stmt))
2535     return cand->var_after;
2536   else
2537     return cand->var_before;
2538 }
2539 
2540 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2541    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2542 
2543 int
tree_int_cst_sign_bit(tree t)2544 tree_int_cst_sign_bit (tree t)
2545 {
2546   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2547   unsigned HOST_WIDE_INT w;
2548 
2549   if (bitno < HOST_BITS_PER_WIDE_INT)
2550     w = TREE_INT_CST_LOW (t);
2551   else
2552     {
2553       w = TREE_INT_CST_HIGH (t);
2554       bitno -= HOST_BITS_PER_WIDE_INT;
2555     }
2556 
2557   return (w >> bitno) & 1;
2558 }
2559 
2560 /* If we can prove that TOP = cst * BOT for some constant cst,
2561    store cst to MUL and return true.  Otherwise return false.
2562    The returned value is always sign-extended, regardless of the
2563    signedness of TOP and BOT.  */
2564 
2565 static bool
constant_multiple_of(tree top,tree bot,double_int * mul)2566 constant_multiple_of (tree top, tree bot, double_int *mul)
2567 {
2568   tree mby;
2569   enum tree_code code;
2570   double_int res, p0, p1;
2571   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2572 
2573   STRIP_NOPS (top);
2574   STRIP_NOPS (bot);
2575 
2576   if (operand_equal_p (top, bot, 0))
2577     {
2578       *mul = double_int_one;
2579       return true;
2580     }
2581 
2582   code = TREE_CODE (top);
2583   switch (code)
2584     {
2585     case MULT_EXPR:
2586       mby = TREE_OPERAND (top, 1);
2587       if (TREE_CODE (mby) != INTEGER_CST)
2588 	return false;
2589 
2590       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2591 	return false;
2592 
2593       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2594 			      precision);
2595       return true;
2596 
2597     case PLUS_EXPR:
2598     case MINUS_EXPR:
2599       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2600 	  || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2601 	return false;
2602 
2603       if (code == MINUS_EXPR)
2604 	p1 = double_int_neg (p1);
2605       *mul = double_int_sext (double_int_add (p0, p1), precision);
2606       return true;
2607 
2608     case INTEGER_CST:
2609       if (TREE_CODE (bot) != INTEGER_CST)
2610 	return false;
2611 
2612       p0 = double_int_sext (tree_to_double_int (bot), precision);
2613       p1 = double_int_sext (tree_to_double_int (top), precision);
2614       if (double_int_zero_p (p1))
2615 	return false;
2616       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2617 			      precision);
2618       return double_int_zero_p (res);
2619 
2620     default:
2621       return false;
2622     }
2623 }
2624 
2625 /* Sets COMB to CST.  */
2626 
2627 static void
aff_combination_const(struct affine_tree_combination * comb,tree type,unsigned HOST_WIDE_INT cst)2628 aff_combination_const (struct affine_tree_combination *comb, tree type,
2629 		       unsigned HOST_WIDE_INT cst)
2630 {
2631   unsigned prec = TYPE_PRECISION (type);
2632 
2633   comb->type = type;
2634   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2635 
2636   comb->n = 0;
2637   comb->rest = NULL_TREE;
2638   comb->offset = cst & comb->mask;
2639 }
2640 
2641 /* Sets COMB to single element ELT.  */
2642 
2643 static void
aff_combination_elt(struct affine_tree_combination * comb,tree type,tree elt)2644 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2645 {
2646   unsigned prec = TYPE_PRECISION (type);
2647 
2648   comb->type = type;
2649   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2650 
2651   comb->n = 1;
2652   comb->elts[0] = elt;
2653   comb->coefs[0] = 1;
2654   comb->rest = NULL_TREE;
2655   comb->offset = 0;
2656 }
2657 
2658 /* Scales COMB by SCALE.  */
2659 
2660 static void
aff_combination_scale(struct affine_tree_combination * comb,unsigned HOST_WIDE_INT scale)2661 aff_combination_scale (struct affine_tree_combination *comb,
2662 		       unsigned HOST_WIDE_INT scale)
2663 {
2664   unsigned i, j;
2665 
2666   if (scale == 1)
2667     return;
2668 
2669   if (scale == 0)
2670     {
2671       aff_combination_const (comb, comb->type, 0);
2672       return;
2673     }
2674 
2675   comb->offset = (scale * comb->offset) & comb->mask;
2676   for (i = 0, j = 0; i < comb->n; i++)
2677     {
2678       comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2679       comb->elts[j] = comb->elts[i];
2680       if (comb->coefs[j] != 0)
2681 	j++;
2682     }
2683   comb->n = j;
2684 
2685   if (comb->rest)
2686     {
2687       if (comb->n < MAX_AFF_ELTS)
2688 	{
2689 	  comb->coefs[comb->n] = scale;
2690 	  comb->elts[comb->n] = comb->rest;
2691 	  comb->rest = NULL_TREE;
2692 	  comb->n++;
2693 	}
2694       else
2695 	comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2696 				  build_int_cst_type (comb->type, scale));
2697     }
2698 }
2699 
2700 /* Adds ELT * SCALE to COMB.  */
2701 
2702 static void
aff_combination_add_elt(struct affine_tree_combination * comb,tree elt,unsigned HOST_WIDE_INT scale)2703 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2704 			 unsigned HOST_WIDE_INT scale)
2705 {
2706   unsigned i;
2707 
2708   if (scale == 0)
2709     return;
2710 
2711   for (i = 0; i < comb->n; i++)
2712     if (operand_equal_p (comb->elts[i], elt, 0))
2713       {
2714 	comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2715 	if (comb->coefs[i])
2716 	  return;
2717 
2718 	comb->n--;
2719 	comb->coefs[i] = comb->coefs[comb->n];
2720 	comb->elts[i] = comb->elts[comb->n];
2721 
2722 	if (comb->rest)
2723 	  {
2724 	    gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2725 	    comb->coefs[comb->n] = 1;
2726 	    comb->elts[comb->n] = comb->rest;
2727 	    comb->rest = NULL_TREE;
2728 	    comb->n++;
2729 	  }
2730 	return;
2731       }
2732   if (comb->n < MAX_AFF_ELTS)
2733     {
2734       comb->coefs[comb->n] = scale;
2735       comb->elts[comb->n] = elt;
2736       comb->n++;
2737       return;
2738     }
2739 
2740   if (scale == 1)
2741     elt = fold_convert (comb->type, elt);
2742   else
2743     elt = fold_build2 (MULT_EXPR, comb->type,
2744 		       fold_convert (comb->type, elt),
2745 		       build_int_cst_type (comb->type, scale));
2746 
2747   if (comb->rest)
2748     comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2749   else
2750     comb->rest = elt;
2751 }
2752 
2753 /* Adds COMB2 to COMB1.  */
2754 
2755 static void
aff_combination_add(struct affine_tree_combination * comb1,struct affine_tree_combination * comb2)2756 aff_combination_add (struct affine_tree_combination *comb1,
2757 		     struct affine_tree_combination *comb2)
2758 {
2759   unsigned i;
2760 
2761   comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2762   for (i = 0; i < comb2->n; i++)
2763     aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2764   if (comb2->rest)
2765     aff_combination_add_elt (comb1, comb2->rest, 1);
2766 }
2767 
2768 /* Convert COMB to TYPE.  */
2769 
2770 static void
aff_combination_convert(tree type,struct affine_tree_combination * comb)2771 aff_combination_convert (tree type, struct affine_tree_combination *comb)
2772 {
2773   unsigned prec = TYPE_PRECISION (type);
2774   unsigned i;
2775 
2776   /* If the precision of both types is the same, it suffices to change the type
2777      of the whole combination -- the elements are allowed to have another type
2778      equivalent wrto STRIP_NOPS.  */
2779   if (prec == TYPE_PRECISION (comb->type))
2780     {
2781       comb->type = type;
2782       return;
2783     }
2784 
2785   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2786   comb->offset = comb->offset & comb->mask;
2787 
2788   /* The type of the elements can be different from comb->type only as
2789      much as what STRIP_NOPS would remove.  We can just directly cast
2790      to TYPE.  */
2791   for (i = 0; i < comb->n; i++)
2792     comb->elts[i] = fold_convert (type, comb->elts[i]);
2793   if (comb->rest)
2794     comb->rest = fold_convert (type, comb->rest);
2795 
2796   comb->type = type;
2797 }
2798 
2799 /* Splits EXPR into an affine combination of parts.  */
2800 
2801 static void
tree_to_aff_combination(tree expr,tree type,struct affine_tree_combination * comb)2802 tree_to_aff_combination (tree expr, tree type,
2803 			 struct affine_tree_combination *comb)
2804 {
2805   struct affine_tree_combination tmp;
2806   enum tree_code code;
2807   tree cst, core, toffset;
2808   HOST_WIDE_INT bitpos, bitsize;
2809   enum machine_mode mode;
2810   int unsignedp, volatilep;
2811 
2812   STRIP_NOPS (expr);
2813 
2814   code = TREE_CODE (expr);
2815   switch (code)
2816     {
2817     case INTEGER_CST:
2818       aff_combination_const (comb, type, int_cst_value (expr));
2819       return;
2820 
2821     case PLUS_EXPR:
2822     case MINUS_EXPR:
2823       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2824       tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2825       if (code == MINUS_EXPR)
2826 	aff_combination_scale (&tmp, -1);
2827       aff_combination_add (comb, &tmp);
2828       return;
2829 
2830     case MULT_EXPR:
2831       cst = TREE_OPERAND (expr, 1);
2832       if (TREE_CODE (cst) != INTEGER_CST)
2833 	break;
2834       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2835       aff_combination_scale (comb, int_cst_value (cst));
2836       return;
2837 
2838     case NEGATE_EXPR:
2839       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2840       aff_combination_scale (comb, -1);
2841       return;
2842 
2843     case ADDR_EXPR:
2844       core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2845 				  &toffset, &mode, &unsignedp, &volatilep,
2846 				  false);
2847       if (bitpos % BITS_PER_UNIT != 0)
2848 	break;
2849       aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2850       core = build_fold_addr_expr (core);
2851       if (TREE_CODE (core) == ADDR_EXPR)
2852 	aff_combination_add_elt (comb, core, 1);
2853       else
2854 	{
2855 	  tree_to_aff_combination (core, type, &tmp);
2856 	  aff_combination_add (comb, &tmp);
2857 	}
2858       if (toffset)
2859 	{
2860 	  tree_to_aff_combination (toffset, type, &tmp);
2861 	  aff_combination_add (comb, &tmp);
2862 	}
2863       return;
2864 
2865     default:
2866       break;
2867     }
2868 
2869   aff_combination_elt (comb, type, expr);
2870 }
2871 
2872 /* Creates EXPR + ELT * SCALE in TYPE.  MASK is the mask for width of TYPE.  */
2873 
2874 static tree
add_elt_to_tree(tree expr,tree type,tree elt,unsigned HOST_WIDE_INT scale,unsigned HOST_WIDE_INT mask)2875 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2876 		 unsigned HOST_WIDE_INT mask)
2877 {
2878   enum tree_code code;
2879 
2880   scale &= mask;
2881   elt = fold_convert (type, elt);
2882 
2883   if (scale == 1)
2884     {
2885       if (!expr)
2886 	return elt;
2887 
2888       return fold_build2 (PLUS_EXPR, type, expr, elt);
2889     }
2890 
2891   if (scale == mask)
2892     {
2893       if (!expr)
2894 	return fold_build1 (NEGATE_EXPR, type, elt);
2895 
2896       return fold_build2 (MINUS_EXPR, type, expr, elt);
2897     }
2898 
2899   if (!expr)
2900     return fold_build2 (MULT_EXPR, type, elt,
2901 			build_int_cst_type (type, scale));
2902 
2903   if ((scale | (mask >> 1)) == mask)
2904     {
2905       /* Scale is negative.  */
2906       code = MINUS_EXPR;
2907       scale = (-scale) & mask;
2908     }
2909   else
2910     code = PLUS_EXPR;
2911 
2912   elt = fold_build2 (MULT_EXPR, type, elt,
2913 		     build_int_cst_type (type, scale));
2914   return fold_build2 (code, type, expr, elt);
2915 }
2916 
2917 /* Copies the tree elements of COMB to ensure that they are not shared.  */
2918 
2919 static void
unshare_aff_combination(struct affine_tree_combination * comb)2920 unshare_aff_combination (struct affine_tree_combination *comb)
2921 {
2922   unsigned i;
2923 
2924   for (i = 0; i < comb->n; i++)
2925     comb->elts[i] = unshare_expr (comb->elts[i]);
2926   if (comb->rest)
2927     comb->rest = unshare_expr (comb->rest);
2928 }
2929 
2930 /* Makes tree from the affine combination COMB.  */
2931 
2932 static tree
aff_combination_to_tree(struct affine_tree_combination * comb)2933 aff_combination_to_tree (struct affine_tree_combination *comb)
2934 {
2935   tree type = comb->type;
2936   tree expr = comb->rest;
2937   unsigned i;
2938   unsigned HOST_WIDE_INT off, sgn;
2939 
2940   if (comb->n == 0 && comb->offset == 0)
2941     {
2942       if (expr)
2943 	{
2944 	  /* Handle the special case produced by get_computation_aff when
2945 	     the type does not fit in HOST_WIDE_INT.  */
2946 	  return fold_convert (type, expr);
2947 	}
2948       else
2949 	return build_int_cst (type, 0);
2950     }
2951 
2952   gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2953 
2954   for (i = 0; i < comb->n; i++)
2955     expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2956 			    comb->mask);
2957 
2958   if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2959     {
2960       /* Offset is negative.  */
2961       off = (-comb->offset) & comb->mask;
2962       sgn = comb->mask;
2963     }
2964   else
2965     {
2966       off = comb->offset;
2967       sgn = 1;
2968     }
2969   return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2970 			  comb->mask);
2971 }
2972 
2973 /* Folds EXPR using the affine expressions framework.  */
2974 
2975 static tree
fold_affine_expr(tree expr)2976 fold_affine_expr (tree expr)
2977 {
2978   tree type = TREE_TYPE (expr);
2979   struct affine_tree_combination comb;
2980 
2981   if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
2982     return expr;
2983 
2984   tree_to_aff_combination (expr, type, &comb);
2985   return aff_combination_to_tree (&comb);
2986 }
2987 
2988 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2989    same precision that is at least as wide as the precision of TYPE, stores
2990    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2991    type of A and B.  */
2992 
2993 static tree
determine_common_wider_type(tree * a,tree * b)2994 determine_common_wider_type (tree *a, tree *b)
2995 {
2996   tree wider_type = NULL;
2997   tree suba, subb;
2998   tree atype = TREE_TYPE (*a);
2999 
3000   if ((TREE_CODE (*a) == NOP_EXPR
3001        || TREE_CODE (*a) == CONVERT_EXPR))
3002     {
3003       suba = TREE_OPERAND (*a, 0);
3004       wider_type = TREE_TYPE (suba);
3005       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3006 	return atype;
3007     }
3008   else
3009     return atype;
3010 
3011   if ((TREE_CODE (*b) == NOP_EXPR
3012        || TREE_CODE (*b) == CONVERT_EXPR))
3013     {
3014       subb = TREE_OPERAND (*b, 0);
3015       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3016 	return atype;
3017     }
3018   else
3019     return atype;
3020 
3021   *a = suba;
3022   *b = subb;
3023   return wider_type;
3024 }
3025 
3026 /* Determines the expression by that USE is expressed from induction variable
3027    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3028    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3029 
3030 static bool
get_computation_aff(struct loop * loop,struct iv_use * use,struct iv_cand * cand,tree at,struct affine_tree_combination * aff)3031 get_computation_aff (struct loop *loop,
3032 		     struct iv_use *use, struct iv_cand *cand, tree at,
3033 		     struct affine_tree_combination *aff)
3034 {
3035   tree ubase = use->iv->base;
3036   tree ustep = use->iv->step;
3037   tree cbase = cand->iv->base;
3038   tree cstep = cand->iv->step;
3039   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3040   tree common_type;
3041   tree uutype;
3042   tree expr, delta;
3043   tree ratio;
3044   unsigned HOST_WIDE_INT ustepi, cstepi;
3045   HOST_WIDE_INT ratioi;
3046   struct affine_tree_combination cbase_aff, expr_aff;
3047   tree cstep_orig = cstep, ustep_orig = ustep;
3048   double_int rat;
3049 
3050   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3051     {
3052       /* We do not have a precision to express the values of use.  */
3053       return false;
3054     }
3055 
3056   expr = var_at_stmt (loop, cand, at);
3057 
3058   if (TREE_TYPE (expr) != ctype)
3059     {
3060       /* This may happen with the original ivs.  */
3061       expr = fold_convert (ctype, expr);
3062     }
3063 
3064   if (TYPE_UNSIGNED (utype))
3065     uutype = utype;
3066   else
3067     {
3068       uutype = unsigned_type_for (utype);
3069       ubase = fold_convert (uutype, ubase);
3070       ustep = fold_convert (uutype, ustep);
3071     }
3072 
3073   if (uutype != ctype)
3074     {
3075       expr = fold_convert (uutype, expr);
3076       cbase = fold_convert (uutype, cbase);
3077       cstep = fold_convert (uutype, cstep);
3078 
3079       /* If the conversion is not noop, we must take it into account when
3080 	 considering the value of the step.  */
3081       if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3082 	cstep_orig = cstep;
3083     }
3084 
3085   if (cst_and_fits_in_hwi (cstep_orig)
3086       && cst_and_fits_in_hwi (ustep_orig))
3087     {
3088       ustepi = int_cst_value (ustep_orig);
3089       cstepi = int_cst_value (cstep_orig);
3090 
3091       if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3092 	{
3093 	  /* TODO maybe consider case when ustep divides cstep and the ratio is
3094 	     a power of 2 (so that the division is fast to execute)?  We would
3095 	     need to be much more careful with overflows etc. then.  */
3096 	  return false;
3097 	}
3098 
3099       ratio = build_int_cst_type (uutype, ratioi);
3100     }
3101   else
3102     {
3103       if (!constant_multiple_of (ustep_orig, cstep_orig, &rat))
3104 	return false;
3105       ratio = double_int_to_tree (uutype, rat);
3106 
3107       /* Ratioi is only used to detect special cases when the multiplicative
3108 	 factor is 1 or -1, so if rat does not fit to HOST_WIDE_INT, we may
3109 	 set it to 0.  */
3110       if (double_int_fits_in_shwi_p (rat))
3111 	ratioi = double_int_to_shwi (rat);
3112       else
3113 	ratioi = 0;
3114     }
3115 
3116   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3117      type, we achieve better folding by computing their difference in this
3118      wider type, and cast the result to UUTYPE.  We do not need to worry about
3119      overflows, as all the arithmetics will in the end be performed in UUTYPE
3120      anyway.  */
3121   common_type = determine_common_wider_type (&ubase, &cbase);
3122 
3123   /* We may need to shift the value if we are after the increment.  */
3124   if (stmt_after_increment (loop, cand, at))
3125     {
3126       if (uutype != common_type)
3127 	cstep = fold_convert (common_type, cstep);
3128       cbase = fold_build2 (PLUS_EXPR, common_type, cbase, cstep);
3129     }
3130 
3131   /* use = ubase - ratio * cbase + ratio * var.
3132 
3133      In general case ubase + ratio * (var - cbase) could be better (one less
3134      multiplication), but often it is possible to eliminate redundant parts
3135      of computations from (ubase - ratio * cbase) term, and if it does not
3136      happen, fold is able to apply the distributive law to obtain this form
3137      anyway.  */
3138 
3139   if (TYPE_PRECISION (common_type) > HOST_BITS_PER_WIDE_INT)
3140     {
3141       /* Let's compute in trees and just return the result in AFF.  This case
3142 	 should not be very common, and fold itself is not that bad either,
3143 	 so making the aff. functions more complicated to handle this case
3144 	 is not that urgent.  */
3145       if (ratioi == 1)
3146 	{
3147 	  delta = fold_build2 (MINUS_EXPR, common_type, ubase, cbase);
3148 	  if (uutype != common_type)
3149 	    delta = fold_convert (uutype, delta);
3150 	  expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3151 	}
3152       else if (ratioi == -1)
3153 	{
3154 	  delta = fold_build2 (PLUS_EXPR, common_type, ubase, cbase);
3155 	  if (uutype != common_type)
3156 	    delta = fold_convert (uutype, delta);
3157 	  expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3158 	}
3159       else
3160 	{
3161 	  delta = fold_build2 (MULT_EXPR, common_type, cbase, ratio);
3162 	  delta = fold_build2 (MINUS_EXPR, common_type, ubase, delta);
3163 	  if (uutype != common_type)
3164 	    delta = fold_convert (uutype, delta);
3165 	  expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3166 	  expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3167 	}
3168 
3169       aff->type = uutype;
3170       aff->n = 0;
3171       aff->offset = 0;
3172       aff->mask = 0;
3173       aff->rest = expr;
3174       return true;
3175     }
3176 
3177   /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3178      possible to compute ratioi.  */
3179   gcc_assert (ratioi);
3180 
3181   tree_to_aff_combination (ubase, common_type, aff);
3182   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3183   tree_to_aff_combination (expr, uutype, &expr_aff);
3184   aff_combination_scale (&cbase_aff, -ratioi);
3185   aff_combination_scale (&expr_aff, ratioi);
3186   aff_combination_add (aff, &cbase_aff);
3187   if (common_type != uutype)
3188     aff_combination_convert (uutype, aff);
3189   aff_combination_add (aff, &expr_aff);
3190 
3191   return true;
3192 }
3193 
3194 /* Determines the expression by that USE is expressed from induction variable
3195    CAND at statement AT in LOOP.  The computation is unshared.  */
3196 
3197 static tree
get_computation_at(struct loop * loop,struct iv_use * use,struct iv_cand * cand,tree at)3198 get_computation_at (struct loop *loop,
3199 		    struct iv_use *use, struct iv_cand *cand, tree at)
3200 {
3201   struct affine_tree_combination aff;
3202   tree type = TREE_TYPE (use->iv->base);
3203 
3204   if (!get_computation_aff (loop, use, cand, at, &aff))
3205     return NULL_TREE;
3206   unshare_aff_combination (&aff);
3207   return fold_convert (type, aff_combination_to_tree (&aff));
3208 }
3209 
3210 /* Determines the expression by that USE is expressed from induction variable
3211    CAND in LOOP.  The computation is unshared.  */
3212 
3213 static tree
get_computation(struct loop * loop,struct iv_use * use,struct iv_cand * cand)3214 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3215 {
3216   return get_computation_at (loop, use, cand, use->stmt);
3217 }
3218 
3219 /* Returns cost of addition in MODE.  */
3220 
3221 static unsigned
add_cost(enum machine_mode mode)3222 add_cost (enum machine_mode mode)
3223 {
3224   static unsigned costs[NUM_MACHINE_MODES];
3225   rtx seq;
3226   unsigned cost;
3227 
3228   if (costs[mode])
3229     return costs[mode];
3230 
3231   start_sequence ();
3232   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3233 				 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3234 				 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3235 		 NULL_RTX);
3236   seq = get_insns ();
3237   end_sequence ();
3238 
3239   cost = seq_cost (seq);
3240   if (!cost)
3241     cost = 1;
3242 
3243   costs[mode] = cost;
3244 
3245   if (dump_file && (dump_flags & TDF_DETAILS))
3246     fprintf (dump_file, "Addition in %s costs %d\n",
3247 	     GET_MODE_NAME (mode), cost);
3248   return cost;
3249 }
3250 
3251 /* Entry in a hashtable of already known costs for multiplication.  */
3252 struct mbc_entry
3253 {
3254   HOST_WIDE_INT cst;		/* The constant to multiply by.  */
3255   enum machine_mode mode;	/* In mode.  */
3256   unsigned cost;		/* The cost.  */
3257 };
3258 
3259 /* Counts hash value for the ENTRY.  */
3260 
3261 static hashval_t
mbc_entry_hash(const void * entry)3262 mbc_entry_hash (const void *entry)
3263 {
3264   const struct mbc_entry *e = entry;
3265 
3266   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3267 }
3268 
3269 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3270 
3271 static int
mbc_entry_eq(const void * entry1,const void * entry2)3272 mbc_entry_eq (const void *entry1, const void *entry2)
3273 {
3274   const struct mbc_entry *e1 = entry1;
3275   const struct mbc_entry *e2 = entry2;
3276 
3277   return (e1->mode == e2->mode
3278 	  && e1->cst == e2->cst);
3279 }
3280 
3281 /* Returns cost of multiplication by constant CST in MODE.  */
3282 
3283 unsigned
multiply_by_cost(HOST_WIDE_INT cst,enum machine_mode mode)3284 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3285 {
3286   static htab_t costs;
3287   struct mbc_entry **cached, act;
3288   rtx seq;
3289   unsigned cost;
3290 
3291   if (!costs)
3292     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3293 
3294   act.mode = mode;
3295   act.cst = cst;
3296   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3297   if (*cached)
3298     return (*cached)->cost;
3299 
3300   *cached = XNEW (struct mbc_entry);
3301   (*cached)->mode = mode;
3302   (*cached)->cst = cst;
3303 
3304   start_sequence ();
3305   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3306 	       gen_int_mode (cst, mode), NULL_RTX, 0);
3307   seq = get_insns ();
3308   end_sequence ();
3309 
3310   cost = seq_cost (seq);
3311 
3312   if (dump_file && (dump_flags & TDF_DETAILS))
3313     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3314 	     (int) cst, GET_MODE_NAME (mode), cost);
3315 
3316   (*cached)->cost = cost;
3317 
3318   return cost;
3319 }
3320 
3321 /* Returns true if multiplying by RATIO is allowed in address.  */
3322 
3323 bool
multiplier_allowed_in_address_p(HOST_WIDE_INT ratio)3324 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3325 {
3326 #define MAX_RATIO 128
3327   static sbitmap valid_mult;
3328 
3329   if (!valid_mult)
3330     {
3331       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3332       rtx addr;
3333       HOST_WIDE_INT i;
3334 
3335       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3336       sbitmap_zero (valid_mult);
3337       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3338       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3339 	{
3340 	  XEXP (addr, 1) = gen_int_mode (i, Pmode);
3341 	  if (memory_address_p (Pmode, addr))
3342 	    SET_BIT (valid_mult, i + MAX_RATIO);
3343 	}
3344 
3345       if (dump_file && (dump_flags & TDF_DETAILS))
3346 	{
3347 	  fprintf (dump_file, "  allowed multipliers:");
3348 	  for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3349 	    if (TEST_BIT (valid_mult, i + MAX_RATIO))
3350 	      fprintf (dump_file, " %d", (int) i);
3351 	  fprintf (dump_file, "\n");
3352 	  fprintf (dump_file, "\n");
3353 	}
3354     }
3355 
3356   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3357     return false;
3358 
3359   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3360 }
3361 
3362 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3363    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3364    variable is omitted.  The created memory accesses MODE.
3365 
3366    TODO -- there must be some better way.  This all is quite crude.  */
3367 
3368 static unsigned
get_address_cost(bool symbol_present,bool var_present,unsigned HOST_WIDE_INT offset,HOST_WIDE_INT ratio)3369 get_address_cost (bool symbol_present, bool var_present,
3370 		  unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3371 {
3372   static bool initialized = false;
3373   static HOST_WIDE_INT rat, off;
3374   static HOST_WIDE_INT min_offset, max_offset;
3375   static unsigned costs[2][2][2][2];
3376   unsigned cost, acost;
3377   bool offset_p, ratio_p;
3378   HOST_WIDE_INT s_offset;
3379   unsigned HOST_WIDE_INT mask;
3380   unsigned bits;
3381 
3382   if (!initialized)
3383     {
3384       HOST_WIDE_INT i;
3385       int old_cse_not_expected;
3386       unsigned sym_p, var_p, off_p, rat_p, add_c;
3387       rtx seq, addr, base;
3388       rtx reg0, reg1;
3389 
3390       initialized = true;
3391 
3392       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3393 
3394       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3395       for (i = 1; i <= 1 << 20; i <<= 1)
3396 	{
3397 	  XEXP (addr, 1) = gen_int_mode (i, Pmode);
3398 	  if (!memory_address_p (Pmode, addr))
3399 	    break;
3400 	}
3401       max_offset = i >> 1;
3402       off = max_offset;
3403 
3404       for (i = 1; i <= 1 << 20; i <<= 1)
3405 	{
3406 	  XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3407 	  if (!memory_address_p (Pmode, addr))
3408 	    break;
3409 	}
3410       min_offset = -(i >> 1);
3411 
3412       if (dump_file && (dump_flags & TDF_DETAILS))
3413 	{
3414 	  fprintf (dump_file, "get_address_cost:\n");
3415 	  fprintf (dump_file, "  min offset %d\n", (int) min_offset);
3416 	  fprintf (dump_file, "  max offset %d\n", (int) max_offset);
3417 	}
3418 
3419       rat = 1;
3420       for (i = 2; i <= MAX_RATIO; i++)
3421 	if (multiplier_allowed_in_address_p (i))
3422 	  {
3423 	    rat = i;
3424 	    break;
3425 	  }
3426 
3427       /* Compute the cost of various addressing modes.  */
3428       acost = 0;
3429       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3430       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3431 
3432       for (i = 0; i < 16; i++)
3433 	{
3434 	  sym_p = i & 1;
3435 	  var_p = (i >> 1) & 1;
3436 	  off_p = (i >> 2) & 1;
3437 	  rat_p = (i >> 3) & 1;
3438 
3439 	  addr = reg0;
3440 	  if (rat_p)
3441 	    addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3442 
3443 	  if (var_p)
3444 	    addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3445 
3446 	  if (sym_p)
3447 	    {
3448 	      base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3449 	      if (off_p)
3450 		base = gen_rtx_fmt_e (CONST, Pmode,
3451 				      gen_rtx_fmt_ee (PLUS, Pmode,
3452 						      base,
3453 						      gen_int_mode (off, Pmode)));
3454 	    }
3455 	  else if (off_p)
3456 	    base = gen_int_mode (off, Pmode);
3457 	  else
3458 	    base = NULL_RTX;
3459 
3460 	  if (base)
3461 	    addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3462 
3463 	  start_sequence ();
3464 	  /* To avoid splitting addressing modes, pretend that no cse will
3465 	     follow.  */
3466 	  old_cse_not_expected = cse_not_expected;
3467 	  cse_not_expected = true;
3468 	  addr = memory_address (Pmode, addr);
3469 	  cse_not_expected = old_cse_not_expected;
3470 	  seq = get_insns ();
3471 	  end_sequence ();
3472 
3473 	  acost = seq_cost (seq);
3474 	  acost += address_cost (addr, Pmode);
3475 
3476 	  if (!acost)
3477 	    acost = 1;
3478 	  costs[sym_p][var_p][off_p][rat_p] = acost;
3479 	}
3480 
3481       /* On some targets, it is quite expensive to load symbol to a register,
3482 	 which makes addresses that contain symbols look much more expensive.
3483 	 However, the symbol will have to be loaded in any case before the
3484 	 loop (and quite likely we have it in register already), so it does not
3485 	 make much sense to penalize them too heavily.  So make some final
3486          tweaks for the SYMBOL_PRESENT modes:
3487 
3488          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3489 	 var is cheaper, use this mode with small penalty.
3490 	 If VAR_PRESENT is true, try whether the mode with
3491 	 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3492 	 if this is the case, use it.  */
3493       add_c = add_cost (Pmode);
3494       for (i = 0; i < 8; i++)
3495 	{
3496 	  var_p = i & 1;
3497 	  off_p = (i >> 1) & 1;
3498 	  rat_p = (i >> 2) & 1;
3499 
3500 	  acost = costs[0][1][off_p][rat_p] + 1;
3501 	  if (var_p)
3502 	    acost += add_c;
3503 
3504 	  if (acost < costs[1][var_p][off_p][rat_p])
3505 	    costs[1][var_p][off_p][rat_p] = acost;
3506 	}
3507 
3508       if (dump_file && (dump_flags & TDF_DETAILS))
3509 	{
3510 	  fprintf (dump_file, "Address costs:\n");
3511 
3512 	  for (i = 0; i < 16; i++)
3513 	    {
3514 	      sym_p = i & 1;
3515 	      var_p = (i >> 1) & 1;
3516 	      off_p = (i >> 2) & 1;
3517 	      rat_p = (i >> 3) & 1;
3518 
3519 	      fprintf (dump_file, "  ");
3520 	      if (sym_p)
3521 		fprintf (dump_file, "sym + ");
3522 	      if (var_p)
3523 		fprintf (dump_file, "var + ");
3524 	      if (off_p)
3525 		fprintf (dump_file, "cst + ");
3526 	      if (rat_p)
3527 		fprintf (dump_file, "rat * ");
3528 
3529 	      acost = costs[sym_p][var_p][off_p][rat_p];
3530 	      fprintf (dump_file, "index costs %d\n", acost);
3531 	    }
3532 	  fprintf (dump_file, "\n");
3533 	}
3534     }
3535 
3536   bits = GET_MODE_BITSIZE (Pmode);
3537   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3538   offset &= mask;
3539   if ((offset >> (bits - 1) & 1))
3540     offset |= ~mask;
3541   s_offset = offset;
3542 
3543   cost = 0;
3544   offset_p = (s_offset != 0
3545 	      && min_offset <= s_offset && s_offset <= max_offset);
3546   ratio_p = (ratio != 1
3547 	     && multiplier_allowed_in_address_p (ratio));
3548 
3549   if (ratio != 1 && !ratio_p)
3550     cost += multiply_by_cost (ratio, Pmode);
3551 
3552   if (s_offset && !offset_p && !symbol_present)
3553     {
3554       cost += add_cost (Pmode);
3555       var_present = true;
3556     }
3557 
3558   acost = costs[symbol_present][var_present][offset_p][ratio_p];
3559   return cost + acost;
3560 }
3561 
3562 /* Estimates cost of forcing expression EXPR into a variable.  */
3563 
3564 unsigned
force_expr_to_var_cost(tree expr)3565 force_expr_to_var_cost (tree expr)
3566 {
3567   static bool costs_initialized = false;
3568   static unsigned integer_cost;
3569   static unsigned symbol_cost;
3570   static unsigned address_cost;
3571   tree op0, op1;
3572   unsigned cost0, cost1, cost;
3573   enum machine_mode mode;
3574 
3575   if (!costs_initialized)
3576     {
3577       tree var = create_tmp_var_raw (integer_type_node, "test_var");
3578       rtx x = gen_rtx_MEM (DECL_MODE (var),
3579 			   gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3580       tree addr;
3581       tree type = build_pointer_type (integer_type_node);
3582 
3583       integer_cost = computation_cost (build_int_cst (integer_type_node,
3584 						      2000));
3585 
3586       SET_DECL_RTL (var, x);
3587       TREE_STATIC (var) = 1;
3588       addr = build1 (ADDR_EXPR, type, var);
3589       symbol_cost = computation_cost (addr) + 1;
3590 
3591       address_cost
3592 	= computation_cost (build2 (PLUS_EXPR, type,
3593 				    addr,
3594 				    build_int_cst (type, 2000))) + 1;
3595       if (dump_file && (dump_flags & TDF_DETAILS))
3596 	{
3597 	  fprintf (dump_file, "force_expr_to_var_cost:\n");
3598 	  fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3599 	  fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3600 	  fprintf (dump_file, "  address %d\n", (int) address_cost);
3601 	  fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3602 	  fprintf (dump_file, "\n");
3603 	}
3604 
3605       costs_initialized = true;
3606     }
3607 
3608   STRIP_NOPS (expr);
3609 
3610   if (SSA_VAR_P (expr))
3611     return 0;
3612 
3613   if (TREE_INVARIANT (expr))
3614     {
3615       if (TREE_CODE (expr) == INTEGER_CST)
3616 	return integer_cost;
3617 
3618       if (TREE_CODE (expr) == ADDR_EXPR)
3619 	{
3620 	  tree obj = TREE_OPERAND (expr, 0);
3621 
3622 	  if (TREE_CODE (obj) == VAR_DECL
3623 	      || TREE_CODE (obj) == PARM_DECL
3624 	      || TREE_CODE (obj) == RESULT_DECL)
3625 	    return symbol_cost;
3626 	}
3627 
3628       return address_cost;
3629     }
3630 
3631   switch (TREE_CODE (expr))
3632     {
3633     case PLUS_EXPR:
3634     case MINUS_EXPR:
3635     case MULT_EXPR:
3636       op0 = TREE_OPERAND (expr, 0);
3637       op1 = TREE_OPERAND (expr, 1);
3638       STRIP_NOPS (op0);
3639       STRIP_NOPS (op1);
3640 
3641       if (is_gimple_val (op0))
3642 	cost0 = 0;
3643       else
3644 	cost0 = force_expr_to_var_cost (op0);
3645 
3646       if (is_gimple_val (op1))
3647 	cost1 = 0;
3648       else
3649 	cost1 = force_expr_to_var_cost (op1);
3650 
3651       break;
3652 
3653     default:
3654       /* Just an arbitrary value, FIXME.  */
3655       return target_spill_cost;
3656     }
3657 
3658   mode = TYPE_MODE (TREE_TYPE (expr));
3659   switch (TREE_CODE (expr))
3660     {
3661     case PLUS_EXPR:
3662     case MINUS_EXPR:
3663       cost = add_cost (mode);
3664       break;
3665 
3666     case MULT_EXPR:
3667       if (cst_and_fits_in_hwi (op0))
3668 	cost = multiply_by_cost (int_cst_value (op0), mode);
3669       else if (cst_and_fits_in_hwi (op1))
3670 	cost = multiply_by_cost (int_cst_value (op1), mode);
3671       else
3672 	return target_spill_cost;
3673       break;
3674 
3675     default:
3676       gcc_unreachable ();
3677     }
3678 
3679   cost += cost0;
3680   cost += cost1;
3681 
3682   /* Bound the cost by target_spill_cost.  The parts of complicated
3683      computations often are either loop invariant or at least can
3684      be shared between several iv uses, so letting this grow without
3685      limits would not give reasonable results.  */
3686   return cost < target_spill_cost ? cost : target_spill_cost;
3687 }
3688 
3689 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3690    invariants the computation depends on.  */
3691 
3692 static unsigned
force_var_cost(struct ivopts_data * data,tree expr,bitmap * depends_on)3693 force_var_cost (struct ivopts_data *data,
3694 		tree expr, bitmap *depends_on)
3695 {
3696   if (depends_on)
3697     {
3698       fd_ivopts_data = data;
3699       walk_tree (&expr, find_depends, depends_on, NULL);
3700     }
3701 
3702   return force_expr_to_var_cost (expr);
3703 }
3704 
3705 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3706    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3707    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3708    invariants the computation depends on.  */
3709 
3710 static unsigned
split_address_cost(struct ivopts_data * data,tree addr,bool * symbol_present,bool * var_present,unsigned HOST_WIDE_INT * offset,bitmap * depends_on)3711 split_address_cost (struct ivopts_data *data,
3712 		    tree addr, bool *symbol_present, bool *var_present,
3713 		    unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3714 {
3715   tree core;
3716   HOST_WIDE_INT bitsize;
3717   HOST_WIDE_INT bitpos;
3718   tree toffset;
3719   enum machine_mode mode;
3720   int unsignedp, volatilep;
3721 
3722   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3723 			      &unsignedp, &volatilep, false);
3724 
3725   if (toffset != 0
3726       || bitpos % BITS_PER_UNIT != 0
3727       || TREE_CODE (core) != VAR_DECL)
3728     {
3729       *symbol_present = false;
3730       *var_present = true;
3731       fd_ivopts_data = data;
3732       walk_tree (&addr, find_depends, depends_on, NULL);
3733       return target_spill_cost;
3734     }
3735 
3736   *offset += bitpos / BITS_PER_UNIT;
3737   if (TREE_STATIC (core)
3738       || DECL_EXTERNAL (core))
3739     {
3740       *symbol_present = true;
3741       *var_present = false;
3742       return 0;
3743     }
3744 
3745   *symbol_present = false;
3746   *var_present = true;
3747   return 0;
3748 }
3749 
3750 /* Estimates cost of expressing difference of addresses E1 - E2 as
3751    var + symbol + offset.  The value of offset is added to OFFSET,
3752    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3753    part is missing.  DEPENDS_ON is a set of the invariants the computation
3754    depends on.  */
3755 
3756 static unsigned
ptr_difference_cost(struct ivopts_data * data,tree e1,tree e2,bool * symbol_present,bool * var_present,unsigned HOST_WIDE_INT * offset,bitmap * depends_on)3757 ptr_difference_cost (struct ivopts_data *data,
3758 		     tree e1, tree e2, bool *symbol_present, bool *var_present,
3759 		     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3760 {
3761   HOST_WIDE_INT diff = 0;
3762   unsigned cost;
3763 
3764   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3765 
3766   if (ptr_difference_const (e1, e2, &diff))
3767     {
3768       *offset += diff;
3769       *symbol_present = false;
3770       *var_present = false;
3771       return 0;
3772     }
3773 
3774   if (e2 == integer_zero_node)
3775     return split_address_cost (data, TREE_OPERAND (e1, 0),
3776 			       symbol_present, var_present, offset, depends_on);
3777 
3778   *symbol_present = false;
3779   *var_present = true;
3780 
3781   cost = force_var_cost (data, e1, depends_on);
3782   cost += force_var_cost (data, e2, depends_on);
3783   cost += add_cost (Pmode);
3784 
3785   return cost;
3786 }
3787 
3788 /* Estimates cost of expressing difference E1 - E2 as
3789    var + symbol + offset.  The value of offset is added to OFFSET,
3790    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3791    part is missing.  DEPENDS_ON is a set of the invariants the computation
3792    depends on.  */
3793 
3794 static unsigned
difference_cost(struct ivopts_data * data,tree e1,tree e2,bool * symbol_present,bool * var_present,unsigned HOST_WIDE_INT * offset,bitmap * depends_on)3795 difference_cost (struct ivopts_data *data,
3796 		 tree e1, tree e2, bool *symbol_present, bool *var_present,
3797 		 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3798 {
3799   unsigned cost;
3800   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3801   unsigned HOST_WIDE_INT off1, off2;
3802 
3803   e1 = strip_offset (e1, &off1);
3804   e2 = strip_offset (e2, &off2);
3805   *offset += off1 - off2;
3806 
3807   STRIP_NOPS (e1);
3808   STRIP_NOPS (e2);
3809 
3810   if (TREE_CODE (e1) == ADDR_EXPR)
3811     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3812 				depends_on);
3813   *symbol_present = false;
3814 
3815   if (operand_equal_p (e1, e2, 0))
3816     {
3817       *var_present = false;
3818       return 0;
3819     }
3820   *var_present = true;
3821   if (zero_p (e2))
3822     return force_var_cost (data, e1, depends_on);
3823 
3824   if (zero_p (e1))
3825     {
3826       cost = force_var_cost (data, e2, depends_on);
3827       cost += multiply_by_cost (-1, mode);
3828 
3829       return cost;
3830     }
3831 
3832   cost = force_var_cost (data, e1, depends_on);
3833   cost += force_var_cost (data, e2, depends_on);
3834   cost += add_cost (mode);
3835 
3836   return cost;
3837 }
3838 
3839 /* Determines the cost of the computation by that USE is expressed
3840    from induction variable CAND.  If ADDRESS_P is true, we just need
3841    to create an address from it, otherwise we want to get it into
3842    register.  A set of invariants we depend on is stored in
3843    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3844 
3845 static unsigned
get_computation_cost_at(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,bool address_p,bitmap * depends_on,tree at)3846 get_computation_cost_at (struct ivopts_data *data,
3847 			 struct iv_use *use, struct iv_cand *cand,
3848 			 bool address_p, bitmap *depends_on, tree at)
3849 {
3850   tree ubase = use->iv->base, ustep = use->iv->step;
3851   tree cbase, cstep;
3852   tree utype = TREE_TYPE (ubase), ctype;
3853   unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3854   HOST_WIDE_INT ratio, aratio;
3855   bool var_present, symbol_present;
3856   unsigned cost = 0, n_sums;
3857 
3858   *depends_on = NULL;
3859 
3860   /* Only consider real candidates.  */
3861   if (!cand->iv)
3862     return INFTY;
3863 
3864   cbase = cand->iv->base;
3865   cstep = cand->iv->step;
3866   ctype = TREE_TYPE (cbase);
3867 
3868   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3869     {
3870       /* We do not have a precision to express the values of use.  */
3871       return INFTY;
3872     }
3873 
3874   if (address_p)
3875     {
3876       /* Do not try to express address of an object with computation based
3877 	 on address of a different object.  This may cause problems in rtl
3878 	 level alias analysis (that does not expect this to be happening,
3879 	 as this is illegal in C), and would be unlikely to be useful
3880 	 anyway.  */
3881       if (use->iv->base_object
3882 	  && cand->iv->base_object
3883 	  && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3884 	return INFTY;
3885     }
3886 
3887   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3888     {
3889       /* TODO -- add direct handling of this case.  */
3890       goto fallback;
3891     }
3892 
3893   /* CSTEPI is removed from the offset in case statement is after the
3894      increment.  If the step is not constant, we use zero instead.
3895      This is a bit imprecise (there is the extra addition), but
3896      redundancy elimination is likely to transform the code so that
3897      it uses value of the variable before increment anyway,
3898      so it is not that much unrealistic.  */
3899   if (cst_and_fits_in_hwi (cstep))
3900     cstepi = int_cst_value (cstep);
3901   else
3902     cstepi = 0;
3903 
3904   if (cst_and_fits_in_hwi (ustep)
3905       && cst_and_fits_in_hwi (cstep))
3906     {
3907       ustepi = int_cst_value (ustep);
3908 
3909       if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3910 	return INFTY;
3911     }
3912   else
3913     {
3914       double_int rat;
3915 
3916       if (!constant_multiple_of (ustep, cstep, &rat))
3917 	return INFTY;
3918 
3919       if (double_int_fits_in_shwi_p (rat))
3920 	ratio = double_int_to_shwi (rat);
3921       else
3922 	return INFTY;
3923     }
3924 
3925   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3926      or ratio == 1, it is better to handle this like
3927 
3928      ubase - ratio * cbase + ratio * var
3929 
3930      (also holds in the case ratio == -1, TODO.  */
3931 
3932   if (cst_and_fits_in_hwi (cbase))
3933     {
3934       offset = - ratio * int_cst_value (cbase);
3935       cost += difference_cost (data,
3936 			       ubase, integer_zero_node,
3937 			       &symbol_present, &var_present, &offset,
3938 			       depends_on);
3939     }
3940   else if (ratio == 1)
3941     {
3942       cost += difference_cost (data,
3943 			       ubase, cbase,
3944 			       &symbol_present, &var_present, &offset,
3945 			       depends_on);
3946     }
3947   else
3948     {
3949       cost += force_var_cost (data, cbase, depends_on);
3950       cost += add_cost (TYPE_MODE (ctype));
3951       cost += difference_cost (data,
3952 			       ubase, integer_zero_node,
3953 			       &symbol_present, &var_present, &offset,
3954 			       depends_on);
3955     }
3956 
3957   /* If we are after the increment, the value of the candidate is higher by
3958      one iteration.  */
3959   if (stmt_after_increment (data->current_loop, cand, at))
3960     offset -= ratio * cstepi;
3961 
3962   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3963      (symbol/var/const parts may be omitted).  If we are looking for an address,
3964      find the cost of addressing this.  */
3965   if (address_p)
3966     return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3967 
3968   /* Otherwise estimate the costs for computing the expression.  */
3969   aratio = ratio > 0 ? ratio : -ratio;
3970   if (!symbol_present && !var_present && !offset)
3971     {
3972       if (ratio != 1)
3973 	cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3974 
3975       return cost;
3976     }
3977 
3978   if (aratio != 1)
3979     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3980 
3981   n_sums = 1;
3982   if (var_present
3983       /* Symbol + offset should be compile-time computable.  */
3984       && (symbol_present || offset))
3985     n_sums++;
3986 
3987   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3988 
3989 fallback:
3990   {
3991     /* Just get the expression, expand it and measure the cost.  */
3992     tree comp = get_computation_at (data->current_loop, use, cand, at);
3993 
3994     if (!comp)
3995       return INFTY;
3996 
3997     if (address_p)
3998       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3999 
4000     return computation_cost (comp);
4001   }
4002 }
4003 
4004 /* Determines the cost of the computation by that USE is expressed
4005    from induction variable CAND.  If ADDRESS_P is true, we just need
4006    to create an address from it, otherwise we want to get it into
4007    register.  A set of invariants we depend on is stored in
4008    DEPENDS_ON.  */
4009 
4010 static unsigned
get_computation_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,bool address_p,bitmap * depends_on)4011 get_computation_cost (struct ivopts_data *data,
4012 		      struct iv_use *use, struct iv_cand *cand,
4013 		      bool address_p, bitmap *depends_on)
4014 {
4015   return get_computation_cost_at (data,
4016 				  use, cand, address_p, depends_on, use->stmt);
4017 }
4018 
4019 /* Determines cost of basing replacement of USE on CAND in a generic
4020    expression.  */
4021 
4022 static bool
determine_use_iv_cost_generic(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)4023 determine_use_iv_cost_generic (struct ivopts_data *data,
4024 			       struct iv_use *use, struct iv_cand *cand)
4025 {
4026   bitmap depends_on;
4027   unsigned cost;
4028 
4029   /* The simple case first -- if we need to express value of the preserved
4030      original biv, the cost is 0.  This also prevents us from counting the
4031      cost of increment twice -- once at this use and once in the cost of
4032      the candidate.  */
4033   if (cand->pos == IP_ORIGINAL
4034       && cand->incremented_at == use->stmt)
4035     {
4036       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4037       return true;
4038     }
4039 
4040   cost = get_computation_cost (data, use, cand, false, &depends_on);
4041   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4042 
4043   return cost != INFTY;
4044 }
4045 
4046 /* Determines cost of basing replacement of USE on CAND in an address.  */
4047 
4048 static bool
determine_use_iv_cost_address(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)4049 determine_use_iv_cost_address (struct ivopts_data *data,
4050 			       struct iv_use *use, struct iv_cand *cand)
4051 {
4052   bitmap depends_on;
4053   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
4054 
4055   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4056 
4057   return cost != INFTY;
4058 }
4059 
4060 /* Computes value of induction variable IV in iteration NITER.  */
4061 
4062 static tree
iv_value(struct iv * iv,tree niter)4063 iv_value (struct iv *iv, tree niter)
4064 {
4065   tree val;
4066   tree type = TREE_TYPE (iv->base);
4067 
4068   niter = fold_convert (type, niter);
4069   val = fold_build2 (MULT_EXPR, type, iv->step, niter);
4070 
4071   return fold_build2 (PLUS_EXPR, type, iv->base, val);
4072 }
4073 
4074 /* Computes value of candidate CAND at position AT in iteration NITER.  */
4075 
4076 static tree
cand_value_at(struct loop * loop,struct iv_cand * cand,tree at,tree niter)4077 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
4078 {
4079   tree val = iv_value (cand->iv, niter);
4080   tree type = TREE_TYPE (cand->iv->base);
4081 
4082   if (stmt_after_increment (loop, cand, at))
4083     val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
4084 
4085   return val;
4086 }
4087 
4088 /* Returns period of induction variable iv.  */
4089 
4090 static tree
iv_period(struct iv * iv)4091 iv_period (struct iv *iv)
4092 {
4093   tree step = iv->step, period, type;
4094   tree pow2div;
4095 
4096   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4097 
4098   /* Period of the iv is gcd (step, type range).  Since type range is power
4099      of two, it suffices to determine the maximum power of two that divides
4100      step.  */
4101   pow2div = num_ending_zeros (step);
4102   type = unsigned_type_for (TREE_TYPE (step));
4103 
4104   period = build_low_bits_mask (type,
4105 				(TYPE_PRECISION (type)
4106 				 - tree_low_cst (pow2div, 1)));
4107 
4108   return period;
4109 }
4110 
4111 /* Returns the comparison operator used when eliminating the iv USE.  */
4112 
4113 static enum tree_code
iv_elimination_compare(struct ivopts_data * data,struct iv_use * use)4114 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4115 {
4116   struct loop *loop = data->current_loop;
4117   basic_block ex_bb;
4118   edge exit;
4119 
4120   ex_bb = bb_for_stmt (use->stmt);
4121   exit = EDGE_SUCC (ex_bb, 0);
4122   if (flow_bb_inside_loop_p (loop, exit->dest))
4123     exit = EDGE_SUCC (ex_bb, 1);
4124 
4125   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4126 }
4127 
4128 /* Check whether it is possible to express the condition in USE by comparison
4129    of candidate CAND.  If so, store the value compared with to BOUND.  */
4130 
4131 static bool
may_eliminate_iv(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,tree * bound)4132 may_eliminate_iv (struct ivopts_data *data,
4133 		  struct iv_use *use, struct iv_cand *cand, tree *bound)
4134 {
4135   basic_block ex_bb;
4136   edge exit;
4137   tree nit, nit_type;
4138   tree wider_type, period, per_type;
4139   struct loop *loop = data->current_loop;
4140 
4141   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4142     return false;
4143 
4144   /* For now works only for exits that dominate the loop latch.  TODO -- extend
4145      for other conditions inside loop body.  */
4146   ex_bb = bb_for_stmt (use->stmt);
4147   if (use->stmt != last_stmt (ex_bb)
4148       || TREE_CODE (use->stmt) != COND_EXPR)
4149     return false;
4150   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4151     return false;
4152 
4153   exit = EDGE_SUCC (ex_bb, 0);
4154   if (flow_bb_inside_loop_p (loop, exit->dest))
4155     exit = EDGE_SUCC (ex_bb, 1);
4156   if (flow_bb_inside_loop_p (loop, exit->dest))
4157     return false;
4158 
4159   nit = niter_for_exit (data, exit);
4160   if (!nit)
4161     return false;
4162 
4163   nit_type = TREE_TYPE (nit);
4164 
4165   /* Determine whether we may use the variable to test whether niter iterations
4166      elapsed.  This is the case iff the period of the induction variable is
4167      greater than the number of iterations.  */
4168   period = iv_period (cand->iv);
4169   if (!period)
4170     return false;
4171   per_type = TREE_TYPE (period);
4172 
4173   wider_type = TREE_TYPE (period);
4174   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4175     wider_type = per_type;
4176   else
4177     wider_type = nit_type;
4178 
4179   if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4180 				      fold_convert (wider_type, period),
4181 				      fold_convert (wider_type, nit))))
4182     return false;
4183 
4184   *bound = fold_affine_expr (cand_value_at (loop, cand, use->stmt, nit));
4185   return true;
4186 }
4187 
4188 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4189 
4190 static bool
determine_use_iv_cost_condition(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)4191 determine_use_iv_cost_condition (struct ivopts_data *data,
4192 				 struct iv_use *use, struct iv_cand *cand)
4193 {
4194   tree bound = NULL_TREE, op, cond;
4195   bitmap depends_on = NULL;
4196   unsigned cost;
4197 
4198   /* Only consider real candidates.  */
4199   if (!cand->iv)
4200     {
4201       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4202       return false;
4203     }
4204 
4205   if (may_eliminate_iv (data, use, cand, &bound))
4206     {
4207       cost = force_var_cost (data, bound, &depends_on);
4208 
4209       set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4210       return cost != INFTY;
4211     }
4212 
4213   /* The induction variable elimination failed; just express the original
4214      giv.  If it is compared with an invariant, note that we cannot get
4215      rid of it.  */
4216   cost = get_computation_cost (data, use, cand, false, &depends_on);
4217 
4218   cond = *use->op_p;
4219   if (TREE_CODE (cond) != SSA_NAME)
4220     {
4221       op = TREE_OPERAND (cond, 0);
4222       if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4223 	op = TREE_OPERAND (cond, 1);
4224       if (TREE_CODE (op) == SSA_NAME)
4225 	{
4226 	  op = get_iv (data, op)->base;
4227 	  fd_ivopts_data = data;
4228 	  walk_tree (&op, find_depends, &depends_on, NULL);
4229 	}
4230     }
4231 
4232   set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4233   return cost != INFTY;
4234 }
4235 
4236 /* Determines cost of basing replacement of USE on CAND.  Returns false
4237    if USE cannot be based on CAND.  */
4238 
4239 static bool
determine_use_iv_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)4240 determine_use_iv_cost (struct ivopts_data *data,
4241 		       struct iv_use *use, struct iv_cand *cand)
4242 {
4243   switch (use->type)
4244     {
4245     case USE_NONLINEAR_EXPR:
4246       return determine_use_iv_cost_generic (data, use, cand);
4247 
4248     case USE_ADDRESS:
4249       return determine_use_iv_cost_address (data, use, cand);
4250 
4251     case USE_COMPARE:
4252       return determine_use_iv_cost_condition (data, use, cand);
4253 
4254     default:
4255       gcc_unreachable ();
4256     }
4257 }
4258 
4259 /* Determines costs of basing the use of the iv on an iv candidate.  */
4260 
4261 static void
determine_use_iv_costs(struct ivopts_data * data)4262 determine_use_iv_costs (struct ivopts_data *data)
4263 {
4264   unsigned i, j;
4265   struct iv_use *use;
4266   struct iv_cand *cand;
4267   bitmap to_clear = BITMAP_ALLOC (NULL);
4268 
4269   alloc_use_cost_map (data);
4270 
4271   for (i = 0; i < n_iv_uses (data); i++)
4272     {
4273       use = iv_use (data, i);
4274 
4275       if (data->consider_all_candidates)
4276 	{
4277 	  for (j = 0; j < n_iv_cands (data); j++)
4278 	    {
4279 	      cand = iv_cand (data, j);
4280 	      determine_use_iv_cost (data, use, cand);
4281 	    }
4282 	}
4283       else
4284 	{
4285 	  bitmap_iterator bi;
4286 
4287 	  EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4288 	    {
4289 	      cand = iv_cand (data, j);
4290 	      if (!determine_use_iv_cost (data, use, cand))
4291 		bitmap_set_bit (to_clear, j);
4292 	    }
4293 
4294 	  /* Remove the candidates for that the cost is infinite from
4295 	     the list of related candidates.  */
4296 	  bitmap_and_compl_into (use->related_cands, to_clear);
4297 	  bitmap_clear (to_clear);
4298 	}
4299     }
4300 
4301   BITMAP_FREE (to_clear);
4302 
4303   if (dump_file && (dump_flags & TDF_DETAILS))
4304     {
4305       fprintf (dump_file, "Use-candidate costs:\n");
4306 
4307       for (i = 0; i < n_iv_uses (data); i++)
4308 	{
4309 	  use = iv_use (data, i);
4310 
4311 	  fprintf (dump_file, "Use %d:\n", i);
4312 	  fprintf (dump_file, "  cand\tcost\tdepends on\n");
4313 	  for (j = 0; j < use->n_map_members; j++)
4314 	    {
4315 	      if (!use->cost_map[j].cand
4316 		  || use->cost_map[j].cost == INFTY)
4317 		continue;
4318 
4319 	      fprintf (dump_file, "  %d\t%d\t",
4320 		       use->cost_map[j].cand->id,
4321 		       use->cost_map[j].cost);
4322 	      if (use->cost_map[j].depends_on)
4323 		bitmap_print (dump_file,
4324 			      use->cost_map[j].depends_on, "","");
4325 	      fprintf (dump_file, "\n");
4326 	    }
4327 
4328 	  fprintf (dump_file, "\n");
4329 	}
4330       fprintf (dump_file, "\n");
4331     }
4332 }
4333 
4334 /* Determines cost of the candidate CAND.  */
4335 
4336 static void
determine_iv_cost(struct ivopts_data * data,struct iv_cand * cand)4337 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4338 {
4339   unsigned cost_base, cost_step;
4340   tree base;
4341 
4342   if (!cand->iv)
4343     {
4344       cand->cost = 0;
4345       return;
4346     }
4347 
4348   /* There are two costs associated with the candidate -- its increment
4349      and its initialization.  The second is almost negligible for any loop
4350      that rolls enough, so we take it just very little into account.  */
4351 
4352   base = cand->iv->base;
4353   cost_base = force_var_cost (data, base, NULL);
4354   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4355 
4356   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4357 
4358   /* Prefer the original iv unless we may gain something by replacing it;
4359      this is not really relevant for artificial ivs created by other
4360      passes.  */
4361   if (cand->pos == IP_ORIGINAL
4362       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4363     cand->cost--;
4364 
4365   /* Prefer not to insert statements into latch unless there are some
4366      already (so that we do not create unnecessary jumps).  */
4367   if (cand->pos == IP_END
4368       && empty_block_p (ip_end_pos (data->current_loop)))
4369     cand->cost++;
4370 }
4371 
4372 /* Determines costs of computation of the candidates.  */
4373 
4374 static void
determine_iv_costs(struct ivopts_data * data)4375 determine_iv_costs (struct ivopts_data *data)
4376 {
4377   unsigned i;
4378 
4379   if (dump_file && (dump_flags & TDF_DETAILS))
4380     {
4381       fprintf (dump_file, "Candidate costs:\n");
4382       fprintf (dump_file, "  cand\tcost\n");
4383     }
4384 
4385   for (i = 0; i < n_iv_cands (data); i++)
4386     {
4387       struct iv_cand *cand = iv_cand (data, i);
4388 
4389       determine_iv_cost (data, cand);
4390 
4391       if (dump_file && (dump_flags & TDF_DETAILS))
4392 	fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4393     }
4394 
4395 if (dump_file && (dump_flags & TDF_DETAILS))
4396       fprintf (dump_file, "\n");
4397 }
4398 
4399 /* Calculates cost for having SIZE induction variables.  */
4400 
4401 static unsigned
ivopts_global_cost_for_size(struct ivopts_data * data,unsigned size)4402 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4403 {
4404   return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
4405 }
4406 
4407 /* For each size of the induction variable set determine the penalty.  */
4408 
4409 static void
determine_set_costs(struct ivopts_data * data)4410 determine_set_costs (struct ivopts_data *data)
4411 {
4412   unsigned j, n;
4413   tree phi, op;
4414   struct loop *loop = data->current_loop;
4415   bitmap_iterator bi;
4416 
4417   /* We use the following model (definitely improvable, especially the
4418      cost function -- TODO):
4419 
4420      We estimate the number of registers available (using MD data), name it A.
4421 
4422      We estimate the number of registers used by the loop, name it U.  This
4423      number is obtained as the number of loop phi nodes (not counting virtual
4424      registers and bivs) + the number of variables from outside of the loop.
4425 
4426      We set a reserve R (free regs that are used for temporary computations,
4427      etc.).  For now the reserve is a constant 3.
4428 
4429      Let I be the number of induction variables.
4430 
4431      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4432 	make a lot of ivs without a reason).
4433      -- if A - R < U + I <= A, the cost is I * PRES_COST
4434      -- if U + I > A, the cost is I * PRES_COST and
4435         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4436 
4437   if (dump_file && (dump_flags & TDF_DETAILS))
4438     {
4439       fprintf (dump_file, "Global costs:\n");
4440       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4441       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
4442       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
4443       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4444     }
4445 
4446   n = 0;
4447   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4448     {
4449       op = PHI_RESULT (phi);
4450 
4451       if (!is_gimple_reg (op))
4452 	continue;
4453 
4454       if (get_iv (data, op))
4455 	continue;
4456 
4457       n++;
4458     }
4459 
4460   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4461     {
4462       struct version_info *info = ver_info (data, j);
4463 
4464       if (info->inv_id && info->has_nonlin_use)
4465 	n++;
4466     }
4467 
4468   data->regs_used = n;
4469   if (dump_file && (dump_flags & TDF_DETAILS))
4470     fprintf (dump_file, "  regs_used %d\n", n);
4471 
4472   if (dump_file && (dump_flags & TDF_DETAILS))
4473     {
4474       fprintf (dump_file, "  cost for size:\n");
4475       fprintf (dump_file, "  ivs\tcost\n");
4476       for (j = 0; j <= 2 * target_avail_regs; j++)
4477 	fprintf (dump_file, "  %d\t%d\n", j,
4478 		 ivopts_global_cost_for_size (data, j));
4479       fprintf (dump_file, "\n");
4480     }
4481 }
4482 
4483 /* Returns true if A is a cheaper cost pair than B.  */
4484 
4485 static bool
cheaper_cost_pair(struct cost_pair * a,struct cost_pair * b)4486 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4487 {
4488   if (!a)
4489     return false;
4490 
4491   if (!b)
4492     return true;
4493 
4494   if (a->cost < b->cost)
4495     return true;
4496 
4497   if (a->cost > b->cost)
4498     return false;
4499 
4500   /* In case the costs are the same, prefer the cheaper candidate.  */
4501   if (a->cand->cost < b->cand->cost)
4502     return true;
4503 
4504   return false;
4505 }
4506 
4507 /* Computes the cost field of IVS structure.  */
4508 
4509 static void
iv_ca_recount_cost(struct ivopts_data * data,struct iv_ca * ivs)4510 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4511 {
4512   unsigned cost = 0;
4513 
4514   cost += ivs->cand_use_cost;
4515   cost += ivs->cand_cost;
4516   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4517 
4518   ivs->cost = cost;
4519 }
4520 
4521 /* Remove invariants in set INVS to set IVS.  */
4522 
4523 static void
iv_ca_set_remove_invariants(struct iv_ca * ivs,bitmap invs)4524 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4525 {
4526   bitmap_iterator bi;
4527   unsigned iid;
4528 
4529   if (!invs)
4530     return;
4531 
4532   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4533     {
4534       ivs->n_invariant_uses[iid]--;
4535       if (ivs->n_invariant_uses[iid] == 0)
4536 	ivs->n_regs--;
4537     }
4538 }
4539 
4540 /* Set USE not to be expressed by any candidate in IVS.  */
4541 
4542 static void
iv_ca_set_no_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_use * use)4543 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4544 		 struct iv_use *use)
4545 {
4546   unsigned uid = use->id, cid;
4547   struct cost_pair *cp;
4548 
4549   cp = ivs->cand_for_use[uid];
4550   if (!cp)
4551     return;
4552   cid = cp->cand->id;
4553 
4554   ivs->bad_uses++;
4555   ivs->cand_for_use[uid] = NULL;
4556   ivs->n_cand_uses[cid]--;
4557 
4558   if (ivs->n_cand_uses[cid] == 0)
4559     {
4560       bitmap_clear_bit (ivs->cands, cid);
4561       /* Do not count the pseudocandidates.  */
4562       if (cp->cand->iv)
4563 	ivs->n_regs--;
4564       ivs->n_cands--;
4565       ivs->cand_cost -= cp->cand->cost;
4566 
4567       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4568     }
4569 
4570   ivs->cand_use_cost -= cp->cost;
4571 
4572   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4573   iv_ca_recount_cost (data, ivs);
4574 }
4575 
4576 /* Add invariants in set INVS to set IVS.  */
4577 
4578 static void
iv_ca_set_add_invariants(struct iv_ca * ivs,bitmap invs)4579 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4580 {
4581   bitmap_iterator bi;
4582   unsigned iid;
4583 
4584   if (!invs)
4585     return;
4586 
4587   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4588     {
4589       ivs->n_invariant_uses[iid]++;
4590       if (ivs->n_invariant_uses[iid] == 1)
4591 	ivs->n_regs++;
4592     }
4593 }
4594 
4595 /* Set cost pair for USE in set IVS to CP.  */
4596 
4597 static void
iv_ca_set_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_use * use,struct cost_pair * cp)4598 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4599 	      struct iv_use *use, struct cost_pair *cp)
4600 {
4601   unsigned uid = use->id, cid;
4602 
4603   if (ivs->cand_for_use[uid] == cp)
4604     return;
4605 
4606   if (ivs->cand_for_use[uid])
4607     iv_ca_set_no_cp (data, ivs, use);
4608 
4609   if (cp)
4610     {
4611       cid = cp->cand->id;
4612 
4613       ivs->bad_uses--;
4614       ivs->cand_for_use[uid] = cp;
4615       ivs->n_cand_uses[cid]++;
4616       if (ivs->n_cand_uses[cid] == 1)
4617 	{
4618 	  bitmap_set_bit (ivs->cands, cid);
4619 	  /* Do not count the pseudocandidates.  */
4620 	  if (cp->cand->iv)
4621 	    ivs->n_regs++;
4622 	  ivs->n_cands++;
4623 	  ivs->cand_cost += cp->cand->cost;
4624 
4625 	  iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4626 	}
4627 
4628       ivs->cand_use_cost += cp->cost;
4629       iv_ca_set_add_invariants (ivs, cp->depends_on);
4630       iv_ca_recount_cost (data, ivs);
4631     }
4632 }
4633 
4634 /* Extend set IVS by expressing USE by some of the candidates in it
4635    if possible.  */
4636 
4637 static void
iv_ca_add_use(struct ivopts_data * data,struct iv_ca * ivs,struct iv_use * use)4638 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4639 	       struct iv_use *use)
4640 {
4641   struct cost_pair *best_cp = NULL, *cp;
4642   bitmap_iterator bi;
4643   unsigned i;
4644 
4645   gcc_assert (ivs->upto >= use->id);
4646 
4647   if (ivs->upto == use->id)
4648     {
4649       ivs->upto++;
4650       ivs->bad_uses++;
4651     }
4652 
4653   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4654     {
4655       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4656 
4657       if (cheaper_cost_pair (cp, best_cp))
4658 	best_cp = cp;
4659     }
4660 
4661   iv_ca_set_cp (data, ivs, use, best_cp);
4662 }
4663 
4664 /* Get cost for assignment IVS.  */
4665 
4666 static unsigned
iv_ca_cost(struct iv_ca * ivs)4667 iv_ca_cost (struct iv_ca *ivs)
4668 {
4669   return (ivs->bad_uses ? INFTY : ivs->cost);
4670 }
4671 
4672 /* Returns true if all dependences of CP are among invariants in IVS.  */
4673 
4674 static bool
iv_ca_has_deps(struct iv_ca * ivs,struct cost_pair * cp)4675 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4676 {
4677   unsigned i;
4678   bitmap_iterator bi;
4679 
4680   if (!cp->depends_on)
4681     return true;
4682 
4683   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4684     {
4685       if (ivs->n_invariant_uses[i] == 0)
4686 	return false;
4687     }
4688 
4689   return true;
4690 }
4691 
4692 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4693    it before NEXT_CHANGE.  */
4694 
4695 static struct iv_ca_delta *
iv_ca_delta_add(struct iv_use * use,struct cost_pair * old_cp,struct cost_pair * new_cp,struct iv_ca_delta * next_change)4696 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4697 		 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4698 {
4699   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4700 
4701   change->use = use;
4702   change->old_cp = old_cp;
4703   change->new_cp = new_cp;
4704   change->next_change = next_change;
4705 
4706   return change;
4707 }
4708 
4709 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4710    are rewritten.  */
4711 
4712 static struct iv_ca_delta *
iv_ca_delta_join(struct iv_ca_delta * l1,struct iv_ca_delta * l2)4713 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4714 {
4715   struct iv_ca_delta *last;
4716 
4717   if (!l2)
4718     return l1;
4719 
4720   if (!l1)
4721     return l2;
4722 
4723   for (last = l1; last->next_change; last = last->next_change)
4724     continue;
4725   last->next_change = l2;
4726 
4727   return l1;
4728 }
4729 
4730 /* Returns candidate by that USE is expressed in IVS.  */
4731 
4732 static struct cost_pair *
iv_ca_cand_for_use(struct iv_ca * ivs,struct iv_use * use)4733 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4734 {
4735   return ivs->cand_for_use[use->id];
4736 }
4737 
4738 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4739 
4740 static struct iv_ca_delta *
iv_ca_delta_reverse(struct iv_ca_delta * delta)4741 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4742 {
4743   struct iv_ca_delta *act, *next, *prev = NULL;
4744   struct cost_pair *tmp;
4745 
4746   for (act = delta; act; act = next)
4747     {
4748       next = act->next_change;
4749       act->next_change = prev;
4750       prev = act;
4751 
4752       tmp = act->old_cp;
4753       act->old_cp = act->new_cp;
4754       act->new_cp = tmp;
4755     }
4756 
4757   return prev;
4758 }
4759 
4760 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4761    reverted instead.  */
4762 
4763 static void
iv_ca_delta_commit(struct ivopts_data * data,struct iv_ca * ivs,struct iv_ca_delta * delta,bool forward)4764 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4765 		    struct iv_ca_delta *delta, bool forward)
4766 {
4767   struct cost_pair *from, *to;
4768   struct iv_ca_delta *act;
4769 
4770   if (!forward)
4771     delta = iv_ca_delta_reverse (delta);
4772 
4773   for (act = delta; act; act = act->next_change)
4774     {
4775       from = act->old_cp;
4776       to = act->new_cp;
4777       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4778       iv_ca_set_cp (data, ivs, act->use, to);
4779     }
4780 
4781   if (!forward)
4782     iv_ca_delta_reverse (delta);
4783 }
4784 
4785 /* Returns true if CAND is used in IVS.  */
4786 
4787 static bool
iv_ca_cand_used_p(struct iv_ca * ivs,struct iv_cand * cand)4788 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4789 {
4790   return ivs->n_cand_uses[cand->id] > 0;
4791 }
4792 
4793 /* Returns number of induction variable candidates in the set IVS.  */
4794 
4795 static unsigned
iv_ca_n_cands(struct iv_ca * ivs)4796 iv_ca_n_cands (struct iv_ca *ivs)
4797 {
4798   return ivs->n_cands;
4799 }
4800 
4801 /* Free the list of changes DELTA.  */
4802 
4803 static void
iv_ca_delta_free(struct iv_ca_delta ** delta)4804 iv_ca_delta_free (struct iv_ca_delta **delta)
4805 {
4806   struct iv_ca_delta *act, *next;
4807 
4808   for (act = *delta; act; act = next)
4809     {
4810       next = act->next_change;
4811       free (act);
4812     }
4813 
4814   *delta = NULL;
4815 }
4816 
4817 /* Allocates new iv candidates assignment.  */
4818 
4819 static struct iv_ca *
iv_ca_new(struct ivopts_data * data)4820 iv_ca_new (struct ivopts_data *data)
4821 {
4822   struct iv_ca *nw = XNEW (struct iv_ca);
4823 
4824   nw->upto = 0;
4825   nw->bad_uses = 0;
4826   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4827   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4828   nw->cands = BITMAP_ALLOC (NULL);
4829   nw->n_cands = 0;
4830   nw->n_regs = 0;
4831   nw->cand_use_cost = 0;
4832   nw->cand_cost = 0;
4833   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4834   nw->cost = 0;
4835 
4836   return nw;
4837 }
4838 
4839 /* Free memory occupied by the set IVS.  */
4840 
4841 static void
iv_ca_free(struct iv_ca ** ivs)4842 iv_ca_free (struct iv_ca **ivs)
4843 {
4844   free ((*ivs)->cand_for_use);
4845   free ((*ivs)->n_cand_uses);
4846   BITMAP_FREE ((*ivs)->cands);
4847   free ((*ivs)->n_invariant_uses);
4848   free (*ivs);
4849   *ivs = NULL;
4850 }
4851 
4852 /* Dumps IVS to FILE.  */
4853 
4854 static void
iv_ca_dump(struct ivopts_data * data,FILE * file,struct iv_ca * ivs)4855 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4856 {
4857   const char *pref = "  invariants ";
4858   unsigned i;
4859 
4860   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4861   bitmap_print (file, ivs->cands, "  candidates ","\n");
4862 
4863   for (i = 1; i <= data->max_inv_id; i++)
4864     if (ivs->n_invariant_uses[i])
4865       {
4866 	fprintf (file, "%s%d", pref, i);
4867 	pref = ", ";
4868       }
4869   fprintf (file, "\n");
4870 }
4871 
4872 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4873    new set, and store differences in DELTA.  Number of induction variables
4874    in the new set is stored to N_IVS.  */
4875 
4876 static unsigned
iv_ca_extend(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_ca_delta ** delta,unsigned * n_ivs)4877 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4878 	      struct iv_cand *cand, struct iv_ca_delta **delta,
4879 	      unsigned *n_ivs)
4880 {
4881   unsigned i, cost;
4882   struct iv_use *use;
4883   struct cost_pair *old_cp, *new_cp;
4884 
4885   *delta = NULL;
4886   for (i = 0; i < ivs->upto; i++)
4887     {
4888       use = iv_use (data, i);
4889       old_cp = iv_ca_cand_for_use (ivs, use);
4890 
4891       if (old_cp
4892 	  && old_cp->cand == cand)
4893 	continue;
4894 
4895       new_cp = get_use_iv_cost (data, use, cand);
4896       if (!new_cp)
4897 	continue;
4898 
4899       if (!iv_ca_has_deps (ivs, new_cp))
4900 	continue;
4901 
4902       if (!cheaper_cost_pair (new_cp, old_cp))
4903 	continue;
4904 
4905       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4906     }
4907 
4908   iv_ca_delta_commit (data, ivs, *delta, true);
4909   cost = iv_ca_cost (ivs);
4910   if (n_ivs)
4911     *n_ivs = iv_ca_n_cands (ivs);
4912   iv_ca_delta_commit (data, ivs, *delta, false);
4913 
4914   return cost;
4915 }
4916 
4917 /* Try narrowing set IVS by removing CAND.  Return the cost of
4918    the new set and store the differences in DELTA.  */
4919 
4920 static unsigned
iv_ca_narrow(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_ca_delta ** delta)4921 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4922 	      struct iv_cand *cand, struct iv_ca_delta **delta)
4923 {
4924   unsigned i, ci;
4925   struct iv_use *use;
4926   struct cost_pair *old_cp, *new_cp, *cp;
4927   bitmap_iterator bi;
4928   struct iv_cand *cnd;
4929   unsigned cost;
4930 
4931   *delta = NULL;
4932   for (i = 0; i < n_iv_uses (data); i++)
4933     {
4934       use = iv_use (data, i);
4935 
4936       old_cp = iv_ca_cand_for_use (ivs, use);
4937       if (old_cp->cand != cand)
4938 	continue;
4939 
4940       new_cp = NULL;
4941 
4942       if (data->consider_all_candidates)
4943 	{
4944 	  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4945 	    {
4946 	      if (ci == cand->id)
4947 		continue;
4948 
4949 	      cnd = iv_cand (data, ci);
4950 
4951 	      cp = get_use_iv_cost (data, use, cnd);
4952 	      if (!cp)
4953 		continue;
4954 	      if (!iv_ca_has_deps (ivs, cp))
4955 		continue;
4956 
4957 	      if (!cheaper_cost_pair (cp, new_cp))
4958 		continue;
4959 
4960 	      new_cp = cp;
4961 	    }
4962 	}
4963       else
4964 	{
4965 	  EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4966 	    {
4967 	      if (ci == cand->id)
4968 		continue;
4969 
4970 	      cnd = iv_cand (data, ci);
4971 
4972 	      cp = get_use_iv_cost (data, use, cnd);
4973 	      if (!cp)
4974 		continue;
4975 	      if (!iv_ca_has_deps (ivs, cp))
4976 		continue;
4977 
4978 	      if (!cheaper_cost_pair (cp, new_cp))
4979 		continue;
4980 
4981 	      new_cp = cp;
4982 	    }
4983 	}
4984 
4985       if (!new_cp)
4986 	{
4987 	  iv_ca_delta_free (delta);
4988 	  return INFTY;
4989 	}
4990 
4991       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4992     }
4993 
4994   iv_ca_delta_commit (data, ivs, *delta, true);
4995   cost = iv_ca_cost (ivs);
4996   iv_ca_delta_commit (data, ivs, *delta, false);
4997 
4998   return cost;
4999 }
5000 
5001 /* Try optimizing the set of candidates IVS by removing candidates different
5002    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5003    differences in DELTA.  */
5004 
5005 static unsigned
iv_ca_prune(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * except_cand,struct iv_ca_delta ** delta)5006 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5007 	     struct iv_cand *except_cand, struct iv_ca_delta **delta)
5008 {
5009   bitmap_iterator bi;
5010   struct iv_ca_delta *act_delta, *best_delta;
5011   unsigned i, best_cost, acost;
5012   struct iv_cand *cand;
5013 
5014   best_delta = NULL;
5015   best_cost = iv_ca_cost (ivs);
5016 
5017   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5018     {
5019       cand = iv_cand (data, i);
5020 
5021       if (cand == except_cand)
5022 	continue;
5023 
5024       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5025 
5026       if (acost < best_cost)
5027 	{
5028 	  best_cost = acost;
5029 	  iv_ca_delta_free (&best_delta);
5030 	  best_delta = act_delta;
5031 	}
5032       else
5033 	iv_ca_delta_free (&act_delta);
5034     }
5035 
5036   if (!best_delta)
5037     {
5038       *delta = NULL;
5039       return best_cost;
5040     }
5041 
5042   /* Recurse to possibly remove other unnecessary ivs.  */
5043   iv_ca_delta_commit (data, ivs, best_delta, true);
5044   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5045   iv_ca_delta_commit (data, ivs, best_delta, false);
5046   *delta = iv_ca_delta_join (best_delta, *delta);
5047   return best_cost;
5048 }
5049 
5050 /* Tries to extend the sets IVS in the best possible way in order
5051    to express the USE.  */
5052 
5053 static bool
try_add_cand_for(struct ivopts_data * data,struct iv_ca * ivs,struct iv_use * use)5054 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5055 		  struct iv_use *use)
5056 {
5057   unsigned best_cost, act_cost;
5058   unsigned i;
5059   bitmap_iterator bi;
5060   struct iv_cand *cand;
5061   struct iv_ca_delta *best_delta = NULL, *act_delta;
5062   struct cost_pair *cp;
5063 
5064   iv_ca_add_use (data, ivs, use);
5065   best_cost = iv_ca_cost (ivs);
5066 
5067   cp = iv_ca_cand_for_use (ivs, use);
5068   if (cp)
5069     {
5070       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5071       iv_ca_set_no_cp (data, ivs, use);
5072     }
5073 
5074   /* First try important candidates.  Only if it fails, try the specific ones.
5075      Rationale -- in loops with many variables the best choice often is to use
5076      just one generic biv.  If we added here many ivs specific to the uses,
5077      the optimization algorithm later would be likely to get stuck in a local
5078      minimum, thus causing us to create too many ivs.  The approach from
5079      few ivs to more seems more likely to be successful -- starting from few
5080      ivs, replacing an expensive use by a specific iv should always be a
5081      win.  */
5082   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5083     {
5084       cand = iv_cand (data, i);
5085 
5086       if (iv_ca_cand_used_p (ivs, cand))
5087 	continue;
5088 
5089       cp = get_use_iv_cost (data, use, cand);
5090       if (!cp)
5091 	continue;
5092 
5093       iv_ca_set_cp (data, ivs, use, cp);
5094       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5095       iv_ca_set_no_cp (data, ivs, use);
5096       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5097 
5098       if (act_cost < best_cost)
5099 	{
5100 	  best_cost = act_cost;
5101 
5102 	  iv_ca_delta_free (&best_delta);
5103 	  best_delta = act_delta;
5104 	}
5105       else
5106 	iv_ca_delta_free (&act_delta);
5107     }
5108 
5109   if (best_cost == INFTY)
5110     {
5111       for (i = 0; i < use->n_map_members; i++)
5112 	{
5113 	  cp = use->cost_map + i;
5114 	  cand = cp->cand;
5115 	  if (!cand)
5116 	    continue;
5117 
5118 	  /* Already tried this.  */
5119 	  if (cand->important)
5120 	    continue;
5121 
5122 	  if (iv_ca_cand_used_p (ivs, cand))
5123 	    continue;
5124 
5125 	  act_delta = NULL;
5126 	  iv_ca_set_cp (data, ivs, use, cp);
5127 	  act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5128 	  iv_ca_set_no_cp (data, ivs, use);
5129 	  act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5130 				       cp, act_delta);
5131 
5132 	  if (act_cost < best_cost)
5133 	    {
5134 	      best_cost = act_cost;
5135 
5136 	      if (best_delta)
5137 		iv_ca_delta_free (&best_delta);
5138 	      best_delta = act_delta;
5139 	    }
5140 	  else
5141 	    iv_ca_delta_free (&act_delta);
5142 	}
5143     }
5144 
5145   iv_ca_delta_commit (data, ivs, best_delta, true);
5146   iv_ca_delta_free (&best_delta);
5147 
5148   return (best_cost != INFTY);
5149 }
5150 
5151 /* Finds an initial assignment of candidates to uses.  */
5152 
5153 static struct iv_ca *
get_initial_solution(struct ivopts_data * data)5154 get_initial_solution (struct ivopts_data *data)
5155 {
5156   struct iv_ca *ivs = iv_ca_new (data);
5157   unsigned i;
5158 
5159   for (i = 0; i < n_iv_uses (data); i++)
5160     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5161       {
5162 	iv_ca_free (&ivs);
5163 	return NULL;
5164       }
5165 
5166   return ivs;
5167 }
5168 
5169 /* Tries to improve set of induction variables IVS.  */
5170 
5171 static bool
try_improve_iv_set(struct ivopts_data * data,struct iv_ca * ivs)5172 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5173 {
5174   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5175   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5176   struct iv_cand *cand;
5177 
5178   /* Try extending the set of induction variables by one.  */
5179   for (i = 0; i < n_iv_cands (data); i++)
5180     {
5181       cand = iv_cand (data, i);
5182 
5183       if (iv_ca_cand_used_p (ivs, cand))
5184 	continue;
5185 
5186       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5187       if (!act_delta)
5188 	continue;
5189 
5190       /* If we successfully added the candidate and the set is small enough,
5191 	 try optimizing it by removing other candidates.  */
5192       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5193       	{
5194 	  iv_ca_delta_commit (data, ivs, act_delta, true);
5195 	  acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5196 	  iv_ca_delta_commit (data, ivs, act_delta, false);
5197 	  act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5198 	}
5199 
5200       if (acost < best_cost)
5201 	{
5202 	  best_cost = acost;
5203 	  iv_ca_delta_free (&best_delta);
5204 	  best_delta = act_delta;
5205 	}
5206       else
5207 	iv_ca_delta_free (&act_delta);
5208     }
5209 
5210   if (!best_delta)
5211     {
5212       /* Try removing the candidates from the set instead.  */
5213       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5214 
5215       /* Nothing more we can do.  */
5216       if (!best_delta)
5217 	return false;
5218     }
5219 
5220   iv_ca_delta_commit (data, ivs, best_delta, true);
5221   gcc_assert (best_cost == iv_ca_cost (ivs));
5222   iv_ca_delta_free (&best_delta);
5223   return true;
5224 }
5225 
5226 /* Attempts to find the optimal set of induction variables.  We do simple
5227    greedy heuristic -- we try to replace at most one candidate in the selected
5228    solution and remove the unused ivs while this improves the cost.  */
5229 
5230 static struct iv_ca *
find_optimal_iv_set(struct ivopts_data * data)5231 find_optimal_iv_set (struct ivopts_data *data)
5232 {
5233   unsigned i;
5234   struct iv_ca *set;
5235   struct iv_use *use;
5236 
5237   /* Get the initial solution.  */
5238   set = get_initial_solution (data);
5239   if (!set)
5240     {
5241       if (dump_file && (dump_flags & TDF_DETAILS))
5242 	fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5243       return NULL;
5244     }
5245 
5246   if (dump_file && (dump_flags & TDF_DETAILS))
5247     {
5248       fprintf (dump_file, "Initial set of candidates:\n");
5249       iv_ca_dump (data, dump_file, set);
5250     }
5251 
5252   while (try_improve_iv_set (data, set))
5253     {
5254       if (dump_file && (dump_flags & TDF_DETAILS))
5255 	{
5256 	  fprintf (dump_file, "Improved to:\n");
5257 	  iv_ca_dump (data, dump_file, set);
5258 	}
5259     }
5260 
5261   if (dump_file && (dump_flags & TDF_DETAILS))
5262     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5263 
5264   for (i = 0; i < n_iv_uses (data); i++)
5265     {
5266       use = iv_use (data, i);
5267       use->selected = iv_ca_cand_for_use (set, use)->cand;
5268     }
5269 
5270   return set;
5271 }
5272 
5273 /* Creates a new induction variable corresponding to CAND.  */
5274 
5275 static void
create_new_iv(struct ivopts_data * data,struct iv_cand * cand)5276 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5277 {
5278   block_stmt_iterator incr_pos;
5279   tree base;
5280   bool after = false;
5281 
5282   if (!cand->iv)
5283     return;
5284 
5285   switch (cand->pos)
5286     {
5287     case IP_NORMAL:
5288       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5289       break;
5290 
5291     case IP_END:
5292       incr_pos = bsi_last (ip_end_pos (data->current_loop));
5293       after = true;
5294       break;
5295 
5296     case IP_ORIGINAL:
5297       /* Mark that the iv is preserved.  */
5298       name_info (data, cand->var_before)->preserve_biv = true;
5299       name_info (data, cand->var_after)->preserve_biv = true;
5300 
5301       /* Rewrite the increment so that it uses var_before directly.  */
5302       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5303 
5304       return;
5305     }
5306 
5307   gimple_add_tmp_var (cand->var_before);
5308   add_referenced_var (cand->var_before);
5309 
5310   base = unshare_expr (cand->iv->base);
5311 
5312   create_iv (base, unshare_expr (cand->iv->step),
5313 	     cand->var_before, data->current_loop,
5314 	     &incr_pos, after, &cand->var_before, &cand->var_after);
5315 }
5316 
5317 /* Creates new induction variables described in SET.  */
5318 
5319 static void
create_new_ivs(struct ivopts_data * data,struct iv_ca * set)5320 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5321 {
5322   unsigned i;
5323   struct iv_cand *cand;
5324   bitmap_iterator bi;
5325 
5326   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5327     {
5328       cand = iv_cand (data, i);
5329       create_new_iv (data, cand);
5330     }
5331 }
5332 
5333 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
5334    is true, remove also the ssa name defined by the statement.  */
5335 
5336 static void
remove_statement(tree stmt,bool including_defined_name)5337 remove_statement (tree stmt, bool including_defined_name)
5338 {
5339   if (TREE_CODE (stmt) == PHI_NODE)
5340     {
5341       if (!including_defined_name)
5342 	{
5343 	  /* Prevent the ssa name defined by the statement from being removed.  */
5344 	  SET_PHI_RESULT (stmt, NULL);
5345 	}
5346       remove_phi_node (stmt, NULL_TREE);
5347     }
5348   else
5349     {
5350       block_stmt_iterator bsi = bsi_for_stmt (stmt);
5351 
5352       bsi_remove (&bsi, true);
5353     }
5354 }
5355 
5356 /* Rewrites USE (definition of iv used in a nonlinear expression)
5357    using candidate CAND.  */
5358 
5359 static void
rewrite_use_nonlinear_expr(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5360 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5361 			    struct iv_use *use, struct iv_cand *cand)
5362 {
5363   tree comp;
5364   tree op, stmts, tgt, ass;
5365   block_stmt_iterator bsi, pbsi;
5366 
5367   /* An important special case -- if we are asked to express value of
5368      the original iv by itself, just exit; there is no need to
5369      introduce a new computation (that might also need casting the
5370      variable to unsigned and back).  */
5371   if (cand->pos == IP_ORIGINAL
5372       && cand->incremented_at == use->stmt)
5373     {
5374       tree step, ctype, utype;
5375       enum tree_code incr_code = PLUS_EXPR;
5376 
5377       gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5378       gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5379 
5380       step = cand->iv->step;
5381       ctype = TREE_TYPE (step);
5382       utype = TREE_TYPE (cand->var_after);
5383       if (TREE_CODE (step) == NEGATE_EXPR)
5384 	{
5385 	  incr_code = MINUS_EXPR;
5386 	  step = TREE_OPERAND (step, 0);
5387 	}
5388 
5389       /* Check whether we may leave the computation unchanged.
5390 	 This is the case only if it does not rely on other
5391 	 computations in the loop -- otherwise, the computation
5392 	 we rely upon may be removed in remove_unused_ivs,
5393 	 thus leading to ICE.  */
5394       op = TREE_OPERAND (use->stmt, 1);
5395       if (TREE_CODE (op) == PLUS_EXPR
5396 	  || TREE_CODE (op) == MINUS_EXPR)
5397 	{
5398 	  if (TREE_OPERAND (op, 0) == cand->var_before)
5399 	    op = TREE_OPERAND (op, 1);
5400 	  else if (TREE_CODE (op) == PLUS_EXPR
5401 		   && TREE_OPERAND (op, 1) == cand->var_before)
5402 	    op = TREE_OPERAND (op, 0);
5403 	  else
5404 	    op = NULL_TREE;
5405 	}
5406       else
5407 	op = NULL_TREE;
5408 
5409       if (op
5410 	  && (TREE_CODE (op) == INTEGER_CST
5411 	      || operand_equal_p (op, step, 0)))
5412 	return;
5413 
5414       /* Otherwise, add the necessary computations to express
5415 	 the iv.  */
5416       op = fold_convert (ctype, cand->var_before);
5417       comp = fold_convert (utype,
5418 			   build2 (incr_code, ctype, op,
5419 				   unshare_expr (step)));
5420     }
5421   else
5422     comp = get_computation (data->current_loop, use, cand);
5423 
5424   switch (TREE_CODE (use->stmt))
5425     {
5426     case PHI_NODE:
5427       tgt = PHI_RESULT (use->stmt);
5428 
5429       /* If we should keep the biv, do not replace it.  */
5430       if (name_info (data, tgt)->preserve_biv)
5431 	return;
5432 
5433       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5434       while (!bsi_end_p (pbsi)
5435 	     && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5436 	{
5437 	  bsi = pbsi;
5438 	  bsi_next (&pbsi);
5439 	}
5440       break;
5441 
5442     case MODIFY_EXPR:
5443       tgt = TREE_OPERAND (use->stmt, 0);
5444       bsi = bsi_for_stmt (use->stmt);
5445       break;
5446 
5447     default:
5448       gcc_unreachable ();
5449     }
5450 
5451   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5452 
5453   if (TREE_CODE (use->stmt) == PHI_NODE)
5454     {
5455       if (stmts)
5456 	bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5457       ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5458       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5459       remove_statement (use->stmt, false);
5460       SSA_NAME_DEF_STMT (tgt) = ass;
5461     }
5462   else
5463     {
5464       if (stmts)
5465 	bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5466       TREE_OPERAND (use->stmt, 1) = op;
5467     }
5468 }
5469 
5470 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5471    for_each_index.  */
5472 
5473 static bool
idx_remove_ssa_names(tree base,tree * idx,void * data ATTRIBUTE_UNUSED)5474 idx_remove_ssa_names (tree base, tree *idx,
5475 		      void *data ATTRIBUTE_UNUSED)
5476 {
5477   tree *op;
5478 
5479   if (TREE_CODE (*idx) == SSA_NAME)
5480     *idx = SSA_NAME_VAR (*idx);
5481 
5482   if (TREE_CODE (base) == ARRAY_REF)
5483     {
5484       op = &TREE_OPERAND (base, 2);
5485       if (*op
5486 	  && TREE_CODE (*op) == SSA_NAME)
5487 	*op = SSA_NAME_VAR (*op);
5488       op = &TREE_OPERAND (base, 3);
5489       if (*op
5490 	  && TREE_CODE (*op) == SSA_NAME)
5491 	*op = SSA_NAME_VAR (*op);
5492     }
5493 
5494   return true;
5495 }
5496 
5497 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5498 
5499 static tree
unshare_and_remove_ssa_names(tree ref)5500 unshare_and_remove_ssa_names (tree ref)
5501 {
5502   ref = unshare_expr (ref);
5503   for_each_index (&ref, idx_remove_ssa_names, NULL);
5504 
5505   return ref;
5506 }
5507 
5508 /* Extract the alias analysis info for the memory reference REF.  There are
5509    several ways how this information may be stored and what precisely is
5510    its semantics depending on the type of the reference, but there always is
5511    somewhere hidden one _DECL node that is used to determine the set of
5512    virtual operands for the reference.  The code below deciphers this jungle
5513    and extracts this single useful piece of information.  */
5514 
5515 static tree
get_ref_tag(tree ref,tree orig)5516 get_ref_tag (tree ref, tree orig)
5517 {
5518   tree var = get_base_address (ref);
5519   tree aref = NULL_TREE, tag, sv;
5520   HOST_WIDE_INT offset, size, maxsize;
5521 
5522   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5523     {
5524       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5525       if (ref)
5526 	break;
5527     }
5528 
5529   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5530     return unshare_expr (sv);
5531 
5532   if (!var)
5533     return NULL_TREE;
5534 
5535   if (TREE_CODE (var) == INDIRECT_REF)
5536     {
5537       /* If the base is a dereference of a pointer, first check its name memory
5538 	 tag.  If it does not have one, use its symbol memory tag.  */
5539       var = TREE_OPERAND (var, 0);
5540       if (TREE_CODE (var) != SSA_NAME)
5541 	return NULL_TREE;
5542 
5543       if (SSA_NAME_PTR_INFO (var))
5544 	{
5545 	  tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5546 	  if (tag)
5547 	    return tag;
5548 	}
5549 
5550       var = SSA_NAME_VAR (var);
5551       tag = var_ann (var)->symbol_mem_tag;
5552       gcc_assert (tag != NULL_TREE);
5553       return tag;
5554     }
5555   else
5556     {
5557       if (!DECL_P (var))
5558 	return NULL_TREE;
5559 
5560       tag = var_ann (var)->symbol_mem_tag;
5561       if (tag)
5562 	return tag;
5563 
5564       return var;
5565     }
5566 }
5567 
5568 /* Copies the reference information from OLD_REF to NEW_REF.  */
5569 
5570 static void
copy_ref_info(tree new_ref,tree old_ref)5571 copy_ref_info (tree new_ref, tree old_ref)
5572 {
5573   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5574     copy_mem_ref_info (new_ref, old_ref);
5575   else
5576     {
5577       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5578       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5579     }
5580 }
5581 
5582 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5583 
5584 static void
rewrite_use_address(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5585 rewrite_use_address (struct ivopts_data *data,
5586 		     struct iv_use *use, struct iv_cand *cand)
5587 {
5588   struct affine_tree_combination aff;
5589   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5590   tree ref;
5591 
5592   get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5593   unshare_aff_combination (&aff);
5594 
5595   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5596   copy_ref_info (ref, *use->op_p);
5597   *use->op_p = ref;
5598 }
5599 
5600 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5601    candidate CAND.  */
5602 
5603 static void
rewrite_use_compare(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5604 rewrite_use_compare (struct ivopts_data *data,
5605 		     struct iv_use *use, struct iv_cand *cand)
5606 {
5607   tree comp;
5608   tree *op_p, cond, op, stmts, bound;
5609   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5610   enum tree_code compare;
5611   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5612 
5613   bound = cp->value;
5614   if (bound)
5615     {
5616       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5617       tree var_type = TREE_TYPE (var);
5618 
5619       compare = iv_elimination_compare (data, use);
5620       bound = fold_convert (var_type, bound);
5621       op = force_gimple_operand (unshare_expr (bound), &stmts,
5622 				 true, NULL_TREE);
5623 
5624       if (stmts)
5625 	bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5626 
5627       *use->op_p = build2 (compare, boolean_type_node, var, op);
5628       update_stmt (use->stmt);
5629       return;
5630     }
5631 
5632   /* The induction variable elimination failed; just express the original
5633      giv.  */
5634   comp = get_computation (data->current_loop, use, cand);
5635 
5636   cond = *use->op_p;
5637   op_p = &TREE_OPERAND (cond, 0);
5638   if (TREE_CODE (*op_p) != SSA_NAME
5639       || zero_p (get_iv (data, *op_p)->step))
5640     op_p = &TREE_OPERAND (cond, 1);
5641 
5642   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5643   if (stmts)
5644     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5645 
5646   *op_p = op;
5647 }
5648 
5649 /* Rewrites USE using candidate CAND.  */
5650 
5651 static void
rewrite_use(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5652 rewrite_use (struct ivopts_data *data,
5653 	     struct iv_use *use, struct iv_cand *cand)
5654 {
5655   switch (use->type)
5656     {
5657       case USE_NONLINEAR_EXPR:
5658 	rewrite_use_nonlinear_expr (data, use, cand);
5659 	break;
5660 
5661       case USE_ADDRESS:
5662 	rewrite_use_address (data, use, cand);
5663 	break;
5664 
5665       case USE_COMPARE:
5666 	rewrite_use_compare (data, use, cand);
5667 	break;
5668 
5669       default:
5670 	gcc_unreachable ();
5671     }
5672   mark_new_vars_to_rename (use->stmt);
5673 }
5674 
5675 /* Rewrite the uses using the selected induction variables.  */
5676 
5677 static void
rewrite_uses(struct ivopts_data * data)5678 rewrite_uses (struct ivopts_data *data)
5679 {
5680   unsigned i;
5681   struct iv_cand *cand;
5682   struct iv_use *use;
5683 
5684   for (i = 0; i < n_iv_uses (data); i++)
5685     {
5686       use = iv_use (data, i);
5687       cand = use->selected;
5688       gcc_assert (cand);
5689 
5690       rewrite_use (data, use, cand);
5691     }
5692 }
5693 
5694 /* Removes the ivs that are not used after rewriting.  */
5695 
5696 static void
remove_unused_ivs(struct ivopts_data * data)5697 remove_unused_ivs (struct ivopts_data *data)
5698 {
5699   unsigned j;
5700   bitmap_iterator bi;
5701 
5702   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5703     {
5704       struct version_info *info;
5705 
5706       info = ver_info (data, j);
5707       if (info->iv
5708 	  && !zero_p (info->iv->step)
5709 	  && !info->inv_id
5710 	  && !info->iv->have_use_for
5711 	  && !info->preserve_biv)
5712 	remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5713     }
5714 }
5715 
5716 /* Frees data allocated by the optimization of a single loop.  */
5717 
5718 static void
free_loop_data(struct ivopts_data * data)5719 free_loop_data (struct ivopts_data *data)
5720 {
5721   unsigned i, j;
5722   bitmap_iterator bi;
5723   tree obj;
5724 
5725   htab_empty (data->niters);
5726 
5727   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5728     {
5729       struct version_info *info;
5730 
5731       info = ver_info (data, i);
5732       if (info->iv)
5733 	free (info->iv);
5734       info->iv = NULL;
5735       info->has_nonlin_use = false;
5736       info->preserve_biv = false;
5737       info->inv_id = 0;
5738     }
5739   bitmap_clear (data->relevant);
5740   bitmap_clear (data->important_candidates);
5741 
5742   for (i = 0; i < n_iv_uses (data); i++)
5743     {
5744       struct iv_use *use = iv_use (data, i);
5745 
5746       free (use->iv);
5747       BITMAP_FREE (use->related_cands);
5748       for (j = 0; j < use->n_map_members; j++)
5749 	if (use->cost_map[j].depends_on)
5750 	  BITMAP_FREE (use->cost_map[j].depends_on);
5751       free (use->cost_map);
5752       free (use);
5753     }
5754   VEC_truncate (iv_use_p, data->iv_uses, 0);
5755 
5756   for (i = 0; i < n_iv_cands (data); i++)
5757     {
5758       struct iv_cand *cand = iv_cand (data, i);
5759 
5760       if (cand->iv)
5761 	free (cand->iv);
5762       if (cand->depends_on)
5763 	BITMAP_FREE (cand->depends_on);
5764       free (cand);
5765     }
5766   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5767 
5768   if (data->version_info_size < num_ssa_names)
5769     {
5770       data->version_info_size = 2 * num_ssa_names;
5771       free (data->version_info);
5772       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5773     }
5774 
5775   data->max_inv_id = 0;
5776 
5777   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5778     SET_DECL_RTL (obj, NULL_RTX);
5779 
5780   VEC_truncate (tree, decl_rtl_to_reset, 0);
5781 }
5782 
5783 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5784    loop tree.  */
5785 
5786 static void
tree_ssa_iv_optimize_finalize(struct ivopts_data * data)5787 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5788 {
5789   free_loop_data (data);
5790   free (data->version_info);
5791   BITMAP_FREE (data->relevant);
5792   BITMAP_FREE (data->important_candidates);
5793   htab_delete (data->niters);
5794 
5795   VEC_free (tree, heap, decl_rtl_to_reset);
5796   VEC_free (iv_use_p, heap, data->iv_uses);
5797   VEC_free (iv_cand_p, heap, data->iv_candidates);
5798 }
5799 
5800 /* Optimizes the LOOP.  Returns true if anything changed.  */
5801 
5802 static bool
tree_ssa_iv_optimize_loop(struct ivopts_data * data,struct loop * loop)5803 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5804 {
5805   bool changed = false;
5806   struct iv_ca *iv_ca;
5807   edge exit;
5808 
5809   data->current_loop = loop;
5810 
5811   if (dump_file && (dump_flags & TDF_DETAILS))
5812     {
5813       fprintf (dump_file, "Processing loop %d\n", loop->num);
5814 
5815       exit = single_dom_exit (loop);
5816       if (exit)
5817 	{
5818 	  fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5819 		   exit->src->index, exit->dest->index);
5820 	  print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5821 	  fprintf (dump_file, "\n");
5822 	}
5823 
5824       fprintf (dump_file, "\n");
5825     }
5826 
5827   /* For each ssa name determines whether it behaves as an induction variable
5828      in some loop.  */
5829   if (!find_induction_variables (data))
5830     goto finish;
5831 
5832   /* Finds interesting uses (item 1).  */
5833   find_interesting_uses (data);
5834   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5835     goto finish;
5836 
5837   /* Finds candidates for the induction variables (item 2).  */
5838   find_iv_candidates (data);
5839 
5840   /* Calculates the costs (item 3, part 1).  */
5841   determine_use_iv_costs (data);
5842   determine_iv_costs (data);
5843   determine_set_costs (data);
5844 
5845   /* Find the optimal set of induction variables (item 3, part 2).  */
5846   iv_ca = find_optimal_iv_set (data);
5847   if (!iv_ca)
5848     goto finish;
5849   changed = true;
5850 
5851   /* Create the new induction variables (item 4, part 1).  */
5852   create_new_ivs (data, iv_ca);
5853   iv_ca_free (&iv_ca);
5854 
5855   /* Rewrite the uses (item 4, part 2).  */
5856   rewrite_uses (data);
5857 
5858   /* Remove the ivs that are unused after rewriting.  */
5859   remove_unused_ivs (data);
5860 
5861   /* We have changed the structure of induction variables; it might happen
5862      that definitions in the scev database refer to some of them that were
5863      eliminated.  */
5864   scev_reset ();
5865 
5866 finish:
5867   free_loop_data (data);
5868 
5869   return changed;
5870 }
5871 
5872 /* Main entry point.  Optimizes induction variables in LOOPS.  */
5873 
5874 void
tree_ssa_iv_optimize(struct loops * loops)5875 tree_ssa_iv_optimize (struct loops *loops)
5876 {
5877   struct loop *loop;
5878   struct ivopts_data data;
5879 
5880   tree_ssa_iv_optimize_init (&data);
5881 
5882   /* Optimize the loops starting with the innermost ones.  */
5883   loop = loops->tree_root;
5884   while (loop->inner)
5885     loop = loop->inner;
5886 
5887   /* Scan the loops, inner ones first.  */
5888   while (loop != loops->tree_root)
5889     {
5890       if (dump_file && (dump_flags & TDF_DETAILS))
5891 	flow_loop_dump (loop, dump_file, NULL, 1);
5892 
5893       tree_ssa_iv_optimize_loop (&data, loop);
5894 
5895       if (loop->next)
5896 	{
5897 	  loop = loop->next;
5898 	  while (loop->inner)
5899 	    loop = loop->inner;
5900 	}
5901       else
5902 	loop = loop->outer;
5903     }
5904 
5905   tree_ssa_iv_optimize_finalize (&data);
5906 }
5907