1 /* Induction variable optimizations.
2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This pass tries to find the optimal set of induction variables for the loop.
21    It optimizes just the basic linear induction variables (although adding
22    support for other types should not be too hard).  It includes the
23    optimizations commonly known as strength reduction, induction variable
24    coalescing and induction variable elimination.  It does it in the
25    following steps:
26 
27    1) The interesting uses of induction variables are found.  This includes
28 
29       -- uses of induction variables in non-linear expressions
30       -- addresses of arrays
31       -- comparisons of induction variables
32 
33    2) Candidates for the induction variables are found.  This includes
34 
35       -- old induction variables
36       -- the variables defined by expressions derived from the "interesting
37 	 uses" above
38 
39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
40       cost function assigns a cost to sets of induction variables and consists
41       of three parts:
42 
43       -- The use costs.  Each of the interesting uses chooses the best induction
44 	 variable in the set and adds its cost to the sum.  The cost reflects
45 	 the time spent on modifying the induction variables value to be usable
46 	 for the given purpose (adding base and offset for arrays, etc.).
47       -- The variable costs.  Each of the variables has a cost assigned that
48 	 reflects the costs associated with incrementing the value of the
49 	 variable.  The original variables are somewhat preferred.
50       -- The set cost.  Depending on the size of the set, extra cost may be
51 	 added to reflect register pressure.
52 
53       All the costs are defined in a machine-specific way, using the target
54       hooks and machine descriptions to determine them.
55 
56    4) The trees are transformed to use the new variables, the dead code is
57       removed.
58 
59    All of this is done loop by loop.  Doing it globally is theoretically
60    possible, it might give a better performance and it might enable us
61    to decide costs more precisely, but getting all the interactions right
62    would be complicated.  */
63 
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "tm_p.h"
70 #include "basic-block.h"
71 #include "gimple-pretty-print.h"
72 #include "tree-flow.h"
73 #include "cfgloop.h"
74 #include "tree-pass.h"
75 #include "ggc.h"
76 #include "insn-config.h"
77 #include "pointer-set.h"
78 #include "hashtab.h"
79 #include "tree-chrec.h"
80 #include "tree-scalar-evolution.h"
81 #include "cfgloop.h"
82 #include "params.h"
83 #include "langhooks.h"
84 #include "tree-affine.h"
85 #include "target.h"
86 #include "tree-inline.h"
87 #include "tree-ssa-propagate.h"
88 #include "expmed.h"
89 
90 /* FIXME: Expressions are expanded to RTL in this pass to determine the
91    cost of different addressing modes.  This should be moved to a TBD
92    interface between the GIMPLE and RTL worlds.  */
93 #include "expr.h"
94 #include "recog.h"
95 
96 /* The infinite cost.  */
97 #define INFTY 10000000
98 
99 #define AVG_LOOP_NITER(LOOP) 5
100 
101 /* Returns the expected number of loop iterations for LOOP.
102    The average trip count is computed from profile data if it
103    exists. */
104 
105 static inline HOST_WIDE_INT
avg_loop_niter(struct loop * loop)106 avg_loop_niter (struct loop *loop)
107 {
108   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
109   if (niter == -1)
110     return AVG_LOOP_NITER (loop);
111 
112   return niter;
113 }
114 
115 /* Representation of the induction variable.  */
116 struct iv
117 {
118   tree base;		/* Initial value of the iv.  */
119   tree base_object;	/* A memory object to that the induction variable points.  */
120   tree step;		/* Step of the iv (constant only).  */
121   tree ssa_name;	/* The ssa name with the value.  */
122   bool biv_p;		/* Is it a biv?  */
123   bool have_use_for;	/* Do we already have a use for it?  */
124   unsigned use_id;	/* The identifier in the use if it is the case.  */
125 };
126 
127 /* Per-ssa version information (induction variable descriptions, etc.).  */
128 struct version_info
129 {
130   tree name;		/* The ssa name.  */
131   struct iv *iv;	/* Induction variable description.  */
132   bool has_nonlin_use;	/* For a loop-level invariant, whether it is used in
133 			   an expression that is not an induction variable.  */
134   bool preserve_biv;	/* For the original biv, whether to preserve it.  */
135   unsigned inv_id;	/* Id of an invariant.  */
136 };
137 
138 /* Types of uses.  */
139 enum use_type
140 {
141   USE_NONLINEAR_EXPR,	/* Use in a nonlinear expression.  */
142   USE_ADDRESS,		/* Use in an address.  */
143   USE_COMPARE		/* Use is a compare.  */
144 };
145 
146 /* Cost of a computation.  */
147 typedef struct
148 {
149   int cost;		/* The runtime cost.  */
150   unsigned complexity;	/* The estimate of the complexity of the code for
151 			   the computation (in no concrete units --
152 			   complexity field should be larger for more
153 			   complex expressions and addressing modes).  */
154 } comp_cost;
155 
156 static const comp_cost no_cost = {0, 0};
157 static const comp_cost infinite_cost = {INFTY, INFTY};
158 
159 /* The candidate - cost pair.  */
160 struct cost_pair
161 {
162   struct iv_cand *cand;	/* The candidate.  */
163   comp_cost cost;	/* The cost.  */
164   bitmap depends_on;	/* The list of invariants that have to be
165 			   preserved.  */
166   tree value;		/* For final value elimination, the expression for
167 			   the final value of the iv.  For iv elimination,
168 			   the new bound to compare with.  */
169   enum tree_code comp;	/* For iv elimination, the comparison.  */
170   int inv_expr_id;      /* Loop invariant expression id.  */
171 };
172 
173 /* Use.  */
174 struct iv_use
175 {
176   unsigned id;		/* The id of the use.  */
177   enum use_type type;	/* Type of the use.  */
178   struct iv *iv;	/* The induction variable it is based on.  */
179   gimple stmt;		/* Statement in that it occurs.  */
180   tree *op_p;		/* The place where it occurs.  */
181   bitmap related_cands;	/* The set of "related" iv candidates, plus the common
182 			   important ones.  */
183 
184   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
185   struct cost_pair *cost_map;
186 			/* The costs wrto the iv candidates.  */
187 
188   struct iv_cand *selected;
189 			/* The selected candidate.  */
190 };
191 
192 /* The position where the iv is computed.  */
193 enum iv_position
194 {
195   IP_NORMAL,		/* At the end, just before the exit condition.  */
196   IP_END,		/* At the end of the latch block.  */
197   IP_BEFORE_USE,	/* Immediately before a specific use.  */
198   IP_AFTER_USE,		/* Immediately after a specific use.  */
199   IP_ORIGINAL		/* The original biv.  */
200 };
201 
202 /* The induction variable candidate.  */
203 struct iv_cand
204 {
205   unsigned id;		/* The number of the candidate.  */
206   bool important;	/* Whether this is an "important" candidate, i.e. such
207 			   that it should be considered by all uses.  */
208   ENUM_BITFIELD(iv_position) pos : 8;	/* Where it is computed.  */
209   gimple incremented_at;/* For original biv, the statement where it is
210 			   incremented.  */
211   tree var_before;	/* The variable used for it before increment.  */
212   tree var_after;	/* The variable used for it after increment.  */
213   struct iv *iv;	/* The value of the candidate.  NULL for
214 			   "pseudocandidate" used to indicate the possibility
215 			   to replace the final value of an iv by direct
216 			   computation of the value.  */
217   unsigned cost;	/* Cost of the candidate.  */
218   unsigned cost_step;	/* Cost of the candidate's increment operation.  */
219   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
220 			      where it is incremented.  */
221   bitmap depends_on;	/* The list of invariants that are used in step of the
222 			   biv.  */
223 };
224 
225 /* Loop invariant expression hashtable entry.  */
226 struct iv_inv_expr_ent
227 {
228   tree expr;
229   int id;
230   hashval_t hash;
231 };
232 
233 /* The data used by the induction variable optimizations.  */
234 
235 typedef struct iv_use *iv_use_p;
236 
237 typedef struct iv_cand *iv_cand_p;
238 
239 struct ivopts_data
240 {
241   /* The currently optimized loop.  */
242   struct loop *current_loop;
243 
244   /* Numbers of iterations for all exits of the current loop.  */
245   struct pointer_map_t *niters;
246 
247   /* Number of registers used in it.  */
248   unsigned regs_used;
249 
250   /* The size of version_info array allocated.  */
251   unsigned version_info_size;
252 
253   /* The array of information for the ssa names.  */
254   struct version_info *version_info;
255 
256   /* The hashtable of loop invariant expressions created
257      by ivopt.  */
258   htab_t inv_expr_tab;
259 
260   /* Loop invariant expression id.  */
261   int inv_expr_id;
262 
263   /* The bitmap of indices in version_info whose value was changed.  */
264   bitmap relevant;
265 
266   /* The uses of induction variables.  */
267   vec<iv_use_p> iv_uses;
268 
269   /* The candidates.  */
270   vec<iv_cand_p> iv_candidates;
271 
272   /* A bitmap of important candidates.  */
273   bitmap important_candidates;
274 
275   /* The maximum invariant id.  */
276   unsigned max_inv_id;
277 
278   /* Whether to consider just related and important candidates when replacing a
279      use.  */
280   bool consider_all_candidates;
281 
282   /* Are we optimizing for speed?  */
283   bool speed;
284 
285   /* Whether the loop body includes any function calls.  */
286   bool body_includes_call;
287 
288   /* Whether the loop body can only be exited via single exit.  */
289   bool loop_single_exit_p;
290 };
291 
292 /* An assignment of iv candidates to uses.  */
293 
294 struct iv_ca
295 {
296   /* The number of uses covered by the assignment.  */
297   unsigned upto;
298 
299   /* Number of uses that cannot be expressed by the candidates in the set.  */
300   unsigned bad_uses;
301 
302   /* Candidate assigned to a use, together with the related costs.  */
303   struct cost_pair **cand_for_use;
304 
305   /* Number of times each candidate is used.  */
306   unsigned *n_cand_uses;
307 
308   /* The candidates used.  */
309   bitmap cands;
310 
311   /* The number of candidates in the set.  */
312   unsigned n_cands;
313 
314   /* Total number of registers needed.  */
315   unsigned n_regs;
316 
317   /* Total cost of expressing uses.  */
318   comp_cost cand_use_cost;
319 
320   /* Total cost of candidates.  */
321   unsigned cand_cost;
322 
323   /* Number of times each invariant is used.  */
324   unsigned *n_invariant_uses;
325 
326   /* The array holding the number of uses of each loop
327      invariant expressions created by ivopt.  */
328   unsigned *used_inv_expr;
329 
330   /* The number of created loop invariants.  */
331   unsigned num_used_inv_expr;
332 
333   /* Total cost of the assignment.  */
334   comp_cost cost;
335 };
336 
337 /* Difference of two iv candidate assignments.  */
338 
339 struct iv_ca_delta
340 {
341   /* Changed use.  */
342   struct iv_use *use;
343 
344   /* An old assignment (for rollback purposes).  */
345   struct cost_pair *old_cp;
346 
347   /* A new assignment.  */
348   struct cost_pair *new_cp;
349 
350   /* Next change in the list.  */
351   struct iv_ca_delta *next_change;
352 };
353 
354 /* Bound on number of candidates below that all candidates are considered.  */
355 
356 #define CONSIDER_ALL_CANDIDATES_BOUND \
357   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
358 
359 /* If there are more iv occurrences, we just give up (it is quite unlikely that
360    optimizing such a loop would help, and it would take ages).  */
361 
362 #define MAX_CONSIDERED_USES \
363   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
364 
365 /* If there are at most this number of ivs in the set, try removing unnecessary
366    ivs from the set always.  */
367 
368 #define ALWAYS_PRUNE_CAND_SET_BOUND \
369   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
370 
371 /* The list of trees for that the decl_rtl field must be reset is stored
372    here.  */
373 
374 static vec<tree> decl_rtl_to_reset;
375 
376 static comp_cost force_expr_to_var_cost (tree, bool);
377 
378 /* Number of uses recorded in DATA.  */
379 
380 static inline unsigned
n_iv_uses(struct ivopts_data * data)381 n_iv_uses (struct ivopts_data *data)
382 {
383   return data->iv_uses.length ();
384 }
385 
386 /* Ith use recorded in DATA.  */
387 
388 static inline struct iv_use *
iv_use(struct ivopts_data * data,unsigned i)389 iv_use (struct ivopts_data *data, unsigned i)
390 {
391   return data->iv_uses[i];
392 }
393 
394 /* Number of candidates recorded in DATA.  */
395 
396 static inline unsigned
n_iv_cands(struct ivopts_data * data)397 n_iv_cands (struct ivopts_data *data)
398 {
399   return data->iv_candidates.length ();
400 }
401 
402 /* Ith candidate recorded in DATA.  */
403 
404 static inline struct iv_cand *
iv_cand(struct ivopts_data * data,unsigned i)405 iv_cand (struct ivopts_data *data, unsigned i)
406 {
407   return data->iv_candidates[i];
408 }
409 
410 /* The single loop exit if it dominates the latch, NULL otherwise.  */
411 
412 edge
single_dom_exit(struct loop * loop)413 single_dom_exit (struct loop *loop)
414 {
415   edge exit = single_exit (loop);
416 
417   if (!exit)
418     return NULL;
419 
420   if (!just_once_each_iteration_p (loop, exit->src))
421     return NULL;
422 
423   return exit;
424 }
425 
426 /* Dumps information about the induction variable IV to FILE.  */
427 
428 extern void dump_iv (FILE *, struct iv *);
429 void
dump_iv(FILE * file,struct iv * iv)430 dump_iv (FILE *file, struct iv *iv)
431 {
432   if (iv->ssa_name)
433     {
434       fprintf (file, "ssa name ");
435       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
436       fprintf (file, "\n");
437     }
438 
439   fprintf (file, "  type ");
440   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
441   fprintf (file, "\n");
442 
443   if (iv->step)
444     {
445       fprintf (file, "  base ");
446       print_generic_expr (file, iv->base, TDF_SLIM);
447       fprintf (file, "\n");
448 
449       fprintf (file, "  step ");
450       print_generic_expr (file, iv->step, TDF_SLIM);
451       fprintf (file, "\n");
452     }
453   else
454     {
455       fprintf (file, "  invariant ");
456       print_generic_expr (file, iv->base, TDF_SLIM);
457       fprintf (file, "\n");
458     }
459 
460   if (iv->base_object)
461     {
462       fprintf (file, "  base object ");
463       print_generic_expr (file, iv->base_object, TDF_SLIM);
464       fprintf (file, "\n");
465     }
466 
467   if (iv->biv_p)
468     fprintf (file, "  is a biv\n");
469 }
470 
471 /* Dumps information about the USE to FILE.  */
472 
473 extern void dump_use (FILE *, struct iv_use *);
474 void
dump_use(FILE * file,struct iv_use * use)475 dump_use (FILE *file, struct iv_use *use)
476 {
477   fprintf (file, "use %d\n", use->id);
478 
479   switch (use->type)
480     {
481     case USE_NONLINEAR_EXPR:
482       fprintf (file, "  generic\n");
483       break;
484 
485     case USE_ADDRESS:
486       fprintf (file, "  address\n");
487       break;
488 
489     case USE_COMPARE:
490       fprintf (file, "  compare\n");
491       break;
492 
493     default:
494       gcc_unreachable ();
495     }
496 
497   fprintf (file, "  in statement ");
498   print_gimple_stmt (file, use->stmt, 0, 0);
499   fprintf (file, "\n");
500 
501   fprintf (file, "  at position ");
502   if (use->op_p)
503     print_generic_expr (file, *use->op_p, TDF_SLIM);
504   fprintf (file, "\n");
505 
506   dump_iv (file, use->iv);
507 
508   if (use->related_cands)
509     {
510       fprintf (file, "  related candidates ");
511       dump_bitmap (file, use->related_cands);
512     }
513 }
514 
515 /* Dumps information about the uses to FILE.  */
516 
517 extern void dump_uses (FILE *, struct ivopts_data *);
518 void
dump_uses(FILE * file,struct ivopts_data * data)519 dump_uses (FILE *file, struct ivopts_data *data)
520 {
521   unsigned i;
522   struct iv_use *use;
523 
524   for (i = 0; i < n_iv_uses (data); i++)
525     {
526       use = iv_use (data, i);
527 
528       dump_use (file, use);
529       fprintf (file, "\n");
530     }
531 }
532 
533 /* Dumps information about induction variable candidate CAND to FILE.  */
534 
535 extern void dump_cand (FILE *, struct iv_cand *);
536 void
dump_cand(FILE * file,struct iv_cand * cand)537 dump_cand (FILE *file, struct iv_cand *cand)
538 {
539   struct iv *iv = cand->iv;
540 
541   fprintf (file, "candidate %d%s\n",
542 	   cand->id, cand->important ? " (important)" : "");
543 
544   if (cand->depends_on)
545     {
546       fprintf (file, "  depends on ");
547       dump_bitmap (file, cand->depends_on);
548     }
549 
550   if (!iv)
551     {
552       fprintf (file, "  final value replacement\n");
553       return;
554     }
555 
556   if (cand->var_before)
557     {
558       fprintf (file, "  var_before ");
559       print_generic_expr (file, cand->var_before, TDF_SLIM);
560       fprintf (file, "\n");
561     }
562   if (cand->var_after)
563     {
564       fprintf (file, "  var_after ");
565       print_generic_expr (file, cand->var_after, TDF_SLIM);
566       fprintf (file, "\n");
567     }
568 
569   switch (cand->pos)
570     {
571     case IP_NORMAL:
572       fprintf (file, "  incremented before exit test\n");
573       break;
574 
575     case IP_BEFORE_USE:
576       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
577       break;
578 
579     case IP_AFTER_USE:
580       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
581       break;
582 
583     case IP_END:
584       fprintf (file, "  incremented at end\n");
585       break;
586 
587     case IP_ORIGINAL:
588       fprintf (file, "  original biv\n");
589       break;
590     }
591 
592   dump_iv (file, iv);
593 }
594 
595 /* Returns the info for ssa version VER.  */
596 
597 static inline struct version_info *
ver_info(struct ivopts_data * data,unsigned ver)598 ver_info (struct ivopts_data *data, unsigned ver)
599 {
600   return data->version_info + ver;
601 }
602 
603 /* Returns the info for ssa name NAME.  */
604 
605 static inline struct version_info *
name_info(struct ivopts_data * data,tree name)606 name_info (struct ivopts_data *data, tree name)
607 {
608   return ver_info (data, SSA_NAME_VERSION (name));
609 }
610 
611 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
612    emitted in LOOP.  */
613 
614 static bool
stmt_after_ip_normal_pos(struct loop * loop,gimple stmt)615 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
616 {
617   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
618 
619   gcc_assert (bb);
620 
621   if (sbb == loop->latch)
622     return true;
623 
624   if (sbb != bb)
625     return false;
626 
627   return stmt == last_stmt (bb);
628 }
629 
630 /* Returns true if STMT if after the place where the original induction
631    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
632    if the positions are identical.  */
633 
634 static bool
stmt_after_inc_pos(struct iv_cand * cand,gimple stmt,bool true_if_equal)635 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
636 {
637   basic_block cand_bb = gimple_bb (cand->incremented_at);
638   basic_block stmt_bb = gimple_bb (stmt);
639 
640   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
641     return false;
642 
643   if (stmt_bb != cand_bb)
644     return true;
645 
646   if (true_if_equal
647       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
648     return true;
649   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
650 }
651 
652 /* Returns true if STMT if after the place where the induction variable
653    CAND is incremented in LOOP.  */
654 
655 static bool
stmt_after_increment(struct loop * loop,struct iv_cand * cand,gimple stmt)656 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
657 {
658   switch (cand->pos)
659     {
660     case IP_END:
661       return false;
662 
663     case IP_NORMAL:
664       return stmt_after_ip_normal_pos (loop, stmt);
665 
666     case IP_ORIGINAL:
667     case IP_AFTER_USE:
668       return stmt_after_inc_pos (cand, stmt, false);
669 
670     case IP_BEFORE_USE:
671       return stmt_after_inc_pos (cand, stmt, true);
672 
673     default:
674       gcc_unreachable ();
675     }
676 }
677 
678 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
679 
680 static bool
abnormal_ssa_name_p(tree exp)681 abnormal_ssa_name_p (tree exp)
682 {
683   if (!exp)
684     return false;
685 
686   if (TREE_CODE (exp) != SSA_NAME)
687     return false;
688 
689   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
690 }
691 
692 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
693    abnormal phi node.  Callback for for_each_index.  */
694 
695 static bool
idx_contains_abnormal_ssa_name_p(tree base,tree * index,void * data ATTRIBUTE_UNUSED)696 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
697 				  void *data ATTRIBUTE_UNUSED)
698 {
699   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
700     {
701       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
702 	return false;
703       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
704 	return false;
705     }
706 
707   return !abnormal_ssa_name_p (*index);
708 }
709 
710 /* Returns true if EXPR contains a ssa name that occurs in an
711    abnormal phi node.  */
712 
713 bool
contains_abnormal_ssa_name_p(tree expr)714 contains_abnormal_ssa_name_p (tree expr)
715 {
716   enum tree_code code;
717   enum tree_code_class codeclass;
718 
719   if (!expr)
720     return false;
721 
722   code = TREE_CODE (expr);
723   codeclass = TREE_CODE_CLASS (code);
724 
725   if (code == SSA_NAME)
726     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
727 
728   if (code == INTEGER_CST
729       || is_gimple_min_invariant (expr))
730     return false;
731 
732   if (code == ADDR_EXPR)
733     return !for_each_index (&TREE_OPERAND (expr, 0),
734 			    idx_contains_abnormal_ssa_name_p,
735 			    NULL);
736 
737   if (code == COND_EXPR)
738     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
739       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
740       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
741 
742   switch (codeclass)
743     {
744     case tcc_binary:
745     case tcc_comparison:
746       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
747 	return true;
748 
749       /* Fallthru.  */
750     case tcc_unary:
751       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
752 	return true;
753 
754       break;
755 
756     default:
757       gcc_unreachable ();
758     }
759 
760   return false;
761 }
762 
763 /*  Returns the structure describing number of iterations determined from
764     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
765 
766 static struct tree_niter_desc *
niter_for_exit(struct ivopts_data * data,edge exit)767 niter_for_exit (struct ivopts_data *data, edge exit)
768 {
769   struct tree_niter_desc *desc;
770   void **slot;
771 
772   if (!data->niters)
773     {
774       data->niters = pointer_map_create ();
775       slot = NULL;
776     }
777   else
778     slot = pointer_map_contains (data->niters, exit);
779 
780   if (!slot)
781     {
782       /* Try to determine number of iterations.  We cannot safely work with ssa
783          names that appear in phi nodes on abnormal edges, so that we do not
784          create overlapping life ranges for them (PR 27283).  */
785       desc = XNEW (struct tree_niter_desc);
786       if (!number_of_iterations_exit (data->current_loop,
787 				      exit, desc, true)
788      	  || contains_abnormal_ssa_name_p (desc->niter))
789 	{
790 	  XDELETE (desc);
791 	  desc = NULL;
792 	}
793       slot = pointer_map_insert (data->niters, exit);
794       *slot = desc;
795     }
796   else
797     desc = (struct tree_niter_desc *) *slot;
798 
799   return desc;
800 }
801 
802 /* Returns the structure describing number of iterations determined from
803    single dominating exit of DATA->current_loop, or NULL if something
804    goes wrong.  */
805 
806 static struct tree_niter_desc *
niter_for_single_dom_exit(struct ivopts_data * data)807 niter_for_single_dom_exit (struct ivopts_data *data)
808 {
809   edge exit = single_dom_exit (data->current_loop);
810 
811   if (!exit)
812     return NULL;
813 
814   return niter_for_exit (data, exit);
815 }
816 
817 /* Hash table equality function for expressions.  */
818 
819 static int
htab_inv_expr_eq(const void * ent1,const void * ent2)820 htab_inv_expr_eq (const void *ent1, const void *ent2)
821 {
822   const struct iv_inv_expr_ent *expr1 =
823       (const struct iv_inv_expr_ent *)ent1;
824   const struct iv_inv_expr_ent *expr2 =
825       (const struct iv_inv_expr_ent *)ent2;
826 
827   return expr1->hash == expr2->hash
828 	 && operand_equal_p (expr1->expr, expr2->expr, 0);
829 }
830 
831 /* Hash function for loop invariant expressions.  */
832 
833 static hashval_t
htab_inv_expr_hash(const void * ent)834 htab_inv_expr_hash (const void *ent)
835 {
836   const struct iv_inv_expr_ent *expr =
837       (const struct iv_inv_expr_ent *)ent;
838   return expr->hash;
839 }
840 
841 /* Initializes data structures used by the iv optimization pass, stored
842    in DATA.  */
843 
844 static void
tree_ssa_iv_optimize_init(struct ivopts_data * data)845 tree_ssa_iv_optimize_init (struct ivopts_data *data)
846 {
847   data->version_info_size = 2 * num_ssa_names;
848   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
849   data->relevant = BITMAP_ALLOC (NULL);
850   data->important_candidates = BITMAP_ALLOC (NULL);
851   data->max_inv_id = 0;
852   data->niters = NULL;
853   data->iv_uses.create (20);
854   data->iv_candidates.create (20);
855   data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
856                                     htab_inv_expr_eq, free);
857   data->inv_expr_id = 0;
858   decl_rtl_to_reset.create (20);
859 }
860 
861 /* Returns a memory object to that EXPR points.  In case we are able to
862    determine that it does not point to any such object, NULL is returned.  */
863 
864 static tree
determine_base_object(tree expr)865 determine_base_object (tree expr)
866 {
867   enum tree_code code = TREE_CODE (expr);
868   tree base, obj;
869 
870   /* If this is a pointer casted to any type, we need to determine
871      the base object for the pointer; so handle conversions before
872      throwing away non-pointer expressions.  */
873   if (CONVERT_EXPR_P (expr))
874     return determine_base_object (TREE_OPERAND (expr, 0));
875 
876   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
877     return NULL_TREE;
878 
879   switch (code)
880     {
881     case INTEGER_CST:
882       return NULL_TREE;
883 
884     case ADDR_EXPR:
885       obj = TREE_OPERAND (expr, 0);
886       base = get_base_address (obj);
887 
888       if (!base)
889 	return expr;
890 
891       if (TREE_CODE (base) == MEM_REF)
892 	return determine_base_object (TREE_OPERAND (base, 0));
893 
894       return fold_convert (ptr_type_node,
895 		           build_fold_addr_expr (base));
896 
897     case POINTER_PLUS_EXPR:
898       return determine_base_object (TREE_OPERAND (expr, 0));
899 
900     case PLUS_EXPR:
901     case MINUS_EXPR:
902       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
903       gcc_unreachable ();
904 
905     default:
906       return fold_convert (ptr_type_node, expr);
907     }
908 }
909 
910 /* Allocates an induction variable with given initial value BASE and step STEP
911    for loop LOOP.  */
912 
913 static struct iv *
alloc_iv(tree base,tree step)914 alloc_iv (tree base, tree step)
915 {
916   struct iv *iv = XCNEW (struct iv);
917   gcc_assert (step != NULL_TREE);
918 
919   iv->base = base;
920   iv->base_object = determine_base_object (base);
921   iv->step = step;
922   iv->biv_p = false;
923   iv->have_use_for = false;
924   iv->use_id = 0;
925   iv->ssa_name = NULL_TREE;
926 
927   return iv;
928 }
929 
930 /* Sets STEP and BASE for induction variable IV.  */
931 
932 static void
set_iv(struct ivopts_data * data,tree iv,tree base,tree step)933 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
934 {
935   struct version_info *info = name_info (data, iv);
936 
937   gcc_assert (!info->iv);
938 
939   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
940   info->iv = alloc_iv (base, step);
941   info->iv->ssa_name = iv;
942 }
943 
944 /* Finds induction variable declaration for VAR.  */
945 
946 static struct iv *
get_iv(struct ivopts_data * data,tree var)947 get_iv (struct ivopts_data *data, tree var)
948 {
949   basic_block bb;
950   tree type = TREE_TYPE (var);
951 
952   if (!POINTER_TYPE_P (type)
953       && !INTEGRAL_TYPE_P (type))
954     return NULL;
955 
956   if (!name_info (data, var)->iv)
957     {
958       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
959 
960       if (!bb
961 	  || !flow_bb_inside_loop_p (data->current_loop, bb))
962 	set_iv (data, var, var, build_int_cst (type, 0));
963     }
964 
965   return name_info (data, var)->iv;
966 }
967 
968 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
969    not define a simple affine biv with nonzero step.  */
970 
971 static tree
determine_biv_step(gimple phi)972 determine_biv_step (gimple phi)
973 {
974   struct loop *loop = gimple_bb (phi)->loop_father;
975   tree name = PHI_RESULT (phi);
976   affine_iv iv;
977 
978   if (virtual_operand_p (name))
979     return NULL_TREE;
980 
981   if (!simple_iv (loop, loop, name, &iv, true))
982     return NULL_TREE;
983 
984   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
985 }
986 
987 /* Finds basic ivs.  */
988 
989 static bool
find_bivs(struct ivopts_data * data)990 find_bivs (struct ivopts_data *data)
991 {
992   gimple phi;
993   tree step, type, base;
994   bool found = false;
995   struct loop *loop = data->current_loop;
996   gimple_stmt_iterator psi;
997 
998   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
999     {
1000       phi = gsi_stmt (psi);
1001 
1002       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1003 	continue;
1004 
1005       step = determine_biv_step (phi);
1006       if (!step)
1007 	continue;
1008 
1009       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1010       base = expand_simple_operations (base);
1011       if (contains_abnormal_ssa_name_p (base)
1012 	  || contains_abnormal_ssa_name_p (step))
1013 	continue;
1014 
1015       type = TREE_TYPE (PHI_RESULT (phi));
1016       base = fold_convert (type, base);
1017       if (step)
1018 	{
1019 	  if (POINTER_TYPE_P (type))
1020 	    step = convert_to_ptrofftype (step);
1021 	  else
1022 	    step = fold_convert (type, step);
1023 	}
1024 
1025       set_iv (data, PHI_RESULT (phi), base, step);
1026       found = true;
1027     }
1028 
1029   return found;
1030 }
1031 
1032 /* Marks basic ivs.  */
1033 
1034 static void
mark_bivs(struct ivopts_data * data)1035 mark_bivs (struct ivopts_data *data)
1036 {
1037   gimple phi;
1038   tree var;
1039   struct iv *iv, *incr_iv;
1040   struct loop *loop = data->current_loop;
1041   basic_block incr_bb;
1042   gimple_stmt_iterator psi;
1043 
1044   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1045     {
1046       phi = gsi_stmt (psi);
1047 
1048       iv = get_iv (data, PHI_RESULT (phi));
1049       if (!iv)
1050 	continue;
1051 
1052       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1053       incr_iv = get_iv (data, var);
1054       if (!incr_iv)
1055 	continue;
1056 
1057       /* If the increment is in the subloop, ignore it.  */
1058       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1059       if (incr_bb->loop_father != data->current_loop
1060 	  || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1061 	continue;
1062 
1063       iv->biv_p = true;
1064       incr_iv->biv_p = true;
1065     }
1066 }
1067 
1068 /* Checks whether STMT defines a linear induction variable and stores its
1069    parameters to IV.  */
1070 
1071 static bool
find_givs_in_stmt_scev(struct ivopts_data * data,gimple stmt,affine_iv * iv)1072 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1073 {
1074   tree lhs;
1075   struct loop *loop = data->current_loop;
1076 
1077   iv->base = NULL_TREE;
1078   iv->step = NULL_TREE;
1079 
1080   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1081     return false;
1082 
1083   lhs = gimple_assign_lhs (stmt);
1084   if (TREE_CODE (lhs) != SSA_NAME)
1085     return false;
1086 
1087   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1088     return false;
1089   iv->base = expand_simple_operations (iv->base);
1090 
1091   if (contains_abnormal_ssa_name_p (iv->base)
1092       || contains_abnormal_ssa_name_p (iv->step))
1093     return false;
1094 
1095   /* If STMT could throw, then do not consider STMT as defining a GIV.
1096      While this will suppress optimizations, we can not safely delete this
1097      GIV and associated statements, even if it appears it is not used.  */
1098   if (stmt_could_throw_p (stmt))
1099     return false;
1100 
1101   return true;
1102 }
1103 
1104 /* Finds general ivs in statement STMT.  */
1105 
1106 static void
find_givs_in_stmt(struct ivopts_data * data,gimple stmt)1107 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1108 {
1109   affine_iv iv;
1110 
1111   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1112     return;
1113 
1114   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1115 }
1116 
1117 /* Finds general ivs in basic block BB.  */
1118 
1119 static void
find_givs_in_bb(struct ivopts_data * data,basic_block bb)1120 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1121 {
1122   gimple_stmt_iterator bsi;
1123 
1124   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1125     find_givs_in_stmt (data, gsi_stmt (bsi));
1126 }
1127 
1128 /* Finds general ivs.  */
1129 
1130 static void
find_givs(struct ivopts_data * data)1131 find_givs (struct ivopts_data *data)
1132 {
1133   struct loop *loop = data->current_loop;
1134   basic_block *body = get_loop_body_in_dom_order (loop);
1135   unsigned i;
1136 
1137   for (i = 0; i < loop->num_nodes; i++)
1138     find_givs_in_bb (data, body[i]);
1139   free (body);
1140 }
1141 
1142 /* For each ssa name defined in LOOP determines whether it is an induction
1143    variable and if so, its initial value and step.  */
1144 
1145 static bool
find_induction_variables(struct ivopts_data * data)1146 find_induction_variables (struct ivopts_data *data)
1147 {
1148   unsigned i;
1149   bitmap_iterator bi;
1150 
1151   if (!find_bivs (data))
1152     return false;
1153 
1154   find_givs (data);
1155   mark_bivs (data);
1156 
1157   if (dump_file && (dump_flags & TDF_DETAILS))
1158     {
1159       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1160 
1161       if (niter)
1162 	{
1163 	  fprintf (dump_file, "  number of iterations ");
1164 	  print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1165 	  if (!integer_zerop (niter->may_be_zero))
1166 	    {
1167 	      fprintf (dump_file, "; zero if ");
1168 	      print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1169 	    }
1170 	  fprintf (dump_file, "\n\n");
1171     	};
1172 
1173       fprintf (dump_file, "Induction variables:\n\n");
1174 
1175       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1176 	{
1177 	  if (ver_info (data, i)->iv)
1178 	    dump_iv (dump_file, ver_info (data, i)->iv);
1179 	}
1180     }
1181 
1182   return true;
1183 }
1184 
1185 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1186 
1187 static struct iv_use *
record_use(struct ivopts_data * data,tree * use_p,struct iv * iv,gimple stmt,enum use_type use_type)1188 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1189 	    gimple stmt, enum use_type use_type)
1190 {
1191   struct iv_use *use = XCNEW (struct iv_use);
1192 
1193   use->id = n_iv_uses (data);
1194   use->type = use_type;
1195   use->iv = iv;
1196   use->stmt = stmt;
1197   use->op_p = use_p;
1198   use->related_cands = BITMAP_ALLOC (NULL);
1199 
1200   /* To avoid showing ssa name in the dumps, if it was not reset by the
1201      caller.  */
1202   iv->ssa_name = NULL_TREE;
1203 
1204   if (dump_file && (dump_flags & TDF_DETAILS))
1205     dump_use (dump_file, use);
1206 
1207   data->iv_uses.safe_push (use);
1208 
1209   return use;
1210 }
1211 
1212 /* Checks whether OP is a loop-level invariant and if so, records it.
1213    NONLINEAR_USE is true if the invariant is used in a way we do not
1214    handle specially.  */
1215 
1216 static void
record_invariant(struct ivopts_data * data,tree op,bool nonlinear_use)1217 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1218 {
1219   basic_block bb;
1220   struct version_info *info;
1221 
1222   if (TREE_CODE (op) != SSA_NAME
1223       || virtual_operand_p (op))
1224     return;
1225 
1226   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1227   if (bb
1228       && flow_bb_inside_loop_p (data->current_loop, bb))
1229     return;
1230 
1231   info = name_info (data, op);
1232   info->name = op;
1233   info->has_nonlin_use |= nonlinear_use;
1234   if (!info->inv_id)
1235     info->inv_id = ++data->max_inv_id;
1236   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1237 }
1238 
1239 /* Checks whether the use OP is interesting and if so, records it.  */
1240 
1241 static struct iv_use *
find_interesting_uses_op(struct ivopts_data * data,tree op)1242 find_interesting_uses_op (struct ivopts_data *data, tree op)
1243 {
1244   struct iv *iv;
1245   struct iv *civ;
1246   gimple stmt;
1247   struct iv_use *use;
1248 
1249   if (TREE_CODE (op) != SSA_NAME)
1250     return NULL;
1251 
1252   iv = get_iv (data, op);
1253   if (!iv)
1254     return NULL;
1255 
1256   if (iv->have_use_for)
1257     {
1258       use = iv_use (data, iv->use_id);
1259 
1260       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1261       return use;
1262     }
1263 
1264   if (integer_zerop (iv->step))
1265     {
1266       record_invariant (data, op, true);
1267       return NULL;
1268     }
1269   iv->have_use_for = true;
1270 
1271   civ = XNEW (struct iv);
1272   *civ = *iv;
1273 
1274   stmt = SSA_NAME_DEF_STMT (op);
1275   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1276 	      || is_gimple_assign (stmt));
1277 
1278   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1279   iv->use_id = use->id;
1280 
1281   return use;
1282 }
1283 
1284 /* Given a condition in statement STMT, checks whether it is a compare
1285    of an induction variable and an invariant.  If this is the case,
1286    CONTROL_VAR is set to location of the iv, BOUND to the location of
1287    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1288    induction variable descriptions, and true is returned.  If this is not
1289    the case, CONTROL_VAR and BOUND are set to the arguments of the
1290    condition and false is returned.  */
1291 
1292 static bool
extract_cond_operands(struct ivopts_data * data,gimple stmt,tree ** control_var,tree ** bound,struct iv ** iv_var,struct iv ** iv_bound)1293 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1294 		       tree **control_var, tree **bound,
1295 		       struct iv **iv_var, struct iv **iv_bound)
1296 {
1297   /* The objects returned when COND has constant operands.  */
1298   static struct iv const_iv;
1299   static tree zero;
1300   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1301   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1302   bool ret = false;
1303 
1304   if (gimple_code (stmt) == GIMPLE_COND)
1305     {
1306       op0 = gimple_cond_lhs_ptr (stmt);
1307       op1 = gimple_cond_rhs_ptr (stmt);
1308     }
1309   else
1310     {
1311       op0 = gimple_assign_rhs1_ptr (stmt);
1312       op1 = gimple_assign_rhs2_ptr (stmt);
1313     }
1314 
1315   zero = integer_zero_node;
1316   const_iv.step = integer_zero_node;
1317 
1318   if (TREE_CODE (*op0) == SSA_NAME)
1319     iv0 = get_iv (data, *op0);
1320   if (TREE_CODE (*op1) == SSA_NAME)
1321     iv1 = get_iv (data, *op1);
1322 
1323   /* Exactly one of the compared values must be an iv, and the other one must
1324      be an invariant.  */
1325   if (!iv0 || !iv1)
1326     goto end;
1327 
1328   if (integer_zerop (iv0->step))
1329     {
1330       /* Control variable may be on the other side.  */
1331       tmp_op = op0; op0 = op1; op1 = tmp_op;
1332       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1333     }
1334   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1335 
1336 end:
1337   if (control_var)
1338     *control_var = op0;;
1339   if (iv_var)
1340     *iv_var = iv0;;
1341   if (bound)
1342     *bound = op1;
1343   if (iv_bound)
1344     *iv_bound = iv1;
1345 
1346   return ret;
1347 }
1348 
1349 /* Checks whether the condition in STMT is interesting and if so,
1350    records it.  */
1351 
1352 static void
find_interesting_uses_cond(struct ivopts_data * data,gimple stmt)1353 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1354 {
1355   tree *var_p, *bound_p;
1356   struct iv *var_iv, *civ;
1357 
1358   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1359     {
1360       find_interesting_uses_op (data, *var_p);
1361       find_interesting_uses_op (data, *bound_p);
1362       return;
1363     }
1364 
1365   civ = XNEW (struct iv);
1366   *civ = *var_iv;
1367   record_use (data, NULL, civ, stmt, USE_COMPARE);
1368 }
1369 
1370 /* Returns true if expression EXPR is obviously invariant in LOOP,
1371    i.e. if all its operands are defined outside of the LOOP.  LOOP
1372    should not be the function body.  */
1373 
1374 bool
expr_invariant_in_loop_p(struct loop * loop,tree expr)1375 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1376 {
1377   basic_block def_bb;
1378   unsigned i, len;
1379 
1380   gcc_assert (loop_depth (loop) > 0);
1381 
1382   if (is_gimple_min_invariant (expr))
1383     return true;
1384 
1385   if (TREE_CODE (expr) == SSA_NAME)
1386     {
1387       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1388       if (def_bb
1389 	  && flow_bb_inside_loop_p (loop, def_bb))
1390 	return false;
1391 
1392       return true;
1393     }
1394 
1395   if (!EXPR_P (expr))
1396     return false;
1397 
1398   len = TREE_OPERAND_LENGTH (expr);
1399   for (i = 0; i < len; i++)
1400     if (TREE_OPERAND (expr, i)
1401 	&& !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1402       return false;
1403 
1404   return true;
1405 }
1406 
1407 /* Returns true if statement STMT is obviously invariant in LOOP,
1408    i.e. if all its operands on the RHS are defined outside of the LOOP.
1409    LOOP should not be the function body.  */
1410 
1411 bool
stmt_invariant_in_loop_p(struct loop * loop,gimple stmt)1412 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1413 {
1414   unsigned i;
1415   tree lhs;
1416 
1417   gcc_assert (loop_depth (loop) > 0);
1418 
1419   lhs = gimple_get_lhs (stmt);
1420   for (i = 0; i < gimple_num_ops (stmt); i++)
1421     {
1422       tree op = gimple_op (stmt, i);
1423       if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1424 	return false;
1425     }
1426 
1427   return true;
1428 }
1429 
1430 /* Cumulates the steps of indices into DATA and replaces their values with the
1431    initial ones.  Returns false when the value of the index cannot be determined.
1432    Callback for for_each_index.  */
1433 
1434 struct ifs_ivopts_data
1435 {
1436   struct ivopts_data *ivopts_data;
1437   gimple stmt;
1438   tree step;
1439 };
1440 
1441 static bool
idx_find_step(tree base,tree * idx,void * data)1442 idx_find_step (tree base, tree *idx, void *data)
1443 {
1444   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1445   struct iv *iv;
1446   tree step, iv_base, iv_step, lbound, off;
1447   struct loop *loop = dta->ivopts_data->current_loop;
1448 
1449   /* If base is a component ref, require that the offset of the reference
1450      be invariant.  */
1451   if (TREE_CODE (base) == COMPONENT_REF)
1452     {
1453       off = component_ref_field_offset (base);
1454       return expr_invariant_in_loop_p (loop, off);
1455     }
1456 
1457   /* If base is array, first check whether we will be able to move the
1458      reference out of the loop (in order to take its address in strength
1459      reduction).  In order for this to work we need both lower bound
1460      and step to be loop invariants.  */
1461   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1462     {
1463       /* Moreover, for a range, the size needs to be invariant as well.  */
1464       if (TREE_CODE (base) == ARRAY_RANGE_REF
1465 	  && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1466 	return false;
1467 
1468       step = array_ref_element_size (base);
1469       lbound = array_ref_low_bound (base);
1470 
1471       if (!expr_invariant_in_loop_p (loop, step)
1472 	  || !expr_invariant_in_loop_p (loop, lbound))
1473 	return false;
1474     }
1475 
1476   if (TREE_CODE (*idx) != SSA_NAME)
1477     return true;
1478 
1479   iv = get_iv (dta->ivopts_data, *idx);
1480   if (!iv)
1481     return false;
1482 
1483   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1484 	  *&x[0], which is not folded and does not trigger the
1485 	  ARRAY_REF path below.  */
1486   *idx = iv->base;
1487 
1488   if (integer_zerop (iv->step))
1489     return true;
1490 
1491   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1492     {
1493       step = array_ref_element_size (base);
1494 
1495       /* We only handle addresses whose step is an integer constant.  */
1496       if (TREE_CODE (step) != INTEGER_CST)
1497 	return false;
1498     }
1499   else
1500     /* The step for pointer arithmetics already is 1 byte.  */
1501     step = size_one_node;
1502 
1503   iv_base = iv->base;
1504   iv_step = iv->step;
1505   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1506 			    sizetype, &iv_base, &iv_step, dta->stmt,
1507 			    false))
1508     {
1509       /* The index might wrap.  */
1510       return false;
1511     }
1512 
1513   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1514   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1515 
1516   return true;
1517 }
1518 
1519 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1520    object is passed to it in DATA.  */
1521 
1522 static bool
idx_record_use(tree base,tree * idx,void * vdata)1523 idx_record_use (tree base, tree *idx,
1524 		void *vdata)
1525 {
1526   struct ivopts_data *data = (struct ivopts_data *) vdata;
1527   find_interesting_uses_op (data, *idx);
1528   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1529     {
1530       find_interesting_uses_op (data, array_ref_element_size (base));
1531       find_interesting_uses_op (data, array_ref_low_bound (base));
1532     }
1533   return true;
1534 }
1535 
1536 /* If we can prove that TOP = cst * BOT for some constant cst,
1537    store cst to MUL and return true.  Otherwise return false.
1538    The returned value is always sign-extended, regardless of the
1539    signedness of TOP and BOT.  */
1540 
1541 static bool
constant_multiple_of(tree top,tree bot,double_int * mul)1542 constant_multiple_of (tree top, tree bot, double_int *mul)
1543 {
1544   tree mby;
1545   enum tree_code code;
1546   double_int res, p0, p1;
1547   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1548 
1549   STRIP_NOPS (top);
1550   STRIP_NOPS (bot);
1551 
1552   if (operand_equal_p (top, bot, 0))
1553     {
1554       *mul = double_int_one;
1555       return true;
1556     }
1557 
1558   code = TREE_CODE (top);
1559   switch (code)
1560     {
1561     case MULT_EXPR:
1562       mby = TREE_OPERAND (top, 1);
1563       if (TREE_CODE (mby) != INTEGER_CST)
1564 	return false;
1565 
1566       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1567 	return false;
1568 
1569       *mul = (res * tree_to_double_int (mby)).sext (precision);
1570       return true;
1571 
1572     case PLUS_EXPR:
1573     case MINUS_EXPR:
1574       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1575 	  || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1576 	return false;
1577 
1578       if (code == MINUS_EXPR)
1579 	p1 = -p1;
1580       *mul = (p0 + p1).sext (precision);
1581       return true;
1582 
1583     case INTEGER_CST:
1584       if (TREE_CODE (bot) != INTEGER_CST)
1585 	return false;
1586 
1587       p0 = tree_to_double_int (top).sext (precision);
1588       p1 = tree_to_double_int (bot).sext (precision);
1589       if (p1.is_zero ())
1590 	return false;
1591       *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1592       return res.is_zero ();
1593 
1594     default:
1595       return false;
1596     }
1597 }
1598 
1599 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1600 
1601 static bool
may_be_unaligned_p(tree ref,tree step)1602 may_be_unaligned_p (tree ref, tree step)
1603 {
1604   tree base;
1605   tree base_type;
1606   HOST_WIDE_INT bitsize;
1607   HOST_WIDE_INT bitpos;
1608   tree toffset;
1609   enum machine_mode mode;
1610   int unsignedp, volatilep;
1611   unsigned base_align;
1612 
1613   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1614      thus they are not misaligned.  */
1615   if (TREE_CODE (ref) == TARGET_MEM_REF)
1616     return false;
1617 
1618   /* The test below is basically copy of what expr.c:normal_inner_ref
1619      does to check whether the object must be loaded by parts when
1620      STRICT_ALIGNMENT is true.  */
1621   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1622 			      &unsignedp, &volatilep, true);
1623   base_type = TREE_TYPE (base);
1624   base_align = get_object_alignment (base);
1625   base_align = MAX (base_align, TYPE_ALIGN (base_type));
1626 
1627   if (mode != BLKmode)
1628     {
1629       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1630 
1631       if (base_align < mode_align
1632 	  || (bitpos % mode_align) != 0
1633 	  || (bitpos % BITS_PER_UNIT) != 0)
1634 	return true;
1635 
1636       if (toffset
1637 	  && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1638 	return true;
1639 
1640       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1641 	return true;
1642     }
1643 
1644   return false;
1645 }
1646 
1647 /* Return true if EXPR may be non-addressable.   */
1648 
1649 bool
may_be_nonaddressable_p(tree expr)1650 may_be_nonaddressable_p (tree expr)
1651 {
1652   switch (TREE_CODE (expr))
1653     {
1654     case TARGET_MEM_REF:
1655       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1656 	 target, thus they are always addressable.  */
1657       return false;
1658 
1659     case COMPONENT_REF:
1660       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1661 	     || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1662 
1663     case VIEW_CONVERT_EXPR:
1664       /* This kind of view-conversions may wrap non-addressable objects
1665 	 and make them look addressable.  After some processing the
1666 	 non-addressability may be uncovered again, causing ADDR_EXPRs
1667 	 of inappropriate objects to be built.  */
1668       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1669 	  || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1670 	return true;
1671 
1672       /* ... fall through ... */
1673 
1674     case ARRAY_REF:
1675     case ARRAY_RANGE_REF:
1676       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1677 
1678     CASE_CONVERT:
1679       return true;
1680 
1681     default:
1682       break;
1683     }
1684 
1685   return false;
1686 }
1687 
1688 /* Finds addresses in *OP_P inside STMT.  */
1689 
1690 static void
find_interesting_uses_address(struct ivopts_data * data,gimple stmt,tree * op_p)1691 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1692 {
1693   tree base = *op_p, step = size_zero_node;
1694   struct iv *civ;
1695   struct ifs_ivopts_data ifs_ivopts_data;
1696 
1697   /* Do not play with volatile memory references.  A bit too conservative,
1698      perhaps, but safe.  */
1699   if (gimple_has_volatile_ops (stmt))
1700     goto fail;
1701 
1702   /* Ignore bitfields for now.  Not really something terribly complicated
1703      to handle.  TODO.  */
1704   if (TREE_CODE (base) == BIT_FIELD_REF)
1705     goto fail;
1706 
1707   base = unshare_expr (base);
1708 
1709   if (TREE_CODE (base) == TARGET_MEM_REF)
1710     {
1711       tree type = build_pointer_type (TREE_TYPE (base));
1712       tree astep;
1713 
1714       if (TMR_BASE (base)
1715 	  && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1716 	{
1717 	  civ = get_iv (data, TMR_BASE (base));
1718 	  if (!civ)
1719 	    goto fail;
1720 
1721 	  TMR_BASE (base) = civ->base;
1722 	  step = civ->step;
1723 	}
1724       if (TMR_INDEX2 (base)
1725 	  && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1726 	{
1727 	  civ = get_iv (data, TMR_INDEX2 (base));
1728 	  if (!civ)
1729 	    goto fail;
1730 
1731 	  TMR_INDEX2 (base) = civ->base;
1732 	  step = civ->step;
1733 	}
1734       if (TMR_INDEX (base)
1735 	  && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1736 	{
1737 	  civ = get_iv (data, TMR_INDEX (base));
1738 	  if (!civ)
1739 	    goto fail;
1740 
1741 	  TMR_INDEX (base) = civ->base;
1742 	  astep = civ->step;
1743 
1744 	  if (astep)
1745 	    {
1746 	      if (TMR_STEP (base))
1747 		astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1748 
1749 	      step = fold_build2 (PLUS_EXPR, type, step, astep);
1750 	    }
1751 	}
1752 
1753       if (integer_zerop (step))
1754 	goto fail;
1755       base = tree_mem_ref_addr (type, base);
1756     }
1757   else
1758     {
1759       ifs_ivopts_data.ivopts_data = data;
1760       ifs_ivopts_data.stmt = stmt;
1761       ifs_ivopts_data.step = size_zero_node;
1762       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1763 	  || integer_zerop (ifs_ivopts_data.step))
1764 	goto fail;
1765       step = ifs_ivopts_data.step;
1766 
1767       /* Check that the base expression is addressable.  This needs
1768 	 to be done after substituting bases of IVs into it.  */
1769       if (may_be_nonaddressable_p (base))
1770 	goto fail;
1771 
1772       /* Moreover, on strict alignment platforms, check that it is
1773 	 sufficiently aligned.  */
1774       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1775 	goto fail;
1776 
1777       base = build_fold_addr_expr (base);
1778 
1779       /* Substituting bases of IVs into the base expression might
1780 	 have caused folding opportunities.  */
1781       if (TREE_CODE (base) == ADDR_EXPR)
1782 	{
1783 	  tree *ref = &TREE_OPERAND (base, 0);
1784 	  while (handled_component_p (*ref))
1785 	    ref = &TREE_OPERAND (*ref, 0);
1786 	  if (TREE_CODE (*ref) == MEM_REF)
1787 	    {
1788 	      tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1789 				      TREE_OPERAND (*ref, 0),
1790 				      TREE_OPERAND (*ref, 1));
1791 	      if (tem)
1792 		*ref = tem;
1793 	    }
1794 	}
1795     }
1796 
1797   civ = alloc_iv (base, step);
1798   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1799   return;
1800 
1801 fail:
1802   for_each_index (op_p, idx_record_use, data);
1803 }
1804 
1805 /* Finds and records invariants used in STMT.  */
1806 
1807 static void
find_invariants_stmt(struct ivopts_data * data,gimple stmt)1808 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1809 {
1810   ssa_op_iter iter;
1811   use_operand_p use_p;
1812   tree op;
1813 
1814   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1815     {
1816       op = USE_FROM_PTR (use_p);
1817       record_invariant (data, op, false);
1818     }
1819 }
1820 
1821 /* Finds interesting uses of induction variables in the statement STMT.  */
1822 
1823 static void
find_interesting_uses_stmt(struct ivopts_data * data,gimple stmt)1824 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1825 {
1826   struct iv *iv;
1827   tree op, *lhs, *rhs;
1828   ssa_op_iter iter;
1829   use_operand_p use_p;
1830   enum tree_code code;
1831 
1832   find_invariants_stmt (data, stmt);
1833 
1834   if (gimple_code (stmt) == GIMPLE_COND)
1835     {
1836       find_interesting_uses_cond (data, stmt);
1837       return;
1838     }
1839 
1840   if (is_gimple_assign (stmt))
1841     {
1842       lhs = gimple_assign_lhs_ptr (stmt);
1843       rhs = gimple_assign_rhs1_ptr (stmt);
1844 
1845       if (TREE_CODE (*lhs) == SSA_NAME)
1846 	{
1847 	  /* If the statement defines an induction variable, the uses are not
1848 	     interesting by themselves.  */
1849 
1850 	  iv = get_iv (data, *lhs);
1851 
1852 	  if (iv && !integer_zerop (iv->step))
1853 	    return;
1854 	}
1855 
1856       code = gimple_assign_rhs_code (stmt);
1857       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1858 	  && (REFERENCE_CLASS_P (*rhs)
1859 	      || is_gimple_val (*rhs)))
1860 	{
1861 	  if (REFERENCE_CLASS_P (*rhs))
1862 	    find_interesting_uses_address (data, stmt, rhs);
1863 	  else
1864 	    find_interesting_uses_op (data, *rhs);
1865 
1866 	  if (REFERENCE_CLASS_P (*lhs))
1867 	    find_interesting_uses_address (data, stmt, lhs);
1868 	  return;
1869 	}
1870       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1871 	{
1872 	  find_interesting_uses_cond (data, stmt);
1873 	  return;
1874 	}
1875 
1876       /* TODO -- we should also handle address uses of type
1877 
1878 	 memory = call (whatever);
1879 
1880 	 and
1881 
1882 	 call (memory).  */
1883     }
1884 
1885   if (gimple_code (stmt) == GIMPLE_PHI
1886       && gimple_bb (stmt) == data->current_loop->header)
1887     {
1888       iv = get_iv (data, PHI_RESULT (stmt));
1889 
1890       if (iv && !integer_zerop (iv->step))
1891 	return;
1892     }
1893 
1894   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1895     {
1896       op = USE_FROM_PTR (use_p);
1897 
1898       if (TREE_CODE (op) != SSA_NAME)
1899 	continue;
1900 
1901       iv = get_iv (data, op);
1902       if (!iv)
1903 	continue;
1904 
1905       find_interesting_uses_op (data, op);
1906     }
1907 }
1908 
1909 /* Finds interesting uses of induction variables outside of loops
1910    on loop exit edge EXIT.  */
1911 
1912 static void
find_interesting_uses_outside(struct ivopts_data * data,edge exit)1913 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1914 {
1915   gimple phi;
1916   gimple_stmt_iterator psi;
1917   tree def;
1918 
1919   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1920     {
1921       phi = gsi_stmt (psi);
1922       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1923       if (!virtual_operand_p (def))
1924         find_interesting_uses_op (data, def);
1925     }
1926 }
1927 
1928 /* Finds uses of the induction variables that are interesting.  */
1929 
1930 static void
find_interesting_uses(struct ivopts_data * data)1931 find_interesting_uses (struct ivopts_data *data)
1932 {
1933   basic_block bb;
1934   gimple_stmt_iterator bsi;
1935   basic_block *body = get_loop_body (data->current_loop);
1936   unsigned i;
1937   struct version_info *info;
1938   edge e;
1939 
1940   if (dump_file && (dump_flags & TDF_DETAILS))
1941     fprintf (dump_file, "Uses:\n\n");
1942 
1943   for (i = 0; i < data->current_loop->num_nodes; i++)
1944     {
1945       edge_iterator ei;
1946       bb = body[i];
1947 
1948       FOR_EACH_EDGE (e, ei, bb->succs)
1949 	if (e->dest != EXIT_BLOCK_PTR
1950 	    && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1951 	  find_interesting_uses_outside (data, e);
1952 
1953       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1954 	find_interesting_uses_stmt (data, gsi_stmt (bsi));
1955       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1956 	if (!is_gimple_debug (gsi_stmt (bsi)))
1957 	  find_interesting_uses_stmt (data, gsi_stmt (bsi));
1958     }
1959 
1960   if (dump_file && (dump_flags & TDF_DETAILS))
1961     {
1962       bitmap_iterator bi;
1963 
1964       fprintf (dump_file, "\n");
1965 
1966       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1967 	{
1968 	  info = ver_info (data, i);
1969 	  if (info->inv_id)
1970 	    {
1971 	      fprintf (dump_file, "  ");
1972 	      print_generic_expr (dump_file, info->name, TDF_SLIM);
1973 	      fprintf (dump_file, " is invariant (%d)%s\n",
1974 		       info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1975 	    }
1976 	}
1977 
1978       fprintf (dump_file, "\n");
1979     }
1980 
1981   free (body);
1982 }
1983 
1984 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1985    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1986    we are at the top-level of the processed address.  */
1987 
1988 static tree
strip_offset_1(tree expr,bool inside_addr,bool top_compref,unsigned HOST_WIDE_INT * offset)1989 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1990 		unsigned HOST_WIDE_INT *offset)
1991 {
1992   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1993   enum tree_code code;
1994   tree type, orig_type = TREE_TYPE (expr);
1995   unsigned HOST_WIDE_INT off0, off1, st;
1996   tree orig_expr = expr;
1997 
1998   STRIP_NOPS (expr);
1999 
2000   type = TREE_TYPE (expr);
2001   code = TREE_CODE (expr);
2002   *offset = 0;
2003 
2004   switch (code)
2005     {
2006     case INTEGER_CST:
2007       if (!cst_and_fits_in_hwi (expr)
2008 	  || integer_zerop (expr))
2009 	return orig_expr;
2010 
2011       *offset = int_cst_value (expr);
2012       return build_int_cst (orig_type, 0);
2013 
2014     case POINTER_PLUS_EXPR:
2015     case PLUS_EXPR:
2016     case MINUS_EXPR:
2017       op0 = TREE_OPERAND (expr, 0);
2018       op1 = TREE_OPERAND (expr, 1);
2019 
2020       op0 = strip_offset_1 (op0, false, false, &off0);
2021       op1 = strip_offset_1 (op1, false, false, &off1);
2022 
2023       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2024       if (op0 == TREE_OPERAND (expr, 0)
2025 	  && op1 == TREE_OPERAND (expr, 1))
2026 	return orig_expr;
2027 
2028       if (integer_zerop (op1))
2029 	expr = op0;
2030       else if (integer_zerop (op0))
2031 	{
2032 	  if (code == MINUS_EXPR)
2033 	    expr = fold_build1 (NEGATE_EXPR, type, op1);
2034 	  else
2035 	    expr = op1;
2036 	}
2037       else
2038 	expr = fold_build2 (code, type, op0, op1);
2039 
2040       return fold_convert (orig_type, expr);
2041 
2042     case MULT_EXPR:
2043       op1 = TREE_OPERAND (expr, 1);
2044       if (!cst_and_fits_in_hwi (op1))
2045 	return orig_expr;
2046 
2047       op0 = TREE_OPERAND (expr, 0);
2048       op0 = strip_offset_1 (op0, false, false, &off0);
2049       if (op0 == TREE_OPERAND (expr, 0))
2050 	return orig_expr;
2051 
2052       *offset = off0 * int_cst_value (op1);
2053       if (integer_zerop (op0))
2054 	expr = op0;
2055       else
2056 	expr = fold_build2 (MULT_EXPR, type, op0, op1);
2057 
2058       return fold_convert (orig_type, expr);
2059 
2060     case ARRAY_REF:
2061     case ARRAY_RANGE_REF:
2062       if (!inside_addr)
2063 	return orig_expr;
2064 
2065       step = array_ref_element_size (expr);
2066       if (!cst_and_fits_in_hwi (step))
2067 	break;
2068 
2069       st = int_cst_value (step);
2070       op1 = TREE_OPERAND (expr, 1);
2071       op1 = strip_offset_1 (op1, false, false, &off1);
2072       *offset = off1 * st;
2073 
2074       if (top_compref
2075 	  && integer_zerop (op1))
2076 	{
2077 	  /* Strip the component reference completely.  */
2078 	  op0 = TREE_OPERAND (expr, 0);
2079 	  op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2080 	  *offset += off0;
2081 	  return op0;
2082 	}
2083       break;
2084 
2085     case COMPONENT_REF:
2086       if (!inside_addr)
2087 	return orig_expr;
2088 
2089       tmp = component_ref_field_offset (expr);
2090       if (top_compref
2091 	  && cst_and_fits_in_hwi (tmp))
2092 	{
2093 	  /* Strip the component reference completely.  */
2094 	  op0 = TREE_OPERAND (expr, 0);
2095 	  op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2096 	  *offset = off0 + int_cst_value (tmp);
2097 	  return op0;
2098 	}
2099       break;
2100 
2101     case ADDR_EXPR:
2102       op0 = TREE_OPERAND (expr, 0);
2103       op0 = strip_offset_1 (op0, true, true, &off0);
2104       *offset += off0;
2105 
2106       if (op0 == TREE_OPERAND (expr, 0))
2107 	return orig_expr;
2108 
2109       expr = build_fold_addr_expr (op0);
2110       return fold_convert (orig_type, expr);
2111 
2112     case MEM_REF:
2113       /* ???  Offset operand?  */
2114       inside_addr = false;
2115       break;
2116 
2117     default:
2118       return orig_expr;
2119     }
2120 
2121   /* Default handling of expressions for that we want to recurse into
2122      the first operand.  */
2123   op0 = TREE_OPERAND (expr, 0);
2124   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2125   *offset += off0;
2126 
2127   if (op0 == TREE_OPERAND (expr, 0)
2128       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2129     return orig_expr;
2130 
2131   expr = copy_node (expr);
2132   TREE_OPERAND (expr, 0) = op0;
2133   if (op1)
2134     TREE_OPERAND (expr, 1) = op1;
2135 
2136   /* Inside address, we might strip the top level component references,
2137      thus changing type of the expression.  Handling of ADDR_EXPR
2138      will fix that.  */
2139   expr = fold_convert (orig_type, expr);
2140 
2141   return expr;
2142 }
2143 
2144 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2145 
2146 static tree
strip_offset(tree expr,unsigned HOST_WIDE_INT * offset)2147 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2148 {
2149   return strip_offset_1 (expr, false, false, offset);
2150 }
2151 
2152 /* Returns variant of TYPE that can be used as base for different uses.
2153    We return unsigned type with the same precision, which avoids problems
2154    with overflows.  */
2155 
2156 static tree
generic_type_for(tree type)2157 generic_type_for (tree type)
2158 {
2159   if (POINTER_TYPE_P (type))
2160     return unsigned_type_for (type);
2161 
2162   if (TYPE_UNSIGNED (type))
2163     return type;
2164 
2165   return unsigned_type_for (type);
2166 }
2167 
2168 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2169    the bitmap to that we should store it.  */
2170 
2171 static struct ivopts_data *fd_ivopts_data;
2172 static tree
find_depends(tree * expr_p,int * ws ATTRIBUTE_UNUSED,void * data)2173 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2174 {
2175   bitmap *depends_on = (bitmap *) data;
2176   struct version_info *info;
2177 
2178   if (TREE_CODE (*expr_p) != SSA_NAME)
2179     return NULL_TREE;
2180   info = name_info (fd_ivopts_data, *expr_p);
2181 
2182   if (!info->inv_id || info->has_nonlin_use)
2183     return NULL_TREE;
2184 
2185   if (!*depends_on)
2186     *depends_on = BITMAP_ALLOC (NULL);
2187   bitmap_set_bit (*depends_on, info->inv_id);
2188 
2189   return NULL_TREE;
2190 }
2191 
2192 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2193    position to POS.  If USE is not NULL, the candidate is set as related to
2194    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2195    replacement of the final value of the iv by a direct computation.  */
2196 
2197 static struct iv_cand *
add_candidate_1(struct ivopts_data * data,tree base,tree step,bool important,enum iv_position pos,struct iv_use * use,gimple incremented_at)2198 add_candidate_1 (struct ivopts_data *data,
2199 		 tree base, tree step, bool important, enum iv_position pos,
2200 		 struct iv_use *use, gimple incremented_at)
2201 {
2202   unsigned i;
2203   struct iv_cand *cand = NULL;
2204   tree type, orig_type;
2205 
2206   /* For non-original variables, make sure their values are computed in a type
2207      that does not invoke undefined behavior on overflows (since in general,
2208      we cannot prove that these induction variables are non-wrapping).  */
2209   if (pos != IP_ORIGINAL)
2210     {
2211       orig_type = TREE_TYPE (base);
2212       type = generic_type_for (orig_type);
2213       if (type != orig_type)
2214 	{
2215 	  base = fold_convert (type, base);
2216 	  step = fold_convert (type, step);
2217 	}
2218     }
2219 
2220   for (i = 0; i < n_iv_cands (data); i++)
2221     {
2222       cand = iv_cand (data, i);
2223 
2224       if (cand->pos != pos)
2225 	continue;
2226 
2227       if (cand->incremented_at != incremented_at
2228 	  || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2229 	      && cand->ainc_use != use))
2230 	continue;
2231 
2232       if (!cand->iv)
2233 	{
2234 	  if (!base && !step)
2235 	    break;
2236 
2237 	  continue;
2238 	}
2239 
2240       if (!base && !step)
2241 	continue;
2242 
2243       if (operand_equal_p (base, cand->iv->base, 0)
2244 	  && operand_equal_p (step, cand->iv->step, 0)
2245           && (TYPE_PRECISION (TREE_TYPE (base))
2246               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2247 	break;
2248     }
2249 
2250   if (i == n_iv_cands (data))
2251     {
2252       cand = XCNEW (struct iv_cand);
2253       cand->id = i;
2254 
2255       if (!base && !step)
2256 	cand->iv = NULL;
2257       else
2258 	cand->iv = alloc_iv (base, step);
2259 
2260       cand->pos = pos;
2261       if (pos != IP_ORIGINAL && cand->iv)
2262 	{
2263 	  cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2264 	  cand->var_after = cand->var_before;
2265 	}
2266       cand->important = important;
2267       cand->incremented_at = incremented_at;
2268       data->iv_candidates.safe_push (cand);
2269 
2270       if (step
2271 	  && TREE_CODE (step) != INTEGER_CST)
2272 	{
2273 	  fd_ivopts_data = data;
2274 	  walk_tree (&step, find_depends, &cand->depends_on, NULL);
2275 	}
2276 
2277       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2278 	cand->ainc_use = use;
2279       else
2280 	cand->ainc_use = NULL;
2281 
2282       if (dump_file && (dump_flags & TDF_DETAILS))
2283 	dump_cand (dump_file, cand);
2284     }
2285 
2286   if (important && !cand->important)
2287     {
2288       cand->important = true;
2289       if (dump_file && (dump_flags & TDF_DETAILS))
2290 	fprintf (dump_file, "Candidate %d is important\n", cand->id);
2291     }
2292 
2293   if (use)
2294     {
2295       bitmap_set_bit (use->related_cands, i);
2296       if (dump_file && (dump_flags & TDF_DETAILS))
2297 	fprintf (dump_file, "Candidate %d is related to use %d\n",
2298 		 cand->id, use->id);
2299     }
2300 
2301   return cand;
2302 }
2303 
2304 /* Returns true if incrementing the induction variable at the end of the LOOP
2305    is allowed.
2306 
2307    The purpose is to avoid splitting latch edge with a biv increment, thus
2308    creating a jump, possibly confusing other optimization passes and leaving
2309    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2310    is not available (so we do not have a better alternative), or if the latch
2311    edge is already nonempty.  */
2312 
2313 static bool
allow_ip_end_pos_p(struct loop * loop)2314 allow_ip_end_pos_p (struct loop *loop)
2315 {
2316   if (!ip_normal_pos (loop))
2317     return true;
2318 
2319   if (!empty_block_p (ip_end_pos (loop)))
2320     return true;
2321 
2322   return false;
2323 }
2324 
2325 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2326    Important field is set to IMPORTANT.  */
2327 
2328 static void
add_autoinc_candidates(struct ivopts_data * data,tree base,tree step,bool important,struct iv_use * use)2329 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2330 			bool important, struct iv_use *use)
2331 {
2332   basic_block use_bb = gimple_bb (use->stmt);
2333   enum machine_mode mem_mode;
2334   unsigned HOST_WIDE_INT cstepi;
2335 
2336   /* If we insert the increment in any position other than the standard
2337      ones, we must ensure that it is incremented once per iteration.
2338      It must not be in an inner nested loop, or one side of an if
2339      statement.  */
2340   if (use_bb->loop_father != data->current_loop
2341       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2342       || stmt_could_throw_p (use->stmt)
2343       || !cst_and_fits_in_hwi (step))
2344     return;
2345 
2346   cstepi = int_cst_value (step);
2347 
2348   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2349   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2350 	|| USE_STORE_PRE_INCREMENT (mem_mode))
2351        && GET_MODE_SIZE (mem_mode) == cstepi)
2352       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2353 	   || USE_STORE_PRE_DECREMENT (mem_mode))
2354 	  && GET_MODE_SIZE (mem_mode) == -cstepi))
2355     {
2356       enum tree_code code = MINUS_EXPR;
2357       tree new_base;
2358       tree new_step = step;
2359 
2360       if (POINTER_TYPE_P (TREE_TYPE (base)))
2361 	{
2362 	  new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2363 	  code = POINTER_PLUS_EXPR;
2364 	}
2365       else
2366 	new_step = fold_convert (TREE_TYPE (base), new_step);
2367       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2368       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2369 		       use->stmt);
2370     }
2371   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2372 	|| USE_STORE_POST_INCREMENT (mem_mode))
2373        && GET_MODE_SIZE (mem_mode) == cstepi)
2374       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2375 	   || USE_STORE_POST_DECREMENT (mem_mode))
2376 	  && GET_MODE_SIZE (mem_mode) == -cstepi))
2377     {
2378       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2379 		       use->stmt);
2380     }
2381 }
2382 
2383 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2384    position to POS.  If USE is not NULL, the candidate is set as related to
2385    it.  The candidate computation is scheduled on all available positions.  */
2386 
2387 static void
add_candidate(struct ivopts_data * data,tree base,tree step,bool important,struct iv_use * use)2388 add_candidate (struct ivopts_data *data,
2389 	       tree base, tree step, bool important, struct iv_use *use)
2390 {
2391   if (ip_normal_pos (data->current_loop))
2392     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2393   if (ip_end_pos (data->current_loop)
2394       && allow_ip_end_pos_p (data->current_loop))
2395     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2396 
2397   if (use != NULL && use->type == USE_ADDRESS)
2398     add_autoinc_candidates (data, base, step, important, use);
2399 }
2400 
2401 /* Adds standard iv candidates.  */
2402 
2403 static void
add_standard_iv_candidates(struct ivopts_data * data)2404 add_standard_iv_candidates (struct ivopts_data *data)
2405 {
2406   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2407 
2408   /* The same for a double-integer type if it is still fast enough.  */
2409   if (TYPE_PRECISION
2410         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2411       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2412     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2413 		   build_int_cst (long_integer_type_node, 1), true, NULL);
2414 
2415   /* The same for a double-integer type if it is still fast enough.  */
2416   if (TYPE_PRECISION
2417         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2418       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2419     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2420 		   build_int_cst (long_long_integer_type_node, 1), true, NULL);
2421 }
2422 
2423 
2424 /* Adds candidates bases on the old induction variable IV.  */
2425 
2426 static void
add_old_iv_candidates(struct ivopts_data * data,struct iv * iv)2427 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2428 {
2429   gimple phi;
2430   tree def;
2431   struct iv_cand *cand;
2432 
2433   add_candidate (data, iv->base, iv->step, true, NULL);
2434 
2435   /* The same, but with initial value zero.  */
2436   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2437     add_candidate (data, size_int (0), iv->step, true, NULL);
2438   else
2439     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2440 		   iv->step, true, NULL);
2441 
2442   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2443   if (gimple_code (phi) == GIMPLE_PHI)
2444     {
2445       /* Additionally record the possibility of leaving the original iv
2446 	 untouched.  */
2447       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2448       cand = add_candidate_1 (data,
2449 			      iv->base, iv->step, true, IP_ORIGINAL, NULL,
2450 			      SSA_NAME_DEF_STMT (def));
2451       cand->var_before = iv->ssa_name;
2452       cand->var_after = def;
2453     }
2454 }
2455 
2456 /* Adds candidates based on the old induction variables.  */
2457 
2458 static void
add_old_ivs_candidates(struct ivopts_data * data)2459 add_old_ivs_candidates (struct ivopts_data *data)
2460 {
2461   unsigned i;
2462   struct iv *iv;
2463   bitmap_iterator bi;
2464 
2465   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2466     {
2467       iv = ver_info (data, i)->iv;
2468       if (iv && iv->biv_p && !integer_zerop (iv->step))
2469 	add_old_iv_candidates (data, iv);
2470     }
2471 }
2472 
2473 /* Adds candidates based on the value of the induction variable IV and USE.  */
2474 
2475 static void
add_iv_value_candidates(struct ivopts_data * data,struct iv * iv,struct iv_use * use)2476 add_iv_value_candidates (struct ivopts_data *data,
2477 			 struct iv *iv, struct iv_use *use)
2478 {
2479   unsigned HOST_WIDE_INT offset;
2480   tree base;
2481   tree basetype;
2482 
2483   add_candidate (data, iv->base, iv->step, false, use);
2484 
2485   /* The same, but with initial value zero.  Make such variable important,
2486      since it is generic enough so that possibly many uses may be based
2487      on it.  */
2488   basetype = TREE_TYPE (iv->base);
2489   if (POINTER_TYPE_P (basetype))
2490     basetype = sizetype;
2491   add_candidate (data, build_int_cst (basetype, 0),
2492 		 iv->step, true, use);
2493 
2494   /* Third, try removing the constant offset.  Make sure to even
2495      add a candidate for &a[0] vs. (T *)&a.  */
2496   base = strip_offset (iv->base, &offset);
2497   if (offset
2498       || base != iv->base)
2499     add_candidate (data, base, iv->step, false, use);
2500 }
2501 
2502 /* Adds candidates based on the uses.  */
2503 
2504 static void
add_derived_ivs_candidates(struct ivopts_data * data)2505 add_derived_ivs_candidates (struct ivopts_data *data)
2506 {
2507   unsigned i;
2508 
2509   for (i = 0; i < n_iv_uses (data); i++)
2510     {
2511       struct iv_use *use = iv_use (data, i);
2512 
2513       if (!use)
2514 	continue;
2515 
2516       switch (use->type)
2517 	{
2518 	case USE_NONLINEAR_EXPR:
2519 	case USE_COMPARE:
2520 	case USE_ADDRESS:
2521 	  /* Just add the ivs based on the value of the iv used here.  */
2522 	  add_iv_value_candidates (data, use->iv, use);
2523 	  break;
2524 
2525 	default:
2526 	  gcc_unreachable ();
2527 	}
2528     }
2529 }
2530 
2531 /* Record important candidates and add them to related_cands bitmaps
2532    if needed.  */
2533 
2534 static void
record_important_candidates(struct ivopts_data * data)2535 record_important_candidates (struct ivopts_data *data)
2536 {
2537   unsigned i;
2538   struct iv_use *use;
2539 
2540   for (i = 0; i < n_iv_cands (data); i++)
2541     {
2542       struct iv_cand *cand = iv_cand (data, i);
2543 
2544       if (cand->important)
2545 	bitmap_set_bit (data->important_candidates, i);
2546     }
2547 
2548   data->consider_all_candidates = (n_iv_cands (data)
2549 				   <= CONSIDER_ALL_CANDIDATES_BOUND);
2550 
2551   if (data->consider_all_candidates)
2552     {
2553       /* We will not need "related_cands" bitmaps in this case,
2554 	 so release them to decrease peak memory consumption.  */
2555       for (i = 0; i < n_iv_uses (data); i++)
2556 	{
2557 	  use = iv_use (data, i);
2558 	  BITMAP_FREE (use->related_cands);
2559 	}
2560     }
2561   else
2562     {
2563       /* Add important candidates to the related_cands bitmaps.  */
2564       for (i = 0; i < n_iv_uses (data); i++)
2565 	bitmap_ior_into (iv_use (data, i)->related_cands,
2566 			 data->important_candidates);
2567     }
2568 }
2569 
2570 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2571    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2572    we allocate a simple list to every use.  */
2573 
2574 static void
alloc_use_cost_map(struct ivopts_data * data)2575 alloc_use_cost_map (struct ivopts_data *data)
2576 {
2577   unsigned i, size, s;
2578 
2579   for (i = 0; i < n_iv_uses (data); i++)
2580     {
2581       struct iv_use *use = iv_use (data, i);
2582 
2583       if (data->consider_all_candidates)
2584 	size = n_iv_cands (data);
2585       else
2586 	{
2587 	  s = bitmap_count_bits (use->related_cands);
2588 
2589 	  /* Round up to the power of two, so that moduling by it is fast.  */
2590 	  size = s ? (1 << ceil_log2 (s)) : 1;
2591 	}
2592 
2593       use->n_map_members = size;
2594       use->cost_map = XCNEWVEC (struct cost_pair, size);
2595     }
2596 }
2597 
2598 /* Returns description of computation cost of expression whose runtime
2599    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2600 
2601 static comp_cost
new_cost(unsigned runtime,unsigned complexity)2602 new_cost (unsigned runtime, unsigned complexity)
2603 {
2604   comp_cost cost;
2605 
2606   cost.cost = runtime;
2607   cost.complexity = complexity;
2608 
2609   return cost;
2610 }
2611 
2612 /* Adds costs COST1 and COST2.  */
2613 
2614 static comp_cost
add_costs(comp_cost cost1,comp_cost cost2)2615 add_costs (comp_cost cost1, comp_cost cost2)
2616 {
2617   cost1.cost += cost2.cost;
2618   cost1.complexity += cost2.complexity;
2619 
2620   return cost1;
2621 }
2622 /* Subtracts costs COST1 and COST2.  */
2623 
2624 static comp_cost
sub_costs(comp_cost cost1,comp_cost cost2)2625 sub_costs (comp_cost cost1, comp_cost cost2)
2626 {
2627   cost1.cost -= cost2.cost;
2628   cost1.complexity -= cost2.complexity;
2629 
2630   return cost1;
2631 }
2632 
2633 /* Returns a negative number if COST1 < COST2, a positive number if
2634    COST1 > COST2, and 0 if COST1 = COST2.  */
2635 
2636 static int
compare_costs(comp_cost cost1,comp_cost cost2)2637 compare_costs (comp_cost cost1, comp_cost cost2)
2638 {
2639   if (cost1.cost == cost2.cost)
2640     return cost1.complexity - cost2.complexity;
2641 
2642   return cost1.cost - cost2.cost;
2643 }
2644 
2645 /* Returns true if COST is infinite.  */
2646 
2647 static bool
infinite_cost_p(comp_cost cost)2648 infinite_cost_p (comp_cost cost)
2649 {
2650   return cost.cost == INFTY;
2651 }
2652 
2653 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2654    on invariants DEPENDS_ON and that the value used in expressing it
2655    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2656 
2657 static void
set_use_iv_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,comp_cost cost,bitmap depends_on,tree value,enum tree_code comp,int inv_expr_id)2658 set_use_iv_cost (struct ivopts_data *data,
2659 		 struct iv_use *use, struct iv_cand *cand,
2660 		 comp_cost cost, bitmap depends_on, tree value,
2661 		 enum tree_code comp, int inv_expr_id)
2662 {
2663   unsigned i, s;
2664 
2665   if (infinite_cost_p (cost))
2666     {
2667       BITMAP_FREE (depends_on);
2668       return;
2669     }
2670 
2671   if (data->consider_all_candidates)
2672     {
2673       use->cost_map[cand->id].cand = cand;
2674       use->cost_map[cand->id].cost = cost;
2675       use->cost_map[cand->id].depends_on = depends_on;
2676       use->cost_map[cand->id].value = value;
2677       use->cost_map[cand->id].comp = comp;
2678       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2679       return;
2680     }
2681 
2682   /* n_map_members is a power of two, so this computes modulo.  */
2683   s = cand->id & (use->n_map_members - 1);
2684   for (i = s; i < use->n_map_members; i++)
2685     if (!use->cost_map[i].cand)
2686       goto found;
2687   for (i = 0; i < s; i++)
2688     if (!use->cost_map[i].cand)
2689       goto found;
2690 
2691   gcc_unreachable ();
2692 
2693 found:
2694   use->cost_map[i].cand = cand;
2695   use->cost_map[i].cost = cost;
2696   use->cost_map[i].depends_on = depends_on;
2697   use->cost_map[i].value = value;
2698   use->cost_map[i].comp = comp;
2699   use->cost_map[i].inv_expr_id = inv_expr_id;
2700 }
2701 
2702 /* Gets cost of (USE, CANDIDATE) pair.  */
2703 
2704 static struct cost_pair *
get_use_iv_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)2705 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2706 		 struct iv_cand *cand)
2707 {
2708   unsigned i, s;
2709   struct cost_pair *ret;
2710 
2711   if (!cand)
2712     return NULL;
2713 
2714   if (data->consider_all_candidates)
2715     {
2716       ret = use->cost_map + cand->id;
2717       if (!ret->cand)
2718 	return NULL;
2719 
2720       return ret;
2721     }
2722 
2723   /* n_map_members is a power of two, so this computes modulo.  */
2724   s = cand->id & (use->n_map_members - 1);
2725   for (i = s; i < use->n_map_members; i++)
2726     if (use->cost_map[i].cand == cand)
2727       return use->cost_map + i;
2728     else if (use->cost_map[i].cand == NULL)
2729       return NULL;
2730   for (i = 0; i < s; i++)
2731     if (use->cost_map[i].cand == cand)
2732       return use->cost_map + i;
2733     else if (use->cost_map[i].cand == NULL)
2734       return NULL;
2735 
2736   return NULL;
2737 }
2738 
2739 /* Returns estimate on cost of computing SEQ.  */
2740 
2741 static unsigned
seq_cost(rtx seq,bool speed)2742 seq_cost (rtx seq, bool speed)
2743 {
2744   unsigned cost = 0;
2745   rtx set;
2746 
2747   for (; seq; seq = NEXT_INSN (seq))
2748     {
2749       set = single_set (seq);
2750       if (set)
2751 	cost += set_src_cost (SET_SRC (set), speed);
2752       else
2753 	cost++;
2754     }
2755 
2756   return cost;
2757 }
2758 
2759 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2760 static rtx
produce_memory_decl_rtl(tree obj,int * regno)2761 produce_memory_decl_rtl (tree obj, int *regno)
2762 {
2763   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2764   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2765   rtx x;
2766 
2767   gcc_assert (obj);
2768   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2769     {
2770       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2771       x = gen_rtx_SYMBOL_REF (address_mode, name);
2772       SET_SYMBOL_REF_DECL (x, obj);
2773       x = gen_rtx_MEM (DECL_MODE (obj), x);
2774       set_mem_addr_space (x, as);
2775       targetm.encode_section_info (obj, x, true);
2776     }
2777   else
2778     {
2779       x = gen_raw_REG (address_mode, (*regno)++);
2780       x = gen_rtx_MEM (DECL_MODE (obj), x);
2781       set_mem_addr_space (x, as);
2782     }
2783 
2784   return x;
2785 }
2786 
2787 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2788    walk_tree.  DATA contains the actual fake register number.  */
2789 
2790 static tree
prepare_decl_rtl(tree * expr_p,int * ws,void * data)2791 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2792 {
2793   tree obj = NULL_TREE;
2794   rtx x = NULL_RTX;
2795   int *regno = (int *) data;
2796 
2797   switch (TREE_CODE (*expr_p))
2798     {
2799     case ADDR_EXPR:
2800       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2801 	   handled_component_p (*expr_p);
2802 	   expr_p = &TREE_OPERAND (*expr_p, 0))
2803 	continue;
2804       obj = *expr_p;
2805       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2806         x = produce_memory_decl_rtl (obj, regno);
2807       break;
2808 
2809     case SSA_NAME:
2810       *ws = 0;
2811       obj = SSA_NAME_VAR (*expr_p);
2812       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2813       if (!obj)
2814 	return NULL_TREE;
2815       if (!DECL_RTL_SET_P (obj))
2816 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2817       break;
2818 
2819     case VAR_DECL:
2820     case PARM_DECL:
2821     case RESULT_DECL:
2822       *ws = 0;
2823       obj = *expr_p;
2824 
2825       if (DECL_RTL_SET_P (obj))
2826 	break;
2827 
2828       if (DECL_MODE (obj) == BLKmode)
2829 	x = produce_memory_decl_rtl (obj, regno);
2830       else
2831 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2832 
2833       break;
2834 
2835     default:
2836       break;
2837     }
2838 
2839   if (x)
2840     {
2841       decl_rtl_to_reset.safe_push (obj);
2842       SET_DECL_RTL (obj, x);
2843     }
2844 
2845   return NULL_TREE;
2846 }
2847 
2848 /* Determines cost of the computation of EXPR.  */
2849 
2850 static unsigned
computation_cost(tree expr,bool speed)2851 computation_cost (tree expr, bool speed)
2852 {
2853   rtx seq, rslt;
2854   tree type = TREE_TYPE (expr);
2855   unsigned cost;
2856   /* Avoid using hard regs in ways which may be unsupported.  */
2857   int regno = LAST_VIRTUAL_REGISTER + 1;
2858   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2859   enum node_frequency real_frequency = node->frequency;
2860 
2861   node->frequency = NODE_FREQUENCY_NORMAL;
2862   crtl->maybe_hot_insn_p = speed;
2863   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2864   start_sequence ();
2865   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2866   seq = get_insns ();
2867   end_sequence ();
2868   default_rtl_profile ();
2869   node->frequency = real_frequency;
2870 
2871   cost = seq_cost (seq, speed);
2872   if (MEM_P (rslt))
2873     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2874 			  TYPE_ADDR_SPACE (type), speed);
2875   else if (!REG_P (rslt))
2876     cost += set_src_cost (rslt, speed);
2877 
2878   return cost;
2879 }
2880 
2881 /* Returns variable containing the value of candidate CAND at statement AT.  */
2882 
2883 static tree
var_at_stmt(struct loop * loop,struct iv_cand * cand,gimple stmt)2884 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2885 {
2886   if (stmt_after_increment (loop, cand, stmt))
2887     return cand->var_after;
2888   else
2889     return cand->var_before;
2890 }
2891 
2892 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2893    same precision that is at least as wide as the precision of TYPE, stores
2894    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2895    type of A and B.  */
2896 
2897 static tree
determine_common_wider_type(tree * a,tree * b)2898 determine_common_wider_type (tree *a, tree *b)
2899 {
2900   tree wider_type = NULL;
2901   tree suba, subb;
2902   tree atype = TREE_TYPE (*a);
2903 
2904   if (CONVERT_EXPR_P (*a))
2905     {
2906       suba = TREE_OPERAND (*a, 0);
2907       wider_type = TREE_TYPE (suba);
2908       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2909 	return atype;
2910     }
2911   else
2912     return atype;
2913 
2914   if (CONVERT_EXPR_P (*b))
2915     {
2916       subb = TREE_OPERAND (*b, 0);
2917       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2918 	return atype;
2919     }
2920   else
2921     return atype;
2922 
2923   *a = suba;
2924   *b = subb;
2925   return wider_type;
2926 }
2927 
2928 /* Determines the expression by that USE is expressed from induction variable
2929    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2930    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2931 
2932 static bool
get_computation_aff(struct loop * loop,struct iv_use * use,struct iv_cand * cand,gimple at,struct affine_tree_combination * aff)2933 get_computation_aff (struct loop *loop,
2934 		     struct iv_use *use, struct iv_cand *cand, gimple at,
2935 		     struct affine_tree_combination *aff)
2936 {
2937   tree ubase = use->iv->base;
2938   tree ustep = use->iv->step;
2939   tree cbase = cand->iv->base;
2940   tree cstep = cand->iv->step, cstep_common;
2941   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2942   tree common_type, var;
2943   tree uutype;
2944   aff_tree cbase_aff, var_aff;
2945   double_int rat;
2946 
2947   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2948     {
2949       /* We do not have a precision to express the values of use.  */
2950       return false;
2951     }
2952 
2953   var = var_at_stmt (loop, cand, at);
2954   uutype = unsigned_type_for (utype);
2955 
2956   /* If the conversion is not noop, perform it.  */
2957   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2958     {
2959       cstep = fold_convert (uutype, cstep);
2960       cbase = fold_convert (uutype, cbase);
2961       var = fold_convert (uutype, var);
2962     }
2963 
2964   if (!constant_multiple_of (ustep, cstep, &rat))
2965     return false;
2966 
2967   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2968      type, we achieve better folding by computing their difference in this
2969      wider type, and cast the result to UUTYPE.  We do not need to worry about
2970      overflows, as all the arithmetics will in the end be performed in UUTYPE
2971      anyway.  */
2972   common_type = determine_common_wider_type (&ubase, &cbase);
2973 
2974   /* use = ubase - ratio * cbase + ratio * var.  */
2975   tree_to_aff_combination (ubase, common_type, aff);
2976   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2977   tree_to_aff_combination (var, uutype, &var_aff);
2978 
2979   /* We need to shift the value if we are after the increment.  */
2980   if (stmt_after_increment (loop, cand, at))
2981     {
2982       aff_tree cstep_aff;
2983 
2984       if (common_type != uutype)
2985 	cstep_common = fold_convert (common_type, cstep);
2986       else
2987 	cstep_common = cstep;
2988 
2989       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2990       aff_combination_add (&cbase_aff, &cstep_aff);
2991     }
2992 
2993   aff_combination_scale (&cbase_aff, -rat);
2994   aff_combination_add (aff, &cbase_aff);
2995   if (common_type != uutype)
2996     aff_combination_convert (aff, uutype);
2997 
2998   aff_combination_scale (&var_aff, rat);
2999   aff_combination_add (aff, &var_aff);
3000 
3001   return true;
3002 }
3003 
3004 /* Return the type of USE.  */
3005 
3006 static tree
get_use_type(struct iv_use * use)3007 get_use_type (struct iv_use *use)
3008 {
3009   tree base_type = TREE_TYPE (use->iv->base);
3010   tree type;
3011 
3012   if (use->type == USE_ADDRESS)
3013     {
3014       /* The base_type may be a void pointer.  Create a pointer type based on
3015 	 the mem_ref instead.  */
3016       type = build_pointer_type (TREE_TYPE (*use->op_p));
3017       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3018 		  == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3019     }
3020   else
3021     type = base_type;
3022 
3023   return type;
3024 }
3025 
3026 /* Determines the expression by that USE is expressed from induction variable
3027    CAND at statement AT in LOOP.  The computation is unshared.  */
3028 
3029 static tree
get_computation_at(struct loop * loop,struct iv_use * use,struct iv_cand * cand,gimple at)3030 get_computation_at (struct loop *loop,
3031 		    struct iv_use *use, struct iv_cand *cand, gimple at)
3032 {
3033   aff_tree aff;
3034   tree type = get_use_type (use);
3035 
3036   if (!get_computation_aff (loop, use, cand, at, &aff))
3037     return NULL_TREE;
3038   unshare_aff_combination (&aff);
3039   return fold_convert (type, aff_combination_to_tree (&aff));
3040 }
3041 
3042 /* Determines the expression by that USE is expressed from induction variable
3043    CAND in LOOP.  The computation is unshared.  */
3044 
3045 static tree
get_computation(struct loop * loop,struct iv_use * use,struct iv_cand * cand)3046 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3047 {
3048   return get_computation_at (loop, use, cand, use->stmt);
3049 }
3050 
3051 /* Adjust the cost COST for being in loop setup rather than loop body.
3052    If we're optimizing for space, the loop setup overhead is constant;
3053    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3054 static unsigned
adjust_setup_cost(struct ivopts_data * data,unsigned cost)3055 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3056 {
3057   if (cost == INFTY)
3058     return cost;
3059   else if (optimize_loop_for_speed_p (data->current_loop))
3060     return cost / avg_loop_niter (data->current_loop);
3061   else
3062     return cost;
3063 }
3064 
3065 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3066    validity for a memory reference accessing memory of mode MODE in
3067    address space AS.  */
3068 
3069 
3070 bool
multiplier_allowed_in_address_p(HOST_WIDE_INT ratio,enum machine_mode mode,addr_space_t as)3071 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3072 				 addr_space_t as)
3073 {
3074 #define MAX_RATIO 128
3075   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3076   static vec<sbitmap> valid_mult_list;
3077   sbitmap valid_mult;
3078 
3079   if (data_index >= valid_mult_list.length ())
3080     valid_mult_list.safe_grow_cleared (data_index + 1);
3081 
3082   valid_mult = valid_mult_list[data_index];
3083   if (!valid_mult)
3084     {
3085       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3086       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3087       rtx addr;
3088       HOST_WIDE_INT i;
3089 
3090       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3091       bitmap_clear (valid_mult);
3092       addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3093       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3094 	{
3095 	  XEXP (addr, 1) = gen_int_mode (i, address_mode);
3096 	  if (memory_address_addr_space_p (mode, addr, as))
3097 	    bitmap_set_bit (valid_mult, i + MAX_RATIO);
3098 	}
3099 
3100       if (dump_file && (dump_flags & TDF_DETAILS))
3101 	{
3102 	  fprintf (dump_file, "  allowed multipliers:");
3103 	  for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3104 	    if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3105 	      fprintf (dump_file, " %d", (int) i);
3106 	  fprintf (dump_file, "\n");
3107 	  fprintf (dump_file, "\n");
3108 	}
3109 
3110       valid_mult_list[data_index] = valid_mult;
3111     }
3112 
3113   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3114     return false;
3115 
3116   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3117 }
3118 
3119 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3120    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3121    variable is omitted.  Compute the cost for a memory reference that accesses
3122    a memory location of mode MEM_MODE in address space AS.
3123 
3124    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3125    size of MEM_MODE / RATIO) is available.  To make this determination, we
3126    look at the size of the increment to be made, which is given in CSTEP.
3127    CSTEP may be zero if the step is unknown.
3128    STMT_AFTER_INC is true iff the statement we're looking at is after the
3129    increment of the original biv.
3130 
3131    TODO -- there must be some better way.  This all is quite crude.  */
3132 
3133 typedef struct address_cost_data_s
3134 {
3135   HOST_WIDE_INT min_offset, max_offset;
3136   unsigned costs[2][2][2][2];
3137 } *address_cost_data;
3138 
3139 
3140 static comp_cost
get_address_cost(bool symbol_present,bool var_present,unsigned HOST_WIDE_INT offset,HOST_WIDE_INT ratio,HOST_WIDE_INT cstep,enum machine_mode mem_mode,addr_space_t as,bool speed,bool stmt_after_inc,bool * may_autoinc)3141 get_address_cost (bool symbol_present, bool var_present,
3142 		  unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3143 		  HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3144 		  addr_space_t as, bool speed,
3145 		  bool stmt_after_inc, bool *may_autoinc)
3146 {
3147   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3148   static vec<address_cost_data> address_cost_data_list;
3149   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3150   address_cost_data data;
3151   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3152   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3153   unsigned cost, acost, complexity;
3154   bool offset_p, ratio_p, autoinc;
3155   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3156   unsigned HOST_WIDE_INT mask;
3157   unsigned bits;
3158 
3159   if (data_index >= address_cost_data_list.length ())
3160     address_cost_data_list.safe_grow_cleared (data_index + 1);
3161 
3162   data = address_cost_data_list[data_index];
3163   if (!data)
3164     {
3165       HOST_WIDE_INT i;
3166       HOST_WIDE_INT rat, off = 0;
3167       int old_cse_not_expected, width;
3168       unsigned sym_p, var_p, off_p, rat_p, add_c;
3169       rtx seq, addr, base;
3170       rtx reg0, reg1;
3171 
3172       data = (address_cost_data) xcalloc (1, sizeof (*data));
3173 
3174       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3175 
3176       width = GET_MODE_BITSIZE (address_mode) - 1;
3177       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3178 	width = HOST_BITS_PER_WIDE_INT - 1;
3179       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3180 
3181       for (i = width; i >= 0; i--)
3182 	{
3183 	  off = -((unsigned HOST_WIDE_INT) 1 << i);
3184 	  XEXP (addr, 1) = gen_int_mode (off, address_mode);
3185 	  if (memory_address_addr_space_p (mem_mode, addr, as))
3186 	    break;
3187 	}
3188       data->min_offset = (i == -1? 0 : off);
3189 
3190       for (i = width; i >= 0; i--)
3191 	{
3192 	  off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3193 	  XEXP (addr, 1) = gen_int_mode (off, address_mode);
3194 	  if (memory_address_addr_space_p (mem_mode, addr, as))
3195 	    break;
3196 	}
3197       if (i == -1)
3198         off = 0;
3199       data->max_offset = off;
3200 
3201       if (dump_file && (dump_flags & TDF_DETAILS))
3202 	{
3203 	  fprintf (dump_file, "get_address_cost:\n");
3204 	  fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3205 		   GET_MODE_NAME (mem_mode),
3206 		   data->min_offset);
3207 	  fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3208 		   GET_MODE_NAME (mem_mode),
3209 		   data->max_offset);
3210 	}
3211 
3212       rat = 1;
3213       for (i = 2; i <= MAX_RATIO; i++)
3214 	if (multiplier_allowed_in_address_p (i, mem_mode, as))
3215 	  {
3216 	    rat = i;
3217 	    break;
3218 	  }
3219 
3220       /* Compute the cost of various addressing modes.  */
3221       acost = 0;
3222       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3223       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3224 
3225       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3226 	  || USE_STORE_PRE_DECREMENT (mem_mode))
3227 	{
3228 	  addr = gen_rtx_PRE_DEC (address_mode, reg0);
3229 	  has_predec[mem_mode]
3230 	    = memory_address_addr_space_p (mem_mode, addr, as);
3231 	}
3232       if (USE_LOAD_POST_DECREMENT (mem_mode)
3233 	  || USE_STORE_POST_DECREMENT (mem_mode))
3234 	{
3235 	  addr = gen_rtx_POST_DEC (address_mode, reg0);
3236 	  has_postdec[mem_mode]
3237 	    = memory_address_addr_space_p (mem_mode, addr, as);
3238 	}
3239       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3240 	  || USE_STORE_PRE_DECREMENT (mem_mode))
3241 	{
3242 	  addr = gen_rtx_PRE_INC (address_mode, reg0);
3243 	  has_preinc[mem_mode]
3244 	    = memory_address_addr_space_p (mem_mode, addr, as);
3245 	}
3246       if (USE_LOAD_POST_INCREMENT (mem_mode)
3247 	  || USE_STORE_POST_INCREMENT (mem_mode))
3248 	{
3249 	  addr = gen_rtx_POST_INC (address_mode, reg0);
3250 	  has_postinc[mem_mode]
3251 	    = memory_address_addr_space_p (mem_mode, addr, as);
3252 	}
3253       for (i = 0; i < 16; i++)
3254 	{
3255 	  sym_p = i & 1;
3256 	  var_p = (i >> 1) & 1;
3257 	  off_p = (i >> 2) & 1;
3258 	  rat_p = (i >> 3) & 1;
3259 
3260 	  addr = reg0;
3261 	  if (rat_p)
3262 	    addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3263 				   gen_int_mode (rat, address_mode));
3264 
3265 	  if (var_p)
3266 	    addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3267 
3268 	  if (sym_p)
3269 	    {
3270 	      base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3271 	      /* ??? We can run into trouble with some backends by presenting
3272 		 it with symbols which haven't been properly passed through
3273 		 targetm.encode_section_info.  By setting the local bit, we
3274 		 enhance the probability of things working.  */
3275 	      SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3276 
3277 	      if (off_p)
3278 		base = gen_rtx_fmt_e (CONST, address_mode,
3279 				      gen_rtx_fmt_ee
3280 					(PLUS, address_mode, base,
3281 					 gen_int_mode (off, address_mode)));
3282 	    }
3283 	  else if (off_p)
3284 	    base = gen_int_mode (off, address_mode);
3285 	  else
3286 	    base = NULL_RTX;
3287 
3288 	  if (base)
3289 	    addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3290 
3291 	  start_sequence ();
3292 	  /* To avoid splitting addressing modes, pretend that no cse will
3293 	     follow.  */
3294 	  old_cse_not_expected = cse_not_expected;
3295 	  cse_not_expected = true;
3296 	  addr = memory_address_addr_space (mem_mode, addr, as);
3297 	  cse_not_expected = old_cse_not_expected;
3298 	  seq = get_insns ();
3299 	  end_sequence ();
3300 
3301 	  acost = seq_cost (seq, speed);
3302 	  acost += address_cost (addr, mem_mode, as, speed);
3303 
3304 	  if (!acost)
3305 	    acost = 1;
3306 	  data->costs[sym_p][var_p][off_p][rat_p] = acost;
3307 	}
3308 
3309       /* On some targets, it is quite expensive to load symbol to a register,
3310 	 which makes addresses that contain symbols look much more expensive.
3311 	 However, the symbol will have to be loaded in any case before the
3312 	 loop (and quite likely we have it in register already), so it does not
3313 	 make much sense to penalize them too heavily.  So make some final
3314          tweaks for the SYMBOL_PRESENT modes:
3315 
3316          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3317 	 var is cheaper, use this mode with small penalty.
3318 	 If VAR_PRESENT is true, try whether the mode with
3319 	 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3320 	 if this is the case, use it.  */
3321       add_c = add_cost (speed, address_mode);
3322       for (i = 0; i < 8; i++)
3323 	{
3324 	  var_p = i & 1;
3325 	  off_p = (i >> 1) & 1;
3326 	  rat_p = (i >> 2) & 1;
3327 
3328 	  acost = data->costs[0][1][off_p][rat_p] + 1;
3329 	  if (var_p)
3330 	    acost += add_c;
3331 
3332 	  if (acost < data->costs[1][var_p][off_p][rat_p])
3333 	    data->costs[1][var_p][off_p][rat_p] = acost;
3334 	}
3335 
3336       if (dump_file && (dump_flags & TDF_DETAILS))
3337 	{
3338 	  fprintf (dump_file, "Address costs:\n");
3339 
3340 	  for (i = 0; i < 16; i++)
3341 	    {
3342 	      sym_p = i & 1;
3343 	      var_p = (i >> 1) & 1;
3344 	      off_p = (i >> 2) & 1;
3345 	      rat_p = (i >> 3) & 1;
3346 
3347 	      fprintf (dump_file, "  ");
3348 	      if (sym_p)
3349 		fprintf (dump_file, "sym + ");
3350 	      if (var_p)
3351 		fprintf (dump_file, "var + ");
3352 	      if (off_p)
3353 		fprintf (dump_file, "cst + ");
3354 	      if (rat_p)
3355 		fprintf (dump_file, "rat * ");
3356 
3357 	      acost = data->costs[sym_p][var_p][off_p][rat_p];
3358 	      fprintf (dump_file, "index costs %d\n", acost);
3359 	    }
3360 	  if (has_predec[mem_mode] || has_postdec[mem_mode]
3361 	      || has_preinc[mem_mode] || has_postinc[mem_mode])
3362 	    fprintf (dump_file, "  May include autoinc/dec\n");
3363 	  fprintf (dump_file, "\n");
3364 	}
3365 
3366       address_cost_data_list[data_index] = data;
3367     }
3368 
3369   bits = GET_MODE_BITSIZE (address_mode);
3370   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3371   offset &= mask;
3372   if ((offset >> (bits - 1) & 1))
3373     offset |= ~mask;
3374   s_offset = offset;
3375 
3376   autoinc = false;
3377   msize = GET_MODE_SIZE (mem_mode);
3378   autoinc_offset = offset;
3379   if (stmt_after_inc)
3380     autoinc_offset += ratio * cstep;
3381   if (symbol_present || var_present || ratio != 1)
3382     autoinc = false;
3383   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3384 	       && msize == cstep)
3385 	   || (has_postdec[mem_mode] && autoinc_offset == 0
3386 	       && msize == -cstep)
3387 	   || (has_preinc[mem_mode] && autoinc_offset == msize
3388 	       && msize == cstep)
3389 	   || (has_predec[mem_mode] && autoinc_offset == -msize
3390 	       && msize == -cstep))
3391     autoinc = true;
3392 
3393   cost = 0;
3394   offset_p = (s_offset != 0
3395 	      && data->min_offset <= s_offset
3396 	      && s_offset <= data->max_offset);
3397   ratio_p = (ratio != 1
3398 	     && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3399 
3400   if (ratio != 1 && !ratio_p)
3401     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3402 
3403   if (s_offset && !offset_p && !symbol_present)
3404     cost += add_cost (speed, address_mode);
3405 
3406   if (may_autoinc)
3407     *may_autoinc = autoinc;
3408   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3409   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3410   return new_cost (cost + acost, complexity);
3411 }
3412 
3413  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3414     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3415     calculating the operands of EXPR.  Returns true if successful, and returns
3416     the cost in COST.  */
3417 
3418 static bool
get_shiftadd_cost(tree expr,enum machine_mode mode,comp_cost cost0,comp_cost cost1,tree mult,bool speed,comp_cost * cost)3419 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3420                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3421 {
3422   comp_cost res;
3423   tree op1 = TREE_OPERAND (expr, 1);
3424   tree cst = TREE_OPERAND (mult, 1);
3425   tree multop = TREE_OPERAND (mult, 0);
3426   int m = exact_log2 (int_cst_value (cst));
3427   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3428   int sa_cost;
3429 
3430   if (!(m >= 0 && m < maxm))
3431     return false;
3432 
3433   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3434              ? shiftadd_cost (speed, mode, m)
3435              : (mult == op1
3436                 ? shiftsub1_cost (speed, mode, m)
3437                 : shiftsub0_cost (speed, mode, m)));
3438   res = new_cost (sa_cost, 0);
3439   res = add_costs (res, mult == op1 ? cost0 : cost1);
3440 
3441   STRIP_NOPS (multop);
3442   if (!is_gimple_val (multop))
3443     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3444 
3445   *cost = res;
3446   return true;
3447 }
3448 
3449 /* Estimates cost of forcing expression EXPR into a variable.  */
3450 
3451 static comp_cost
force_expr_to_var_cost(tree expr,bool speed)3452 force_expr_to_var_cost (tree expr, bool speed)
3453 {
3454   static bool costs_initialized = false;
3455   static unsigned integer_cost [2];
3456   static unsigned symbol_cost [2];
3457   static unsigned address_cost [2];
3458   tree op0, op1;
3459   comp_cost cost0, cost1, cost;
3460   enum machine_mode mode;
3461 
3462   if (!costs_initialized)
3463     {
3464       tree type = build_pointer_type (integer_type_node);
3465       tree var, addr;
3466       rtx x;
3467       int i;
3468 
3469       var = create_tmp_var_raw (integer_type_node, "test_var");
3470       TREE_STATIC (var) = 1;
3471       x = produce_memory_decl_rtl (var, NULL);
3472       SET_DECL_RTL (var, x);
3473 
3474       addr = build1 (ADDR_EXPR, type, var);
3475 
3476 
3477       for (i = 0; i < 2; i++)
3478 	{
3479 	  integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3480 							     2000), i);
3481 
3482 	  symbol_cost[i] = computation_cost (addr, i) + 1;
3483 
3484 	  address_cost[i]
3485 	    = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3486 	  if (dump_file && (dump_flags & TDF_DETAILS))
3487 	    {
3488 	      fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3489 	      fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3490 	      fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3491 	      fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3492 	      fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3493 	      fprintf (dump_file, "\n");
3494 	    }
3495 	}
3496 
3497       costs_initialized = true;
3498     }
3499 
3500   STRIP_NOPS (expr);
3501 
3502   if (SSA_VAR_P (expr))
3503     return no_cost;
3504 
3505   if (is_gimple_min_invariant (expr))
3506     {
3507       if (TREE_CODE (expr) == INTEGER_CST)
3508 	return new_cost (integer_cost [speed], 0);
3509 
3510       if (TREE_CODE (expr) == ADDR_EXPR)
3511 	{
3512 	  tree obj = TREE_OPERAND (expr, 0);
3513 
3514 	  if (TREE_CODE (obj) == VAR_DECL
3515 	      || TREE_CODE (obj) == PARM_DECL
3516 	      || TREE_CODE (obj) == RESULT_DECL)
3517 	    return new_cost (symbol_cost [speed], 0);
3518 	}
3519 
3520       return new_cost (address_cost [speed], 0);
3521     }
3522 
3523   switch (TREE_CODE (expr))
3524     {
3525     case POINTER_PLUS_EXPR:
3526     case PLUS_EXPR:
3527     case MINUS_EXPR:
3528     case MULT_EXPR:
3529       op0 = TREE_OPERAND (expr, 0);
3530       op1 = TREE_OPERAND (expr, 1);
3531       STRIP_NOPS (op0);
3532       STRIP_NOPS (op1);
3533 
3534       if (is_gimple_val (op0))
3535 	cost0 = no_cost;
3536       else
3537 	cost0 = force_expr_to_var_cost (op0, speed);
3538 
3539       if (is_gimple_val (op1))
3540 	cost1 = no_cost;
3541       else
3542 	cost1 = force_expr_to_var_cost (op1, speed);
3543 
3544       break;
3545 
3546     case NEGATE_EXPR:
3547       op0 = TREE_OPERAND (expr, 0);
3548       STRIP_NOPS (op0);
3549       op1 = NULL_TREE;
3550 
3551       if (is_gimple_val (op0))
3552 	cost0 = no_cost;
3553       else
3554 	cost0 = force_expr_to_var_cost (op0, speed);
3555 
3556       cost1 = no_cost;
3557       break;
3558 
3559     default:
3560       /* Just an arbitrary value, FIXME.  */
3561       return new_cost (target_spill_cost[speed], 0);
3562     }
3563 
3564   mode = TYPE_MODE (TREE_TYPE (expr));
3565   switch (TREE_CODE (expr))
3566     {
3567     case POINTER_PLUS_EXPR:
3568     case PLUS_EXPR:
3569     case MINUS_EXPR:
3570     case NEGATE_EXPR:
3571       cost = new_cost (add_cost (speed, mode), 0);
3572       if (TREE_CODE (expr) != NEGATE_EXPR)
3573         {
3574           tree mult = NULL_TREE;
3575           comp_cost sa_cost;
3576           if (TREE_CODE (op1) == MULT_EXPR)
3577             mult = op1;
3578           else if (TREE_CODE (op0) == MULT_EXPR)
3579             mult = op0;
3580 
3581           if (mult != NULL_TREE
3582               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3583               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3584                                     speed, &sa_cost))
3585             return sa_cost;
3586         }
3587       break;
3588 
3589     case MULT_EXPR:
3590       if (cst_and_fits_in_hwi (op0))
3591 	cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3592 					     mode, speed), 0);
3593       else if (cst_and_fits_in_hwi (op1))
3594 	cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3595 					     mode, speed), 0);
3596       else
3597 	return new_cost (target_spill_cost [speed], 0);
3598       break;
3599 
3600     default:
3601       gcc_unreachable ();
3602     }
3603 
3604   cost = add_costs (cost, cost0);
3605   cost = add_costs (cost, cost1);
3606 
3607   /* Bound the cost by target_spill_cost.  The parts of complicated
3608      computations often are either loop invariant or at least can
3609      be shared between several iv uses, so letting this grow without
3610      limits would not give reasonable results.  */
3611   if (cost.cost > (int) target_spill_cost [speed])
3612     cost.cost = target_spill_cost [speed];
3613 
3614   return cost;
3615 }
3616 
3617 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3618    invariants the computation depends on.  */
3619 
3620 static comp_cost
force_var_cost(struct ivopts_data * data,tree expr,bitmap * depends_on)3621 force_var_cost (struct ivopts_data *data,
3622 		tree expr, bitmap *depends_on)
3623 {
3624   if (depends_on)
3625     {
3626       fd_ivopts_data = data;
3627       walk_tree (&expr, find_depends, depends_on, NULL);
3628     }
3629 
3630   return force_expr_to_var_cost (expr, data->speed);
3631 }
3632 
3633 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3634    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3635    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3636    invariants the computation depends on.  */
3637 
3638 static comp_cost
split_address_cost(struct ivopts_data * data,tree addr,bool * symbol_present,bool * var_present,unsigned HOST_WIDE_INT * offset,bitmap * depends_on)3639 split_address_cost (struct ivopts_data *data,
3640 		    tree addr, bool *symbol_present, bool *var_present,
3641 		    unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3642 {
3643   tree core;
3644   HOST_WIDE_INT bitsize;
3645   HOST_WIDE_INT bitpos;
3646   tree toffset;
3647   enum machine_mode mode;
3648   int unsignedp, volatilep;
3649 
3650   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3651 			      &unsignedp, &volatilep, false);
3652 
3653   if (toffset != 0
3654       || bitpos % BITS_PER_UNIT != 0
3655       || TREE_CODE (core) != VAR_DECL)
3656     {
3657       *symbol_present = false;
3658       *var_present = true;
3659       fd_ivopts_data = data;
3660       walk_tree (&addr, find_depends, depends_on, NULL);
3661       return new_cost (target_spill_cost[data->speed], 0);
3662     }
3663 
3664   *offset += bitpos / BITS_PER_UNIT;
3665   if (TREE_STATIC (core)
3666       || DECL_EXTERNAL (core))
3667     {
3668       *symbol_present = true;
3669       *var_present = false;
3670       return no_cost;
3671     }
3672 
3673   *symbol_present = false;
3674   *var_present = true;
3675   return no_cost;
3676 }
3677 
3678 /* Estimates cost of expressing difference of addresses E1 - E2 as
3679    var + symbol + offset.  The value of offset is added to OFFSET,
3680    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3681    part is missing.  DEPENDS_ON is a set of the invariants the computation
3682    depends on.  */
3683 
3684 static comp_cost
ptr_difference_cost(struct ivopts_data * data,tree e1,tree e2,bool * symbol_present,bool * var_present,unsigned HOST_WIDE_INT * offset,bitmap * depends_on)3685 ptr_difference_cost (struct ivopts_data *data,
3686 		     tree e1, tree e2, bool *symbol_present, bool *var_present,
3687 		     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3688 {
3689   HOST_WIDE_INT diff = 0;
3690   aff_tree aff_e1, aff_e2;
3691   tree type;
3692 
3693   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3694 
3695   if (ptr_difference_const (e1, e2, &diff))
3696     {
3697       *offset += diff;
3698       *symbol_present = false;
3699       *var_present = false;
3700       return no_cost;
3701     }
3702 
3703   if (integer_zerop (e2))
3704     return split_address_cost (data, TREE_OPERAND (e1, 0),
3705 			       symbol_present, var_present, offset, depends_on);
3706 
3707   *symbol_present = false;
3708   *var_present = true;
3709 
3710   type = signed_type_for (TREE_TYPE (e1));
3711   tree_to_aff_combination (e1, type, &aff_e1);
3712   tree_to_aff_combination (e2, type, &aff_e2);
3713   aff_combination_scale (&aff_e2, double_int_minus_one);
3714   aff_combination_add (&aff_e1, &aff_e2);
3715 
3716   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3717 }
3718 
3719 /* Estimates cost of expressing difference E1 - E2 as
3720    var + symbol + offset.  The value of offset is added to OFFSET,
3721    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3722    part is missing.  DEPENDS_ON is a set of the invariants the computation
3723    depends on.  */
3724 
3725 static comp_cost
difference_cost(struct ivopts_data * data,tree e1,tree e2,bool * symbol_present,bool * var_present,unsigned HOST_WIDE_INT * offset,bitmap * depends_on)3726 difference_cost (struct ivopts_data *data,
3727 		 tree e1, tree e2, bool *symbol_present, bool *var_present,
3728 		 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3729 {
3730   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3731   unsigned HOST_WIDE_INT off1, off2;
3732   aff_tree aff_e1, aff_e2;
3733   tree type;
3734 
3735   e1 = strip_offset (e1, &off1);
3736   e2 = strip_offset (e2, &off2);
3737   *offset += off1 - off2;
3738 
3739   STRIP_NOPS (e1);
3740   STRIP_NOPS (e2);
3741 
3742   if (TREE_CODE (e1) == ADDR_EXPR)
3743     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3744 				offset, depends_on);
3745   *symbol_present = false;
3746 
3747   if (operand_equal_p (e1, e2, 0))
3748     {
3749       *var_present = false;
3750       return no_cost;
3751     }
3752 
3753   *var_present = true;
3754 
3755   if (integer_zerop (e2))
3756     return force_var_cost (data, e1, depends_on);
3757 
3758   if (integer_zerop (e1))
3759     {
3760       comp_cost cost = force_var_cost (data, e2, depends_on);
3761       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3762       return cost;
3763     }
3764 
3765   type = signed_type_for (TREE_TYPE (e1));
3766   tree_to_aff_combination (e1, type, &aff_e1);
3767   tree_to_aff_combination (e2, type, &aff_e2);
3768   aff_combination_scale (&aff_e2, double_int_minus_one);
3769   aff_combination_add (&aff_e1, &aff_e2);
3770 
3771   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3772 }
3773 
3774 /* Returns true if AFF1 and AFF2 are identical.  */
3775 
3776 static bool
compare_aff_trees(aff_tree * aff1,aff_tree * aff2)3777 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3778 {
3779   unsigned i;
3780 
3781   if (aff1->n != aff2->n)
3782     return false;
3783 
3784   for (i = 0; i < aff1->n; i++)
3785     {
3786       if (aff1->elts[i].coef != aff2->elts[i].coef)
3787         return false;
3788 
3789       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3790         return false;
3791     }
3792   return true;
3793 }
3794 
3795 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3796 
3797 static int
get_expr_id(struct ivopts_data * data,tree expr)3798 get_expr_id (struct ivopts_data *data, tree expr)
3799 {
3800   struct iv_inv_expr_ent ent;
3801   struct iv_inv_expr_ent **slot;
3802 
3803   ent.expr = expr;
3804   ent.hash = iterative_hash_expr (expr, 0);
3805   slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3806                                                      &ent, INSERT);
3807   if (*slot)
3808     return (*slot)->id;
3809 
3810   *slot = XNEW (struct iv_inv_expr_ent);
3811   (*slot)->expr = expr;
3812   (*slot)->hash = ent.hash;
3813   (*slot)->id = data->inv_expr_id++;
3814   return (*slot)->id;
3815 }
3816 
3817 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3818    requires a new compiler generated temporary.  Returns -1 otherwise.
3819    ADDRESS_P is a flag indicating if the expression is for address
3820    computation.  */
3821 
3822 static int
get_loop_invariant_expr_id(struct ivopts_data * data,tree ubase,tree cbase,HOST_WIDE_INT ratio,bool address_p)3823 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3824                             tree cbase, HOST_WIDE_INT ratio,
3825                             bool address_p)
3826 {
3827   aff_tree ubase_aff, cbase_aff;
3828   tree expr, ub, cb;
3829 
3830   STRIP_NOPS (ubase);
3831   STRIP_NOPS (cbase);
3832   ub = ubase;
3833   cb = cbase;
3834 
3835   if ((TREE_CODE (ubase) == INTEGER_CST)
3836       && (TREE_CODE (cbase) == INTEGER_CST))
3837     return -1;
3838 
3839   /* Strips the constant part. */
3840   if (TREE_CODE (ubase) == PLUS_EXPR
3841       || TREE_CODE (ubase) == MINUS_EXPR
3842       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3843     {
3844       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3845         ubase = TREE_OPERAND (ubase, 0);
3846     }
3847 
3848   /* Strips the constant part. */
3849   if (TREE_CODE (cbase) == PLUS_EXPR
3850       || TREE_CODE (cbase) == MINUS_EXPR
3851       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3852     {
3853       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3854         cbase = TREE_OPERAND (cbase, 0);
3855     }
3856 
3857   if (address_p)
3858     {
3859       if (((TREE_CODE (ubase) == SSA_NAME)
3860            || (TREE_CODE (ubase) == ADDR_EXPR
3861                && is_gimple_min_invariant (ubase)))
3862           && (TREE_CODE (cbase) == INTEGER_CST))
3863         return -1;
3864 
3865       if (((TREE_CODE (cbase) == SSA_NAME)
3866            || (TREE_CODE (cbase) == ADDR_EXPR
3867                && is_gimple_min_invariant (cbase)))
3868           && (TREE_CODE (ubase) == INTEGER_CST))
3869         return -1;
3870     }
3871 
3872   if (ratio == 1)
3873     {
3874       if(operand_equal_p (ubase, cbase, 0))
3875         return -1;
3876 
3877       if (TREE_CODE (ubase) == ADDR_EXPR
3878           && TREE_CODE (cbase) == ADDR_EXPR)
3879         {
3880           tree usym, csym;
3881 
3882           usym = TREE_OPERAND (ubase, 0);
3883           csym = TREE_OPERAND (cbase, 0);
3884           if (TREE_CODE (usym) == ARRAY_REF)
3885             {
3886               tree ind = TREE_OPERAND (usym, 1);
3887               if (TREE_CODE (ind) == INTEGER_CST
3888                   && host_integerp (ind, 0)
3889                   && TREE_INT_CST_LOW (ind) == 0)
3890                 usym = TREE_OPERAND (usym, 0);
3891             }
3892           if (TREE_CODE (csym) == ARRAY_REF)
3893             {
3894               tree ind = TREE_OPERAND (csym, 1);
3895               if (TREE_CODE (ind) == INTEGER_CST
3896                   && host_integerp (ind, 0)
3897                   && TREE_INT_CST_LOW (ind) == 0)
3898                 csym = TREE_OPERAND (csym, 0);
3899             }
3900           if (operand_equal_p (usym, csym, 0))
3901             return -1;
3902         }
3903       /* Now do more complex comparison  */
3904       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3905       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3906       if (compare_aff_trees (&ubase_aff, &cbase_aff))
3907         return -1;
3908     }
3909 
3910   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3911   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3912 
3913   aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
3914   aff_combination_add (&ubase_aff, &cbase_aff);
3915   expr = aff_combination_to_tree (&ubase_aff);
3916   return get_expr_id (data, expr);
3917 }
3918 
3919 
3920 
3921 /* Determines the cost of the computation by that USE is expressed
3922    from induction variable CAND.  If ADDRESS_P is true, we just need
3923    to create an address from it, otherwise we want to get it into
3924    register.  A set of invariants we depend on is stored in
3925    DEPENDS_ON.  AT is the statement at that the value is computed.
3926    If CAN_AUTOINC is nonnull, use it to record whether autoinc
3927    addressing is likely.  */
3928 
3929 static comp_cost
get_computation_cost_at(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,bool address_p,bitmap * depends_on,gimple at,bool * can_autoinc,int * inv_expr_id)3930 get_computation_cost_at (struct ivopts_data *data,
3931 			 struct iv_use *use, struct iv_cand *cand,
3932 			 bool address_p, bitmap *depends_on, gimple at,
3933 			 bool *can_autoinc,
3934                          int *inv_expr_id)
3935 {
3936   tree ubase = use->iv->base, ustep = use->iv->step;
3937   tree cbase, cstep;
3938   tree utype = TREE_TYPE (ubase), ctype;
3939   unsigned HOST_WIDE_INT cstepi, offset = 0;
3940   HOST_WIDE_INT ratio, aratio;
3941   bool var_present, symbol_present, stmt_is_after_inc;
3942   comp_cost cost;
3943   double_int rat;
3944   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3945   enum machine_mode mem_mode = (address_p
3946 				? TYPE_MODE (TREE_TYPE (*use->op_p))
3947 				: VOIDmode);
3948 
3949   *depends_on = NULL;
3950 
3951   /* Only consider real candidates.  */
3952   if (!cand->iv)
3953     return infinite_cost;
3954 
3955   cbase = cand->iv->base;
3956   cstep = cand->iv->step;
3957   ctype = TREE_TYPE (cbase);
3958 
3959   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3960     {
3961       /* We do not have a precision to express the values of use.  */
3962       return infinite_cost;
3963     }
3964 
3965   if (address_p
3966       || (use->iv->base_object
3967 	  && cand->iv->base_object
3968 	  && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
3969 	  && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
3970     {
3971       /* Do not try to express address of an object with computation based
3972 	 on address of a different object.  This may cause problems in rtl
3973 	 level alias analysis (that does not expect this to be happening,
3974 	 as this is illegal in C), and would be unlikely to be useful
3975 	 anyway.  */
3976       if (use->iv->base_object
3977 	  && cand->iv->base_object
3978 	  && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3979 	return infinite_cost;
3980     }
3981 
3982   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3983     {
3984       /* TODO -- add direct handling of this case.  */
3985       goto fallback;
3986     }
3987 
3988   /* CSTEPI is removed from the offset in case statement is after the
3989      increment.  If the step is not constant, we use zero instead.
3990      This is a bit imprecise (there is the extra addition), but
3991      redundancy elimination is likely to transform the code so that
3992      it uses value of the variable before increment anyway,
3993      so it is not that much unrealistic.  */
3994   if (cst_and_fits_in_hwi (cstep))
3995     cstepi = int_cst_value (cstep);
3996   else
3997     cstepi = 0;
3998 
3999   if (!constant_multiple_of (ustep, cstep, &rat))
4000     return infinite_cost;
4001 
4002   if (rat.fits_shwi ())
4003     ratio = rat.to_shwi ();
4004   else
4005     return infinite_cost;
4006 
4007   STRIP_NOPS (cbase);
4008   ctype = TREE_TYPE (cbase);
4009 
4010   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4011 
4012   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4013      or ratio == 1, it is better to handle this like
4014 
4015      ubase - ratio * cbase + ratio * var
4016 
4017      (also holds in the case ratio == -1, TODO.  */
4018 
4019   if (cst_and_fits_in_hwi (cbase))
4020     {
4021       offset = - ratio * int_cst_value (cbase);
4022       cost = difference_cost (data,
4023 			      ubase, build_int_cst (utype, 0),
4024 			      &symbol_present, &var_present, &offset,
4025 			      depends_on);
4026       cost.cost /= avg_loop_niter (data->current_loop);
4027     }
4028   else if (ratio == 1)
4029     {
4030       tree real_cbase = cbase;
4031 
4032       /* Check to see if any adjustment is needed.  */
4033       if (cstepi == 0 && stmt_is_after_inc)
4034         {
4035           aff_tree real_cbase_aff;
4036           aff_tree cstep_aff;
4037 
4038           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4039                                    &real_cbase_aff);
4040           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4041 
4042           aff_combination_add (&real_cbase_aff, &cstep_aff);
4043           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4044         }
4045 
4046       cost = difference_cost (data,
4047 			      ubase, real_cbase,
4048 			      &symbol_present, &var_present, &offset,
4049 			      depends_on);
4050       cost.cost /= avg_loop_niter (data->current_loop);
4051     }
4052   else if (address_p
4053 	   && !POINTER_TYPE_P (ctype)
4054 	   && multiplier_allowed_in_address_p
4055 		(ratio, mem_mode,
4056 			TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4057     {
4058       cbase
4059 	= fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4060       cost = difference_cost (data,
4061 			      ubase, cbase,
4062 			      &symbol_present, &var_present, &offset,
4063 			      depends_on);
4064       cost.cost /= avg_loop_niter (data->current_loop);
4065     }
4066   else
4067     {
4068       cost = force_var_cost (data, cbase, depends_on);
4069       cost = add_costs (cost,
4070 			difference_cost (data,
4071 					 ubase, build_int_cst (utype, 0),
4072 					 &symbol_present, &var_present,
4073 					 &offset, depends_on));
4074       cost.cost /= avg_loop_niter (data->current_loop);
4075       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4076     }
4077 
4078   if (inv_expr_id)
4079     {
4080       *inv_expr_id =
4081           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4082       /* Clear depends on.  */
4083       if (*inv_expr_id != -1 && depends_on && *depends_on)
4084         bitmap_clear (*depends_on);
4085     }
4086 
4087   /* If we are after the increment, the value of the candidate is higher by
4088      one iteration.  */
4089   if (stmt_is_after_inc)
4090     offset -= ratio * cstepi;
4091 
4092   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4093      (symbol/var1/const parts may be omitted).  If we are looking for an
4094      address, find the cost of addressing this.  */
4095   if (address_p)
4096     return add_costs (cost,
4097 		      get_address_cost (symbol_present, var_present,
4098 					offset, ratio, cstepi,
4099 					mem_mode,
4100 					TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4101 					speed, stmt_is_after_inc,
4102 					can_autoinc));
4103 
4104   /* Otherwise estimate the costs for computing the expression.  */
4105   if (!symbol_present && !var_present && !offset)
4106     {
4107       if (ratio != 1)
4108 	cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4109       return cost;
4110     }
4111 
4112   /* Symbol + offset should be compile-time computable so consider that they
4113       are added once to the variable, if present.  */
4114   if (var_present && (symbol_present || offset))
4115     cost.cost += adjust_setup_cost (data,
4116 				    add_cost (speed, TYPE_MODE (ctype)));
4117 
4118   /* Having offset does not affect runtime cost in case it is added to
4119      symbol, but it increases complexity.  */
4120   if (offset)
4121     cost.complexity++;
4122 
4123   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4124 
4125   aratio = ratio > 0 ? ratio : -ratio;
4126   if (aratio != 1)
4127     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4128   return cost;
4129 
4130 fallback:
4131   if (can_autoinc)
4132     *can_autoinc = false;
4133 
4134   {
4135     /* Just get the expression, expand it and measure the cost.  */
4136     tree comp = get_computation_at (data->current_loop, use, cand, at);
4137 
4138     if (!comp)
4139       return infinite_cost;
4140 
4141     if (address_p)
4142       comp = build_simple_mem_ref (comp);
4143 
4144     return new_cost (computation_cost (comp, speed), 0);
4145   }
4146 }
4147 
4148 /* Determines the cost of the computation by that USE is expressed
4149    from induction variable CAND.  If ADDRESS_P is true, we just need
4150    to create an address from it, otherwise we want to get it into
4151    register.  A set of invariants we depend on is stored in
4152    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4153    autoinc addressing is likely.  */
4154 
4155 static comp_cost
get_computation_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,bool address_p,bitmap * depends_on,bool * can_autoinc,int * inv_expr_id)4156 get_computation_cost (struct ivopts_data *data,
4157 		      struct iv_use *use, struct iv_cand *cand,
4158 		      bool address_p, bitmap *depends_on,
4159                       bool *can_autoinc, int *inv_expr_id)
4160 {
4161   return get_computation_cost_at (data,
4162 				  use, cand, address_p, depends_on, use->stmt,
4163 				  can_autoinc, inv_expr_id);
4164 }
4165 
4166 /* Determines cost of basing replacement of USE on CAND in a generic
4167    expression.  */
4168 
4169 static bool
determine_use_iv_cost_generic(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)4170 determine_use_iv_cost_generic (struct ivopts_data *data,
4171 			       struct iv_use *use, struct iv_cand *cand)
4172 {
4173   bitmap depends_on;
4174   comp_cost cost;
4175   int inv_expr_id = -1;
4176 
4177   /* The simple case first -- if we need to express value of the preserved
4178      original biv, the cost is 0.  This also prevents us from counting the
4179      cost of increment twice -- once at this use and once in the cost of
4180      the candidate.  */
4181   if (cand->pos == IP_ORIGINAL
4182       && cand->incremented_at == use->stmt)
4183     {
4184       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4185                        ERROR_MARK, -1);
4186       return true;
4187     }
4188 
4189   cost = get_computation_cost (data, use, cand, false, &depends_on,
4190                                NULL, &inv_expr_id);
4191 
4192   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4193                    inv_expr_id);
4194 
4195   return !infinite_cost_p (cost);
4196 }
4197 
4198 /* Determines cost of basing replacement of USE on CAND in an address.  */
4199 
4200 static bool
determine_use_iv_cost_address(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)4201 determine_use_iv_cost_address (struct ivopts_data *data,
4202 			       struct iv_use *use, struct iv_cand *cand)
4203 {
4204   bitmap depends_on;
4205   bool can_autoinc;
4206   int inv_expr_id = -1;
4207   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4208 					 &can_autoinc, &inv_expr_id);
4209 
4210   if (cand->ainc_use == use)
4211     {
4212       if (can_autoinc)
4213 	cost.cost -= cand->cost_step;
4214       /* If we generated the candidate solely for exploiting autoincrement
4215 	 opportunities, and it turns out it can't be used, set the cost to
4216 	 infinity to make sure we ignore it.  */
4217       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4218 	cost = infinite_cost;
4219     }
4220   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4221                    inv_expr_id);
4222 
4223   return !infinite_cost_p (cost);
4224 }
4225 
4226 /* Computes value of candidate CAND at position AT in iteration NITER, and
4227    stores it to VAL.  */
4228 
4229 static void
cand_value_at(struct loop * loop,struct iv_cand * cand,gimple at,tree niter,aff_tree * val)4230 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4231 	       aff_tree *val)
4232 {
4233   aff_tree step, delta, nit;
4234   struct iv *iv = cand->iv;
4235   tree type = TREE_TYPE (iv->base);
4236   tree steptype = type;
4237   if (POINTER_TYPE_P (type))
4238     steptype = sizetype;
4239   steptype = unsigned_type_for (type);
4240 
4241   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4242   aff_combination_convert (&step, steptype);
4243   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4244   aff_combination_convert (&nit, steptype);
4245   aff_combination_mult (&nit, &step, &delta);
4246   if (stmt_after_increment (loop, cand, at))
4247     aff_combination_add (&delta, &step);
4248 
4249   tree_to_aff_combination (iv->base, type, val);
4250   if (!POINTER_TYPE_P (type))
4251     aff_combination_convert (val, steptype);
4252   aff_combination_add (val, &delta);
4253 }
4254 
4255 /* Returns period of induction variable iv.  */
4256 
4257 static tree
iv_period(struct iv * iv)4258 iv_period (struct iv *iv)
4259 {
4260   tree step = iv->step, period, type;
4261   tree pow2div;
4262 
4263   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4264 
4265   type = unsigned_type_for (TREE_TYPE (step));
4266   /* Period of the iv is lcm (step, type_range)/step -1,
4267      i.e., N*type_range/step - 1. Since type range is power
4268      of two, N == (step >> num_of_ending_zeros_binary (step),
4269      so the final result is
4270 
4271        (type_range >> num_of_ending_zeros_binary (step)) - 1
4272 
4273   */
4274   pow2div = num_ending_zeros (step);
4275 
4276   period = build_low_bits_mask (type,
4277                                 (TYPE_PRECISION (type)
4278                                  - tree_low_cst (pow2div, 1)));
4279 
4280   return period;
4281 }
4282 
4283 /* Returns the comparison operator used when eliminating the iv USE.  */
4284 
4285 static enum tree_code
iv_elimination_compare(struct ivopts_data * data,struct iv_use * use)4286 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4287 {
4288   struct loop *loop = data->current_loop;
4289   basic_block ex_bb;
4290   edge exit;
4291 
4292   ex_bb = gimple_bb (use->stmt);
4293   exit = EDGE_SUCC (ex_bb, 0);
4294   if (flow_bb_inside_loop_p (loop, exit->dest))
4295     exit = EDGE_SUCC (ex_bb, 1);
4296 
4297   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4298 }
4299 
4300 static tree
strip_wrap_conserving_type_conversions(tree exp)4301 strip_wrap_conserving_type_conversions (tree exp)
4302 {
4303   while (tree_ssa_useless_type_conversion (exp)
4304 	 && (nowrap_type_p (TREE_TYPE (exp))
4305 	     == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4306     exp = TREE_OPERAND (exp, 0);
4307   return exp;
4308 }
4309 
4310 /* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
4311    check for an exact match.  */
4312 
4313 static bool
expr_equal_p(tree e,tree what)4314 expr_equal_p (tree e, tree what)
4315 {
4316   gimple stmt;
4317   enum tree_code code;
4318 
4319   e = strip_wrap_conserving_type_conversions (e);
4320   what = strip_wrap_conserving_type_conversions (what);
4321 
4322   code = TREE_CODE (what);
4323   if (TREE_TYPE (e) != TREE_TYPE (what))
4324     return false;
4325 
4326   if (operand_equal_p (e, what, 0))
4327     return true;
4328 
4329   if (TREE_CODE (e) != SSA_NAME)
4330     return false;
4331 
4332   stmt = SSA_NAME_DEF_STMT (e);
4333   if (gimple_code (stmt) != GIMPLE_ASSIGN
4334       || gimple_assign_rhs_code (stmt) != code)
4335     return false;
4336 
4337   switch (get_gimple_rhs_class (code))
4338     {
4339     case GIMPLE_BINARY_RHS:
4340       if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4341 	return false;
4342       /* Fallthru.  */
4343 
4344     case GIMPLE_UNARY_RHS:
4345     case GIMPLE_SINGLE_RHS:
4346       return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4347     default:
4348       return false;
4349     }
4350 }
4351 
4352 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4353    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4354    calculation is performed in non-wrapping type.
4355 
4356    TODO: More generally, we could test for the situation that
4357 	 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4358 	 This would require knowing the sign of OFFSET.
4359 
4360 	 Also, we only look for the first addition in the computation of BASE.
4361 	 More complex analysis would be better, but introducing it just for
4362 	 this optimization seems like an overkill.  */
4363 
4364 static bool
difference_cannot_overflow_p(tree base,tree offset)4365 difference_cannot_overflow_p (tree base, tree offset)
4366 {
4367   enum tree_code code;
4368   tree e1, e2;
4369 
4370   if (!nowrap_type_p (TREE_TYPE (base)))
4371     return false;
4372 
4373   base = expand_simple_operations (base);
4374 
4375   if (TREE_CODE (base) == SSA_NAME)
4376     {
4377       gimple stmt = SSA_NAME_DEF_STMT (base);
4378 
4379       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4380 	return false;
4381 
4382       code = gimple_assign_rhs_code (stmt);
4383       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4384 	return false;
4385 
4386       e1 = gimple_assign_rhs1 (stmt);
4387       e2 = gimple_assign_rhs2 (stmt);
4388     }
4389   else
4390     {
4391       code = TREE_CODE (base);
4392       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4393 	return false;
4394       e1 = TREE_OPERAND (base, 0);
4395       e2 = TREE_OPERAND (base, 1);
4396     }
4397 
4398   /* TODO: deeper inspection may be necessary to prove the equality.  */
4399   switch (code)
4400     {
4401     case PLUS_EXPR:
4402       return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4403     case POINTER_PLUS_EXPR:
4404       return expr_equal_p (e2, offset);
4405 
4406     default:
4407       return false;
4408     }
4409 }
4410 
4411 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4412    comparison with CAND.  NITER describes the number of iterations of
4413    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4414 
4415    We aim to handle the following situation:
4416 
4417    sometype *base, *p;
4418    int a, b, i;
4419 
4420    i = a;
4421    p = p_0 = base + a;
4422 
4423    do
4424      {
4425        bla (*p);
4426        p++;
4427        i++;
4428      }
4429    while (i < b);
4430 
4431    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4432    We aim to optimize this to
4433 
4434    p = p_0 = base + a;
4435    do
4436      {
4437        bla (*p);
4438        p++;
4439      }
4440    while (p < p_0 - a + b);
4441 
4442    This preserves the correctness, since the pointer arithmetics does not
4443    overflow.  More precisely:
4444 
4445    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4446       overflow in computing it or the values of p.
4447    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4448       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4449 
4450 static bool
iv_elimination_compare_lt(struct ivopts_data * data,struct iv_cand * cand,enum tree_code * comp_p,struct tree_niter_desc * niter)4451 iv_elimination_compare_lt (struct ivopts_data *data,
4452                            struct iv_cand *cand, enum tree_code *comp_p,
4453 			   struct tree_niter_desc *niter)
4454 {
4455   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4456   struct affine_tree_combination nit, tmpa, tmpb;
4457   enum tree_code comp;
4458   HOST_WIDE_INT step;
4459 
4460   /* We need to know that the candidate induction variable does not overflow.
4461      While more complex analysis may be used to prove this, for now just
4462      check that the variable appears in the original program and that it
4463      is computed in a type that guarantees no overflows.  */
4464   cand_type = TREE_TYPE (cand->iv->base);
4465   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4466     return false;
4467 
4468   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4469      the calculation of the BOUND could overflow, making the comparison
4470      invalid.  */
4471   if (!data->loop_single_exit_p)
4472     return false;
4473 
4474   /* We need to be able to decide whether candidate is increasing or decreasing
4475      in order to choose the right comparison operator.  */
4476   if (!cst_and_fits_in_hwi (cand->iv->step))
4477     return false;
4478   step = int_cst_value (cand->iv->step);
4479 
4480   /* Check that the number of iterations matches the expected pattern:
4481      a + 1 > b ? 0 : b - a - 1.  */
4482   mbz = niter->may_be_zero;
4483   if (TREE_CODE (mbz) == GT_EXPR)
4484     {
4485       /* Handle a + 1 > b.  */
4486       tree op0 = TREE_OPERAND (mbz, 0);
4487       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4488 	{
4489 	  a = TREE_OPERAND (op0, 0);
4490 	  b = TREE_OPERAND (mbz, 1);
4491 	}
4492       else
4493 	return false;
4494     }
4495   else if (TREE_CODE (mbz) == LT_EXPR)
4496     {
4497       tree op1 = TREE_OPERAND (mbz, 1);
4498 
4499       /* Handle b < a + 1.  */
4500       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4501         {
4502           a = TREE_OPERAND (op1, 0);
4503           b = TREE_OPERAND (mbz, 0);
4504         }
4505       else
4506 	return false;
4507     }
4508   else
4509     return false;
4510 
4511   /* Expected number of iterations is B - A - 1.  Check that it matches
4512      the actual number, i.e., that B - A - NITER = 1.  */
4513   tree_to_aff_combination (niter->niter, nit_type, &nit);
4514   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4515   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4516   aff_combination_scale (&nit, double_int_minus_one);
4517   aff_combination_scale (&tmpa, double_int_minus_one);
4518   aff_combination_add (&tmpb, &tmpa);
4519   aff_combination_add (&tmpb, &nit);
4520   if (tmpb.n != 0 || tmpb.offset != double_int_one)
4521     return false;
4522 
4523   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4524      overflow.  */
4525   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4526 			cand->iv->step,
4527 			fold_convert (TREE_TYPE (cand->iv->step), a));
4528   if (!difference_cannot_overflow_p (cand->iv->base, offset))
4529     return false;
4530 
4531   /* Determine the new comparison operator.  */
4532   comp = step < 0 ? GT_EXPR : LT_EXPR;
4533   if (*comp_p == NE_EXPR)
4534     *comp_p = comp;
4535   else if (*comp_p == EQ_EXPR)
4536     *comp_p = invert_tree_comparison (comp, false);
4537   else
4538     gcc_unreachable ();
4539 
4540   return true;
4541 }
4542 
4543 /* Check whether it is possible to express the condition in USE by comparison
4544    of candidate CAND.  If so, store the value compared with to BOUND, and the
4545    comparison operator to COMP.  */
4546 
4547 static bool
may_eliminate_iv(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,tree * bound,enum tree_code * comp)4548 may_eliminate_iv (struct ivopts_data *data,
4549 		  struct iv_use *use, struct iv_cand *cand, tree *bound,
4550 		  enum tree_code *comp)
4551 {
4552   basic_block ex_bb;
4553   edge exit;
4554   tree period;
4555   struct loop *loop = data->current_loop;
4556   aff_tree bnd;
4557   struct tree_niter_desc *desc = NULL;
4558 
4559   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4560     return false;
4561 
4562   /* For now works only for exits that dominate the loop latch.
4563      TODO: extend to other conditions inside loop body.  */
4564   ex_bb = gimple_bb (use->stmt);
4565   if (use->stmt != last_stmt (ex_bb)
4566       || gimple_code (use->stmt) != GIMPLE_COND
4567       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4568     return false;
4569 
4570   exit = EDGE_SUCC (ex_bb, 0);
4571   if (flow_bb_inside_loop_p (loop, exit->dest))
4572     exit = EDGE_SUCC (ex_bb, 1);
4573   if (flow_bb_inside_loop_p (loop, exit->dest))
4574     return false;
4575 
4576   desc = niter_for_exit (data, exit);
4577   if (!desc)
4578     return false;
4579 
4580   /* Determine whether we can use the variable to test the exit condition.
4581      This is the case iff the period of the induction variable is greater
4582      than the number of iterations for which the exit condition is true.  */
4583   period = iv_period (cand->iv);
4584 
4585   /* If the number of iterations is constant, compare against it directly.  */
4586   if (TREE_CODE (desc->niter) == INTEGER_CST)
4587     {
4588       /* See cand_value_at.  */
4589       if (stmt_after_increment (loop, cand, use->stmt))
4590         {
4591           if (!tree_int_cst_lt (desc->niter, period))
4592             return false;
4593         }
4594       else
4595         {
4596           if (tree_int_cst_lt (period, desc->niter))
4597             return false;
4598         }
4599     }
4600 
4601   /* If not, and if this is the only possible exit of the loop, see whether
4602      we can get a conservative estimate on the number of iterations of the
4603      entire loop and compare against that instead.  */
4604   else
4605     {
4606       double_int period_value, max_niter;
4607 
4608       max_niter = desc->max;
4609       if (stmt_after_increment (loop, cand, use->stmt))
4610         max_niter += double_int_one;
4611       period_value = tree_to_double_int (period);
4612       if (max_niter.ugt (period_value))
4613         {
4614           /* See if we can take advantage of inferred loop bound information.  */
4615           if (data->loop_single_exit_p)
4616             {
4617               if (!max_loop_iterations (loop, &max_niter))
4618                 return false;
4619               /* The loop bound is already adjusted by adding 1.  */
4620               if (max_niter.ugt (period_value))
4621                 return false;
4622             }
4623           else
4624             return false;
4625         }
4626     }
4627 
4628   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4629 
4630   *bound = fold_convert (TREE_TYPE (cand->iv->base),
4631 			 aff_combination_to_tree (&bnd));
4632   *comp = iv_elimination_compare (data, use);
4633 
4634   /* It is unlikely that computing the number of iterations using division
4635      would be more profitable than keeping the original induction variable.  */
4636   if (expression_expensive_p (*bound))
4637     return false;
4638 
4639   /* Sometimes, it is possible to handle the situation that the number of
4640      iterations may be zero unless additional assumtions by using <
4641      instead of != in the exit condition.
4642 
4643      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4644 	   base the exit condition on it.  However, that is often too
4645 	   expensive.  */
4646   if (!integer_zerop (desc->may_be_zero))
4647     return iv_elimination_compare_lt (data, cand, comp, desc);
4648 
4649   return true;
4650 }
4651 
4652  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4653     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4654 
4655 static int
parm_decl_cost(struct ivopts_data * data,tree bound)4656 parm_decl_cost (struct ivopts_data *data, tree bound)
4657 {
4658   tree sbound = bound;
4659   STRIP_NOPS (sbound);
4660 
4661   if (TREE_CODE (sbound) == SSA_NAME
4662       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4663       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4664       && data->body_includes_call)
4665     return COSTS_N_INSNS (1);
4666 
4667   return 0;
4668 }
4669 
4670 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4671 
4672 static bool
determine_use_iv_cost_condition(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)4673 determine_use_iv_cost_condition (struct ivopts_data *data,
4674 				 struct iv_use *use, struct iv_cand *cand)
4675 {
4676   tree bound = NULL_TREE;
4677   struct iv *cmp_iv;
4678   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4679   comp_cost elim_cost, express_cost, cost, bound_cost;
4680   bool ok;
4681   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4682   tree *control_var, *bound_cst;
4683   enum tree_code comp = ERROR_MARK;
4684 
4685   /* Only consider real candidates.  */
4686   if (!cand->iv)
4687     {
4688       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4689 		       ERROR_MARK, -1);
4690       return false;
4691     }
4692 
4693   /* Try iv elimination.  */
4694   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4695     {
4696       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4697       if (elim_cost.cost == 0)
4698         elim_cost.cost = parm_decl_cost (data, bound);
4699       else if (TREE_CODE (bound) == INTEGER_CST)
4700         elim_cost.cost = 0;
4701       /* If we replace a loop condition 'i < n' with 'p < base + n',
4702 	 depends_on_elim will have 'base' and 'n' set, which implies
4703 	 that both 'base' and 'n' will be live during the loop.	 More likely,
4704 	 'base + n' will be loop invariant, resulting in only one live value
4705 	 during the loop.  So in that case we clear depends_on_elim and set
4706         elim_inv_expr_id instead.  */
4707       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4708 	{
4709 	  elim_inv_expr_id = get_expr_id (data, bound);
4710 	  bitmap_clear (depends_on_elim);
4711 	}
4712       /* The bound is a loop invariant, so it will be only computed
4713 	 once.  */
4714       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4715     }
4716   else
4717     elim_cost = infinite_cost;
4718 
4719   /* Try expressing the original giv.  If it is compared with an invariant,
4720      note that we cannot get rid of it.  */
4721   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4722 			      NULL, &cmp_iv);
4723   gcc_assert (ok);
4724 
4725   /* When the condition is a comparison of the candidate IV against
4726      zero, prefer this IV.
4727 
4728      TODO: The constant that we're subtracting from the cost should
4729      be target-dependent.  This information should be added to the
4730      target costs for each backend.  */
4731   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4732       && integer_zerop (*bound_cst)
4733       && (operand_equal_p (*control_var, cand->var_after, 0)
4734 	  || operand_equal_p (*control_var, cand->var_before, 0)))
4735     elim_cost.cost -= 1;
4736 
4737   express_cost = get_computation_cost (data, use, cand, false,
4738 				       &depends_on_express, NULL,
4739                                        &express_inv_expr_id);
4740   fd_ivopts_data = data;
4741   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4742 
4743   /* Count the cost of the original bound as well.  */
4744   bound_cost = force_var_cost (data, *bound_cst, NULL);
4745   if (bound_cost.cost == 0)
4746     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4747   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4748     bound_cost.cost = 0;
4749   express_cost.cost += bound_cost.cost;
4750 
4751   /* Choose the better approach, preferring the eliminated IV. */
4752   if (compare_costs (elim_cost, express_cost) <= 0)
4753     {
4754       cost = elim_cost;
4755       depends_on = depends_on_elim;
4756       depends_on_elim = NULL;
4757       inv_expr_id = elim_inv_expr_id;
4758     }
4759   else
4760     {
4761       cost = express_cost;
4762       depends_on = depends_on_express;
4763       depends_on_express = NULL;
4764       bound = NULL_TREE;
4765       comp = ERROR_MARK;
4766       inv_expr_id = express_inv_expr_id;
4767     }
4768 
4769   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4770 
4771   if (depends_on_elim)
4772     BITMAP_FREE (depends_on_elim);
4773   if (depends_on_express)
4774     BITMAP_FREE (depends_on_express);
4775 
4776   return !infinite_cost_p (cost);
4777 }
4778 
4779 /* Determines cost of basing replacement of USE on CAND.  Returns false
4780    if USE cannot be based on CAND.  */
4781 
4782 static bool
determine_use_iv_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)4783 determine_use_iv_cost (struct ivopts_data *data,
4784 		       struct iv_use *use, struct iv_cand *cand)
4785 {
4786   switch (use->type)
4787     {
4788     case USE_NONLINEAR_EXPR:
4789       return determine_use_iv_cost_generic (data, use, cand);
4790 
4791     case USE_ADDRESS:
4792       return determine_use_iv_cost_address (data, use, cand);
4793 
4794     case USE_COMPARE:
4795       return determine_use_iv_cost_condition (data, use, cand);
4796 
4797     default:
4798       gcc_unreachable ();
4799     }
4800 }
4801 
4802 /* Return true if get_computation_cost indicates that autoincrement is
4803    a possibility for the pair of USE and CAND, false otherwise.  */
4804 
4805 static bool
autoinc_possible_for_pair(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)4806 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4807 			   struct iv_cand *cand)
4808 {
4809   bitmap depends_on;
4810   bool can_autoinc;
4811   comp_cost cost;
4812 
4813   if (use->type != USE_ADDRESS)
4814     return false;
4815 
4816   cost = get_computation_cost (data, use, cand, true, &depends_on,
4817 			       &can_autoinc, NULL);
4818 
4819   BITMAP_FREE (depends_on);
4820 
4821   return !infinite_cost_p (cost) && can_autoinc;
4822 }
4823 
4824 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4825    use that allows autoincrement, and set their AINC_USE if possible.  */
4826 
4827 static void
set_autoinc_for_original_candidates(struct ivopts_data * data)4828 set_autoinc_for_original_candidates (struct ivopts_data *data)
4829 {
4830   unsigned i, j;
4831 
4832   for (i = 0; i < n_iv_cands (data); i++)
4833     {
4834       struct iv_cand *cand = iv_cand (data, i);
4835       struct iv_use *closest = NULL;
4836       if (cand->pos != IP_ORIGINAL)
4837 	continue;
4838       for (j = 0; j < n_iv_uses (data); j++)
4839 	{
4840 	  struct iv_use *use = iv_use (data, j);
4841 	  unsigned uid = gimple_uid (use->stmt);
4842 	  if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4843 	      || uid > gimple_uid (cand->incremented_at))
4844 	    continue;
4845 	  if (closest == NULL || uid > gimple_uid (closest->stmt))
4846 	    closest = use;
4847 	}
4848       if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4849 	continue;
4850       cand->ainc_use = closest;
4851     }
4852 }
4853 
4854 /* Finds the candidates for the induction variables.  */
4855 
4856 static void
find_iv_candidates(struct ivopts_data * data)4857 find_iv_candidates (struct ivopts_data *data)
4858 {
4859   /* Add commonly used ivs.  */
4860   add_standard_iv_candidates (data);
4861 
4862   /* Add old induction variables.  */
4863   add_old_ivs_candidates (data);
4864 
4865   /* Add induction variables derived from uses.  */
4866   add_derived_ivs_candidates (data);
4867 
4868   set_autoinc_for_original_candidates (data);
4869 
4870   /* Record the important candidates.  */
4871   record_important_candidates (data);
4872 }
4873 
4874 /* Determines costs of basing the use of the iv on an iv candidate.  */
4875 
4876 static void
determine_use_iv_costs(struct ivopts_data * data)4877 determine_use_iv_costs (struct ivopts_data *data)
4878 {
4879   unsigned i, j;
4880   struct iv_use *use;
4881   struct iv_cand *cand;
4882   bitmap to_clear = BITMAP_ALLOC (NULL);
4883 
4884   alloc_use_cost_map (data);
4885 
4886   for (i = 0; i < n_iv_uses (data); i++)
4887     {
4888       use = iv_use (data, i);
4889 
4890       if (data->consider_all_candidates)
4891 	{
4892 	  for (j = 0; j < n_iv_cands (data); j++)
4893 	    {
4894 	      cand = iv_cand (data, j);
4895 	      determine_use_iv_cost (data, use, cand);
4896 	    }
4897 	}
4898       else
4899 	{
4900 	  bitmap_iterator bi;
4901 
4902 	  EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4903 	    {
4904 	      cand = iv_cand (data, j);
4905 	      if (!determine_use_iv_cost (data, use, cand))
4906 		bitmap_set_bit (to_clear, j);
4907 	    }
4908 
4909 	  /* Remove the candidates for that the cost is infinite from
4910 	     the list of related candidates.  */
4911 	  bitmap_and_compl_into (use->related_cands, to_clear);
4912 	  bitmap_clear (to_clear);
4913 	}
4914     }
4915 
4916   BITMAP_FREE (to_clear);
4917 
4918   if (dump_file && (dump_flags & TDF_DETAILS))
4919     {
4920       fprintf (dump_file, "Use-candidate costs:\n");
4921 
4922       for (i = 0; i < n_iv_uses (data); i++)
4923 	{
4924 	  use = iv_use (data, i);
4925 
4926 	  fprintf (dump_file, "Use %d:\n", i);
4927 	  fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
4928 	  for (j = 0; j < use->n_map_members; j++)
4929 	    {
4930 	      if (!use->cost_map[j].cand
4931 		  || infinite_cost_p (use->cost_map[j].cost))
4932 		continue;
4933 
4934 	      fprintf (dump_file, "  %d\t%d\t%d\t",
4935 		       use->cost_map[j].cand->id,
4936 		       use->cost_map[j].cost.cost,
4937 		       use->cost_map[j].cost.complexity);
4938 	      if (use->cost_map[j].depends_on)
4939 		bitmap_print (dump_file,
4940 			      use->cost_map[j].depends_on, "","");
4941               if (use->cost_map[j].inv_expr_id != -1)
4942                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4943 	      fprintf (dump_file, "\n");
4944 	    }
4945 
4946 	  fprintf (dump_file, "\n");
4947 	}
4948       fprintf (dump_file, "\n");
4949     }
4950 }
4951 
4952 /* Determines cost of the candidate CAND.  */
4953 
4954 static void
determine_iv_cost(struct ivopts_data * data,struct iv_cand * cand)4955 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4956 {
4957   comp_cost cost_base;
4958   unsigned cost, cost_step;
4959   tree base;
4960 
4961   if (!cand->iv)
4962     {
4963       cand->cost = 0;
4964       return;
4965     }
4966 
4967   /* There are two costs associated with the candidate -- its increment
4968      and its initialization.  The second is almost negligible for any loop
4969      that rolls enough, so we take it just very little into account.  */
4970 
4971   base = cand->iv->base;
4972   cost_base = force_var_cost (data, base, NULL);
4973   /* It will be exceptional that the iv register happens to be initialized with
4974      the proper value at no cost.  In general, there will at least be a regcopy
4975      or a const set.  */
4976   if (cost_base.cost == 0)
4977     cost_base.cost = COSTS_N_INSNS (1);
4978   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
4979 
4980   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4981 
4982   /* Prefer the original ivs unless we may gain something by replacing it.
4983      The reason is to make debugging simpler; so this is not relevant for
4984      artificial ivs created by other optimization passes.  */
4985   if (cand->pos != IP_ORIGINAL
4986       || !SSA_NAME_VAR (cand->var_before)
4987       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4988     cost++;
4989 
4990   /* Prefer not to insert statements into latch unless there are some
4991      already (so that we do not create unnecessary jumps).  */
4992   if (cand->pos == IP_END
4993       && empty_block_p (ip_end_pos (data->current_loop)))
4994     cost++;
4995 
4996   cand->cost = cost;
4997   cand->cost_step = cost_step;
4998 }
4999 
5000 /* Determines costs of computation of the candidates.  */
5001 
5002 static void
determine_iv_costs(struct ivopts_data * data)5003 determine_iv_costs (struct ivopts_data *data)
5004 {
5005   unsigned i;
5006 
5007   if (dump_file && (dump_flags & TDF_DETAILS))
5008     {
5009       fprintf (dump_file, "Candidate costs:\n");
5010       fprintf (dump_file, "  cand\tcost\n");
5011     }
5012 
5013   for (i = 0; i < n_iv_cands (data); i++)
5014     {
5015       struct iv_cand *cand = iv_cand (data, i);
5016 
5017       determine_iv_cost (data, cand);
5018 
5019       if (dump_file && (dump_flags & TDF_DETAILS))
5020 	fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5021     }
5022 
5023   if (dump_file && (dump_flags & TDF_DETAILS))
5024     fprintf (dump_file, "\n");
5025 }
5026 
5027 /* Calculates cost for having SIZE induction variables.  */
5028 
5029 static unsigned
ivopts_global_cost_for_size(struct ivopts_data * data,unsigned size)5030 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5031 {
5032   /* We add size to the cost, so that we prefer eliminating ivs
5033      if possible.  */
5034   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5035 					    data->body_includes_call);
5036 }
5037 
5038 /* For each size of the induction variable set determine the penalty.  */
5039 
5040 static void
determine_set_costs(struct ivopts_data * data)5041 determine_set_costs (struct ivopts_data *data)
5042 {
5043   unsigned j, n;
5044   gimple phi;
5045   gimple_stmt_iterator psi;
5046   tree op;
5047   struct loop *loop = data->current_loop;
5048   bitmap_iterator bi;
5049 
5050   if (dump_file && (dump_flags & TDF_DETAILS))
5051     {
5052       fprintf (dump_file, "Global costs:\n");
5053       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5054       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5055       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5056       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5057     }
5058 
5059   n = 0;
5060   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5061     {
5062       phi = gsi_stmt (psi);
5063       op = PHI_RESULT (phi);
5064 
5065       if (virtual_operand_p (op))
5066 	continue;
5067 
5068       if (get_iv (data, op))
5069 	continue;
5070 
5071       n++;
5072     }
5073 
5074   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5075     {
5076       struct version_info *info = ver_info (data, j);
5077 
5078       if (info->inv_id && info->has_nonlin_use)
5079 	n++;
5080     }
5081 
5082   data->regs_used = n;
5083   if (dump_file && (dump_flags & TDF_DETAILS))
5084     fprintf (dump_file, "  regs_used %d\n", n);
5085 
5086   if (dump_file && (dump_flags & TDF_DETAILS))
5087     {
5088       fprintf (dump_file, "  cost for size:\n");
5089       fprintf (dump_file, "  ivs\tcost\n");
5090       for (j = 0; j <= 2 * target_avail_regs; j++)
5091 	fprintf (dump_file, "  %d\t%d\n", j,
5092 		 ivopts_global_cost_for_size (data, j));
5093       fprintf (dump_file, "\n");
5094     }
5095 }
5096 
5097 /* Returns true if A is a cheaper cost pair than B.  */
5098 
5099 static bool
cheaper_cost_pair(struct cost_pair * a,struct cost_pair * b)5100 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5101 {
5102   int cmp;
5103 
5104   if (!a)
5105     return false;
5106 
5107   if (!b)
5108     return true;
5109 
5110   cmp = compare_costs (a->cost, b->cost);
5111   if (cmp < 0)
5112     return true;
5113 
5114   if (cmp > 0)
5115     return false;
5116 
5117   /* In case the costs are the same, prefer the cheaper candidate.  */
5118   if (a->cand->cost < b->cand->cost)
5119     return true;
5120 
5121   return false;
5122 }
5123 
5124 
5125 /* Returns candidate by that USE is expressed in IVS.  */
5126 
5127 static struct cost_pair *
iv_ca_cand_for_use(struct iv_ca * ivs,struct iv_use * use)5128 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5129 {
5130   return ivs->cand_for_use[use->id];
5131 }
5132 
5133 /* Computes the cost field of IVS structure.  */
5134 
5135 static void
iv_ca_recount_cost(struct ivopts_data * data,struct iv_ca * ivs)5136 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5137 {
5138   comp_cost cost = ivs->cand_use_cost;
5139 
5140   cost.cost += ivs->cand_cost;
5141 
5142   cost.cost += ivopts_global_cost_for_size (data,
5143                                             ivs->n_regs + ivs->num_used_inv_expr);
5144 
5145   ivs->cost = cost;
5146 }
5147 
5148 /* Remove invariants in set INVS to set IVS.  */
5149 
5150 static void
iv_ca_set_remove_invariants(struct iv_ca * ivs,bitmap invs)5151 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5152 {
5153   bitmap_iterator bi;
5154   unsigned iid;
5155 
5156   if (!invs)
5157     return;
5158 
5159   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5160     {
5161       ivs->n_invariant_uses[iid]--;
5162       if (ivs->n_invariant_uses[iid] == 0)
5163         ivs->n_regs--;
5164     }
5165 }
5166 
5167 /* Set USE not to be expressed by any candidate in IVS.  */
5168 
5169 static void
iv_ca_set_no_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_use * use)5170 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5171 		 struct iv_use *use)
5172 {
5173   unsigned uid = use->id, cid;
5174   struct cost_pair *cp;
5175 
5176   cp = ivs->cand_for_use[uid];
5177   if (!cp)
5178     return;
5179   cid = cp->cand->id;
5180 
5181   ivs->bad_uses++;
5182   ivs->cand_for_use[uid] = NULL;
5183   ivs->n_cand_uses[cid]--;
5184 
5185   if (ivs->n_cand_uses[cid] == 0)
5186     {
5187       bitmap_clear_bit (ivs->cands, cid);
5188       /* Do not count the pseudocandidates.  */
5189       if (cp->cand->iv)
5190 	ivs->n_regs--;
5191       ivs->n_cands--;
5192       ivs->cand_cost -= cp->cand->cost;
5193 
5194       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5195     }
5196 
5197   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5198 
5199   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5200 
5201   if (cp->inv_expr_id != -1)
5202     {
5203       ivs->used_inv_expr[cp->inv_expr_id]--;
5204       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5205         ivs->num_used_inv_expr--;
5206     }
5207   iv_ca_recount_cost (data, ivs);
5208 }
5209 
5210 /* Add invariants in set INVS to set IVS.  */
5211 
5212 static void
iv_ca_set_add_invariants(struct iv_ca * ivs,bitmap invs)5213 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5214 {
5215   bitmap_iterator bi;
5216   unsigned iid;
5217 
5218   if (!invs)
5219     return;
5220 
5221   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5222     {
5223       ivs->n_invariant_uses[iid]++;
5224       if (ivs->n_invariant_uses[iid] == 1)
5225         ivs->n_regs++;
5226     }
5227 }
5228 
5229 /* Set cost pair for USE in set IVS to CP.  */
5230 
5231 static void
iv_ca_set_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_use * use,struct cost_pair * cp)5232 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5233 	      struct iv_use *use, struct cost_pair *cp)
5234 {
5235   unsigned uid = use->id, cid;
5236 
5237   if (ivs->cand_for_use[uid] == cp)
5238     return;
5239 
5240   if (ivs->cand_for_use[uid])
5241     iv_ca_set_no_cp (data, ivs, use);
5242 
5243   if (cp)
5244     {
5245       cid = cp->cand->id;
5246 
5247       ivs->bad_uses--;
5248       ivs->cand_for_use[uid] = cp;
5249       ivs->n_cand_uses[cid]++;
5250       if (ivs->n_cand_uses[cid] == 1)
5251 	{
5252 	  bitmap_set_bit (ivs->cands, cid);
5253 	  /* Do not count the pseudocandidates.  */
5254 	  if (cp->cand->iv)
5255 	    ivs->n_regs++;
5256 	  ivs->n_cands++;
5257 	  ivs->cand_cost += cp->cand->cost;
5258 
5259 	  iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5260 	}
5261 
5262       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5263       iv_ca_set_add_invariants (ivs, cp->depends_on);
5264 
5265       if (cp->inv_expr_id != -1)
5266         {
5267           ivs->used_inv_expr[cp->inv_expr_id]++;
5268           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5269             ivs->num_used_inv_expr++;
5270         }
5271       iv_ca_recount_cost (data, ivs);
5272     }
5273 }
5274 
5275 /* Extend set IVS by expressing USE by some of the candidates in it
5276    if possible. All important candidates will be considered
5277    if IMPORTANT_CANDIDATES is true.  */
5278 
5279 static void
iv_ca_add_use(struct ivopts_data * data,struct iv_ca * ivs,struct iv_use * use,bool important_candidates)5280 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5281 	       struct iv_use *use, bool important_candidates)
5282 {
5283   struct cost_pair *best_cp = NULL, *cp;
5284   bitmap_iterator bi;
5285   bitmap cands;
5286   unsigned i;
5287 
5288   gcc_assert (ivs->upto >= use->id);
5289 
5290   if (ivs->upto == use->id)
5291     {
5292       ivs->upto++;
5293       ivs->bad_uses++;
5294     }
5295 
5296   cands = (important_candidates ? data->important_candidates : ivs->cands);
5297   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5298     {
5299       struct iv_cand *cand = iv_cand (data, i);
5300 
5301       cp = get_use_iv_cost (data, use, cand);
5302 
5303       if (cheaper_cost_pair (cp, best_cp))
5304 	best_cp = cp;
5305     }
5306 
5307   iv_ca_set_cp (data, ivs, use, best_cp);
5308 }
5309 
5310 /* Get cost for assignment IVS.  */
5311 
5312 static comp_cost
iv_ca_cost(struct iv_ca * ivs)5313 iv_ca_cost (struct iv_ca *ivs)
5314 {
5315   /* This was a conditional expression but it triggered a bug in
5316      Sun C 5.5.  */
5317   if (ivs->bad_uses)
5318     return infinite_cost;
5319   else
5320     return ivs->cost;
5321 }
5322 
5323 /* Returns true if all dependences of CP are among invariants in IVS.  */
5324 
5325 static bool
iv_ca_has_deps(struct iv_ca * ivs,struct cost_pair * cp)5326 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5327 {
5328   unsigned i;
5329   bitmap_iterator bi;
5330 
5331   if (!cp->depends_on)
5332     return true;
5333 
5334   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5335     {
5336       if (ivs->n_invariant_uses[i] == 0)
5337 	return false;
5338     }
5339 
5340   return true;
5341 }
5342 
5343 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5344    it before NEXT_CHANGE.  */
5345 
5346 static struct iv_ca_delta *
iv_ca_delta_add(struct iv_use * use,struct cost_pair * old_cp,struct cost_pair * new_cp,struct iv_ca_delta * next_change)5347 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5348 		 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5349 {
5350   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5351 
5352   change->use = use;
5353   change->old_cp = old_cp;
5354   change->new_cp = new_cp;
5355   change->next_change = next_change;
5356 
5357   return change;
5358 }
5359 
5360 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5361    are rewritten.  */
5362 
5363 static struct iv_ca_delta *
iv_ca_delta_join(struct iv_ca_delta * l1,struct iv_ca_delta * l2)5364 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5365 {
5366   struct iv_ca_delta *last;
5367 
5368   if (!l2)
5369     return l1;
5370 
5371   if (!l1)
5372     return l2;
5373 
5374   for (last = l1; last->next_change; last = last->next_change)
5375     continue;
5376   last->next_change = l2;
5377 
5378   return l1;
5379 }
5380 
5381 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5382 
5383 static struct iv_ca_delta *
iv_ca_delta_reverse(struct iv_ca_delta * delta)5384 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5385 {
5386   struct iv_ca_delta *act, *next, *prev = NULL;
5387   struct cost_pair *tmp;
5388 
5389   for (act = delta; act; act = next)
5390     {
5391       next = act->next_change;
5392       act->next_change = prev;
5393       prev = act;
5394 
5395       tmp = act->old_cp;
5396       act->old_cp = act->new_cp;
5397       act->new_cp = tmp;
5398     }
5399 
5400   return prev;
5401 }
5402 
5403 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5404    reverted instead.  */
5405 
5406 static void
iv_ca_delta_commit(struct ivopts_data * data,struct iv_ca * ivs,struct iv_ca_delta * delta,bool forward)5407 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5408 		    struct iv_ca_delta *delta, bool forward)
5409 {
5410   struct cost_pair *from, *to;
5411   struct iv_ca_delta *act;
5412 
5413   if (!forward)
5414     delta = iv_ca_delta_reverse (delta);
5415 
5416   for (act = delta; act; act = act->next_change)
5417     {
5418       from = act->old_cp;
5419       to = act->new_cp;
5420       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5421       iv_ca_set_cp (data, ivs, act->use, to);
5422     }
5423 
5424   if (!forward)
5425     iv_ca_delta_reverse (delta);
5426 }
5427 
5428 /* Returns true if CAND is used in IVS.  */
5429 
5430 static bool
iv_ca_cand_used_p(struct iv_ca * ivs,struct iv_cand * cand)5431 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5432 {
5433   return ivs->n_cand_uses[cand->id] > 0;
5434 }
5435 
5436 /* Returns number of induction variable candidates in the set IVS.  */
5437 
5438 static unsigned
iv_ca_n_cands(struct iv_ca * ivs)5439 iv_ca_n_cands (struct iv_ca *ivs)
5440 {
5441   return ivs->n_cands;
5442 }
5443 
5444 /* Free the list of changes DELTA.  */
5445 
5446 static void
iv_ca_delta_free(struct iv_ca_delta ** delta)5447 iv_ca_delta_free (struct iv_ca_delta **delta)
5448 {
5449   struct iv_ca_delta *act, *next;
5450 
5451   for (act = *delta; act; act = next)
5452     {
5453       next = act->next_change;
5454       free (act);
5455     }
5456 
5457   *delta = NULL;
5458 }
5459 
5460 /* Allocates new iv candidates assignment.  */
5461 
5462 static struct iv_ca *
iv_ca_new(struct ivopts_data * data)5463 iv_ca_new (struct ivopts_data *data)
5464 {
5465   struct iv_ca *nw = XNEW (struct iv_ca);
5466 
5467   nw->upto = 0;
5468   nw->bad_uses = 0;
5469   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5470   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5471   nw->cands = BITMAP_ALLOC (NULL);
5472   nw->n_cands = 0;
5473   nw->n_regs = 0;
5474   nw->cand_use_cost = no_cost;
5475   nw->cand_cost = 0;
5476   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5477   nw->cost = no_cost;
5478   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5479   nw->num_used_inv_expr = 0;
5480 
5481   return nw;
5482 }
5483 
5484 /* Free memory occupied by the set IVS.  */
5485 
5486 static void
iv_ca_free(struct iv_ca ** ivs)5487 iv_ca_free (struct iv_ca **ivs)
5488 {
5489   free ((*ivs)->cand_for_use);
5490   free ((*ivs)->n_cand_uses);
5491   BITMAP_FREE ((*ivs)->cands);
5492   free ((*ivs)->n_invariant_uses);
5493   free ((*ivs)->used_inv_expr);
5494   free (*ivs);
5495   *ivs = NULL;
5496 }
5497 
5498 /* Dumps IVS to FILE.  */
5499 
5500 static void
iv_ca_dump(struct ivopts_data * data,FILE * file,struct iv_ca * ivs)5501 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5502 {
5503   const char *pref = "  invariants ";
5504   unsigned i;
5505   comp_cost cost = iv_ca_cost (ivs);
5506 
5507   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5508   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5509            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5510   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5511 
5512    for (i = 0; i < ivs->upto; i++)
5513     {
5514       struct iv_use *use = iv_use (data, i);
5515       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5516       if (cp)
5517         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5518                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5519       else
5520         fprintf (file, "   use:%d --> ??\n", use->id);
5521     }
5522 
5523   for (i = 1; i <= data->max_inv_id; i++)
5524     if (ivs->n_invariant_uses[i])
5525       {
5526 	fprintf (file, "%s%d", pref, i);
5527 	pref = ", ";
5528       }
5529   fprintf (file, "\n\n");
5530 }
5531 
5532 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5533    new set, and store differences in DELTA.  Number of induction variables
5534    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5535    the function will try to find a solution with mimimal iv candidates.  */
5536 
5537 static comp_cost
iv_ca_extend(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_ca_delta ** delta,unsigned * n_ivs,bool min_ncand)5538 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5539 	      struct iv_cand *cand, struct iv_ca_delta **delta,
5540 	      unsigned *n_ivs, bool min_ncand)
5541 {
5542   unsigned i;
5543   comp_cost cost;
5544   struct iv_use *use;
5545   struct cost_pair *old_cp, *new_cp;
5546 
5547   *delta = NULL;
5548   for (i = 0; i < ivs->upto; i++)
5549     {
5550       use = iv_use (data, i);
5551       old_cp = iv_ca_cand_for_use (ivs, use);
5552 
5553       if (old_cp
5554 	  && old_cp->cand == cand)
5555 	continue;
5556 
5557       new_cp = get_use_iv_cost (data, use, cand);
5558       if (!new_cp)
5559 	continue;
5560 
5561       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5562 	continue;
5563 
5564       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5565         continue;
5566 
5567       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5568     }
5569 
5570   iv_ca_delta_commit (data, ivs, *delta, true);
5571   cost = iv_ca_cost (ivs);
5572   if (n_ivs)
5573     *n_ivs = iv_ca_n_cands (ivs);
5574   iv_ca_delta_commit (data, ivs, *delta, false);
5575 
5576   return cost;
5577 }
5578 
5579 /* Try narrowing set IVS by removing CAND.  Return the cost of
5580    the new set and store the differences in DELTA.  */
5581 
5582 static comp_cost
iv_ca_narrow(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_ca_delta ** delta)5583 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5584 	      struct iv_cand *cand, struct iv_ca_delta **delta)
5585 {
5586   unsigned i, ci;
5587   struct iv_use *use;
5588   struct cost_pair *old_cp, *new_cp, *cp;
5589   bitmap_iterator bi;
5590   struct iv_cand *cnd;
5591   comp_cost cost;
5592 
5593   *delta = NULL;
5594   for (i = 0; i < n_iv_uses (data); i++)
5595     {
5596       use = iv_use (data, i);
5597 
5598       old_cp = iv_ca_cand_for_use (ivs, use);
5599       if (old_cp->cand != cand)
5600 	continue;
5601 
5602       new_cp = NULL;
5603 
5604       if (data->consider_all_candidates)
5605 	{
5606 	  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5607 	    {
5608 	      if (ci == cand->id)
5609 		continue;
5610 
5611 	      cnd = iv_cand (data, ci);
5612 
5613 	      cp = get_use_iv_cost (data, use, cnd);
5614 	      if (!cp)
5615 		continue;
5616 
5617 	      if (!iv_ca_has_deps (ivs, cp))
5618                 continue;
5619 
5620 	      if (!cheaper_cost_pair (cp, new_cp))
5621 		continue;
5622 
5623 	      new_cp = cp;
5624 	    }
5625 	}
5626       else
5627 	{
5628 	  EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5629 	    {
5630 	      if (ci == cand->id)
5631 		continue;
5632 
5633 	      cnd = iv_cand (data, ci);
5634 
5635 	      cp = get_use_iv_cost (data, use, cnd);
5636 	      if (!cp)
5637 		continue;
5638 	      if (!iv_ca_has_deps (ivs, cp))
5639 		continue;
5640 
5641 	      if (!cheaper_cost_pair (cp, new_cp))
5642 		continue;
5643 
5644 	      new_cp = cp;
5645 	    }
5646 	}
5647 
5648       if (!new_cp)
5649 	{
5650 	  iv_ca_delta_free (delta);
5651 	  return infinite_cost;
5652 	}
5653 
5654       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5655     }
5656 
5657   iv_ca_delta_commit (data, ivs, *delta, true);
5658   cost = iv_ca_cost (ivs);
5659   iv_ca_delta_commit (data, ivs, *delta, false);
5660 
5661   return cost;
5662 }
5663 
5664 /* Try optimizing the set of candidates IVS by removing candidates different
5665    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5666    differences in DELTA.  */
5667 
5668 static comp_cost
iv_ca_prune(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * except_cand,struct iv_ca_delta ** delta)5669 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5670 	     struct iv_cand *except_cand, struct iv_ca_delta **delta)
5671 {
5672   bitmap_iterator bi;
5673   struct iv_ca_delta *act_delta, *best_delta;
5674   unsigned i;
5675   comp_cost best_cost, acost;
5676   struct iv_cand *cand;
5677 
5678   best_delta = NULL;
5679   best_cost = iv_ca_cost (ivs);
5680 
5681   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5682     {
5683       cand = iv_cand (data, i);
5684 
5685       if (cand == except_cand)
5686 	continue;
5687 
5688       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5689 
5690       if (compare_costs (acost, best_cost) < 0)
5691 	{
5692 	  best_cost = acost;
5693 	  iv_ca_delta_free (&best_delta);
5694 	  best_delta = act_delta;
5695 	}
5696       else
5697 	iv_ca_delta_free (&act_delta);
5698     }
5699 
5700   if (!best_delta)
5701     {
5702       *delta = NULL;
5703       return best_cost;
5704     }
5705 
5706   /* Recurse to possibly remove other unnecessary ivs.  */
5707   iv_ca_delta_commit (data, ivs, best_delta, true);
5708   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5709   iv_ca_delta_commit (data, ivs, best_delta, false);
5710   *delta = iv_ca_delta_join (best_delta, *delta);
5711   return best_cost;
5712 }
5713 
5714 /* Tries to extend the sets IVS in the best possible way in order
5715    to express the USE.  If ORIGINALP is true, prefer candidates from
5716    the original set of IVs, otherwise favor important candidates not
5717    based on any memory object.  */
5718 
5719 static bool
try_add_cand_for(struct ivopts_data * data,struct iv_ca * ivs,struct iv_use * use,bool originalp)5720 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5721 		  struct iv_use *use, bool originalp)
5722 {
5723   comp_cost best_cost, act_cost;
5724   unsigned i;
5725   bitmap_iterator bi;
5726   struct iv_cand *cand;
5727   struct iv_ca_delta *best_delta = NULL, *act_delta;
5728   struct cost_pair *cp;
5729 
5730   iv_ca_add_use (data, ivs, use, false);
5731   best_cost = iv_ca_cost (ivs);
5732 
5733   cp = iv_ca_cand_for_use (ivs, use);
5734   if (!cp)
5735     {
5736       ivs->upto--;
5737       ivs->bad_uses--;
5738       iv_ca_add_use (data, ivs, use, true);
5739       best_cost = iv_ca_cost (ivs);
5740       cp = iv_ca_cand_for_use (ivs, use);
5741     }
5742   if (cp)
5743     {
5744       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5745       iv_ca_set_no_cp (data, ivs, use);
5746     }
5747 
5748   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5749      first try important candidates not based on any memory object.  Only if
5750      this fails, try the specific ones.  Rationale -- in loops with many
5751      variables the best choice often is to use just one generic biv.  If we
5752      added here many ivs specific to the uses, the optimization algorithm later
5753      would be likely to get stuck in a local minimum, thus causing us to create
5754      too many ivs.  The approach from few ivs to more seems more likely to be
5755      successful -- starting from few ivs, replacing an expensive use by a
5756      specific iv should always be a win.  */
5757   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5758     {
5759       cand = iv_cand (data, i);
5760 
5761       if (originalp && cand->pos !=IP_ORIGINAL)
5762 	continue;
5763 
5764       if (!originalp && cand->iv->base_object != NULL_TREE)
5765 	continue;
5766 
5767       if (iv_ca_cand_used_p (ivs, cand))
5768         continue;
5769 
5770       cp = get_use_iv_cost (data, use, cand);
5771       if (!cp)
5772 	continue;
5773 
5774       iv_ca_set_cp (data, ivs, use, cp);
5775       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5776                                true);
5777       iv_ca_set_no_cp (data, ivs, use);
5778       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5779 
5780       if (compare_costs (act_cost, best_cost) < 0)
5781 	{
5782 	  best_cost = act_cost;
5783 
5784 	  iv_ca_delta_free (&best_delta);
5785 	  best_delta = act_delta;
5786 	}
5787       else
5788 	iv_ca_delta_free (&act_delta);
5789     }
5790 
5791   if (infinite_cost_p (best_cost))
5792     {
5793       for (i = 0; i < use->n_map_members; i++)
5794 	{
5795 	  cp = use->cost_map + i;
5796 	  cand = cp->cand;
5797 	  if (!cand)
5798 	    continue;
5799 
5800 	  /* Already tried this.  */
5801 	  if (cand->important)
5802 	    {
5803 	      if (originalp && cand->pos == IP_ORIGINAL)
5804 		continue;
5805 	      if (!originalp && cand->iv->base_object == NULL_TREE)
5806 		continue;
5807 	    }
5808 
5809 	  if (iv_ca_cand_used_p (ivs, cand))
5810 	    continue;
5811 
5812 	  act_delta = NULL;
5813 	  iv_ca_set_cp (data, ivs, use, cp);
5814 	  act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5815 	  iv_ca_set_no_cp (data, ivs, use);
5816 	  act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5817 				       cp, act_delta);
5818 
5819 	  if (compare_costs (act_cost, best_cost) < 0)
5820 	    {
5821 	      best_cost = act_cost;
5822 
5823 	      if (best_delta)
5824 		iv_ca_delta_free (&best_delta);
5825 	      best_delta = act_delta;
5826 	    }
5827 	  else
5828 	    iv_ca_delta_free (&act_delta);
5829 	}
5830     }
5831 
5832   iv_ca_delta_commit (data, ivs, best_delta, true);
5833   iv_ca_delta_free (&best_delta);
5834 
5835   return !infinite_cost_p (best_cost);
5836 }
5837 
5838 /* Finds an initial assignment of candidates to uses.  */
5839 
5840 static struct iv_ca *
get_initial_solution(struct ivopts_data * data,bool originalp)5841 get_initial_solution (struct ivopts_data *data, bool originalp)
5842 {
5843   struct iv_ca *ivs = iv_ca_new (data);
5844   unsigned i;
5845 
5846   for (i = 0; i < n_iv_uses (data); i++)
5847     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5848       {
5849 	iv_ca_free (&ivs);
5850 	return NULL;
5851       }
5852 
5853   return ivs;
5854 }
5855 
5856 /* Tries to improve set of induction variables IVS.  */
5857 
5858 static bool
try_improve_iv_set(struct ivopts_data * data,struct iv_ca * ivs)5859 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5860 {
5861   unsigned i, n_ivs;
5862   comp_cost acost, best_cost = iv_ca_cost (ivs);
5863   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5864   struct iv_cand *cand;
5865 
5866   /* Try extending the set of induction variables by one.  */
5867   for (i = 0; i < n_iv_cands (data); i++)
5868     {
5869       cand = iv_cand (data, i);
5870 
5871       if (iv_ca_cand_used_p (ivs, cand))
5872 	continue;
5873 
5874       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5875       if (!act_delta)
5876 	continue;
5877 
5878       /* If we successfully added the candidate and the set is small enough,
5879 	 try optimizing it by removing other candidates.  */
5880       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5881       	{
5882 	  iv_ca_delta_commit (data, ivs, act_delta, true);
5883 	  acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5884 	  iv_ca_delta_commit (data, ivs, act_delta, false);
5885 	  act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5886 	}
5887 
5888       if (compare_costs (acost, best_cost) < 0)
5889 	{
5890 	  best_cost = acost;
5891 	  iv_ca_delta_free (&best_delta);
5892 	  best_delta = act_delta;
5893 	}
5894       else
5895 	iv_ca_delta_free (&act_delta);
5896     }
5897 
5898   if (!best_delta)
5899     {
5900       /* Try removing the candidates from the set instead.  */
5901       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5902 
5903       /* Nothing more we can do.  */
5904       if (!best_delta)
5905 	return false;
5906     }
5907 
5908   iv_ca_delta_commit (data, ivs, best_delta, true);
5909   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5910   iv_ca_delta_free (&best_delta);
5911   return true;
5912 }
5913 
5914 /* Attempts to find the optimal set of induction variables.  We do simple
5915    greedy heuristic -- we try to replace at most one candidate in the selected
5916    solution and remove the unused ivs while this improves the cost.  */
5917 
5918 static struct iv_ca *
find_optimal_iv_set_1(struct ivopts_data * data,bool originalp)5919 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5920 {
5921   struct iv_ca *set;
5922 
5923   /* Get the initial solution.  */
5924   set = get_initial_solution (data, originalp);
5925   if (!set)
5926     {
5927       if (dump_file && (dump_flags & TDF_DETAILS))
5928 	fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5929       return NULL;
5930     }
5931 
5932   if (dump_file && (dump_flags & TDF_DETAILS))
5933     {
5934       fprintf (dump_file, "Initial set of candidates:\n");
5935       iv_ca_dump (data, dump_file, set);
5936     }
5937 
5938   while (try_improve_iv_set (data, set))
5939     {
5940       if (dump_file && (dump_flags & TDF_DETAILS))
5941 	{
5942 	  fprintf (dump_file, "Improved to:\n");
5943 	  iv_ca_dump (data, dump_file, set);
5944 	}
5945     }
5946 
5947   return set;
5948 }
5949 
5950 static struct iv_ca *
find_optimal_iv_set(struct ivopts_data * data)5951 find_optimal_iv_set (struct ivopts_data *data)
5952 {
5953   unsigned i;
5954   struct iv_ca *set, *origset;
5955   struct iv_use *use;
5956   comp_cost cost, origcost;
5957 
5958   /* Determine the cost based on a strategy that starts with original IVs,
5959      and try again using a strategy that prefers candidates not based
5960      on any IVs.  */
5961   origset = find_optimal_iv_set_1 (data, true);
5962   set = find_optimal_iv_set_1 (data, false);
5963 
5964   if (!origset && !set)
5965     return NULL;
5966 
5967   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5968   cost = set ? iv_ca_cost (set) : infinite_cost;
5969 
5970   if (dump_file && (dump_flags & TDF_DETAILS))
5971     {
5972       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5973 	       origcost.cost, origcost.complexity);
5974       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5975 	       cost.cost, cost.complexity);
5976     }
5977 
5978   /* Choose the one with the best cost.  */
5979   if (compare_costs (origcost, cost) <= 0)
5980     {
5981       if (set)
5982 	iv_ca_free (&set);
5983       set = origset;
5984     }
5985   else if (origset)
5986     iv_ca_free (&origset);
5987 
5988   for (i = 0; i < n_iv_uses (data); i++)
5989     {
5990       use = iv_use (data, i);
5991       use->selected = iv_ca_cand_for_use (set, use)->cand;
5992     }
5993 
5994   return set;
5995 }
5996 
5997 /* Creates a new induction variable corresponding to CAND.  */
5998 
5999 static void
create_new_iv(struct ivopts_data * data,struct iv_cand * cand)6000 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6001 {
6002   gimple_stmt_iterator incr_pos;
6003   tree base;
6004   bool after = false;
6005 
6006   if (!cand->iv)
6007     return;
6008 
6009   switch (cand->pos)
6010     {
6011     case IP_NORMAL:
6012       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6013       break;
6014 
6015     case IP_END:
6016       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6017       after = true;
6018       break;
6019 
6020     case IP_AFTER_USE:
6021       after = true;
6022       /* fall through */
6023     case IP_BEFORE_USE:
6024       incr_pos = gsi_for_stmt (cand->incremented_at);
6025       break;
6026 
6027     case IP_ORIGINAL:
6028       /* Mark that the iv is preserved.  */
6029       name_info (data, cand->var_before)->preserve_biv = true;
6030       name_info (data, cand->var_after)->preserve_biv = true;
6031 
6032       /* Rewrite the increment so that it uses var_before directly.  */
6033       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6034       return;
6035     }
6036 
6037   gimple_add_tmp_var (cand->var_before);
6038 
6039   base = unshare_expr (cand->iv->base);
6040 
6041   create_iv (base, unshare_expr (cand->iv->step),
6042 	     cand->var_before, data->current_loop,
6043 	     &incr_pos, after, &cand->var_before, &cand->var_after);
6044 }
6045 
6046 /* Creates new induction variables described in SET.  */
6047 
6048 static void
create_new_ivs(struct ivopts_data * data,struct iv_ca * set)6049 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6050 {
6051   unsigned i;
6052   struct iv_cand *cand;
6053   bitmap_iterator bi;
6054 
6055   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6056     {
6057       cand = iv_cand (data, i);
6058       create_new_iv (data, cand);
6059     }
6060 
6061   if (dump_file && (dump_flags & TDF_DETAILS))
6062     {
6063       fprintf (dump_file, "\nSelected IV set: \n");
6064       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6065         {
6066           cand = iv_cand (data, i);
6067           dump_cand (dump_file, cand);
6068         }
6069       fprintf (dump_file, "\n");
6070     }
6071 }
6072 
6073 /* Rewrites USE (definition of iv used in a nonlinear expression)
6074    using candidate CAND.  */
6075 
6076 static void
rewrite_use_nonlinear_expr(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)6077 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6078 			    struct iv_use *use, struct iv_cand *cand)
6079 {
6080   tree comp;
6081   tree op, tgt;
6082   gimple ass;
6083   gimple_stmt_iterator bsi;
6084 
6085   /* An important special case -- if we are asked to express value of
6086      the original iv by itself, just exit; there is no need to
6087      introduce a new computation (that might also need casting the
6088      variable to unsigned and back).  */
6089   if (cand->pos == IP_ORIGINAL
6090       && cand->incremented_at == use->stmt)
6091     {
6092       enum tree_code stmt_code;
6093 
6094       gcc_assert (is_gimple_assign (use->stmt));
6095       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6096 
6097       /* Check whether we may leave the computation unchanged.
6098 	 This is the case only if it does not rely on other
6099 	 computations in the loop -- otherwise, the computation
6100 	 we rely upon may be removed in remove_unused_ivs,
6101 	 thus leading to ICE.  */
6102       stmt_code = gimple_assign_rhs_code (use->stmt);
6103       if (stmt_code == PLUS_EXPR
6104 	  || stmt_code == MINUS_EXPR
6105 	  || stmt_code == POINTER_PLUS_EXPR)
6106 	{
6107 	  if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6108 	    op = gimple_assign_rhs2 (use->stmt);
6109 	  else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6110 	    op = gimple_assign_rhs1 (use->stmt);
6111 	  else
6112 	    op = NULL_TREE;
6113 	}
6114       else
6115 	op = NULL_TREE;
6116 
6117       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6118 	return;
6119     }
6120 
6121   comp = get_computation (data->current_loop, use, cand);
6122   gcc_assert (comp != NULL_TREE);
6123 
6124   switch (gimple_code (use->stmt))
6125     {
6126     case GIMPLE_PHI:
6127       tgt = PHI_RESULT (use->stmt);
6128 
6129       /* If we should keep the biv, do not replace it.  */
6130       if (name_info (data, tgt)->preserve_biv)
6131 	return;
6132 
6133       bsi = gsi_after_labels (gimple_bb (use->stmt));
6134       break;
6135 
6136     case GIMPLE_ASSIGN:
6137       tgt = gimple_assign_lhs (use->stmt);
6138       bsi = gsi_for_stmt (use->stmt);
6139       break;
6140 
6141     default:
6142       gcc_unreachable ();
6143     }
6144 
6145   if (!valid_gimple_rhs_p (comp)
6146       || (gimple_code (use->stmt) != GIMPLE_PHI
6147 	  /* We can't allow re-allocating the stmt as it might be pointed
6148 	     to still.  */
6149 	  && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6150 	      >= gimple_num_ops (gsi_stmt (bsi)))))
6151     {
6152       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6153 				       true, GSI_SAME_STMT);
6154       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6155 	{
6156 	  duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6157 	  /* As this isn't a plain copy we have to reset alignment
6158 	     information.  */
6159 	  if (SSA_NAME_PTR_INFO (comp))
6160 	    mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6161 	}
6162     }
6163 
6164   if (gimple_code (use->stmt) == GIMPLE_PHI)
6165     {
6166       ass = gimple_build_assign (tgt, comp);
6167       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6168 
6169       bsi = gsi_for_stmt (use->stmt);
6170       remove_phi_node (&bsi, false);
6171     }
6172   else
6173     {
6174       gimple_assign_set_rhs_from_tree (&bsi, comp);
6175       use->stmt = gsi_stmt (bsi);
6176     }
6177 }
6178 
6179 /* Performs a peephole optimization to reorder the iv update statement with
6180    a mem ref to enable instruction combining in later phases. The mem ref uses
6181    the iv value before the update, so the reordering transformation requires
6182    adjustment of the offset. CAND is the selected IV_CAND.
6183 
6184    Example:
6185 
6186    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6187    iv2 = iv1 + 1;
6188 
6189    if (t < val)      (1)
6190      goto L;
6191    goto Head;
6192 
6193 
6194    directly propagating t over to (1) will introduce overlapping live range
6195    thus increase register pressure. This peephole transform it into:
6196 
6197 
6198    iv2 = iv1 + 1;
6199    t = MEM_REF (base, iv2, 8, 8);
6200    if (t < val)
6201      goto L;
6202    goto Head;
6203 */
6204 
6205 static void
adjust_iv_update_pos(struct iv_cand * cand,struct iv_use * use)6206 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6207 {
6208   tree var_after;
6209   gimple iv_update, stmt;
6210   basic_block bb;
6211   gimple_stmt_iterator gsi, gsi_iv;
6212 
6213   if (cand->pos != IP_NORMAL)
6214     return;
6215 
6216   var_after = cand->var_after;
6217   iv_update = SSA_NAME_DEF_STMT (var_after);
6218 
6219   bb = gimple_bb (iv_update);
6220   gsi = gsi_last_nondebug_bb (bb);
6221   stmt = gsi_stmt (gsi);
6222 
6223   /* Only handle conditional statement for now.  */
6224   if (gimple_code (stmt) != GIMPLE_COND)
6225     return;
6226 
6227   gsi_prev_nondebug (&gsi);
6228   stmt = gsi_stmt (gsi);
6229   if (stmt != iv_update)
6230     return;
6231 
6232   gsi_prev_nondebug (&gsi);
6233   if (gsi_end_p (gsi))
6234     return;
6235 
6236   stmt = gsi_stmt (gsi);
6237   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6238     return;
6239 
6240   if (stmt != use->stmt)
6241     return;
6242 
6243   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6244     return;
6245 
6246   if (dump_file && (dump_flags & TDF_DETAILS))
6247     {
6248       fprintf (dump_file, "Reordering \n");
6249       print_gimple_stmt (dump_file, iv_update, 0, 0);
6250       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6251       fprintf (dump_file, "\n");
6252     }
6253 
6254   gsi = gsi_for_stmt (use->stmt);
6255   gsi_iv = gsi_for_stmt (iv_update);
6256   gsi_move_before (&gsi_iv, &gsi);
6257 
6258   cand->pos = IP_BEFORE_USE;
6259   cand->incremented_at = use->stmt;
6260 }
6261 
6262 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6263 
6264 static void
rewrite_use_address(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)6265 rewrite_use_address (struct ivopts_data *data,
6266 		     struct iv_use *use, struct iv_cand *cand)
6267 {
6268   aff_tree aff;
6269   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6270   tree base_hint = NULL_TREE;
6271   tree ref, iv;
6272   bool ok;
6273 
6274   adjust_iv_update_pos (cand, use);
6275   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6276   gcc_assert (ok);
6277   unshare_aff_combination (&aff);
6278 
6279   /* To avoid undefined overflow problems, all IV candidates use unsigned
6280      integer types.  The drawback is that this makes it impossible for
6281      create_mem_ref to distinguish an IV that is based on a memory object
6282      from one that represents simply an offset.
6283 
6284      To work around this problem, we pass a hint to create_mem_ref that
6285      indicates which variable (if any) in aff is an IV based on a memory
6286      object.  Note that we only consider the candidate.  If this is not
6287      based on an object, the base of the reference is in some subexpression
6288      of the use -- but these will use pointer types, so they are recognized
6289      by the create_mem_ref heuristics anyway.  */
6290   if (cand->iv->base_object)
6291     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6292 
6293   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6294   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6295 			reference_alias_ptr_type (*use->op_p),
6296 			iv, base_hint, data->speed);
6297   copy_ref_info (ref, *use->op_p);
6298   *use->op_p = ref;
6299 }
6300 
6301 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6302    candidate CAND.  */
6303 
6304 static void
rewrite_use_compare(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)6305 rewrite_use_compare (struct ivopts_data *data,
6306 		     struct iv_use *use, struct iv_cand *cand)
6307 {
6308   tree comp, *var_p, op, bound;
6309   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6310   enum tree_code compare;
6311   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6312   bool ok;
6313 
6314   bound = cp->value;
6315   if (bound)
6316     {
6317       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6318       tree var_type = TREE_TYPE (var);
6319       gimple_seq stmts;
6320 
6321       if (dump_file && (dump_flags & TDF_DETAILS))
6322         {
6323           fprintf (dump_file, "Replacing exit test: ");
6324           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6325         }
6326       compare = cp->comp;
6327       bound = unshare_expr (fold_convert (var_type, bound));
6328       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6329       if (stmts)
6330 	gsi_insert_seq_on_edge_immediate (
6331 		loop_preheader_edge (data->current_loop),
6332 		stmts);
6333 
6334       gimple_cond_set_lhs (use->stmt, var);
6335       gimple_cond_set_code (use->stmt, compare);
6336       gimple_cond_set_rhs (use->stmt, op);
6337       return;
6338     }
6339 
6340   /* The induction variable elimination failed; just express the original
6341      giv.  */
6342   comp = get_computation (data->current_loop, use, cand);
6343   gcc_assert (comp != NULL_TREE);
6344 
6345   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6346   gcc_assert (ok);
6347 
6348   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6349 				     true, GSI_SAME_STMT);
6350 }
6351 
6352 /* Rewrites USE using candidate CAND.  */
6353 
6354 static void
rewrite_use(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)6355 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6356 {
6357   switch (use->type)
6358     {
6359       case USE_NONLINEAR_EXPR:
6360 	rewrite_use_nonlinear_expr (data, use, cand);
6361 	break;
6362 
6363       case USE_ADDRESS:
6364 	rewrite_use_address (data, use, cand);
6365 	break;
6366 
6367       case USE_COMPARE:
6368 	rewrite_use_compare (data, use, cand);
6369 	break;
6370 
6371       default:
6372 	gcc_unreachable ();
6373     }
6374 
6375   update_stmt (use->stmt);
6376 }
6377 
6378 /* Rewrite the uses using the selected induction variables.  */
6379 
6380 static void
rewrite_uses(struct ivopts_data * data)6381 rewrite_uses (struct ivopts_data *data)
6382 {
6383   unsigned i;
6384   struct iv_cand *cand;
6385   struct iv_use *use;
6386 
6387   for (i = 0; i < n_iv_uses (data); i++)
6388     {
6389       use = iv_use (data, i);
6390       cand = use->selected;
6391       gcc_assert (cand);
6392 
6393       rewrite_use (data, use, cand);
6394     }
6395 }
6396 
6397 /* Removes the ivs that are not used after rewriting.  */
6398 
6399 static void
remove_unused_ivs(struct ivopts_data * data)6400 remove_unused_ivs (struct ivopts_data *data)
6401 {
6402   unsigned j;
6403   bitmap_iterator bi;
6404   bitmap toremove = BITMAP_ALLOC (NULL);
6405 
6406   /* Figure out an order in which to release SSA DEFs so that we don't
6407      release something that we'd have to propagate into a debug stmt
6408      afterwards.  */
6409   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6410     {
6411       struct version_info *info;
6412 
6413       info = ver_info (data, j);
6414       if (info->iv
6415 	  && !integer_zerop (info->iv->step)
6416 	  && !info->inv_id
6417 	  && !info->iv->have_use_for
6418 	  && !info->preserve_biv)
6419 	{
6420 	  bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6421 
6422 	  tree def = info->iv->ssa_name;
6423 
6424 	  if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6425 	    {
6426 	      imm_use_iterator imm_iter;
6427 	      use_operand_p use_p;
6428 	      gimple stmt;
6429 	      int count = 0;
6430 
6431 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6432 		{
6433 		  if (!gimple_debug_bind_p (stmt))
6434 		    continue;
6435 
6436 		  /* We just want to determine whether to do nothing
6437 		     (count == 0), to substitute the computed
6438 		     expression into a single use of the SSA DEF by
6439 		     itself (count == 1), or to use a debug temp
6440 		     because the SSA DEF is used multiple times or as
6441 		     part of a larger expression (count > 1). */
6442 		  count++;
6443 		  if (gimple_debug_bind_get_value (stmt) != def)
6444 		    count++;
6445 
6446 		  if (count > 1)
6447 		    BREAK_FROM_IMM_USE_STMT (imm_iter);
6448 		}
6449 
6450 	      if (!count)
6451 		continue;
6452 
6453 	      struct iv_use dummy_use;
6454 	      struct iv_cand *best_cand = NULL, *cand;
6455 	      unsigned i, best_pref = 0, cand_pref;
6456 
6457 	      memset (&dummy_use, 0, sizeof (dummy_use));
6458 	      dummy_use.iv = info->iv;
6459 	      for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6460 		{
6461 		  cand = iv_use (data, i)->selected;
6462 		  if (cand == best_cand)
6463 		    continue;
6464 		  cand_pref = operand_equal_p (cand->iv->step,
6465 					       info->iv->step, 0)
6466 		    ? 4 : 0;
6467 		  cand_pref
6468 		    += TYPE_MODE (TREE_TYPE (cand->iv->base))
6469 		    == TYPE_MODE (TREE_TYPE (info->iv->base))
6470 		    ? 2 : 0;
6471 		  cand_pref
6472 		    += TREE_CODE (cand->iv->base) == INTEGER_CST
6473 		    ? 1 : 0;
6474 		  if (best_cand == NULL || best_pref < cand_pref)
6475 		    {
6476 		      best_cand = cand;
6477 		      best_pref = cand_pref;
6478 		    }
6479 		}
6480 
6481 	      if (!best_cand)
6482 		continue;
6483 
6484 	      tree comp = get_computation_at (data->current_loop,
6485 					      &dummy_use, best_cand,
6486 					      SSA_NAME_DEF_STMT (def));
6487 	      if (!comp)
6488 		continue;
6489 
6490 	      if (count > 1)
6491 		{
6492 		  tree vexpr = make_node (DEBUG_EXPR_DECL);
6493 		  DECL_ARTIFICIAL (vexpr) = 1;
6494 		  TREE_TYPE (vexpr) = TREE_TYPE (comp);
6495 		  if (SSA_NAME_VAR (def))
6496 		    DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6497 		  else
6498 		    DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6499 		  gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6500 		  gimple_stmt_iterator gsi;
6501 
6502 		  if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6503 		    gsi = gsi_after_labels (gimple_bb
6504 					    (SSA_NAME_DEF_STMT (def)));
6505 		  else
6506 		    gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6507 
6508 		  gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6509 		  comp = vexpr;
6510 		}
6511 
6512 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6513 		{
6514 		  if (!gimple_debug_bind_p (stmt))
6515 		    continue;
6516 
6517 		  FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6518 		    SET_USE (use_p, comp);
6519 
6520 		  update_stmt (stmt);
6521 		}
6522 	    }
6523 	}
6524     }
6525 
6526   release_defs_bitset (toremove);
6527 
6528   BITMAP_FREE (toremove);
6529 }
6530 
6531 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6532    for pointer_map_traverse.  */
6533 
6534 static bool
free_tree_niter_desc(const void * key ATTRIBUTE_UNUSED,void ** value,void * data ATTRIBUTE_UNUSED)6535 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6536                       void *data ATTRIBUTE_UNUSED)
6537 {
6538   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6539 
6540   free (niter);
6541   return true;
6542 }
6543 
6544 /* Frees data allocated by the optimization of a single loop.  */
6545 
6546 static void
free_loop_data(struct ivopts_data * data)6547 free_loop_data (struct ivopts_data *data)
6548 {
6549   unsigned i, j;
6550   bitmap_iterator bi;
6551   tree obj;
6552 
6553   if (data->niters)
6554     {
6555       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6556       pointer_map_destroy (data->niters);
6557       data->niters = NULL;
6558     }
6559 
6560   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6561     {
6562       struct version_info *info;
6563 
6564       info = ver_info (data, i);
6565       free (info->iv);
6566       info->iv = NULL;
6567       info->has_nonlin_use = false;
6568       info->preserve_biv = false;
6569       info->inv_id = 0;
6570     }
6571   bitmap_clear (data->relevant);
6572   bitmap_clear (data->important_candidates);
6573 
6574   for (i = 0; i < n_iv_uses (data); i++)
6575     {
6576       struct iv_use *use = iv_use (data, i);
6577 
6578       free (use->iv);
6579       BITMAP_FREE (use->related_cands);
6580       for (j = 0; j < use->n_map_members; j++)
6581 	if (use->cost_map[j].depends_on)
6582 	  BITMAP_FREE (use->cost_map[j].depends_on);
6583       free (use->cost_map);
6584       free (use);
6585     }
6586   data->iv_uses.truncate (0);
6587 
6588   for (i = 0; i < n_iv_cands (data); i++)
6589     {
6590       struct iv_cand *cand = iv_cand (data, i);
6591 
6592       free (cand->iv);
6593       if (cand->depends_on)
6594 	BITMAP_FREE (cand->depends_on);
6595       free (cand);
6596     }
6597   data->iv_candidates.truncate (0);
6598 
6599   if (data->version_info_size < num_ssa_names)
6600     {
6601       data->version_info_size = 2 * num_ssa_names;
6602       free (data->version_info);
6603       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6604     }
6605 
6606   data->max_inv_id = 0;
6607 
6608   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6609     SET_DECL_RTL (obj, NULL_RTX);
6610 
6611   decl_rtl_to_reset.truncate (0);
6612 
6613   htab_empty (data->inv_expr_tab);
6614   data->inv_expr_id = 0;
6615 }
6616 
6617 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6618    loop tree.  */
6619 
6620 static void
tree_ssa_iv_optimize_finalize(struct ivopts_data * data)6621 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6622 {
6623   free_loop_data (data);
6624   free (data->version_info);
6625   BITMAP_FREE (data->relevant);
6626   BITMAP_FREE (data->important_candidates);
6627 
6628   decl_rtl_to_reset.release ();
6629   data->iv_uses.release ();
6630   data->iv_candidates.release ();
6631   htab_delete (data->inv_expr_tab);
6632 }
6633 
6634 /* Returns true if the loop body BODY includes any function calls.  */
6635 
6636 static bool
loop_body_includes_call(basic_block * body,unsigned num_nodes)6637 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6638 {
6639   gimple_stmt_iterator gsi;
6640   unsigned i;
6641 
6642   for (i = 0; i < num_nodes; i++)
6643     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6644       {
6645 	gimple stmt = gsi_stmt (gsi);
6646 	if (is_gimple_call (stmt)
6647 	    && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6648 	  return true;
6649       }
6650   return false;
6651 }
6652 
6653 /* Optimizes the LOOP.  Returns true if anything changed.  */
6654 
6655 static bool
tree_ssa_iv_optimize_loop(struct ivopts_data * data,struct loop * loop)6656 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6657 {
6658   bool changed = false;
6659   struct iv_ca *iv_ca;
6660   edge exit = single_dom_exit (loop);
6661   basic_block *body;
6662 
6663   gcc_assert (!data->niters);
6664   data->current_loop = loop;
6665   data->speed = optimize_loop_for_speed_p (loop);
6666 
6667   if (dump_file && (dump_flags & TDF_DETAILS))
6668     {
6669       fprintf (dump_file, "Processing loop %d\n", loop->num);
6670 
6671       if (exit)
6672 	{
6673 	  fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6674 		   exit->src->index, exit->dest->index);
6675 	  print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6676 	  fprintf (dump_file, "\n");
6677 	}
6678 
6679       fprintf (dump_file, "\n");
6680     }
6681 
6682   body = get_loop_body (loop);
6683   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6684   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6685   free (body);
6686 
6687   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6688 
6689   /* For each ssa name determines whether it behaves as an induction variable
6690      in some loop.  */
6691   if (!find_induction_variables (data))
6692     goto finish;
6693 
6694   /* Finds interesting uses (item 1).  */
6695   find_interesting_uses (data);
6696   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6697     goto finish;
6698 
6699   /* Finds candidates for the induction variables (item 2).  */
6700   find_iv_candidates (data);
6701 
6702   /* Calculates the costs (item 3, part 1).  */
6703   determine_iv_costs (data);
6704   determine_use_iv_costs (data);
6705   determine_set_costs (data);
6706 
6707   /* Find the optimal set of induction variables (item 3, part 2).  */
6708   iv_ca = find_optimal_iv_set (data);
6709   if (!iv_ca)
6710     goto finish;
6711   changed = true;
6712 
6713   /* Create the new induction variables (item 4, part 1).  */
6714   create_new_ivs (data, iv_ca);
6715   iv_ca_free (&iv_ca);
6716 
6717   /* Rewrite the uses (item 4, part 2).  */
6718   rewrite_uses (data);
6719 
6720   /* Remove the ivs that are unused after rewriting.  */
6721   remove_unused_ivs (data);
6722 
6723   /* We have changed the structure of induction variables; it might happen
6724      that definitions in the scev database refer to some of them that were
6725      eliminated.  */
6726   scev_reset ();
6727 
6728 finish:
6729   free_loop_data (data);
6730 
6731   return changed;
6732 }
6733 
6734 /* Main entry point.  Optimizes induction variables in loops.  */
6735 
6736 void
tree_ssa_iv_optimize(void)6737 tree_ssa_iv_optimize (void)
6738 {
6739   struct loop *loop;
6740   struct ivopts_data data;
6741   loop_iterator li;
6742 
6743   tree_ssa_iv_optimize_init (&data);
6744 
6745   /* Optimize the loops starting with the innermost ones.  */
6746   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6747     {
6748       if (dump_file && (dump_flags & TDF_DETAILS))
6749 	flow_loop_dump (loop, dump_file, NULL, 1);
6750 
6751       tree_ssa_iv_optimize_loop (&data, loop);
6752     }
6753 
6754   tree_ssa_iv_optimize_finalize (&data);
6755 }
6756