1 /* Induction variable optimizations.
2    Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This pass tries to find the optimal set of induction variables for the loop.
21    It optimizes just the basic linear induction variables (although adding
22    support for other types should not be too hard).  It includes the
23    optimizations commonly known as strength reduction, induction variable
24    coalescing and induction variable elimination.  It does it in the
25    following steps:
26 
27    1) The interesting uses of induction variables are found.  This includes
28 
29       -- uses of induction variables in non-linear expressions
30       -- addresses of arrays
31       -- comparisons of induction variables
32 
33    2) Candidates for the induction variables are found.  This includes
34 
35       -- old induction variables
36       -- the variables defined by expressions derived from the "interesting
37 	 uses" above
38 
39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
40       cost function assigns a cost to sets of induction variables and consists
41       of three parts:
42 
43       -- The use costs.  Each of the interesting uses chooses the best induction
44 	 variable in the set and adds its cost to the sum.  The cost reflects
45 	 the time spent on modifying the induction variables value to be usable
46 	 for the given purpose (adding base and offset for arrays, etc.).
47       -- The variable costs.  Each of the variables has a cost assigned that
48 	 reflects the costs associated with incrementing the value of the
49 	 variable.  The original variables are somewhat preferred.
50       -- The set cost.  Depending on the size of the set, extra cost may be
51 	 added to reflect register pressure.
52 
53       All the costs are defined in a machine-specific way, using the target
54       hooks and machine descriptions to determine them.
55 
56    4) The trees are transformed to use the new variables, the dead code is
57       removed.
58 
59    All of this is done loop by loop.  Doing it globally is theoretically
60    possible, it might give a better performance and it might enable us
61    to decide costs more precisely, but getting all the interactions right
62    would be complicated.  */
63 
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "backend.h"
68 #include "rtl.h"
69 #include "tree.h"
70 #include "gimple.h"
71 #include "cfghooks.h"
72 #include "tree-pass.h"
73 #include "tm_p.h"
74 #include "ssa.h"
75 #include "expmed.h"
76 #include "insn-config.h"
77 #include "emit-rtl.h"
78 #include "recog.h"
79 #include "cgraph.h"
80 #include "gimple-pretty-print.h"
81 #include "alias.h"
82 #include "fold-const.h"
83 #include "stor-layout.h"
84 #include "tree-eh.h"
85 #include "gimplify.h"
86 #include "gimple-iterator.h"
87 #include "gimplify-me.h"
88 #include "tree-cfg.h"
89 #include "tree-ssa-loop-ivopts.h"
90 #include "tree-ssa-loop-manip.h"
91 #include "tree-ssa-loop-niter.h"
92 #include "tree-ssa-loop.h"
93 #include "explow.h"
94 #include "expr.h"
95 #include "tree-dfa.h"
96 #include "tree-ssa.h"
97 #include "cfgloop.h"
98 #include "tree-scalar-evolution.h"
99 #include "params.h"
100 #include "tree-affine.h"
101 #include "tree-ssa-propagate.h"
102 #include "tree-ssa-address.h"
103 #include "builtins.h"
104 #include "tree-vectorizer.h"
105 
106 /* FIXME: Expressions are expanded to RTL in this pass to determine the
107    cost of different addressing modes.  This should be moved to a TBD
108    interface between the GIMPLE and RTL worlds.  */
109 
110 /* The infinite cost.  */
111 #define INFTY 10000000
112 
113 #define AVG_LOOP_NITER(LOOP) 5
114 
115 /* Returns the expected number of loop iterations for LOOP.
116    The average trip count is computed from profile data if it
117    exists. */
118 
119 static inline HOST_WIDE_INT
avg_loop_niter(struct loop * loop)120 avg_loop_niter (struct loop *loop)
121 {
122   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
123   if (niter == -1)
124     return AVG_LOOP_NITER (loop);
125 
126   return niter;
127 }
128 
129 /* Representation of the induction variable.  */
130 struct iv
131 {
132   tree base;		/* Initial value of the iv.  */
133   tree base_object;	/* A memory object to that the induction variable points.  */
134   tree step;		/* Step of the iv (constant only).  */
135   tree ssa_name;	/* The ssa name with the value.  */
136   unsigned use_id;	/* The identifier in the use if it is the case.  */
137   bool biv_p;		/* Is it a biv?  */
138   bool have_use_for;	/* Do we already have a use for it?  */
139   bool no_overflow;	/* True if the iv doesn't overflow.  */
140   bool have_address_use;/* For biv, indicate if it's used in any address
141 			   type use.  */
142 };
143 
144 /* Per-ssa version information (induction variable descriptions, etc.).  */
145 struct version_info
146 {
147   tree name;		/* The ssa name.  */
148   struct iv *iv;	/* Induction variable description.  */
149   bool has_nonlin_use;	/* For a loop-level invariant, whether it is used in
150 			   an expression that is not an induction variable.  */
151   bool preserve_biv;	/* For the original biv, whether to preserve it.  */
152   unsigned inv_id;	/* Id of an invariant.  */
153 };
154 
155 /* Types of uses.  */
156 enum use_type
157 {
158   USE_NONLINEAR_EXPR,	/* Use in a nonlinear expression.  */
159   USE_ADDRESS,		/* Use in an address.  */
160   USE_COMPARE		/* Use is a compare.  */
161 };
162 
163 /* Cost of a computation.  */
164 struct comp_cost
165 {
166   int cost;		/* The runtime cost.  */
167   unsigned complexity;	/* The estimate of the complexity of the code for
168 			   the computation (in no concrete units --
169 			   complexity field should be larger for more
170 			   complex expressions and addressing modes).  */
171   int scratch;		/* Scratch used during cost computation.  */
172 };
173 
174 static const comp_cost no_cost = {0, 0, 0};
175 static const comp_cost infinite_cost = {INFTY, INFTY, INFTY};
176 
177 /* The candidate - cost pair.  */
178 struct cost_pair
179 {
180   struct iv_cand *cand;	/* The candidate.  */
181   comp_cost cost;	/* The cost.  */
182   bitmap depends_on;	/* The list of invariants that have to be
183 			   preserved.  */
184   tree value;		/* For final value elimination, the expression for
185 			   the final value of the iv.  For iv elimination,
186 			   the new bound to compare with.  */
187   enum tree_code comp;	/* For iv elimination, the comparison.  */
188   int inv_expr_id;      /* Loop invariant expression id.  */
189 };
190 
191 /* Use.  */
192 struct iv_use
193 {
194   unsigned id;		/* The id of the use.  */
195   unsigned sub_id;	/* The id of the sub use.  */
196   enum use_type type;	/* Type of the use.  */
197   struct iv *iv;	/* The induction variable it is based on.  */
198   gimple *stmt;		/* Statement in that it occurs.  */
199   tree *op_p;		/* The place where it occurs.  */
200   bitmap related_cands;	/* The set of "related" iv candidates, plus the common
201 			   important ones.  */
202 
203   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
204   struct cost_pair *cost_map;
205 			/* The costs wrto the iv candidates.  */
206 
207   struct iv_cand *selected;
208 			/* The selected candidate.  */
209 
210   struct iv_use *next;	/* The next sub use.  */
211   tree addr_base;	/* Base address with const offset stripped.  */
212   unsigned HOST_WIDE_INT addr_offset;
213 			/* Const offset stripped from base address.  */
214 };
215 
216 /* The position where the iv is computed.  */
217 enum iv_position
218 {
219   IP_NORMAL,		/* At the end, just before the exit condition.  */
220   IP_END,		/* At the end of the latch block.  */
221   IP_BEFORE_USE,	/* Immediately before a specific use.  */
222   IP_AFTER_USE,		/* Immediately after a specific use.  */
223   IP_ORIGINAL		/* The original biv.  */
224 };
225 
226 /* The induction variable candidate.  */
227 struct iv_cand
228 {
229   unsigned id;		/* The number of the candidate.  */
230   bool important;	/* Whether this is an "important" candidate, i.e. such
231 			   that it should be considered by all uses.  */
232   ENUM_BITFIELD(iv_position) pos : 8;	/* Where it is computed.  */
233   gimple *incremented_at;/* For original biv, the statement where it is
234 			   incremented.  */
235   tree var_before;	/* The variable used for it before increment.  */
236   tree var_after;	/* The variable used for it after increment.  */
237   struct iv *iv;	/* The value of the candidate.  NULL for
238 			   "pseudocandidate" used to indicate the possibility
239 			   to replace the final value of an iv by direct
240 			   computation of the value.  */
241   unsigned cost;	/* Cost of the candidate.  */
242   unsigned cost_step;	/* Cost of the candidate's increment operation.  */
243   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
244 			      where it is incremented.  */
245   bitmap depends_on;	/* The list of invariants that are used in step of the
246 			   biv.  */
247   struct iv *orig_iv;	/* The original iv if this cand is added from biv with
248 			   smaller type.  */
249 };
250 
251 /* Hashtable entry for common candidate derived from iv uses.  */
252 struct iv_common_cand
253 {
254   tree base;
255   tree step;
256   /* IV uses from which this common candidate is derived.  */
257   auto_vec<iv_use *> uses;
258   hashval_t hash;
259 };
260 
261 /* Hashtable helpers.  */
262 
263 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
264 {
265   static inline hashval_t hash (const iv_common_cand *);
266   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
267 };
268 
269 /* Hash function for possible common candidates.  */
270 
271 inline hashval_t
hash(const iv_common_cand * ccand)272 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
273 {
274   return ccand->hash;
275 }
276 
277 /* Hash table equality function for common candidates.  */
278 
279 inline bool
equal(const iv_common_cand * ccand1,const iv_common_cand * ccand2)280 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
281 			      const iv_common_cand *ccand2)
282 {
283   return (ccand1->hash == ccand2->hash
284 	  && operand_equal_p (ccand1->base, ccand2->base, 0)
285 	  && operand_equal_p (ccand1->step, ccand2->step, 0)
286 	  && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
287 	      == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
288 }
289 
290 /* Loop invariant expression hashtable entry.  */
291 struct iv_inv_expr_ent
292 {
293   tree expr;
294   int id;
295   hashval_t hash;
296 };
297 
298 /* Hashtable helpers.  */
299 
300 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
301 {
302   static inline hashval_t hash (const iv_inv_expr_ent *);
303   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
304 };
305 
306 /* Hash function for loop invariant expressions.  */
307 
308 inline hashval_t
hash(const iv_inv_expr_ent * expr)309 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
310 {
311   return expr->hash;
312 }
313 
314 /* Hash table equality function for expressions.  */
315 
316 inline bool
equal(const iv_inv_expr_ent * expr1,const iv_inv_expr_ent * expr2)317 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
318 			   const iv_inv_expr_ent *expr2)
319 {
320   return expr1->hash == expr2->hash
321 	 && operand_equal_p (expr1->expr, expr2->expr, 0);
322 }
323 
324 struct ivopts_data
325 {
326   /* The currently optimized loop.  */
327   struct loop *current_loop;
328   source_location loop_loc;
329 
330   /* Numbers of iterations for all exits of the current loop.  */
331   hash_map<edge, tree_niter_desc *> *niters;
332 
333   /* Number of registers used in it.  */
334   unsigned regs_used;
335 
336   /* The size of version_info array allocated.  */
337   unsigned version_info_size;
338 
339   /* The array of information for the ssa names.  */
340   struct version_info *version_info;
341 
342   /* The hashtable of loop invariant expressions created
343      by ivopt.  */
344   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
345 
346   /* Loop invariant expression id.  */
347   int inv_expr_id;
348 
349   /* The bitmap of indices in version_info whose value was changed.  */
350   bitmap relevant;
351 
352   /* The uses of induction variables.  */
353   vec<iv_use *> iv_uses;
354 
355   /* The candidates.  */
356   vec<iv_cand *> iv_candidates;
357 
358   /* A bitmap of important candidates.  */
359   bitmap important_candidates;
360 
361   /* Cache used by tree_to_aff_combination_expand.  */
362   hash_map<tree, name_expansion *> *name_expansion_cache;
363 
364   /* The hashtable of common candidates derived from iv uses.  */
365   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
366 
367   /* The common candidates.  */
368   vec<iv_common_cand *> iv_common_cands;
369 
370   /* The maximum invariant id.  */
371   unsigned max_inv_id;
372 
373   /* Number of no_overflow BIVs which are not used in memory address.  */
374   unsigned bivs_not_used_in_addr;
375 
376   /* Obstack for iv structure.  */
377   struct obstack iv_obstack;
378 
379   /* Whether to consider just related and important candidates when replacing a
380      use.  */
381   bool consider_all_candidates;
382 
383   /* Are we optimizing for speed?  */
384   bool speed;
385 
386   /* Whether the loop body includes any function calls.  */
387   bool body_includes_call;
388 
389   /* Whether the loop body can only be exited via single exit.  */
390   bool loop_single_exit_p;
391 };
392 
393 /* An assignment of iv candidates to uses.  */
394 
395 struct iv_ca
396 {
397   /* The number of uses covered by the assignment.  */
398   unsigned upto;
399 
400   /* Number of uses that cannot be expressed by the candidates in the set.  */
401   unsigned bad_uses;
402 
403   /* Candidate assigned to a use, together with the related costs.  */
404   struct cost_pair **cand_for_use;
405 
406   /* Number of times each candidate is used.  */
407   unsigned *n_cand_uses;
408 
409   /* The candidates used.  */
410   bitmap cands;
411 
412   /* The number of candidates in the set.  */
413   unsigned n_cands;
414 
415   /* Total number of registers needed.  */
416   unsigned n_regs;
417 
418   /* Total cost of expressing uses.  */
419   comp_cost cand_use_cost;
420 
421   /* Total cost of candidates.  */
422   unsigned cand_cost;
423 
424   /* Number of times each invariant is used.  */
425   unsigned *n_invariant_uses;
426 
427   /* The array holding the number of uses of each loop
428      invariant expressions created by ivopt.  */
429   unsigned *used_inv_expr;
430 
431   /* The number of created loop invariants.  */
432   unsigned num_used_inv_expr;
433 
434   /* Total cost of the assignment.  */
435   comp_cost cost;
436 };
437 
438 /* Difference of two iv candidate assignments.  */
439 
440 struct iv_ca_delta
441 {
442   /* Changed use.  */
443   struct iv_use *use;
444 
445   /* An old assignment (for rollback purposes).  */
446   struct cost_pair *old_cp;
447 
448   /* A new assignment.  */
449   struct cost_pair *new_cp;
450 
451   /* Next change in the list.  */
452   struct iv_ca_delta *next_change;
453 };
454 
455 /* Bound on number of candidates below that all candidates are considered.  */
456 
457 #define CONSIDER_ALL_CANDIDATES_BOUND \
458   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
459 
460 /* If there are more iv occurrences, we just give up (it is quite unlikely that
461    optimizing such a loop would help, and it would take ages).  */
462 
463 #define MAX_CONSIDERED_USES \
464   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
465 
466 /* If there are at most this number of ivs in the set, try removing unnecessary
467    ivs from the set always.  */
468 
469 #define ALWAYS_PRUNE_CAND_SET_BOUND \
470   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
471 
472 /* The list of trees for that the decl_rtl field must be reset is stored
473    here.  */
474 
475 static vec<tree> decl_rtl_to_reset;
476 
477 static comp_cost force_expr_to_var_cost (tree, bool);
478 
479 /* Number of uses recorded in DATA.  */
480 
481 static inline unsigned
n_iv_uses(struct ivopts_data * data)482 n_iv_uses (struct ivopts_data *data)
483 {
484   return data->iv_uses.length ();
485 }
486 
487 /* Ith use recorded in DATA.  */
488 
489 static inline struct iv_use *
iv_use(struct ivopts_data * data,unsigned i)490 iv_use (struct ivopts_data *data, unsigned i)
491 {
492   return data->iv_uses[i];
493 }
494 
495 /* Number of candidates recorded in DATA.  */
496 
497 static inline unsigned
n_iv_cands(struct ivopts_data * data)498 n_iv_cands (struct ivopts_data *data)
499 {
500   return data->iv_candidates.length ();
501 }
502 
503 /* Ith candidate recorded in DATA.  */
504 
505 static inline struct iv_cand *
iv_cand(struct ivopts_data * data,unsigned i)506 iv_cand (struct ivopts_data *data, unsigned i)
507 {
508   return data->iv_candidates[i];
509 }
510 
511 /* The single loop exit if it dominates the latch, NULL otherwise.  */
512 
513 edge
single_dom_exit(struct loop * loop)514 single_dom_exit (struct loop *loop)
515 {
516   edge exit = single_exit (loop);
517 
518   if (!exit)
519     return NULL;
520 
521   if (!just_once_each_iteration_p (loop, exit->src))
522     return NULL;
523 
524   return exit;
525 }
526 
527 /* Dumps information about the induction variable IV to FILE.  */
528 
529 void
dump_iv(FILE * file,struct iv * iv,bool dump_name)530 dump_iv (FILE *file, struct iv *iv, bool dump_name)
531 {
532   if (iv->ssa_name && dump_name)
533     {
534       fprintf (file, "ssa name ");
535       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
536       fprintf (file, "\n");
537     }
538 
539   fprintf (file, "  type ");
540   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
541   fprintf (file, "\n");
542 
543   if (iv->step)
544     {
545       fprintf (file, "  base ");
546       print_generic_expr (file, iv->base, TDF_SLIM);
547       fprintf (file, "\n");
548 
549       fprintf (file, "  step ");
550       print_generic_expr (file, iv->step, TDF_SLIM);
551       fprintf (file, "\n");
552     }
553   else
554     {
555       fprintf (file, "  invariant ");
556       print_generic_expr (file, iv->base, TDF_SLIM);
557       fprintf (file, "\n");
558     }
559 
560   if (iv->base_object)
561     {
562       fprintf (file, "  base object ");
563       print_generic_expr (file, iv->base_object, TDF_SLIM);
564       fprintf (file, "\n");
565     }
566 
567   if (iv->biv_p)
568     fprintf (file, "  is a biv\n");
569 
570   if (iv->no_overflow)
571     fprintf (file, "  iv doesn't overflow wrto loop niter\n");
572 }
573 
574 /* Dumps information about the USE to FILE.  */
575 
576 void
dump_use(FILE * file,struct iv_use * use)577 dump_use (FILE *file, struct iv_use *use)
578 {
579   fprintf (file, "use %d", use->id);
580   if (use->sub_id)
581     fprintf (file, ".%d", use->sub_id);
582 
583   fprintf (file, "\n");
584 
585   switch (use->type)
586     {
587     case USE_NONLINEAR_EXPR:
588       fprintf (file, "  generic\n");
589       break;
590 
591     case USE_ADDRESS:
592       fprintf (file, "  address\n");
593       break;
594 
595     case USE_COMPARE:
596       fprintf (file, "  compare\n");
597       break;
598 
599     default:
600       gcc_unreachable ();
601     }
602 
603   fprintf (file, "  in statement ");
604   print_gimple_stmt (file, use->stmt, 0, 0);
605   fprintf (file, "\n");
606 
607   fprintf (file, "  at position ");
608   if (use->op_p)
609     print_generic_expr (file, *use->op_p, TDF_SLIM);
610   fprintf (file, "\n");
611 
612   dump_iv (file, use->iv, false);
613 
614   if (use->related_cands)
615     {
616       fprintf (file, "  related candidates ");
617       dump_bitmap (file, use->related_cands);
618     }
619 }
620 
621 /* Dumps information about the uses to FILE.  */
622 
623 void
dump_uses(FILE * file,struct ivopts_data * data)624 dump_uses (FILE *file, struct ivopts_data *data)
625 {
626   unsigned i;
627   struct iv_use *use;
628 
629   for (i = 0; i < n_iv_uses (data); i++)
630     {
631       use = iv_use (data, i);
632       do
633 	{
634 	  dump_use (file, use);
635 	  use = use->next;
636 	}
637       while (use);
638       fprintf (file, "\n");
639     }
640 }
641 
642 /* Dumps information about induction variable candidate CAND to FILE.  */
643 
644 void
dump_cand(FILE * file,struct iv_cand * cand)645 dump_cand (FILE *file, struct iv_cand *cand)
646 {
647   struct iv *iv = cand->iv;
648 
649   fprintf (file, "candidate %d%s\n",
650 	   cand->id, cand->important ? " (important)" : "");
651 
652   if (cand->depends_on)
653     {
654       fprintf (file, "  depends on ");
655       dump_bitmap (file, cand->depends_on);
656     }
657 
658   if (!iv)
659     {
660       fprintf (file, "  final value replacement\n");
661       return;
662     }
663 
664   if (cand->var_before)
665     {
666       fprintf (file, "  var_before ");
667       print_generic_expr (file, cand->var_before, TDF_SLIM);
668       fprintf (file, "\n");
669     }
670   if (cand->var_after)
671     {
672       fprintf (file, "  var_after ");
673       print_generic_expr (file, cand->var_after, TDF_SLIM);
674       fprintf (file, "\n");
675     }
676 
677   switch (cand->pos)
678     {
679     case IP_NORMAL:
680       fprintf (file, "  incremented before exit test\n");
681       break;
682 
683     case IP_BEFORE_USE:
684       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
685       break;
686 
687     case IP_AFTER_USE:
688       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
689       break;
690 
691     case IP_END:
692       fprintf (file, "  incremented at end\n");
693       break;
694 
695     case IP_ORIGINAL:
696       fprintf (file, "  original biv\n");
697       break;
698     }
699 
700   dump_iv (file, iv, false);
701 }
702 
703 /* Returns the info for ssa version VER.  */
704 
705 static inline struct version_info *
ver_info(struct ivopts_data * data,unsigned ver)706 ver_info (struct ivopts_data *data, unsigned ver)
707 {
708   return data->version_info + ver;
709 }
710 
711 /* Returns the info for ssa name NAME.  */
712 
713 static inline struct version_info *
name_info(struct ivopts_data * data,tree name)714 name_info (struct ivopts_data *data, tree name)
715 {
716   return ver_info (data, SSA_NAME_VERSION (name));
717 }
718 
719 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
720    emitted in LOOP.  */
721 
722 static bool
stmt_after_ip_normal_pos(struct loop * loop,gimple * stmt)723 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
724 {
725   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
726 
727   gcc_assert (bb);
728 
729   if (sbb == loop->latch)
730     return true;
731 
732   if (sbb != bb)
733     return false;
734 
735   return stmt == last_stmt (bb);
736 }
737 
738 /* Returns true if STMT if after the place where the original induction
739    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
740    if the positions are identical.  */
741 
742 static bool
stmt_after_inc_pos(struct iv_cand * cand,gimple * stmt,bool true_if_equal)743 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
744 {
745   basic_block cand_bb = gimple_bb (cand->incremented_at);
746   basic_block stmt_bb = gimple_bb (stmt);
747 
748   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
749     return false;
750 
751   if (stmt_bb != cand_bb)
752     return true;
753 
754   if (true_if_equal
755       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
756     return true;
757   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
758 }
759 
760 /* Returns true if STMT if after the place where the induction variable
761    CAND is incremented in LOOP.  */
762 
763 static bool
stmt_after_increment(struct loop * loop,struct iv_cand * cand,gimple * stmt)764 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
765 {
766   switch (cand->pos)
767     {
768     case IP_END:
769       return false;
770 
771     case IP_NORMAL:
772       return stmt_after_ip_normal_pos (loop, stmt);
773 
774     case IP_ORIGINAL:
775     case IP_AFTER_USE:
776       return stmt_after_inc_pos (cand, stmt, false);
777 
778     case IP_BEFORE_USE:
779       return stmt_after_inc_pos (cand, stmt, true);
780 
781     default:
782       gcc_unreachable ();
783     }
784 }
785 
786 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
787 
788 static bool
abnormal_ssa_name_p(tree exp)789 abnormal_ssa_name_p (tree exp)
790 {
791   if (!exp)
792     return false;
793 
794   if (TREE_CODE (exp) != SSA_NAME)
795     return false;
796 
797   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
798 }
799 
800 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
801    abnormal phi node.  Callback for for_each_index.  */
802 
803 static bool
idx_contains_abnormal_ssa_name_p(tree base,tree * index,void * data ATTRIBUTE_UNUSED)804 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
805 				  void *data ATTRIBUTE_UNUSED)
806 {
807   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
808     {
809       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
810 	return false;
811       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
812 	return false;
813     }
814 
815   return !abnormal_ssa_name_p (*index);
816 }
817 
818 /* Returns true if EXPR contains a ssa name that occurs in an
819    abnormal phi node.  */
820 
821 bool
contains_abnormal_ssa_name_p(tree expr)822 contains_abnormal_ssa_name_p (tree expr)
823 {
824   enum tree_code code;
825   enum tree_code_class codeclass;
826 
827   if (!expr)
828     return false;
829 
830   code = TREE_CODE (expr);
831   codeclass = TREE_CODE_CLASS (code);
832 
833   if (code == SSA_NAME)
834     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
835 
836   if (code == INTEGER_CST
837       || is_gimple_min_invariant (expr))
838     return false;
839 
840   if (code == ADDR_EXPR)
841     return !for_each_index (&TREE_OPERAND (expr, 0),
842 			    idx_contains_abnormal_ssa_name_p,
843 			    NULL);
844 
845   if (code == COND_EXPR)
846     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
847       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
848       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
849 
850   switch (codeclass)
851     {
852     case tcc_binary:
853     case tcc_comparison:
854       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
855 	return true;
856 
857       /* Fallthru.  */
858     case tcc_unary:
859       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
860 	return true;
861 
862       break;
863 
864     default:
865       gcc_unreachable ();
866     }
867 
868   return false;
869 }
870 
871 /*  Returns the structure describing number of iterations determined from
872     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
873 
874 static struct tree_niter_desc *
niter_for_exit(struct ivopts_data * data,edge exit)875 niter_for_exit (struct ivopts_data *data, edge exit)
876 {
877   struct tree_niter_desc *desc;
878   tree_niter_desc **slot;
879 
880   if (!data->niters)
881     {
882       data->niters = new hash_map<edge, tree_niter_desc *>;
883       slot = NULL;
884     }
885   else
886     slot = data->niters->get (exit);
887 
888   if (!slot)
889     {
890       /* Try to determine number of iterations.  We cannot safely work with ssa
891          names that appear in phi nodes on abnormal edges, so that we do not
892          create overlapping life ranges for them (PR 27283).  */
893       desc = XNEW (struct tree_niter_desc);
894       if (!number_of_iterations_exit (data->current_loop,
895 				      exit, desc, true)
896      	  || contains_abnormal_ssa_name_p (desc->niter))
897 	{
898 	  XDELETE (desc);
899 	  desc = NULL;
900 	}
901       data->niters->put (exit, desc);
902     }
903   else
904     desc = *slot;
905 
906   return desc;
907 }
908 
909 /* Returns the structure describing number of iterations determined from
910    single dominating exit of DATA->current_loop, or NULL if something
911    goes wrong.  */
912 
913 static struct tree_niter_desc *
niter_for_single_dom_exit(struct ivopts_data * data)914 niter_for_single_dom_exit (struct ivopts_data *data)
915 {
916   edge exit = single_dom_exit (data->current_loop);
917 
918   if (!exit)
919     return NULL;
920 
921   return niter_for_exit (data, exit);
922 }
923 
924 /* Initializes data structures used by the iv optimization pass, stored
925    in DATA.  */
926 
927 static void
tree_ssa_iv_optimize_init(struct ivopts_data * data)928 tree_ssa_iv_optimize_init (struct ivopts_data *data)
929 {
930   data->version_info_size = 2 * num_ssa_names;
931   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
932   data->relevant = BITMAP_ALLOC (NULL);
933   data->important_candidates = BITMAP_ALLOC (NULL);
934   data->max_inv_id = 0;
935   data->niters = NULL;
936   data->iv_uses.create (20);
937   data->iv_candidates.create (20);
938   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
939   data->inv_expr_id = 0;
940   data->name_expansion_cache = NULL;
941   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
942   data->iv_common_cands.create (20);
943   decl_rtl_to_reset.create (20);
944   gcc_obstack_init (&data->iv_obstack);
945 }
946 
947 /* Returns a memory object to that EXPR points.  In case we are able to
948    determine that it does not point to any such object, NULL is returned.  */
949 
950 static tree
determine_base_object(tree expr)951 determine_base_object (tree expr)
952 {
953   enum tree_code code = TREE_CODE (expr);
954   tree base, obj;
955 
956   /* If this is a pointer casted to any type, we need to determine
957      the base object for the pointer; so handle conversions before
958      throwing away non-pointer expressions.  */
959   if (CONVERT_EXPR_P (expr))
960     return determine_base_object (TREE_OPERAND (expr, 0));
961 
962   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
963     return NULL_TREE;
964 
965   switch (code)
966     {
967     case INTEGER_CST:
968       return NULL_TREE;
969 
970     case ADDR_EXPR:
971       obj = TREE_OPERAND (expr, 0);
972       base = get_base_address (obj);
973 
974       if (!base)
975 	return expr;
976 
977       if (TREE_CODE (base) == MEM_REF)
978 	return determine_base_object (TREE_OPERAND (base, 0));
979 
980       return fold_convert (ptr_type_node,
981 		           build_fold_addr_expr (base));
982 
983     case POINTER_PLUS_EXPR:
984       return determine_base_object (TREE_OPERAND (expr, 0));
985 
986     case PLUS_EXPR:
987     case MINUS_EXPR:
988       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
989       gcc_unreachable ();
990 
991     default:
992       return fold_convert (ptr_type_node, expr);
993     }
994 }
995 
996 /* Return true if address expression with non-DECL_P operand appears
997    in EXPR.  */
998 
999 static bool
contain_complex_addr_expr(tree expr)1000 contain_complex_addr_expr (tree expr)
1001 {
1002   bool res = false;
1003 
1004   STRIP_NOPS (expr);
1005   switch (TREE_CODE (expr))
1006     {
1007     case POINTER_PLUS_EXPR:
1008     case PLUS_EXPR:
1009     case MINUS_EXPR:
1010       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1011       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1012       break;
1013 
1014     case ADDR_EXPR:
1015       return (!DECL_P (TREE_OPERAND (expr, 0)));
1016 
1017     default:
1018       return false;
1019     }
1020 
1021   return res;
1022 }
1023 
1024 /* Allocates an induction variable with given initial value BASE and step STEP
1025    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1026 
1027 static struct iv *
1028 alloc_iv (struct ivopts_data *data, tree base, tree step,
1029 	  bool no_overflow = false)
1030 {
1031   tree expr = base;
1032   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1033 					      sizeof (struct iv));
1034   gcc_assert (step != NULL_TREE);
1035 
1036   /* Lower address expression in base except ones with DECL_P as operand.
1037      By doing this:
1038        1) More accurate cost can be computed for address expressions;
1039        2) Duplicate candidates won't be created for bases in different
1040           forms, like &a[0] and &a.  */
1041   STRIP_NOPS (expr);
1042   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1043       || contain_complex_addr_expr (expr))
1044     {
1045       aff_tree comb;
1046       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1047       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1048     }
1049 
1050   iv->base = base;
1051   iv->base_object = determine_base_object (base);
1052   iv->step = step;
1053   iv->biv_p = false;
1054   iv->have_use_for = false;
1055   iv->use_id = 0;
1056   iv->ssa_name = NULL_TREE;
1057   iv->no_overflow = no_overflow;
1058   iv->have_address_use = false;
1059 
1060   return iv;
1061 }
1062 
1063 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1064    doesn't overflow.  */
1065 
1066 static void
set_iv(struct ivopts_data * data,tree iv,tree base,tree step,bool no_overflow)1067 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1068 	bool no_overflow)
1069 {
1070   struct version_info *info = name_info (data, iv);
1071 
1072   gcc_assert (!info->iv);
1073 
1074   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1075   info->iv = alloc_iv (data, base, step, no_overflow);
1076   info->iv->ssa_name = iv;
1077 }
1078 
1079 /* Finds induction variable declaration for VAR.  */
1080 
1081 static struct iv *
get_iv(struct ivopts_data * data,tree var)1082 get_iv (struct ivopts_data *data, tree var)
1083 {
1084   basic_block bb;
1085   tree type = TREE_TYPE (var);
1086 
1087   if (!POINTER_TYPE_P (type)
1088       && !INTEGRAL_TYPE_P (type))
1089     return NULL;
1090 
1091   if (!name_info (data, var)->iv)
1092     {
1093       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1094 
1095       if (!bb
1096 	  || !flow_bb_inside_loop_p (data->current_loop, bb))
1097 	set_iv (data, var, var, build_int_cst (type, 0), true);
1098     }
1099 
1100   return name_info (data, var)->iv;
1101 }
1102 
1103 /* Return the first non-invariant ssa var found in EXPR.  */
1104 
1105 static tree
extract_single_var_from_expr(tree expr)1106 extract_single_var_from_expr (tree expr)
1107 {
1108   int i, n;
1109   tree tmp;
1110   enum tree_code code;
1111 
1112   if (!expr || is_gimple_min_invariant (expr))
1113     return NULL;
1114 
1115   code = TREE_CODE (expr);
1116   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1117     {
1118       n = TREE_OPERAND_LENGTH (expr);
1119       for (i = 0; i < n; i++)
1120 	{
1121 	  tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1122 
1123 	  if (tmp)
1124 	    return tmp;
1125 	}
1126     }
1127   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1128 }
1129 
1130 /* Finds basic ivs.  */
1131 
1132 static bool
find_bivs(struct ivopts_data * data)1133 find_bivs (struct ivopts_data *data)
1134 {
1135   gphi *phi;
1136   affine_iv iv;
1137   tree step, type, base, stop;
1138   bool found = false;
1139   struct loop *loop = data->current_loop;
1140   gphi_iterator psi;
1141 
1142   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1143     {
1144       phi = psi.phi ();
1145 
1146       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1147 	continue;
1148 
1149       if (virtual_operand_p (PHI_RESULT (phi)))
1150 	continue;
1151 
1152       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1153 	continue;
1154 
1155       if (integer_zerop (iv.step))
1156 	continue;
1157 
1158       step = iv.step;
1159       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1160       /* Stop expanding iv base at the first ssa var referred by iv step.
1161 	 Ideally we should stop at any ssa var, because that's expensive
1162 	 and unusual to happen, we just do it on the first one.
1163 
1164 	 See PR64705 for the rationale.  */
1165       stop = extract_single_var_from_expr (step);
1166       base = expand_simple_operations (base, stop);
1167       if (contains_abnormal_ssa_name_p (base)
1168 	  || contains_abnormal_ssa_name_p (step))
1169 	continue;
1170 
1171       type = TREE_TYPE (PHI_RESULT (phi));
1172       base = fold_convert (type, base);
1173       if (step)
1174 	{
1175 	  if (POINTER_TYPE_P (type))
1176 	    step = convert_to_ptrofftype (step);
1177 	  else
1178 	    step = fold_convert (type, step);
1179 	}
1180 
1181       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1182       found = true;
1183     }
1184 
1185   return found;
1186 }
1187 
1188 /* Marks basic ivs.  */
1189 
1190 static void
mark_bivs(struct ivopts_data * data)1191 mark_bivs (struct ivopts_data *data)
1192 {
1193   gphi *phi;
1194   gimple *def;
1195   tree var;
1196   struct iv *iv, *incr_iv;
1197   struct loop *loop = data->current_loop;
1198   basic_block incr_bb;
1199   gphi_iterator psi;
1200 
1201   data->bivs_not_used_in_addr = 0;
1202   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1203     {
1204       phi = psi.phi ();
1205 
1206       iv = get_iv (data, PHI_RESULT (phi));
1207       if (!iv)
1208 	continue;
1209 
1210       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1211       def = SSA_NAME_DEF_STMT (var);
1212       /* Don't mark iv peeled from other one as biv.  */
1213       if (def
1214 	  && gimple_code (def) == GIMPLE_PHI
1215 	  && gimple_bb (def) == loop->header)
1216 	continue;
1217 
1218       incr_iv = get_iv (data, var);
1219       if (!incr_iv)
1220 	continue;
1221 
1222       /* If the increment is in the subloop, ignore it.  */
1223       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1224       if (incr_bb->loop_father != data->current_loop
1225 	  || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1226 	continue;
1227 
1228       iv->biv_p = true;
1229       incr_iv->biv_p = true;
1230       if (iv->no_overflow)
1231 	data->bivs_not_used_in_addr++;
1232       if (incr_iv->no_overflow)
1233 	data->bivs_not_used_in_addr++;
1234     }
1235 }
1236 
1237 /* Checks whether STMT defines a linear induction variable and stores its
1238    parameters to IV.  */
1239 
1240 static bool
find_givs_in_stmt_scev(struct ivopts_data * data,gimple * stmt,affine_iv * iv)1241 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1242 {
1243   tree lhs, stop;
1244   struct loop *loop = data->current_loop;
1245 
1246   iv->base = NULL_TREE;
1247   iv->step = NULL_TREE;
1248 
1249   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1250     return false;
1251 
1252   lhs = gimple_assign_lhs (stmt);
1253   if (TREE_CODE (lhs) != SSA_NAME)
1254     return false;
1255 
1256   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1257     return false;
1258 
1259   /* Stop expanding iv base at the first ssa var referred by iv step.
1260      Ideally we should stop at any ssa var, because that's expensive
1261      and unusual to happen, we just do it on the first one.
1262 
1263      See PR64705 for the rationale.  */
1264   stop = extract_single_var_from_expr (iv->step);
1265   iv->base = expand_simple_operations (iv->base, stop);
1266   if (contains_abnormal_ssa_name_p (iv->base)
1267       || contains_abnormal_ssa_name_p (iv->step))
1268     return false;
1269 
1270   /* If STMT could throw, then do not consider STMT as defining a GIV.
1271      While this will suppress optimizations, we can not safely delete this
1272      GIV and associated statements, even if it appears it is not used.  */
1273   if (stmt_could_throw_p (stmt))
1274     return false;
1275 
1276   return true;
1277 }
1278 
1279 /* Finds general ivs in statement STMT.  */
1280 
1281 static void
find_givs_in_stmt(struct ivopts_data * data,gimple * stmt)1282 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1283 {
1284   affine_iv iv;
1285 
1286   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1287     return;
1288 
1289   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1290 }
1291 
1292 /* Finds general ivs in basic block BB.  */
1293 
1294 static void
find_givs_in_bb(struct ivopts_data * data,basic_block bb)1295 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1296 {
1297   gimple_stmt_iterator bsi;
1298 
1299   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1300     find_givs_in_stmt (data, gsi_stmt (bsi));
1301 }
1302 
1303 /* Finds general ivs.  */
1304 
1305 static void
find_givs(struct ivopts_data * data)1306 find_givs (struct ivopts_data *data)
1307 {
1308   struct loop *loop = data->current_loop;
1309   basic_block *body = get_loop_body_in_dom_order (loop);
1310   unsigned i;
1311 
1312   for (i = 0; i < loop->num_nodes; i++)
1313     find_givs_in_bb (data, body[i]);
1314   free (body);
1315 }
1316 
1317 /* For each ssa name defined in LOOP determines whether it is an induction
1318    variable and if so, its initial value and step.  */
1319 
1320 static bool
find_induction_variables(struct ivopts_data * data)1321 find_induction_variables (struct ivopts_data *data)
1322 {
1323   unsigned i;
1324   bitmap_iterator bi;
1325 
1326   if (!find_bivs (data))
1327     return false;
1328 
1329   find_givs (data);
1330   mark_bivs (data);
1331 
1332   if (dump_file && (dump_flags & TDF_DETAILS))
1333     {
1334       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1335 
1336       if (niter)
1337 	{
1338 	  fprintf (dump_file, "  number of iterations ");
1339 	  print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1340 	  if (!integer_zerop (niter->may_be_zero))
1341 	    {
1342 	      fprintf (dump_file, "; zero if ");
1343 	      print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1344 	    }
1345 	  fprintf (dump_file, "\n\n");
1346     	};
1347 
1348       fprintf (dump_file, "Induction variables:\n\n");
1349 
1350       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1351 	{
1352 	  if (ver_info (data, i)->iv)
1353 	    dump_iv (dump_file, ver_info (data, i)->iv, true);
1354 	}
1355     }
1356 
1357   return true;
1358 }
1359 
1360 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1361    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1362    is the const offset stripped from IV base.  For uses of other types,
1363    ADDR_BASE and ADDR_OFFSET are zero by default.  */
1364 
1365 static struct iv_use *
1366 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1367 	    gimple *stmt, enum use_type use_type, tree addr_base = NULL,
1368 	    unsigned HOST_WIDE_INT addr_offset = 0)
1369 {
1370   struct iv_use *use = XCNEW (struct iv_use);
1371 
1372   use->id = n_iv_uses (data);
1373   use->sub_id = 0;
1374   use->type = use_type;
1375   use->iv = iv;
1376   use->stmt = stmt;
1377   use->op_p = use_p;
1378   use->related_cands = BITMAP_ALLOC (NULL);
1379   use->next = NULL;
1380   use->addr_base = addr_base;
1381   use->addr_offset = addr_offset;
1382 
1383   data->iv_uses.safe_push (use);
1384 
1385   return use;
1386 }
1387 
1388 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1389    The sub use is recorded under the one whose use id is ID_GROUP.  */
1390 
1391 static struct iv_use *
record_sub_use(struct ivopts_data * data,tree * use_p,struct iv * iv,gimple * stmt,enum use_type use_type,tree addr_base,unsigned HOST_WIDE_INT addr_offset,unsigned int id_group)1392 record_sub_use (struct ivopts_data *data, tree *use_p,
1393 		    struct iv *iv, gimple *stmt, enum use_type use_type,
1394 		    tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1395 		    unsigned int id_group)
1396 {
1397   struct iv_use *use = XCNEW (struct iv_use);
1398   struct iv_use *group = iv_use (data, id_group);
1399 
1400   use->id = group->id;
1401   use->sub_id = 0;
1402   use->type = use_type;
1403   use->iv = iv;
1404   use->stmt = stmt;
1405   use->op_p = use_p;
1406   use->related_cands = NULL;
1407   use->addr_base = addr_base;
1408   use->addr_offset = addr_offset;
1409 
1410   /* Sub use list is maintained in offset ascending order.  */
1411   if (addr_offset <= group->addr_offset)
1412     {
1413       use->related_cands = group->related_cands;
1414       group->related_cands = NULL;
1415       use->next = group;
1416       data->iv_uses[id_group] = use;
1417     }
1418   else
1419     {
1420       struct iv_use *pre;
1421       do
1422 	{
1423 	  pre = group;
1424 	  group = group->next;
1425 	}
1426       while (group && addr_offset > group->addr_offset);
1427       use->next = pre->next;
1428       pre->next = use;
1429     }
1430 
1431   return use;
1432 }
1433 
1434 /* Checks whether OP is a loop-level invariant and if so, records it.
1435    NONLINEAR_USE is true if the invariant is used in a way we do not
1436    handle specially.  */
1437 
1438 static void
record_invariant(struct ivopts_data * data,tree op,bool nonlinear_use)1439 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1440 {
1441   basic_block bb;
1442   struct version_info *info;
1443 
1444   if (TREE_CODE (op) != SSA_NAME
1445       || virtual_operand_p (op))
1446     return;
1447 
1448   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1449   if (bb
1450       && flow_bb_inside_loop_p (data->current_loop, bb))
1451     return;
1452 
1453   info = name_info (data, op);
1454   info->name = op;
1455   info->has_nonlin_use |= nonlinear_use;
1456   if (!info->inv_id)
1457     info->inv_id = ++data->max_inv_id;
1458   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1459 }
1460 
1461 /* Checks whether the use OP is interesting and if so, records it.  */
1462 
1463 static struct iv_use *
find_interesting_uses_op(struct ivopts_data * data,tree op)1464 find_interesting_uses_op (struct ivopts_data *data, tree op)
1465 {
1466   struct iv *iv;
1467   gimple *stmt;
1468   struct iv_use *use;
1469 
1470   if (TREE_CODE (op) != SSA_NAME)
1471     return NULL;
1472 
1473   iv = get_iv (data, op);
1474   if (!iv)
1475     return NULL;
1476 
1477   if (iv->have_use_for)
1478     {
1479       use = iv_use (data, iv->use_id);
1480 
1481       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1482       return use;
1483     }
1484 
1485   if (integer_zerop (iv->step))
1486     {
1487       record_invariant (data, op, true);
1488       return NULL;
1489     }
1490   iv->have_use_for = true;
1491 
1492   stmt = SSA_NAME_DEF_STMT (op);
1493   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1494 	      || is_gimple_assign (stmt));
1495 
1496   use = record_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1497   iv->use_id = use->id;
1498 
1499   return use;
1500 }
1501 
1502 /* Given a condition in statement STMT, checks whether it is a compare
1503    of an induction variable and an invariant.  If this is the case,
1504    CONTROL_VAR is set to location of the iv, BOUND to the location of
1505    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1506    induction variable descriptions, and true is returned.  If this is not
1507    the case, CONTROL_VAR and BOUND are set to the arguments of the
1508    condition and false is returned.  */
1509 
1510 static bool
extract_cond_operands(struct ivopts_data * data,gimple * stmt,tree ** control_var,tree ** bound,struct iv ** iv_var,struct iv ** iv_bound)1511 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1512 		       tree **control_var, tree **bound,
1513 		       struct iv **iv_var, struct iv **iv_bound)
1514 {
1515   /* The objects returned when COND has constant operands.  */
1516   static struct iv const_iv;
1517   static tree zero;
1518   tree *op0 = &zero, *op1 = &zero;
1519   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1520   bool ret = false;
1521 
1522   if (gimple_code (stmt) == GIMPLE_COND)
1523     {
1524       gcond *cond_stmt = as_a <gcond *> (stmt);
1525       op0 = gimple_cond_lhs_ptr (cond_stmt);
1526       op1 = gimple_cond_rhs_ptr (cond_stmt);
1527     }
1528   else
1529     {
1530       op0 = gimple_assign_rhs1_ptr (stmt);
1531       op1 = gimple_assign_rhs2_ptr (stmt);
1532     }
1533 
1534   zero = integer_zero_node;
1535   const_iv.step = integer_zero_node;
1536 
1537   if (TREE_CODE (*op0) == SSA_NAME)
1538     iv0 = get_iv (data, *op0);
1539   if (TREE_CODE (*op1) == SSA_NAME)
1540     iv1 = get_iv (data, *op1);
1541 
1542   /* Exactly one of the compared values must be an iv, and the other one must
1543      be an invariant.  */
1544   if (!iv0 || !iv1)
1545     goto end;
1546 
1547   if (integer_zerop (iv0->step))
1548     {
1549       /* Control variable may be on the other side.  */
1550       std::swap (op0, op1);
1551       std::swap (iv0, iv1);
1552     }
1553   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1554 
1555 end:
1556   if (control_var)
1557     *control_var = op0;
1558   if (iv_var)
1559     *iv_var = iv0;
1560   if (bound)
1561     *bound = op1;
1562   if (iv_bound)
1563     *iv_bound = iv1;
1564 
1565   return ret;
1566 }
1567 
1568 /* Checks whether the condition in STMT is interesting and if so,
1569    records it.  */
1570 
1571 static void
find_interesting_uses_cond(struct ivopts_data * data,gimple * stmt)1572 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1573 {
1574   tree *var_p, *bound_p;
1575   struct iv *var_iv;
1576 
1577   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1578     {
1579       find_interesting_uses_op (data, *var_p);
1580       find_interesting_uses_op (data, *bound_p);
1581       return;
1582     }
1583 
1584   record_use (data, NULL, var_iv, stmt, USE_COMPARE);
1585 }
1586 
1587 /* Returns the outermost loop EXPR is obviously invariant in
1588    relative to the loop LOOP, i.e. if all its operands are defined
1589    outside of the returned loop.  Returns NULL if EXPR is not
1590    even obviously invariant in LOOP.  */
1591 
1592 struct loop *
outermost_invariant_loop_for_expr(struct loop * loop,tree expr)1593 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1594 {
1595   basic_block def_bb;
1596   unsigned i, len;
1597 
1598   if (is_gimple_min_invariant (expr))
1599     return current_loops->tree_root;
1600 
1601   if (TREE_CODE (expr) == SSA_NAME)
1602     {
1603       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1604       if (def_bb)
1605 	{
1606 	  if (flow_bb_inside_loop_p (loop, def_bb))
1607 	    return NULL;
1608 	  return superloop_at_depth (loop,
1609 				     loop_depth (def_bb->loop_father) + 1);
1610 	}
1611 
1612       return current_loops->tree_root;
1613     }
1614 
1615   if (!EXPR_P (expr))
1616     return NULL;
1617 
1618   unsigned maxdepth = 0;
1619   len = TREE_OPERAND_LENGTH (expr);
1620   for (i = 0; i < len; i++)
1621     {
1622       struct loop *ivloop;
1623       if (!TREE_OPERAND (expr, i))
1624 	continue;
1625 
1626       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1627       if (!ivloop)
1628 	return NULL;
1629       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1630     }
1631 
1632   return superloop_at_depth (loop, maxdepth);
1633 }
1634 
1635 /* Returns true if expression EXPR is obviously invariant in LOOP,
1636    i.e. if all its operands are defined outside of the LOOP.  LOOP
1637    should not be the function body.  */
1638 
1639 bool
expr_invariant_in_loop_p(struct loop * loop,tree expr)1640 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1641 {
1642   basic_block def_bb;
1643   unsigned i, len;
1644 
1645   gcc_assert (loop_depth (loop) > 0);
1646 
1647   if (is_gimple_min_invariant (expr))
1648     return true;
1649 
1650   if (TREE_CODE (expr) == SSA_NAME)
1651     {
1652       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1653       if (def_bb
1654 	  && flow_bb_inside_loop_p (loop, def_bb))
1655 	return false;
1656 
1657       return true;
1658     }
1659 
1660   if (!EXPR_P (expr))
1661     return false;
1662 
1663   len = TREE_OPERAND_LENGTH (expr);
1664   for (i = 0; i < len; i++)
1665     if (TREE_OPERAND (expr, i)
1666 	&& !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1667       return false;
1668 
1669   return true;
1670 }
1671 
1672 /* Given expression EXPR which computes inductive values with respect
1673    to loop recorded in DATA, this function returns biv from which EXPR
1674    is derived by tracing definition chains of ssa variables in EXPR.  */
1675 
1676 static struct iv*
find_deriving_biv_for_expr(struct ivopts_data * data,tree expr)1677 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1678 {
1679   struct iv *iv;
1680   unsigned i, n;
1681   tree e2, e1;
1682   enum tree_code code;
1683   gimple *stmt;
1684 
1685   if (expr == NULL_TREE)
1686     return NULL;
1687 
1688   if (is_gimple_min_invariant (expr))
1689     return NULL;
1690 
1691   code = TREE_CODE (expr);
1692   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1693     {
1694       n = TREE_OPERAND_LENGTH (expr);
1695       for (i = 0; i < n; i++)
1696 	{
1697 	  iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1698 	  if (iv)
1699 	    return iv;
1700 	}
1701     }
1702 
1703   /* Stop if it's not ssa name.  */
1704   if (code != SSA_NAME)
1705     return NULL;
1706 
1707   iv = get_iv (data, expr);
1708   if (!iv || integer_zerop (iv->step))
1709     return NULL;
1710   else if (iv->biv_p)
1711     return iv;
1712 
1713   stmt = SSA_NAME_DEF_STMT (expr);
1714   if (gphi *phi = dyn_cast <gphi *> (stmt))
1715     {
1716       ssa_op_iter iter;
1717       use_operand_p use_p;
1718 
1719       if (virtual_operand_p (gimple_phi_result (phi)))
1720 	return NULL;
1721 
1722       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1723 	{
1724 	  tree use = USE_FROM_PTR (use_p);
1725 	  iv = find_deriving_biv_for_expr (data, use);
1726 	  if (iv)
1727 	    return iv;
1728 	}
1729       return NULL;
1730     }
1731   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1732     return NULL;
1733 
1734   e1 = gimple_assign_rhs1 (stmt);
1735   code = gimple_assign_rhs_code (stmt);
1736   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1737     return find_deriving_biv_for_expr (data, e1);
1738 
1739   switch (code)
1740     {
1741     case MULT_EXPR:
1742     case PLUS_EXPR:
1743     case MINUS_EXPR:
1744     case POINTER_PLUS_EXPR:
1745       /* Increments, decrements and multiplications by a constant
1746 	 are simple.  */
1747       e2 = gimple_assign_rhs2 (stmt);
1748       iv = find_deriving_biv_for_expr (data, e2);
1749       if (iv)
1750 	return iv;
1751 
1752       /* Fallthru.  */
1753     CASE_CONVERT:
1754       /* Casts are simple.  */
1755       return find_deriving_biv_for_expr (data, e1);
1756 
1757     default:
1758       break;
1759     }
1760 
1761   return NULL;
1762 }
1763 
1764 /* Record BIV, its predecessor and successor that they are used in
1765    address type uses.  */
1766 
1767 static void
record_biv_for_address_use(struct ivopts_data * data,struct iv * biv)1768 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1769 {
1770   unsigned i;
1771   tree type, base_1, base_2;
1772   bitmap_iterator bi;
1773 
1774   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1775       || biv->have_address_use || !biv->no_overflow)
1776     return;
1777 
1778   type = TREE_TYPE (biv->base);
1779   if (!INTEGRAL_TYPE_P (type))
1780     return;
1781 
1782   biv->have_address_use = true;
1783   data->bivs_not_used_in_addr--;
1784   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1785   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1786     {
1787       struct iv *iv = ver_info (data, i)->iv;
1788 
1789       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1790 	  || iv->have_address_use || !iv->no_overflow)
1791 	continue;
1792 
1793       if (type != TREE_TYPE (iv->base)
1794 	  || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1795 	continue;
1796 
1797       if (!operand_equal_p (biv->step, iv->step, 0))
1798 	continue;
1799 
1800       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1801       if (operand_equal_p (base_1, iv->base, 0)
1802 	  || operand_equal_p (base_2, biv->base, 0))
1803 	{
1804 	  iv->have_address_use = true;
1805 	  data->bivs_not_used_in_addr--;
1806 	}
1807     }
1808 }
1809 
1810 /* Cumulates the steps of indices into DATA and replaces their values with the
1811    initial ones.  Returns false when the value of the index cannot be determined.
1812    Callback for for_each_index.  */
1813 
1814 struct ifs_ivopts_data
1815 {
1816   struct ivopts_data *ivopts_data;
1817   gimple *stmt;
1818   tree step;
1819 };
1820 
1821 static bool
idx_find_step(tree base,tree * idx,void * data)1822 idx_find_step (tree base, tree *idx, void *data)
1823 {
1824   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1825   struct iv *iv;
1826   bool use_overflow_semantics = false;
1827   tree step, iv_base, iv_step, lbound, off;
1828   struct loop *loop = dta->ivopts_data->current_loop;
1829 
1830   /* If base is a component ref, require that the offset of the reference
1831      be invariant.  */
1832   if (TREE_CODE (base) == COMPONENT_REF)
1833     {
1834       off = component_ref_field_offset (base);
1835       return expr_invariant_in_loop_p (loop, off);
1836     }
1837 
1838   /* If base is array, first check whether we will be able to move the
1839      reference out of the loop (in order to take its address in strength
1840      reduction).  In order for this to work we need both lower bound
1841      and step to be loop invariants.  */
1842   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1843     {
1844       /* Moreover, for a range, the size needs to be invariant as well.  */
1845       if (TREE_CODE (base) == ARRAY_RANGE_REF
1846 	  && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1847 	return false;
1848 
1849       step = array_ref_element_size (base);
1850       lbound = array_ref_low_bound (base);
1851 
1852       if (!expr_invariant_in_loop_p (loop, step)
1853 	  || !expr_invariant_in_loop_p (loop, lbound))
1854 	return false;
1855     }
1856 
1857   if (TREE_CODE (*idx) != SSA_NAME)
1858     return true;
1859 
1860   iv = get_iv (dta->ivopts_data, *idx);
1861   if (!iv)
1862     return false;
1863 
1864   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1865 	  *&x[0], which is not folded and does not trigger the
1866 	  ARRAY_REF path below.  */
1867   *idx = iv->base;
1868 
1869   if (integer_zerop (iv->step))
1870     return true;
1871 
1872   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1873     {
1874       step = array_ref_element_size (base);
1875 
1876       /* We only handle addresses whose step is an integer constant.  */
1877       if (TREE_CODE (step) != INTEGER_CST)
1878 	return false;
1879     }
1880   else
1881     /* The step for pointer arithmetics already is 1 byte.  */
1882     step = size_one_node;
1883 
1884   iv_base = iv->base;
1885   iv_step = iv->step;
1886   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1887     use_overflow_semantics = true;
1888 
1889   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1890 			    sizetype, &iv_base, &iv_step, dta->stmt,
1891 			    use_overflow_semantics))
1892     {
1893       /* The index might wrap.  */
1894       return false;
1895     }
1896 
1897   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1898   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1899 
1900   if (dta->ivopts_data->bivs_not_used_in_addr)
1901     {
1902       if (!iv->biv_p)
1903 	iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
1904 
1905       record_biv_for_address_use (dta->ivopts_data, iv);
1906     }
1907   return true;
1908 }
1909 
1910 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1911    object is passed to it in DATA.  */
1912 
1913 static bool
idx_record_use(tree base,tree * idx,void * vdata)1914 idx_record_use (tree base, tree *idx,
1915 		void *vdata)
1916 {
1917   struct ivopts_data *data = (struct ivopts_data *) vdata;
1918   find_interesting_uses_op (data, *idx);
1919   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1920     {
1921       find_interesting_uses_op (data, array_ref_element_size (base));
1922       find_interesting_uses_op (data, array_ref_low_bound (base));
1923     }
1924   return true;
1925 }
1926 
1927 /* If we can prove that TOP = cst * BOT for some constant cst,
1928    store cst to MUL and return true.  Otherwise return false.
1929    The returned value is always sign-extended, regardless of the
1930    signedness of TOP and BOT.  */
1931 
1932 static bool
constant_multiple_of(tree top,tree bot,widest_int * mul)1933 constant_multiple_of (tree top, tree bot, widest_int *mul)
1934 {
1935   tree mby;
1936   enum tree_code code;
1937   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1938   widest_int res, p0, p1;
1939 
1940   STRIP_NOPS (top);
1941   STRIP_NOPS (bot);
1942 
1943   if (operand_equal_p (top, bot, 0))
1944     {
1945       *mul = 1;
1946       return true;
1947     }
1948 
1949   code = TREE_CODE (top);
1950   switch (code)
1951     {
1952     case MULT_EXPR:
1953       mby = TREE_OPERAND (top, 1);
1954       if (TREE_CODE (mby) != INTEGER_CST)
1955 	return false;
1956 
1957       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1958 	return false;
1959 
1960       *mul = wi::sext (res * wi::to_widest (mby), precision);
1961       return true;
1962 
1963     case PLUS_EXPR:
1964     case MINUS_EXPR:
1965       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1966 	  || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1967 	return false;
1968 
1969       if (code == MINUS_EXPR)
1970 	p1 = -p1;
1971       *mul = wi::sext (p0 + p1, precision);
1972       return true;
1973 
1974     case INTEGER_CST:
1975       if (TREE_CODE (bot) != INTEGER_CST)
1976 	return false;
1977 
1978       p0 = widest_int::from (top, SIGNED);
1979       p1 = widest_int::from (bot, SIGNED);
1980       if (p1 == 0)
1981 	return false;
1982       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1983       return res == 0;
1984 
1985     default:
1986       return false;
1987     }
1988 }
1989 
1990 /* Return true if memory reference REF with step STEP may be unaligned.  */
1991 
1992 static bool
may_be_unaligned_p(tree ref,tree step)1993 may_be_unaligned_p (tree ref, tree step)
1994 {
1995   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1996      thus they are not misaligned.  */
1997   if (TREE_CODE (ref) == TARGET_MEM_REF)
1998     return false;
1999 
2000   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2001   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2002     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2003 
2004   unsigned HOST_WIDE_INT bitpos;
2005   unsigned int ref_align;
2006   get_object_alignment_1 (ref, &ref_align, &bitpos);
2007   if (ref_align < align
2008       || (bitpos % align) != 0
2009       || (bitpos % BITS_PER_UNIT) != 0)
2010     return true;
2011 
2012   unsigned int trailing_zeros = tree_ctz (step);
2013   if (trailing_zeros < HOST_BITS_PER_INT
2014       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2015     return true;
2016 
2017   return false;
2018 }
2019 
2020 /* Return true if EXPR may be non-addressable.   */
2021 
2022 bool
may_be_nonaddressable_p(tree expr)2023 may_be_nonaddressable_p (tree expr)
2024 {
2025   switch (TREE_CODE (expr))
2026     {
2027     case TARGET_MEM_REF:
2028       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2029 	 target, thus they are always addressable.  */
2030       return false;
2031 
2032     case MEM_REF:
2033       /* Likewise for MEM_REFs, modulo the storage order.  */
2034       return REF_REVERSE_STORAGE_ORDER (expr);
2035 
2036     case BIT_FIELD_REF:
2037       if (REF_REVERSE_STORAGE_ORDER (expr))
2038 	return true;
2039       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2040 
2041     case COMPONENT_REF:
2042       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2043 	return true;
2044       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2045 	     || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2046 
2047     case ARRAY_REF:
2048     case ARRAY_RANGE_REF:
2049       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2050 	return true;
2051       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2052 
2053     case VIEW_CONVERT_EXPR:
2054       /* This kind of view-conversions may wrap non-addressable objects
2055 	 and make them look addressable.  After some processing the
2056 	 non-addressability may be uncovered again, causing ADDR_EXPRs
2057 	 of inappropriate objects to be built.  */
2058       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2059 	  || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2060 	return true;
2061       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2062 
2063     CASE_CONVERT:
2064       return true;
2065 
2066     default:
2067       break;
2068     }
2069 
2070   return false;
2071 }
2072 
2073 static tree
2074 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
2075 
2076 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
2077    If there is an existing use which has same stripped iv base and step,
2078    this function records this one as a sub use to that; otherwise records
2079    it as a normal one.  */
2080 
2081 static struct iv_use *
record_group_use(struct ivopts_data * data,tree * use_p,struct iv * iv,gimple * stmt,enum use_type use_type)2082 record_group_use (struct ivopts_data *data, tree *use_p,
2083 		  struct iv *iv, gimple *stmt, enum use_type use_type)
2084 {
2085   unsigned int i;
2086   struct iv_use *use;
2087   tree addr_base;
2088   unsigned HOST_WIDE_INT addr_offset;
2089 
2090   /* Only support sub use for address type uses, that is, with base
2091      object.  */
2092   if (!iv->base_object)
2093     return record_use (data, use_p, iv, stmt, use_type);
2094 
2095   addr_base = strip_offset (iv->base, &addr_offset);
2096   for (i = 0; i < n_iv_uses (data); i++)
2097     {
2098       use = iv_use (data, i);
2099       if (use->type != USE_ADDRESS || !use->iv->base_object)
2100 	continue;
2101 
2102       /* Check if it has the same stripped base and step.  */
2103       if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
2104 	  && operand_equal_p (iv->step, use->iv->step, 0)
2105 	  && operand_equal_p (addr_base, use->addr_base, 0))
2106 	break;
2107     }
2108 
2109   if (i == n_iv_uses (data))
2110     return record_use (data, use_p, iv, stmt,
2111 		       use_type, addr_base, addr_offset);
2112   else
2113     return record_sub_use (data, use_p, iv, stmt,
2114 			   use_type, addr_base, addr_offset, i);
2115 }
2116 
2117 /* Finds addresses in *OP_P inside STMT.  */
2118 
2119 static void
find_interesting_uses_address(struct ivopts_data * data,gimple * stmt,tree * op_p)2120 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2121 			       tree *op_p)
2122 {
2123   tree base = *op_p, step = size_zero_node;
2124   struct iv *civ;
2125   struct ifs_ivopts_data ifs_ivopts_data;
2126 
2127   /* Do not play with volatile memory references.  A bit too conservative,
2128      perhaps, but safe.  */
2129   if (gimple_has_volatile_ops (stmt))
2130     goto fail;
2131 
2132   /* Ignore bitfields for now.  Not really something terribly complicated
2133      to handle.  TODO.  */
2134   if (TREE_CODE (base) == BIT_FIELD_REF)
2135     goto fail;
2136 
2137   base = unshare_expr (base);
2138 
2139   if (TREE_CODE (base) == TARGET_MEM_REF)
2140     {
2141       tree type = build_pointer_type (TREE_TYPE (base));
2142       tree astep;
2143 
2144       if (TMR_BASE (base)
2145 	  && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2146 	{
2147 	  civ = get_iv (data, TMR_BASE (base));
2148 	  if (!civ)
2149 	    goto fail;
2150 
2151 	  TMR_BASE (base) = civ->base;
2152 	  step = civ->step;
2153 	}
2154       if (TMR_INDEX2 (base)
2155 	  && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2156 	{
2157 	  civ = get_iv (data, TMR_INDEX2 (base));
2158 	  if (!civ)
2159 	    goto fail;
2160 
2161 	  TMR_INDEX2 (base) = civ->base;
2162 	  step = civ->step;
2163 	}
2164       if (TMR_INDEX (base)
2165 	  && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2166 	{
2167 	  civ = get_iv (data, TMR_INDEX (base));
2168 	  if (!civ)
2169 	    goto fail;
2170 
2171 	  TMR_INDEX (base) = civ->base;
2172 	  astep = civ->step;
2173 
2174 	  if (astep)
2175 	    {
2176 	      if (TMR_STEP (base))
2177 		astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2178 
2179 	      step = fold_build2 (PLUS_EXPR, type, step, astep);
2180 	    }
2181 	}
2182 
2183       if (integer_zerop (step))
2184 	goto fail;
2185       base = tree_mem_ref_addr (type, base);
2186     }
2187   else
2188     {
2189       ifs_ivopts_data.ivopts_data = data;
2190       ifs_ivopts_data.stmt = stmt;
2191       ifs_ivopts_data.step = size_zero_node;
2192       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2193 	  || integer_zerop (ifs_ivopts_data.step))
2194 	goto fail;
2195       step = ifs_ivopts_data.step;
2196 
2197       /* Check that the base expression is addressable.  This needs
2198 	 to be done after substituting bases of IVs into it.  */
2199       if (may_be_nonaddressable_p (base))
2200 	goto fail;
2201 
2202       /* Moreover, on strict alignment platforms, check that it is
2203 	 sufficiently aligned.  */
2204       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2205 	goto fail;
2206 
2207       base = build_fold_addr_expr (base);
2208 
2209       /* Substituting bases of IVs into the base expression might
2210 	 have caused folding opportunities.  */
2211       if (TREE_CODE (base) == ADDR_EXPR)
2212 	{
2213 	  tree *ref = &TREE_OPERAND (base, 0);
2214 	  while (handled_component_p (*ref))
2215 	    ref = &TREE_OPERAND (*ref, 0);
2216 	  if (TREE_CODE (*ref) == MEM_REF)
2217 	    {
2218 	      tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2219 				      TREE_OPERAND (*ref, 0),
2220 				      TREE_OPERAND (*ref, 1));
2221 	      if (tem)
2222 		*ref = tem;
2223 	    }
2224 	}
2225     }
2226 
2227   civ = alloc_iv (data, base, step);
2228   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2229   return;
2230 
2231 fail:
2232   for_each_index (op_p, idx_record_use, data);
2233 }
2234 
2235 /* Finds and records invariants used in STMT.  */
2236 
2237 static void
find_invariants_stmt(struct ivopts_data * data,gimple * stmt)2238 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2239 {
2240   ssa_op_iter iter;
2241   use_operand_p use_p;
2242   tree op;
2243 
2244   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2245     {
2246       op = USE_FROM_PTR (use_p);
2247       record_invariant (data, op, false);
2248     }
2249 }
2250 
2251 /* Finds interesting uses of induction variables in the statement STMT.  */
2252 
2253 static void
find_interesting_uses_stmt(struct ivopts_data * data,gimple * stmt)2254 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2255 {
2256   struct iv *iv;
2257   tree op, *lhs, *rhs;
2258   ssa_op_iter iter;
2259   use_operand_p use_p;
2260   enum tree_code code;
2261 
2262   find_invariants_stmt (data, stmt);
2263 
2264   if (gimple_code (stmt) == GIMPLE_COND)
2265     {
2266       find_interesting_uses_cond (data, stmt);
2267       return;
2268     }
2269 
2270   if (is_gimple_assign (stmt))
2271     {
2272       lhs = gimple_assign_lhs_ptr (stmt);
2273       rhs = gimple_assign_rhs1_ptr (stmt);
2274 
2275       if (TREE_CODE (*lhs) == SSA_NAME)
2276 	{
2277 	  /* If the statement defines an induction variable, the uses are not
2278 	     interesting by themselves.  */
2279 
2280 	  iv = get_iv (data, *lhs);
2281 
2282 	  if (iv && !integer_zerop (iv->step))
2283 	    return;
2284 	}
2285 
2286       code = gimple_assign_rhs_code (stmt);
2287       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2288 	  && (REFERENCE_CLASS_P (*rhs)
2289 	      || is_gimple_val (*rhs)))
2290 	{
2291 	  if (REFERENCE_CLASS_P (*rhs))
2292 	    find_interesting_uses_address (data, stmt, rhs);
2293 	  else
2294 	    find_interesting_uses_op (data, *rhs);
2295 
2296 	  if (REFERENCE_CLASS_P (*lhs))
2297 	    find_interesting_uses_address (data, stmt, lhs);
2298 	  return;
2299 	}
2300       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2301 	{
2302 	  find_interesting_uses_cond (data, stmt);
2303 	  return;
2304 	}
2305 
2306       /* TODO -- we should also handle address uses of type
2307 
2308 	 memory = call (whatever);
2309 
2310 	 and
2311 
2312 	 call (memory).  */
2313     }
2314 
2315   if (gimple_code (stmt) == GIMPLE_PHI
2316       && gimple_bb (stmt) == data->current_loop->header)
2317     {
2318       iv = get_iv (data, PHI_RESULT (stmt));
2319 
2320       if (iv && !integer_zerop (iv->step))
2321 	return;
2322     }
2323 
2324   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2325     {
2326       op = USE_FROM_PTR (use_p);
2327 
2328       if (TREE_CODE (op) != SSA_NAME)
2329 	continue;
2330 
2331       iv = get_iv (data, op);
2332       if (!iv)
2333 	continue;
2334 
2335       find_interesting_uses_op (data, op);
2336     }
2337 }
2338 
2339 /* Finds interesting uses of induction variables outside of loops
2340    on loop exit edge EXIT.  */
2341 
2342 static void
find_interesting_uses_outside(struct ivopts_data * data,edge exit)2343 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2344 {
2345   gphi *phi;
2346   gphi_iterator psi;
2347   tree def;
2348 
2349   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2350     {
2351       phi = psi.phi ();
2352       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2353       if (!virtual_operand_p (def))
2354         find_interesting_uses_op (data, def);
2355     }
2356 }
2357 
2358 /* Finds uses of the induction variables that are interesting.  */
2359 
2360 static void
find_interesting_uses(struct ivopts_data * data)2361 find_interesting_uses (struct ivopts_data *data)
2362 {
2363   basic_block bb;
2364   gimple_stmt_iterator bsi;
2365   basic_block *body = get_loop_body (data->current_loop);
2366   unsigned i;
2367   struct version_info *info;
2368   edge e;
2369 
2370   if (dump_file && (dump_flags & TDF_DETAILS))
2371     fprintf (dump_file, "Uses:\n\n");
2372 
2373   for (i = 0; i < data->current_loop->num_nodes; i++)
2374     {
2375       edge_iterator ei;
2376       bb = body[i];
2377 
2378       FOR_EACH_EDGE (e, ei, bb->succs)
2379 	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2380 	    && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2381 	  find_interesting_uses_outside (data, e);
2382 
2383       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2384 	find_interesting_uses_stmt (data, gsi_stmt (bsi));
2385       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2386 	if (!is_gimple_debug (gsi_stmt (bsi)))
2387 	  find_interesting_uses_stmt (data, gsi_stmt (bsi));
2388     }
2389 
2390   if (dump_file && (dump_flags & TDF_DETAILS))
2391     {
2392       bitmap_iterator bi;
2393 
2394       fprintf (dump_file, "\n");
2395 
2396       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2397 	{
2398 	  info = ver_info (data, i);
2399 	  if (info->inv_id)
2400 	    {
2401 	      fprintf (dump_file, "  ");
2402 	      print_generic_expr (dump_file, info->name, TDF_SLIM);
2403 	      fprintf (dump_file, " is invariant (%d)%s\n",
2404 		       info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2405 	    }
2406 	}
2407 
2408       fprintf (dump_file, "\n");
2409     }
2410 
2411   free (body);
2412 }
2413 
2414 /* Compute maximum offset of [base + offset] addressing mode
2415    for memory reference represented by USE.  */
2416 
2417 static HOST_WIDE_INT
compute_max_addr_offset(struct iv_use * use)2418 compute_max_addr_offset (struct iv_use *use)
2419 {
2420   int width;
2421   rtx reg, addr;
2422   HOST_WIDE_INT i, off;
2423   unsigned list_index, num;
2424   addr_space_t as;
2425   machine_mode mem_mode, addr_mode;
2426   static vec<HOST_WIDE_INT> max_offset_list;
2427 
2428   as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2429   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2430 
2431   num = max_offset_list.length ();
2432   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2433   if (list_index >= num)
2434     {
2435       max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2436       for (; num < max_offset_list.length (); num++)
2437 	max_offset_list[num] = -1;
2438     }
2439 
2440   off = max_offset_list[list_index];
2441   if (off != -1)
2442     return off;
2443 
2444   addr_mode = targetm.addr_space.address_mode (as);
2445   reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2446   addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2447 
2448   width = GET_MODE_BITSIZE (addr_mode) - 1;
2449   if (width > (HOST_BITS_PER_WIDE_INT - 1))
2450     width = HOST_BITS_PER_WIDE_INT - 1;
2451 
2452   for (i = width; i > 0; i--)
2453     {
2454       off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2455       XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2456       if (memory_address_addr_space_p (mem_mode, addr, as))
2457 	break;
2458 
2459       /* For some strict-alignment targets, the offset must be naturally
2460 	 aligned.  Try an aligned offset if mem_mode is not QImode.  */
2461       off = ((unsigned HOST_WIDE_INT) 1 << i);
2462       if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2463 	{
2464 	  off -= GET_MODE_SIZE (mem_mode);
2465 	  XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2466 	  if (memory_address_addr_space_p (mem_mode, addr, as))
2467 	    break;
2468 	}
2469     }
2470   if (i == 0)
2471     off = 0;
2472 
2473   max_offset_list[list_index] = off;
2474   return off;
2475 }
2476 
2477 /* Check if all small groups should be split.  Return true if and
2478    only if:
2479 
2480      1) At least one groups contain two uses with different offsets.
2481      2) No group contains more than two uses with different offsets.
2482 
2483    Return false otherwise.  We want to split such groups because:
2484 
2485      1) Small groups don't have much benefit and may interfer with
2486 	general candidate selection.
2487      2) Size for problem with only small groups is usually small and
2488 	general algorithm can handle it well.
2489 
2490    TODO -- Above claim may not hold when auto increment is supported.  */
2491 
2492 static bool
split_all_small_groups(struct ivopts_data * data)2493 split_all_small_groups (struct ivopts_data *data)
2494 {
2495   bool split_p = false;
2496   unsigned int i, n, distinct;
2497   struct iv_use *pre, *use;
2498 
2499   n = n_iv_uses (data);
2500   for (i = 0; i < n; i++)
2501     {
2502       use = iv_use (data, i);
2503       if (!use->next)
2504 	continue;
2505 
2506       distinct = 1;
2507       gcc_assert (use->type == USE_ADDRESS);
2508       for (pre = use, use = use->next; use; pre = use, use = use->next)
2509 	{
2510 	  if (pre->addr_offset != use->addr_offset)
2511 	    distinct++;
2512 
2513 	  if (distinct > 2)
2514 	    return false;
2515 	}
2516       if (distinct == 2)
2517 	split_p = true;
2518     }
2519 
2520   return split_p;
2521 }
2522 
2523 /* For each group of address type uses, this function further groups
2524    these uses according to the maximum offset supported by target's
2525    [base + offset] addressing mode.  */
2526 
2527 static void
group_address_uses(struct ivopts_data * data)2528 group_address_uses (struct ivopts_data *data)
2529 {
2530   HOST_WIDE_INT max_offset = -1;
2531   unsigned int i, n, sub_id;
2532   struct iv_use *pre, *use;
2533   unsigned HOST_WIDE_INT addr_offset_first;
2534 
2535   /* Reset max offset to split all small groups.  */
2536   if (split_all_small_groups (data))
2537     max_offset = 0;
2538 
2539   n = n_iv_uses (data);
2540   for (i = 0; i < n; i++)
2541     {
2542       use = iv_use (data, i);
2543       if (!use->next)
2544 	continue;
2545 
2546       gcc_assert (use->type == USE_ADDRESS);
2547       if (max_offset != 0)
2548 	max_offset = compute_max_addr_offset (use);
2549 
2550       while (use)
2551 	{
2552 	  sub_id = 0;
2553 	  addr_offset_first = use->addr_offset;
2554 	  /* Only uses with offset that can fit in offset part against
2555 	     the first use can be grouped together.  */
2556 	  for (pre = use, use = use->next;
2557 	       use && (use->addr_offset - addr_offset_first
2558 		       <= (unsigned HOST_WIDE_INT) max_offset);
2559 	       pre = use, use = use->next)
2560 	    {
2561 	      use->id = pre->id;
2562 	      use->sub_id = ++sub_id;
2563 	    }
2564 
2565 	  /* Break the list and create new group.  */
2566 	  if (use)
2567 	    {
2568 	      pre->next = NULL;
2569 	      use->id = n_iv_uses (data);
2570 	      use->related_cands = BITMAP_ALLOC (NULL);
2571 	      data->iv_uses.safe_push (use);
2572 	    }
2573 	}
2574     }
2575 
2576   if (dump_file && (dump_flags & TDF_DETAILS))
2577     dump_uses (dump_file, data);
2578 }
2579 
2580 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2581    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2582    we are at the top-level of the processed address.  */
2583 
2584 static tree
strip_offset_1(tree expr,bool inside_addr,bool top_compref,HOST_WIDE_INT * offset)2585 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2586 		HOST_WIDE_INT *offset)
2587 {
2588   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2589   enum tree_code code;
2590   tree type, orig_type = TREE_TYPE (expr);
2591   HOST_WIDE_INT off0, off1, st;
2592   tree orig_expr = expr;
2593 
2594   STRIP_NOPS (expr);
2595 
2596   type = TREE_TYPE (expr);
2597   code = TREE_CODE (expr);
2598   *offset = 0;
2599 
2600   switch (code)
2601     {
2602     case INTEGER_CST:
2603       if (!cst_and_fits_in_hwi (expr)
2604 	  || integer_zerop (expr))
2605 	return orig_expr;
2606 
2607       *offset = int_cst_value (expr);
2608       return build_int_cst (orig_type, 0);
2609 
2610     case POINTER_PLUS_EXPR:
2611     case PLUS_EXPR:
2612     case MINUS_EXPR:
2613       op0 = TREE_OPERAND (expr, 0);
2614       op1 = TREE_OPERAND (expr, 1);
2615 
2616       op0 = strip_offset_1 (op0, false, false, &off0);
2617       op1 = strip_offset_1 (op1, false, false, &off1);
2618 
2619       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2620       if (op0 == TREE_OPERAND (expr, 0)
2621 	  && op1 == TREE_OPERAND (expr, 1))
2622 	return orig_expr;
2623 
2624       if (integer_zerop (op1))
2625 	expr = op0;
2626       else if (integer_zerop (op0))
2627 	{
2628 	  if (code == MINUS_EXPR)
2629 	    expr = fold_build1 (NEGATE_EXPR, type, op1);
2630 	  else
2631 	    expr = op1;
2632 	}
2633       else
2634 	expr = fold_build2 (code, type, op0, op1);
2635 
2636       return fold_convert (orig_type, expr);
2637 
2638     case MULT_EXPR:
2639       op1 = TREE_OPERAND (expr, 1);
2640       if (!cst_and_fits_in_hwi (op1))
2641 	return orig_expr;
2642 
2643       op0 = TREE_OPERAND (expr, 0);
2644       op0 = strip_offset_1 (op0, false, false, &off0);
2645       if (op0 == TREE_OPERAND (expr, 0))
2646 	return orig_expr;
2647 
2648       *offset = off0 * int_cst_value (op1);
2649       if (integer_zerop (op0))
2650 	expr = op0;
2651       else
2652 	expr = fold_build2 (MULT_EXPR, type, op0, op1);
2653 
2654       return fold_convert (orig_type, expr);
2655 
2656     case ARRAY_REF:
2657     case ARRAY_RANGE_REF:
2658       if (!inside_addr)
2659 	return orig_expr;
2660 
2661       step = array_ref_element_size (expr);
2662       if (!cst_and_fits_in_hwi (step))
2663 	break;
2664 
2665       st = int_cst_value (step);
2666       op1 = TREE_OPERAND (expr, 1);
2667       op1 = strip_offset_1 (op1, false, false, &off1);
2668       *offset = off1 * st;
2669 
2670       if (top_compref
2671 	  && integer_zerop (op1))
2672 	{
2673 	  /* Strip the component reference completely.  */
2674 	  op0 = TREE_OPERAND (expr, 0);
2675 	  op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2676 	  *offset += off0;
2677 	  return op0;
2678 	}
2679       break;
2680 
2681     case COMPONENT_REF:
2682       {
2683 	tree field;
2684 
2685 	if (!inside_addr)
2686 	  return orig_expr;
2687 
2688 	tmp = component_ref_field_offset (expr);
2689 	field = TREE_OPERAND (expr, 1);
2690 	if (top_compref
2691 	    && cst_and_fits_in_hwi (tmp)
2692 	    && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2693 	  {
2694 	    HOST_WIDE_INT boffset, abs_off;
2695 
2696 	    /* Strip the component reference completely.  */
2697 	    op0 = TREE_OPERAND (expr, 0);
2698 	    op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2699 	    boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2700 	    abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2701 	    if (boffset < 0)
2702 	      abs_off = -abs_off;
2703 
2704 	    *offset = off0 + int_cst_value (tmp) + abs_off;
2705 	    return op0;
2706 	  }
2707       }
2708       break;
2709 
2710     case ADDR_EXPR:
2711       op0 = TREE_OPERAND (expr, 0);
2712       op0 = strip_offset_1 (op0, true, true, &off0);
2713       *offset += off0;
2714 
2715       if (op0 == TREE_OPERAND (expr, 0))
2716 	return orig_expr;
2717 
2718       expr = build_fold_addr_expr (op0);
2719       return fold_convert (orig_type, expr);
2720 
2721     case MEM_REF:
2722       /* ???  Offset operand?  */
2723       inside_addr = false;
2724       break;
2725 
2726     default:
2727       return orig_expr;
2728     }
2729 
2730   /* Default handling of expressions for that we want to recurse into
2731      the first operand.  */
2732   op0 = TREE_OPERAND (expr, 0);
2733   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2734   *offset += off0;
2735 
2736   if (op0 == TREE_OPERAND (expr, 0)
2737       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2738     return orig_expr;
2739 
2740   expr = copy_node (expr);
2741   TREE_OPERAND (expr, 0) = op0;
2742   if (op1)
2743     TREE_OPERAND (expr, 1) = op1;
2744 
2745   /* Inside address, we might strip the top level component references,
2746      thus changing type of the expression.  Handling of ADDR_EXPR
2747      will fix that.  */
2748   expr = fold_convert (orig_type, expr);
2749 
2750   return expr;
2751 }
2752 
2753 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2754 
2755 static tree
strip_offset(tree expr,unsigned HOST_WIDE_INT * offset)2756 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2757 {
2758   HOST_WIDE_INT off;
2759   tree core = strip_offset_1 (expr, false, false, &off);
2760   *offset = off;
2761   return core;
2762 }
2763 
2764 /* Returns variant of TYPE that can be used as base for different uses.
2765    We return unsigned type with the same precision, which avoids problems
2766    with overflows.  */
2767 
2768 static tree
generic_type_for(tree type)2769 generic_type_for (tree type)
2770 {
2771   if (POINTER_TYPE_P (type))
2772     return unsigned_type_for (type);
2773 
2774   if (TYPE_UNSIGNED (type))
2775     return type;
2776 
2777   return unsigned_type_for (type);
2778 }
2779 
2780 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2781    the bitmap to that we should store it.  */
2782 
2783 static struct ivopts_data *fd_ivopts_data;
2784 static tree
find_depends(tree * expr_p,int * ws ATTRIBUTE_UNUSED,void * data)2785 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2786 {
2787   bitmap *depends_on = (bitmap *) data;
2788   struct version_info *info;
2789 
2790   if (TREE_CODE (*expr_p) != SSA_NAME)
2791     return NULL_TREE;
2792   info = name_info (fd_ivopts_data, *expr_p);
2793 
2794   if (!info->inv_id || info->has_nonlin_use)
2795     return NULL_TREE;
2796 
2797   if (!*depends_on)
2798     *depends_on = BITMAP_ALLOC (NULL);
2799   bitmap_set_bit (*depends_on, info->inv_id);
2800 
2801   return NULL_TREE;
2802 }
2803 
2804 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2805    position to POS.  If USE is not NULL, the candidate is set as related to
2806    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2807    replacement of the final value of the iv by a direct computation.  */
2808 
2809 static struct iv_cand *
2810 add_candidate_1 (struct ivopts_data *data,
2811 		 tree base, tree step, bool important, enum iv_position pos,
2812 		 struct iv_use *use, gimple *incremented_at,
2813 		 struct iv *orig_iv = NULL)
2814 {
2815   unsigned i;
2816   struct iv_cand *cand = NULL;
2817   tree type, orig_type;
2818 
2819   /* -fkeep-gc-roots-live means that we have to keep a real pointer
2820      live, but the ivopts code may replace a real pointer with one
2821      pointing before or after the memory block that is then adjusted
2822      into the memory block during the loop.  FIXME: It would likely be
2823      better to actually force the pointer live and still use ivopts;
2824      for example, it would be enough to write the pointer into memory
2825      and keep it there until after the loop.  */
2826   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
2827     return NULL;
2828 
2829   /* For non-original variables, make sure their values are computed in a type
2830      that does not invoke undefined behavior on overflows (since in general,
2831      we cannot prove that these induction variables are non-wrapping).  */
2832   if (pos != IP_ORIGINAL)
2833     {
2834       orig_type = TREE_TYPE (base);
2835       type = generic_type_for (orig_type);
2836       if (type != orig_type)
2837 	{
2838 	  base = fold_convert (type, base);
2839 	  step = fold_convert (type, step);
2840 	}
2841     }
2842 
2843   for (i = 0; i < n_iv_cands (data); i++)
2844     {
2845       cand = iv_cand (data, i);
2846 
2847       if (cand->pos != pos)
2848 	continue;
2849 
2850       if (cand->incremented_at != incremented_at
2851 	  || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2852 	      && cand->ainc_use != use))
2853 	continue;
2854 
2855       if (!cand->iv)
2856 	{
2857 	  if (!base && !step)
2858 	    break;
2859 
2860 	  continue;
2861 	}
2862 
2863       if (!base && !step)
2864 	continue;
2865 
2866       if (operand_equal_p (base, cand->iv->base, 0)
2867 	  && operand_equal_p (step, cand->iv->step, 0)
2868           && (TYPE_PRECISION (TREE_TYPE (base))
2869               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2870 	break;
2871     }
2872 
2873   if (i == n_iv_cands (data))
2874     {
2875       cand = XCNEW (struct iv_cand);
2876       cand->id = i;
2877 
2878       if (!base && !step)
2879 	cand->iv = NULL;
2880       else
2881 	cand->iv = alloc_iv (data, base, step);
2882 
2883       cand->pos = pos;
2884       if (pos != IP_ORIGINAL && cand->iv)
2885 	{
2886 	  cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2887 	  cand->var_after = cand->var_before;
2888 	}
2889       cand->important = important;
2890       cand->incremented_at = incremented_at;
2891       data->iv_candidates.safe_push (cand);
2892 
2893       if (step
2894 	  && TREE_CODE (step) != INTEGER_CST)
2895 	{
2896 	  fd_ivopts_data = data;
2897 	  walk_tree (&step, find_depends, &cand->depends_on, NULL);
2898 	}
2899 
2900       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2901 	cand->ainc_use = use;
2902       else
2903 	cand->ainc_use = NULL;
2904 
2905       cand->orig_iv = orig_iv;
2906       if (dump_file && (dump_flags & TDF_DETAILS))
2907 	dump_cand (dump_file, cand);
2908     }
2909 
2910   if (important && !cand->important)
2911     {
2912       cand->important = true;
2913       if (dump_file && (dump_flags & TDF_DETAILS))
2914 	fprintf (dump_file, "Candidate %d is important\n", cand->id);
2915     }
2916 
2917   if (use)
2918     {
2919       bitmap_set_bit (use->related_cands, i);
2920       if (dump_file && (dump_flags & TDF_DETAILS))
2921 	fprintf (dump_file, "Candidate %d is related to use %d\n",
2922 		 cand->id, use->id);
2923     }
2924 
2925   return cand;
2926 }
2927 
2928 /* Returns true if incrementing the induction variable at the end of the LOOP
2929    is allowed.
2930 
2931    The purpose is to avoid splitting latch edge with a biv increment, thus
2932    creating a jump, possibly confusing other optimization passes and leaving
2933    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2934    is not available (so we do not have a better alternative), or if the latch
2935    edge is already nonempty.  */
2936 
2937 static bool
allow_ip_end_pos_p(struct loop * loop)2938 allow_ip_end_pos_p (struct loop *loop)
2939 {
2940   if (!ip_normal_pos (loop))
2941     return true;
2942 
2943   if (!empty_block_p (ip_end_pos (loop)))
2944     return true;
2945 
2946   return false;
2947 }
2948 
2949 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2950    Important field is set to IMPORTANT.  */
2951 
2952 static void
add_autoinc_candidates(struct ivopts_data * data,tree base,tree step,bool important,struct iv_use * use)2953 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2954 			bool important, struct iv_use *use)
2955 {
2956   basic_block use_bb = gimple_bb (use->stmt);
2957   machine_mode mem_mode;
2958   unsigned HOST_WIDE_INT cstepi;
2959 
2960   /* If we insert the increment in any position other than the standard
2961      ones, we must ensure that it is incremented once per iteration.
2962      It must not be in an inner nested loop, or one side of an if
2963      statement.  */
2964   if (use_bb->loop_father != data->current_loop
2965       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2966       || stmt_could_throw_p (use->stmt)
2967       || !cst_and_fits_in_hwi (step))
2968     return;
2969 
2970   cstepi = int_cst_value (step);
2971 
2972   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2973   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2974 	|| USE_STORE_PRE_INCREMENT (mem_mode))
2975        && GET_MODE_SIZE (mem_mode) == cstepi)
2976       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2977 	   || USE_STORE_PRE_DECREMENT (mem_mode))
2978 	  && GET_MODE_SIZE (mem_mode) == -cstepi))
2979     {
2980       enum tree_code code = MINUS_EXPR;
2981       tree new_base;
2982       tree new_step = step;
2983 
2984       if (POINTER_TYPE_P (TREE_TYPE (base)))
2985 	{
2986 	  new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2987 	  code = POINTER_PLUS_EXPR;
2988 	}
2989       else
2990 	new_step = fold_convert (TREE_TYPE (base), new_step);
2991       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2992       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2993 		       use->stmt);
2994     }
2995   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2996 	|| USE_STORE_POST_INCREMENT (mem_mode))
2997        && GET_MODE_SIZE (mem_mode) == cstepi)
2998       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2999 	   || USE_STORE_POST_DECREMENT (mem_mode))
3000 	  && GET_MODE_SIZE (mem_mode) == -cstepi))
3001     {
3002       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3003 		       use->stmt);
3004     }
3005 }
3006 
3007 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3008    position to POS.  If USE is not NULL, the candidate is set as related to
3009    it.  The candidate computation is scheduled before exit condition and at
3010    the end of loop.  */
3011 
3012 static void
3013 add_candidate (struct ivopts_data *data,
3014 	       tree base, tree step, bool important, struct iv_use *use,
3015 	       struct iv *orig_iv = NULL)
3016 {
3017   gcc_assert (use == NULL || use->sub_id == 0);
3018 
3019   if (ip_normal_pos (data->current_loop))
3020     add_candidate_1 (data, base, step, important,
3021 		     IP_NORMAL, use, NULL, orig_iv);
3022   if (ip_end_pos (data->current_loop)
3023       && allow_ip_end_pos_p (data->current_loop))
3024     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3025 }
3026 
3027 /* Adds standard iv candidates.  */
3028 
3029 static void
add_standard_iv_candidates(struct ivopts_data * data)3030 add_standard_iv_candidates (struct ivopts_data *data)
3031 {
3032   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3033 
3034   /* The same for a double-integer type if it is still fast enough.  */
3035   if (TYPE_PRECISION
3036         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3037       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3038     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3039 		   build_int_cst (long_integer_type_node, 1), true, NULL);
3040 
3041   /* The same for a double-integer type if it is still fast enough.  */
3042   if (TYPE_PRECISION
3043         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3044       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3045     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3046 		   build_int_cst (long_long_integer_type_node, 1), true, NULL);
3047 }
3048 
3049 
3050 /* Adds candidates bases on the old induction variable IV.  */
3051 
3052 static void
add_iv_candidate_for_biv(struct ivopts_data * data,struct iv * iv)3053 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3054 {
3055   gimple *phi;
3056   tree def;
3057   struct iv_cand *cand;
3058 
3059   /* Check if this biv is used in address type use.  */
3060   if (iv->no_overflow  && iv->have_address_use
3061       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3062       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3063     {
3064       tree base = fold_convert (sizetype, iv->base);
3065       tree step = fold_convert (sizetype, iv->step);
3066 
3067       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3068       add_candidate (data, base, step, true, NULL, iv);
3069       /* Add iv cand of the original type only if it has nonlinear use.  */
3070       if (iv->have_use_for)
3071 	add_candidate (data, iv->base, iv->step, true, NULL);
3072     }
3073   else
3074     add_candidate (data, iv->base, iv->step, true, NULL);
3075 
3076   /* The same, but with initial value zero.  */
3077   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3078     add_candidate (data, size_int (0), iv->step, true, NULL);
3079   else
3080     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3081 		   iv->step, true, NULL);
3082 
3083   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3084   if (gimple_code (phi) == GIMPLE_PHI)
3085     {
3086       /* Additionally record the possibility of leaving the original iv
3087 	 untouched.  */
3088       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3089       /* Don't add candidate if it's from another PHI node because
3090 	 it's an affine iv appearing in the form of PEELED_CHREC.  */
3091       phi = SSA_NAME_DEF_STMT (def);
3092       if (gimple_code (phi) != GIMPLE_PHI)
3093 	{
3094 	  cand = add_candidate_1 (data,
3095 				  iv->base, iv->step, true, IP_ORIGINAL, NULL,
3096 				  SSA_NAME_DEF_STMT (def));
3097 	  if (cand)
3098 	    {
3099 	      cand->var_before = iv->ssa_name;
3100 	      cand->var_after = def;
3101 	    }
3102 	}
3103       else
3104 	gcc_assert (gimple_bb (phi) == data->current_loop->header);
3105     }
3106 }
3107 
3108 /* Adds candidates based on the old induction variables.  */
3109 
3110 static void
add_iv_candidate_for_bivs(struct ivopts_data * data)3111 add_iv_candidate_for_bivs (struct ivopts_data *data)
3112 {
3113   unsigned i;
3114   struct iv *iv;
3115   bitmap_iterator bi;
3116 
3117   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3118     {
3119       iv = ver_info (data, i)->iv;
3120       if (iv && iv->biv_p && !integer_zerop (iv->step))
3121 	add_iv_candidate_for_biv (data, iv);
3122     }
3123 }
3124 
3125 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3126 
3127 static void
record_common_cand(struct ivopts_data * data,tree base,tree step,struct iv_use * use)3128 record_common_cand (struct ivopts_data *data, tree base,
3129 		    tree step, struct iv_use *use)
3130 {
3131   struct iv_common_cand ent;
3132   struct iv_common_cand **slot;
3133 
3134   gcc_assert (use != NULL);
3135 
3136   ent.base = base;
3137   ent.step = step;
3138   ent.hash = iterative_hash_expr (base, 0);
3139   ent.hash = iterative_hash_expr (step, ent.hash);
3140 
3141   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3142   if (*slot == NULL)
3143     {
3144       *slot = new iv_common_cand ();
3145       (*slot)->base = base;
3146       (*slot)->step = step;
3147       (*slot)->uses.create (8);
3148       (*slot)->hash = ent.hash;
3149       data->iv_common_cands.safe_push ((*slot));
3150     }
3151   (*slot)->uses.safe_push (use);
3152   return;
3153 }
3154 
3155 /* Comparison function used to sort common candidates.  */
3156 
3157 static int
common_cand_cmp(const void * p1,const void * p2)3158 common_cand_cmp (const void *p1, const void *p2)
3159 {
3160   unsigned n1, n2;
3161   const struct iv_common_cand *const *const ccand1
3162     = (const struct iv_common_cand *const *)p1;
3163   const struct iv_common_cand *const *const ccand2
3164     = (const struct iv_common_cand *const *)p2;
3165 
3166   n1 = (*ccand1)->uses.length ();
3167   n2 = (*ccand2)->uses.length ();
3168   return n2 - n1;
3169 }
3170 
3171 /* Adds IV candidates based on common candidated recorded.  */
3172 
3173 static void
add_iv_candidate_derived_from_uses(struct ivopts_data * data)3174 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3175 {
3176   unsigned i, j;
3177   struct iv_cand *cand_1, *cand_2;
3178 
3179   data->iv_common_cands.qsort (common_cand_cmp);
3180   for (i = 0; i < data->iv_common_cands.length (); i++)
3181     {
3182       struct iv_common_cand *ptr = data->iv_common_cands[i];
3183 
3184       /* Only add IV candidate if it's derived from multiple uses.  */
3185       if (ptr->uses.length () <= 1)
3186 	break;
3187 
3188       cand_1 = NULL;
3189       cand_2 = NULL;
3190       if (ip_normal_pos (data->current_loop))
3191 	cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3192 				  false, IP_NORMAL, NULL, NULL);
3193 
3194       if (ip_end_pos (data->current_loop)
3195 	  && allow_ip_end_pos_p (data->current_loop))
3196 	cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3197 				  false, IP_END, NULL, NULL);
3198 
3199       /* Bind deriving uses and the new candidates.  */
3200       for (j = 0; j < ptr->uses.length (); j++)
3201 	{
3202 	  struct iv_use *use = ptr->uses[j];
3203 	  if (cand_1)
3204 	    bitmap_set_bit (use->related_cands, cand_1->id);
3205 	  if (cand_2)
3206 	    bitmap_set_bit (use->related_cands, cand_2->id);
3207 	}
3208     }
3209 
3210   /* Release data since it is useless from this point.  */
3211   data->iv_common_cand_tab->empty ();
3212   data->iv_common_cands.truncate (0);
3213 }
3214 
3215 /* Adds candidates based on the value of USE's iv.  */
3216 
3217 static void
add_iv_candidate_for_use(struct ivopts_data * data,struct iv_use * use)3218 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3219 {
3220   unsigned HOST_WIDE_INT offset;
3221   tree base;
3222   tree basetype;
3223   struct iv *iv = use->iv;
3224 
3225   add_candidate (data, iv->base, iv->step, false, use);
3226 
3227   /* Record common candidate for use in case it can be shared by others.  */
3228   record_common_cand (data, iv->base, iv->step, use);
3229 
3230   /* Record common candidate with initial value zero.  */
3231   basetype = TREE_TYPE (iv->base);
3232   if (POINTER_TYPE_P (basetype))
3233     basetype = sizetype;
3234   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3235 
3236   /* Record common candidate with constant offset stripped in base.
3237      Like the use itself, we also add candidate directly for it.  */
3238   base = strip_offset (iv->base, &offset);
3239   if (offset || base != iv->base)
3240     {
3241       record_common_cand (data, base, iv->step, use);
3242       add_candidate (data, base, iv->step, false, use);
3243     }
3244 
3245   /* Record common candidate with base_object removed in base.  */
3246   if (iv->base_object != NULL)
3247     {
3248       unsigned i;
3249       aff_tree aff_base;
3250       tree step, base_object = iv->base_object;
3251 
3252       base = iv->base;
3253       step = iv->step;
3254       STRIP_NOPS (base);
3255       STRIP_NOPS (step);
3256       STRIP_NOPS (base_object);
3257       tree_to_aff_combination (base, TREE_TYPE (base), &aff_base);
3258       for (i = 0; i < aff_base.n; i++)
3259 	{
3260 	  if (aff_base.elts[i].coef != 1)
3261 	    continue;
3262 
3263 	  if (operand_equal_p (aff_base.elts[i].val, base_object, 0))
3264 	    break;
3265 	}
3266       if (i < aff_base.n)
3267 	{
3268 	  aff_combination_remove_elt (&aff_base, i);
3269 	  base = aff_combination_to_tree (&aff_base);
3270 	  basetype = TREE_TYPE (base);
3271 	  if (POINTER_TYPE_P (basetype))
3272 	    basetype = sizetype;
3273 
3274 	  step = fold_convert (basetype, step);
3275 	  record_common_cand (data, base, step, use);
3276 	  /* Also record common candidate with offset stripped.  */
3277 	  base = strip_offset (base, &offset);
3278 	  if (offset)
3279 	    record_common_cand (data, base, step, use);
3280 	}
3281     }
3282 
3283   /* At last, add auto-incremental candidates.  Make such variables
3284      important since other iv uses with same base object may be based
3285      on it.  */
3286   if (use != NULL && use->type == USE_ADDRESS)
3287     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3288 }
3289 
3290 /* Adds candidates based on the uses.  */
3291 
3292 static void
add_iv_candidate_for_uses(struct ivopts_data * data)3293 add_iv_candidate_for_uses (struct ivopts_data *data)
3294 {
3295   unsigned i;
3296 
3297   for (i = 0; i < n_iv_uses (data); i++)
3298     {
3299       struct iv_use *use = iv_use (data, i);
3300 
3301       if (!use)
3302 	continue;
3303 
3304       switch (use->type)
3305 	{
3306 	case USE_NONLINEAR_EXPR:
3307 	case USE_COMPARE:
3308 	case USE_ADDRESS:
3309 	  /* Just add the ivs based on the value of the iv used here.  */
3310 	  add_iv_candidate_for_use (data, use);
3311 	  break;
3312 
3313 	default:
3314 	  gcc_unreachable ();
3315 	}
3316     }
3317   add_iv_candidate_derived_from_uses (data);
3318 }
3319 
3320 /* Record important candidates and add them to related_cands bitmaps.  */
3321 
3322 static void
record_important_candidates(struct ivopts_data * data)3323 record_important_candidates (struct ivopts_data *data)
3324 {
3325   unsigned i;
3326   struct iv_use *use;
3327 
3328   for (i = 0; i < n_iv_cands (data); i++)
3329     {
3330       struct iv_cand *cand = iv_cand (data, i);
3331 
3332       if (cand->important)
3333 	bitmap_set_bit (data->important_candidates, i);
3334     }
3335 
3336   data->consider_all_candidates = (n_iv_cands (data)
3337 				   <= CONSIDER_ALL_CANDIDATES_BOUND);
3338 
3339   /* Add important candidates to uses' related_cands bitmaps.  */
3340   for (i = 0; i < n_iv_uses (data); i++)
3341     {
3342       use = iv_use (data, i);
3343       bitmap_ior_into (use->related_cands, data->important_candidates);
3344     }
3345 }
3346 
3347 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3348    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3349    we allocate a simple list to every use.  */
3350 
3351 static void
alloc_use_cost_map(struct ivopts_data * data)3352 alloc_use_cost_map (struct ivopts_data *data)
3353 {
3354   unsigned i, size, s;
3355 
3356   for (i = 0; i < n_iv_uses (data); i++)
3357     {
3358       struct iv_use *use = iv_use (data, i);
3359 
3360       if (data->consider_all_candidates)
3361 	size = n_iv_cands (data);
3362       else
3363 	{
3364 	  s = bitmap_count_bits (use->related_cands);
3365 
3366 	  /* Round up to the power of two, so that moduling by it is fast.  */
3367 	  size = s ? (1 << ceil_log2 (s)) : 1;
3368 	}
3369 
3370       use->n_map_members = size;
3371       use->cost_map = XCNEWVEC (struct cost_pair, size);
3372     }
3373 }
3374 
3375 /* Returns description of computation cost of expression whose runtime
3376    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
3377 
3378 static comp_cost
new_cost(unsigned runtime,unsigned complexity)3379 new_cost (unsigned runtime, unsigned complexity)
3380 {
3381   comp_cost cost;
3382 
3383   cost.cost = runtime;
3384   cost.complexity = complexity;
3385 
3386   return cost;
3387 }
3388 
3389 /* Returns true if COST is infinite.  */
3390 
3391 static bool
infinite_cost_p(comp_cost cost)3392 infinite_cost_p (comp_cost cost)
3393 {
3394   return cost.cost == INFTY;
3395 }
3396 
3397 /* Adds costs COST1 and COST2.  */
3398 
3399 static comp_cost
add_costs(comp_cost cost1,comp_cost cost2)3400 add_costs (comp_cost cost1, comp_cost cost2)
3401 {
3402   if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3403     return infinite_cost;
3404 
3405   cost1.cost += cost2.cost;
3406   cost1.complexity += cost2.complexity;
3407 
3408   return cost1;
3409 }
3410 /* Subtracts costs COST1 and COST2.  */
3411 
3412 static comp_cost
sub_costs(comp_cost cost1,comp_cost cost2)3413 sub_costs (comp_cost cost1, comp_cost cost2)
3414 {
3415   cost1.cost -= cost2.cost;
3416   cost1.complexity -= cost2.complexity;
3417 
3418   return cost1;
3419 }
3420 
3421 /* Returns a negative number if COST1 < COST2, a positive number if
3422    COST1 > COST2, and 0 if COST1 = COST2.  */
3423 
3424 static int
compare_costs(comp_cost cost1,comp_cost cost2)3425 compare_costs (comp_cost cost1, comp_cost cost2)
3426 {
3427   if (cost1.cost == cost2.cost)
3428     return cost1.complexity - cost2.complexity;
3429 
3430   return cost1.cost - cost2.cost;
3431 }
3432 
3433 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3434    on invariants DEPENDS_ON and that the value used in expressing it
3435    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
3436 
3437 static void
set_use_iv_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,comp_cost cost,bitmap depends_on,tree value,enum tree_code comp,int inv_expr_id)3438 set_use_iv_cost (struct ivopts_data *data,
3439 		 struct iv_use *use, struct iv_cand *cand,
3440 		 comp_cost cost, bitmap depends_on, tree value,
3441 		 enum tree_code comp, int inv_expr_id)
3442 {
3443   unsigned i, s;
3444 
3445   if (infinite_cost_p (cost))
3446     {
3447       BITMAP_FREE (depends_on);
3448       return;
3449     }
3450 
3451   if (data->consider_all_candidates)
3452     {
3453       use->cost_map[cand->id].cand = cand;
3454       use->cost_map[cand->id].cost = cost;
3455       use->cost_map[cand->id].depends_on = depends_on;
3456       use->cost_map[cand->id].value = value;
3457       use->cost_map[cand->id].comp = comp;
3458       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3459       return;
3460     }
3461 
3462   /* n_map_members is a power of two, so this computes modulo.  */
3463   s = cand->id & (use->n_map_members - 1);
3464   for (i = s; i < use->n_map_members; i++)
3465     if (!use->cost_map[i].cand)
3466       goto found;
3467   for (i = 0; i < s; i++)
3468     if (!use->cost_map[i].cand)
3469       goto found;
3470 
3471   gcc_unreachable ();
3472 
3473 found:
3474   use->cost_map[i].cand = cand;
3475   use->cost_map[i].cost = cost;
3476   use->cost_map[i].depends_on = depends_on;
3477   use->cost_map[i].value = value;
3478   use->cost_map[i].comp = comp;
3479   use->cost_map[i].inv_expr_id = inv_expr_id;
3480 }
3481 
3482 /* Gets cost of (USE, CANDIDATE) pair.  */
3483 
3484 static struct cost_pair *
get_use_iv_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)3485 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3486 		 struct iv_cand *cand)
3487 {
3488   unsigned i, s;
3489   struct cost_pair *ret;
3490 
3491   if (!cand)
3492     return NULL;
3493 
3494   if (data->consider_all_candidates)
3495     {
3496       ret = use->cost_map + cand->id;
3497       if (!ret->cand)
3498 	return NULL;
3499 
3500       return ret;
3501     }
3502 
3503   /* n_map_members is a power of two, so this computes modulo.  */
3504   s = cand->id & (use->n_map_members - 1);
3505   for (i = s; i < use->n_map_members; i++)
3506     if (use->cost_map[i].cand == cand)
3507       return use->cost_map + i;
3508     else if (use->cost_map[i].cand == NULL)
3509       return NULL;
3510   for (i = 0; i < s; i++)
3511     if (use->cost_map[i].cand == cand)
3512       return use->cost_map + i;
3513     else if (use->cost_map[i].cand == NULL)
3514       return NULL;
3515 
3516   return NULL;
3517 }
3518 
3519 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3520 static rtx
produce_memory_decl_rtl(tree obj,int * regno)3521 produce_memory_decl_rtl (tree obj, int *regno)
3522 {
3523   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3524   machine_mode address_mode = targetm.addr_space.address_mode (as);
3525   rtx x;
3526 
3527   gcc_assert (obj);
3528   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3529     {
3530       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3531       x = gen_rtx_SYMBOL_REF (address_mode, name);
3532       SET_SYMBOL_REF_DECL (x, obj);
3533       x = gen_rtx_MEM (DECL_MODE (obj), x);
3534       set_mem_addr_space (x, as);
3535       targetm.encode_section_info (obj, x, true);
3536     }
3537   else
3538     {
3539       x = gen_raw_REG (address_mode, (*regno)++);
3540       x = gen_rtx_MEM (DECL_MODE (obj), x);
3541       set_mem_addr_space (x, as);
3542     }
3543 
3544   return x;
3545 }
3546 
3547 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3548    walk_tree.  DATA contains the actual fake register number.  */
3549 
3550 static tree
prepare_decl_rtl(tree * expr_p,int * ws,void * data)3551 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3552 {
3553   tree obj = NULL_TREE;
3554   rtx x = NULL_RTX;
3555   int *regno = (int *) data;
3556 
3557   switch (TREE_CODE (*expr_p))
3558     {
3559     case ADDR_EXPR:
3560       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3561 	   handled_component_p (*expr_p);
3562 	   expr_p = &TREE_OPERAND (*expr_p, 0))
3563 	continue;
3564       obj = *expr_p;
3565       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3566         x = produce_memory_decl_rtl (obj, regno);
3567       break;
3568 
3569     case SSA_NAME:
3570       *ws = 0;
3571       obj = SSA_NAME_VAR (*expr_p);
3572       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3573       if (!obj)
3574 	return NULL_TREE;
3575       if (!DECL_RTL_SET_P (obj))
3576 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3577       break;
3578 
3579     case VAR_DECL:
3580     case PARM_DECL:
3581     case RESULT_DECL:
3582       *ws = 0;
3583       obj = *expr_p;
3584 
3585       if (DECL_RTL_SET_P (obj))
3586 	break;
3587 
3588       if (DECL_MODE (obj) == BLKmode)
3589 	x = produce_memory_decl_rtl (obj, regno);
3590       else
3591 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3592 
3593       break;
3594 
3595     default:
3596       break;
3597     }
3598 
3599   if (x)
3600     {
3601       decl_rtl_to_reset.safe_push (obj);
3602       SET_DECL_RTL (obj, x);
3603     }
3604 
3605   return NULL_TREE;
3606 }
3607 
3608 /* Determines cost of the computation of EXPR.  */
3609 
3610 static unsigned
computation_cost(tree expr,bool speed)3611 computation_cost (tree expr, bool speed)
3612 {
3613   rtx_insn *seq;
3614   rtx rslt;
3615   tree type = TREE_TYPE (expr);
3616   unsigned cost;
3617   /* Avoid using hard regs in ways which may be unsupported.  */
3618   int regno = LAST_VIRTUAL_REGISTER + 1;
3619   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3620   enum node_frequency real_frequency = node->frequency;
3621 
3622   node->frequency = NODE_FREQUENCY_NORMAL;
3623   crtl->maybe_hot_insn_p = speed;
3624   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3625   start_sequence ();
3626   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3627   seq = get_insns ();
3628   end_sequence ();
3629   default_rtl_profile ();
3630   node->frequency = real_frequency;
3631 
3632   cost = seq_cost (seq, speed);
3633   if (MEM_P (rslt))
3634     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3635 			  TYPE_ADDR_SPACE (type), speed);
3636   else if (!REG_P (rslt))
3637     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3638 
3639   return cost;
3640 }
3641 
3642 /* Returns variable containing the value of candidate CAND at statement AT.  */
3643 
3644 static tree
var_at_stmt(struct loop * loop,struct iv_cand * cand,gimple * stmt)3645 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3646 {
3647   if (stmt_after_increment (loop, cand, stmt))
3648     return cand->var_after;
3649   else
3650     return cand->var_before;
3651 }
3652 
3653 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3654    same precision that is at least as wide as the precision of TYPE, stores
3655    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3656    type of A and B.  */
3657 
3658 static tree
determine_common_wider_type(tree * a,tree * b)3659 determine_common_wider_type (tree *a, tree *b)
3660 {
3661   tree wider_type = NULL;
3662   tree suba, subb;
3663   tree atype = TREE_TYPE (*a);
3664 
3665   if (CONVERT_EXPR_P (*a))
3666     {
3667       suba = TREE_OPERAND (*a, 0);
3668       wider_type = TREE_TYPE (suba);
3669       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3670 	return atype;
3671     }
3672   else
3673     return atype;
3674 
3675   if (CONVERT_EXPR_P (*b))
3676     {
3677       subb = TREE_OPERAND (*b, 0);
3678       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3679 	return atype;
3680     }
3681   else
3682     return atype;
3683 
3684   *a = suba;
3685   *b = subb;
3686   return wider_type;
3687 }
3688 
3689 /* Determines the expression by that USE is expressed from induction variable
3690    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3691    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3692 
3693 static bool
get_computation_aff(struct loop * loop,struct iv_use * use,struct iv_cand * cand,gimple * at,struct aff_tree * aff)3694 get_computation_aff (struct loop *loop,
3695 		     struct iv_use *use, struct iv_cand *cand, gimple *at,
3696 		     struct aff_tree *aff)
3697 {
3698   tree ubase = use->iv->base;
3699   tree ustep = use->iv->step;
3700   tree cbase = cand->iv->base;
3701   tree cstep = cand->iv->step, cstep_common;
3702   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3703   tree common_type, var;
3704   tree uutype;
3705   aff_tree cbase_aff, var_aff;
3706   widest_int rat;
3707 
3708   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3709     {
3710       /* We do not have a precision to express the values of use.  */
3711       return false;
3712     }
3713 
3714   var = var_at_stmt (loop, cand, at);
3715   uutype = unsigned_type_for (utype);
3716 
3717   /* If the conversion is not noop, perform it.  */
3718   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3719     {
3720       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3721 	  && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3722 	{
3723 	  tree inner_base, inner_step, inner_type;
3724 	  inner_base = TREE_OPERAND (cbase, 0);
3725 	  if (CONVERT_EXPR_P (cstep))
3726 	    inner_step = TREE_OPERAND (cstep, 0);
3727 	  else
3728 	    inner_step = cstep;
3729 
3730 	  inner_type = TREE_TYPE (inner_base);
3731 	  /* If candidate is added from a biv whose type is smaller than
3732 	     ctype, we know both candidate and the biv won't overflow.
3733 	     In this case, it's safe to skip the convertion in candidate.
3734 	     As an example, (unsigned short)((unsigned long)A) equals to
3735 	     (unsigned short)A, if A has a type no larger than short.  */
3736 	  if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3737 	    {
3738 	      cbase = inner_base;
3739 	      cstep = inner_step;
3740 	    }
3741 	}
3742       cstep = fold_convert (uutype, cstep);
3743       cbase = fold_convert (uutype, cbase);
3744       var = fold_convert (uutype, var);
3745     }
3746 
3747   /* Ratio is 1 when computing the value of biv cand by itself.
3748      We can't rely on constant_multiple_of in this case because the
3749      use is created after the original biv is selected.  The call
3750      could fail because of inconsistent fold behavior.  See PR68021
3751      for more information.  */
3752   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3753     {
3754       gcc_assert (is_gimple_assign (use->stmt));
3755       gcc_assert (use->iv->ssa_name == cand->var_after);
3756       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3757       rat = 1;
3758     }
3759   else if (!constant_multiple_of (ustep, cstep, &rat))
3760     return false;
3761 
3762   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3763      type, we achieve better folding by computing their difference in this
3764      wider type, and cast the result to UUTYPE.  We do not need to worry about
3765      overflows, as all the arithmetics will in the end be performed in UUTYPE
3766      anyway.  */
3767   common_type = determine_common_wider_type (&ubase, &cbase);
3768 
3769   /* use = ubase - ratio * cbase + ratio * var.  */
3770   tree_to_aff_combination (ubase, common_type, aff);
3771   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3772   tree_to_aff_combination (var, uutype, &var_aff);
3773 
3774   /* We need to shift the value if we are after the increment.  */
3775   if (stmt_after_increment (loop, cand, at))
3776     {
3777       aff_tree cstep_aff;
3778 
3779       if (common_type != uutype)
3780 	cstep_common = fold_convert (common_type, cstep);
3781       else
3782 	cstep_common = cstep;
3783 
3784       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3785       aff_combination_add (&cbase_aff, &cstep_aff);
3786     }
3787 
3788   aff_combination_scale (&cbase_aff, -rat);
3789   aff_combination_add (aff, &cbase_aff);
3790   if (common_type != uutype)
3791     aff_combination_convert (aff, uutype);
3792 
3793   aff_combination_scale (&var_aff, rat);
3794   aff_combination_add (aff, &var_aff);
3795 
3796   return true;
3797 }
3798 
3799 /* Return the type of USE.  */
3800 
3801 static tree
get_use_type(struct iv_use * use)3802 get_use_type (struct iv_use *use)
3803 {
3804   tree base_type = TREE_TYPE (use->iv->base);
3805   tree type;
3806 
3807   if (use->type == USE_ADDRESS)
3808     {
3809       /* The base_type may be a void pointer.  Create a pointer type based on
3810 	 the mem_ref instead.  */
3811       type = build_pointer_type (TREE_TYPE (*use->op_p));
3812       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3813 		  == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3814     }
3815   else
3816     type = base_type;
3817 
3818   return type;
3819 }
3820 
3821 /* Determines the expression by that USE is expressed from induction variable
3822    CAND at statement AT in LOOP.  The computation is unshared.  */
3823 
3824 static tree
get_computation_at(struct loop * loop,struct iv_use * use,struct iv_cand * cand,gimple * at)3825 get_computation_at (struct loop *loop,
3826 		    struct iv_use *use, struct iv_cand *cand, gimple *at)
3827 {
3828   aff_tree aff;
3829   tree type = get_use_type (use);
3830 
3831   if (!get_computation_aff (loop, use, cand, at, &aff))
3832     return NULL_TREE;
3833   unshare_aff_combination (&aff);
3834   return fold_convert (type, aff_combination_to_tree (&aff));
3835 }
3836 
3837 /* Determines the expression by that USE is expressed from induction variable
3838    CAND in LOOP.  The computation is unshared.  */
3839 
3840 static tree
get_computation(struct loop * loop,struct iv_use * use,struct iv_cand * cand)3841 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3842 {
3843   return get_computation_at (loop, use, cand, use->stmt);
3844 }
3845 
3846 /* Adjust the cost COST for being in loop setup rather than loop body.
3847    If we're optimizing for space, the loop setup overhead is constant;
3848    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3849 static unsigned
adjust_setup_cost(struct ivopts_data * data,unsigned cost)3850 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3851 {
3852   if (cost == INFTY)
3853     return cost;
3854   else if (optimize_loop_for_speed_p (data->current_loop))
3855     return cost / avg_loop_niter (data->current_loop);
3856   else
3857     return cost;
3858 }
3859 
3860 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3861    validity for a memory reference accessing memory of mode MODE in
3862    address space AS.  */
3863 
3864 
3865 bool
multiplier_allowed_in_address_p(HOST_WIDE_INT ratio,machine_mode mode,addr_space_t as)3866 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3867 				 addr_space_t as)
3868 {
3869 #define MAX_RATIO 128
3870   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3871   static vec<sbitmap> valid_mult_list;
3872   sbitmap valid_mult;
3873 
3874   if (data_index >= valid_mult_list.length ())
3875     valid_mult_list.safe_grow_cleared (data_index + 1);
3876 
3877   valid_mult = valid_mult_list[data_index];
3878   if (!valid_mult)
3879     {
3880       machine_mode address_mode = targetm.addr_space.address_mode (as);
3881       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3882       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3883       rtx addr, scaled;
3884       HOST_WIDE_INT i;
3885 
3886       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3887       bitmap_clear (valid_mult);
3888       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3889       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3890       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3891 	{
3892 	  XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3893 	  if (memory_address_addr_space_p (mode, addr, as)
3894 	      || memory_address_addr_space_p (mode, scaled, as))
3895 	    bitmap_set_bit (valid_mult, i + MAX_RATIO);
3896 	}
3897 
3898       if (dump_file && (dump_flags & TDF_DETAILS))
3899 	{
3900 	  fprintf (dump_file, "  allowed multipliers:");
3901 	  for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3902 	    if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3903 	      fprintf (dump_file, " %d", (int) i);
3904 	  fprintf (dump_file, "\n");
3905 	  fprintf (dump_file, "\n");
3906 	}
3907 
3908       valid_mult_list[data_index] = valid_mult;
3909     }
3910 
3911   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3912     return false;
3913 
3914   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3915 }
3916 
3917 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3918    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3919    variable is omitted.  Compute the cost for a memory reference that accesses
3920    a memory location of mode MEM_MODE in address space AS.
3921 
3922    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3923    size of MEM_MODE / RATIO) is available.  To make this determination, we
3924    look at the size of the increment to be made, which is given in CSTEP.
3925    CSTEP may be zero if the step is unknown.
3926    STMT_AFTER_INC is true iff the statement we're looking at is after the
3927    increment of the original biv.
3928 
3929    TODO -- there must be some better way.  This all is quite crude.  */
3930 
3931 enum ainc_type
3932 {
3933   AINC_PRE_INC,		/* Pre increment.  */
3934   AINC_PRE_DEC,		/* Pre decrement.  */
3935   AINC_POST_INC,	/* Post increment.  */
3936   AINC_POST_DEC,	/* Post decrement.  */
3937   AINC_NONE		/* Also the number of auto increment types.  */
3938 };
3939 
3940 struct address_cost_data
3941 {
3942   HOST_WIDE_INT min_offset, max_offset;
3943   unsigned costs[2][2][2][2];
3944   unsigned ainc_costs[AINC_NONE];
3945 };
3946 
3947 
3948 static comp_cost
get_address_cost(bool symbol_present,bool var_present,unsigned HOST_WIDE_INT offset,HOST_WIDE_INT ratio,HOST_WIDE_INT cstep,machine_mode mem_mode,addr_space_t as,bool speed,bool stmt_after_inc,bool * may_autoinc)3949 get_address_cost (bool symbol_present, bool var_present,
3950 		  unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3951 		  HOST_WIDE_INT cstep, machine_mode mem_mode,
3952 		  addr_space_t as, bool speed,
3953 		  bool stmt_after_inc, bool *may_autoinc)
3954 {
3955   machine_mode address_mode = targetm.addr_space.address_mode (as);
3956   static vec<address_cost_data *> address_cost_data_list;
3957   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3958   address_cost_data *data;
3959   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3960   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3961   unsigned cost, acost, complexity;
3962   enum ainc_type autoinc_type;
3963   bool offset_p, ratio_p, autoinc;
3964   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3965   unsigned HOST_WIDE_INT mask;
3966   unsigned bits;
3967 
3968   if (data_index >= address_cost_data_list.length ())
3969     address_cost_data_list.safe_grow_cleared (data_index + 1);
3970 
3971   data = address_cost_data_list[data_index];
3972   if (!data)
3973     {
3974       HOST_WIDE_INT i;
3975       HOST_WIDE_INT rat, off = 0;
3976       int old_cse_not_expected, width;
3977       unsigned sym_p, var_p, off_p, rat_p, add_c;
3978       rtx_insn *seq;
3979       rtx addr, base;
3980       rtx reg0, reg1;
3981 
3982       data = (address_cost_data *) xcalloc (1, sizeof (*data));
3983 
3984       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3985 
3986       width = GET_MODE_BITSIZE (address_mode) - 1;
3987       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3988 	width = HOST_BITS_PER_WIDE_INT - 1;
3989       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3990 
3991       for (i = width; i >= 0; i--)
3992 	{
3993 	  off = -((unsigned HOST_WIDE_INT) 1 << i);
3994 	  XEXP (addr, 1) = gen_int_mode (off, address_mode);
3995 	  if (memory_address_addr_space_p (mem_mode, addr, as))
3996 	    break;
3997 	}
3998       data->min_offset = (i == -1? 0 : off);
3999 
4000       for (i = width; i >= 0; i--)
4001 	{
4002 	  off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
4003 	  XEXP (addr, 1) = gen_int_mode (off, address_mode);
4004 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4005 	    break;
4006 	  /* For some strict-alignment targets, the offset must be naturally
4007 	     aligned.  Try an aligned offset if mem_mode is not QImode.  */
4008 	  off = mem_mode != QImode
4009 		? ((unsigned HOST_WIDE_INT) 1 << i)
4010 		    - GET_MODE_SIZE (mem_mode)
4011 		: 0;
4012 	  if (off > 0)
4013 	    {
4014 	      XEXP (addr, 1) = gen_int_mode (off, address_mode);
4015 	      if (memory_address_addr_space_p (mem_mode, addr, as))
4016 		break;
4017 	    }
4018 	}
4019       if (i == -1)
4020         off = 0;
4021       data->max_offset = off;
4022 
4023       if (dump_file && (dump_flags & TDF_DETAILS))
4024 	{
4025 	  fprintf (dump_file, "get_address_cost:\n");
4026 	  fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4027 		   GET_MODE_NAME (mem_mode),
4028 		   data->min_offset);
4029 	  fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4030 		   GET_MODE_NAME (mem_mode),
4031 		   data->max_offset);
4032 	}
4033 
4034       rat = 1;
4035       for (i = 2; i <= MAX_RATIO; i++)
4036 	if (multiplier_allowed_in_address_p (i, mem_mode, as))
4037 	  {
4038 	    rat = i;
4039 	    break;
4040 	  }
4041 
4042       /* Compute the cost of various addressing modes.  */
4043       acost = 0;
4044       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
4045       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
4046 
4047       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4048 	  || USE_STORE_PRE_DECREMENT (mem_mode))
4049 	{
4050 	  addr = gen_rtx_PRE_DEC (address_mode, reg0);
4051 	  has_predec[mem_mode]
4052 	    = memory_address_addr_space_p (mem_mode, addr, as);
4053 
4054 	  if (has_predec[mem_mode])
4055 	    data->ainc_costs[AINC_PRE_DEC]
4056 	      = address_cost (addr, mem_mode, as, speed);
4057 	}
4058       if (USE_LOAD_POST_DECREMENT (mem_mode)
4059 	  || USE_STORE_POST_DECREMENT (mem_mode))
4060 	{
4061 	  addr = gen_rtx_POST_DEC (address_mode, reg0);
4062 	  has_postdec[mem_mode]
4063 	    = memory_address_addr_space_p (mem_mode, addr, as);
4064 
4065 	  if (has_postdec[mem_mode])
4066 	    data->ainc_costs[AINC_POST_DEC]
4067 	      = address_cost (addr, mem_mode, as, speed);
4068 	}
4069       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4070 	  || USE_STORE_PRE_DECREMENT (mem_mode))
4071 	{
4072 	  addr = gen_rtx_PRE_INC (address_mode, reg0);
4073 	  has_preinc[mem_mode]
4074 	    = memory_address_addr_space_p (mem_mode, addr, as);
4075 
4076 	  if (has_preinc[mem_mode])
4077 	    data->ainc_costs[AINC_PRE_INC]
4078 	      = address_cost (addr, mem_mode, as, speed);
4079 	}
4080       if (USE_LOAD_POST_INCREMENT (mem_mode)
4081 	  || USE_STORE_POST_INCREMENT (mem_mode))
4082 	{
4083 	  addr = gen_rtx_POST_INC (address_mode, reg0);
4084 	  has_postinc[mem_mode]
4085 	    = memory_address_addr_space_p (mem_mode, addr, as);
4086 
4087 	  if (has_postinc[mem_mode])
4088 	    data->ainc_costs[AINC_POST_INC]
4089 	      = address_cost (addr, mem_mode, as, speed);
4090 	}
4091       for (i = 0; i < 16; i++)
4092 	{
4093 	  sym_p = i & 1;
4094 	  var_p = (i >> 1) & 1;
4095 	  off_p = (i >> 2) & 1;
4096 	  rat_p = (i >> 3) & 1;
4097 
4098 	  addr = reg0;
4099 	  if (rat_p)
4100 	    addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
4101 				   gen_int_mode (rat, address_mode));
4102 
4103 	  if (var_p)
4104 	    addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
4105 
4106 	  if (sym_p)
4107 	    {
4108 	      base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
4109 	      /* ??? We can run into trouble with some backends by presenting
4110 		 it with symbols which haven't been properly passed through
4111 		 targetm.encode_section_info.  By setting the local bit, we
4112 		 enhance the probability of things working.  */
4113 	      SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
4114 
4115 	      if (off_p)
4116 		base = gen_rtx_fmt_e (CONST, address_mode,
4117 				      gen_rtx_fmt_ee
4118 					(PLUS, address_mode, base,
4119 					 gen_int_mode (off, address_mode)));
4120 	    }
4121 	  else if (off_p)
4122 	    base = gen_int_mode (off, address_mode);
4123 	  else
4124 	    base = NULL_RTX;
4125 
4126 	  if (base)
4127 	    addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
4128 
4129 	  start_sequence ();
4130 	  /* To avoid splitting addressing modes, pretend that no cse will
4131 	     follow.  */
4132 	  old_cse_not_expected = cse_not_expected;
4133 	  cse_not_expected = true;
4134 	  addr = memory_address_addr_space (mem_mode, addr, as);
4135 	  cse_not_expected = old_cse_not_expected;
4136 	  seq = get_insns ();
4137 	  end_sequence ();
4138 
4139 	  acost = seq_cost (seq, speed);
4140 	  acost += address_cost (addr, mem_mode, as, speed);
4141 
4142 	  if (!acost)
4143 	    acost = 1;
4144 	  data->costs[sym_p][var_p][off_p][rat_p] = acost;
4145 	}
4146 
4147       /* On some targets, it is quite expensive to load symbol to a register,
4148 	 which makes addresses that contain symbols look much more expensive.
4149 	 However, the symbol will have to be loaded in any case before the
4150 	 loop (and quite likely we have it in register already), so it does not
4151 	 make much sense to penalize them too heavily.  So make some final
4152          tweaks for the SYMBOL_PRESENT modes:
4153 
4154          If VAR_PRESENT is false, and the mode obtained by changing symbol to
4155 	 var is cheaper, use this mode with small penalty.
4156 	 If VAR_PRESENT is true, try whether the mode with
4157 	 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
4158 	 if this is the case, use it.  */
4159       add_c = add_cost (speed, address_mode);
4160       for (i = 0; i < 8; i++)
4161 	{
4162 	  var_p = i & 1;
4163 	  off_p = (i >> 1) & 1;
4164 	  rat_p = (i >> 2) & 1;
4165 
4166 	  acost = data->costs[0][1][off_p][rat_p] + 1;
4167 	  if (var_p)
4168 	    acost += add_c;
4169 
4170 	  if (acost < data->costs[1][var_p][off_p][rat_p])
4171 	    data->costs[1][var_p][off_p][rat_p] = acost;
4172 	}
4173 
4174       if (dump_file && (dump_flags & TDF_DETAILS))
4175 	{
4176 	  fprintf (dump_file, "Address costs:\n");
4177 
4178 	  for (i = 0; i < 16; i++)
4179 	    {
4180 	      sym_p = i & 1;
4181 	      var_p = (i >> 1) & 1;
4182 	      off_p = (i >> 2) & 1;
4183 	      rat_p = (i >> 3) & 1;
4184 
4185 	      fprintf (dump_file, "  ");
4186 	      if (sym_p)
4187 		fprintf (dump_file, "sym + ");
4188 	      if (var_p)
4189 		fprintf (dump_file, "var + ");
4190 	      if (off_p)
4191 		fprintf (dump_file, "cst + ");
4192 	      if (rat_p)
4193 		fprintf (dump_file, "rat * ");
4194 
4195 	      acost = data->costs[sym_p][var_p][off_p][rat_p];
4196 	      fprintf (dump_file, "index costs %d\n", acost);
4197 	    }
4198 	  if (has_predec[mem_mode] || has_postdec[mem_mode]
4199 	      || has_preinc[mem_mode] || has_postinc[mem_mode])
4200 	    fprintf (dump_file, "  May include autoinc/dec\n");
4201 	  fprintf (dump_file, "\n");
4202 	}
4203 
4204       address_cost_data_list[data_index] = data;
4205     }
4206 
4207   bits = GET_MODE_BITSIZE (address_mode);
4208   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4209   offset &= mask;
4210   if ((offset >> (bits - 1) & 1))
4211     offset |= ~mask;
4212   s_offset = offset;
4213 
4214   autoinc = false;
4215   autoinc_type = AINC_NONE;
4216   msize = GET_MODE_SIZE (mem_mode);
4217   autoinc_offset = offset;
4218   if (stmt_after_inc)
4219     autoinc_offset += ratio * cstep;
4220   if (symbol_present || var_present || ratio != 1)
4221     autoinc = false;
4222   else
4223     {
4224       if (has_postinc[mem_mode] && autoinc_offset == 0
4225 	  && msize == cstep)
4226 	autoinc_type = AINC_POST_INC;
4227       else if (has_postdec[mem_mode] && autoinc_offset == 0
4228 	       && msize == -cstep)
4229 	autoinc_type = AINC_POST_DEC;
4230       else if (has_preinc[mem_mode] && autoinc_offset == msize
4231 	       && msize == cstep)
4232 	autoinc_type = AINC_PRE_INC;
4233       else if (has_predec[mem_mode] && autoinc_offset == -msize
4234 	       && msize == -cstep)
4235 	autoinc_type = AINC_PRE_DEC;
4236 
4237       if (autoinc_type != AINC_NONE)
4238 	autoinc = true;
4239     }
4240 
4241   cost = 0;
4242   offset_p = (s_offset != 0
4243 	      && data->min_offset <= s_offset
4244 	      && s_offset <= data->max_offset);
4245   ratio_p = (ratio != 1
4246 	     && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4247 
4248   if (ratio != 1 && !ratio_p)
4249     cost += mult_by_coeff_cost (ratio, address_mode, speed);
4250 
4251   if (s_offset && !offset_p && !symbol_present)
4252     cost += add_cost (speed, address_mode);
4253 
4254   if (may_autoinc)
4255     *may_autoinc = autoinc;
4256   if (autoinc)
4257     acost = data->ainc_costs[autoinc_type];
4258   else
4259     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4260   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4261   return new_cost (cost + acost, complexity);
4262 }
4263 
4264  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4265     EXPR operand holding the shift.  COST0 and COST1 are the costs for
4266     calculating the operands of EXPR.  Returns true if successful, and returns
4267     the cost in COST.  */
4268 
4269 static bool
get_shiftadd_cost(tree expr,machine_mode mode,comp_cost cost0,comp_cost cost1,tree mult,bool speed,comp_cost * cost)4270 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4271                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4272 {
4273   comp_cost res;
4274   tree op1 = TREE_OPERAND (expr, 1);
4275   tree cst = TREE_OPERAND (mult, 1);
4276   tree multop = TREE_OPERAND (mult, 0);
4277   int m = exact_log2 (int_cst_value (cst));
4278   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4279   int as_cost, sa_cost;
4280   bool mult_in_op1;
4281 
4282   if (!(m >= 0 && m < maxm))
4283     return false;
4284 
4285   STRIP_NOPS (op1);
4286   mult_in_op1 = operand_equal_p (op1, mult, 0);
4287 
4288   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4289 
4290   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4291      use that in preference to a shift insn followed by an add insn.  */
4292   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4293              ? shiftadd_cost (speed, mode, m)
4294              : (mult_in_op1
4295                 ? shiftsub1_cost (speed, mode, m)
4296                 : shiftsub0_cost (speed, mode, m)));
4297 
4298   res = new_cost (MIN (as_cost, sa_cost), 0);
4299   res = add_costs (res, mult_in_op1 ? cost0 : cost1);
4300 
4301   STRIP_NOPS (multop);
4302   if (!is_gimple_val (multop))
4303     res = add_costs (res, force_expr_to_var_cost (multop, speed));
4304 
4305   *cost = res;
4306   return true;
4307 }
4308 
4309 /* Estimates cost of forcing expression EXPR into a variable.  */
4310 
4311 static comp_cost
force_expr_to_var_cost(tree expr,bool speed)4312 force_expr_to_var_cost (tree expr, bool speed)
4313 {
4314   static bool costs_initialized = false;
4315   static unsigned integer_cost [2];
4316   static unsigned symbol_cost [2];
4317   static unsigned address_cost [2];
4318   tree op0, op1;
4319   comp_cost cost0, cost1, cost;
4320   machine_mode mode;
4321 
4322   if (!costs_initialized)
4323     {
4324       tree type = build_pointer_type (integer_type_node);
4325       tree var, addr;
4326       rtx x;
4327       int i;
4328 
4329       var = create_tmp_var_raw (integer_type_node, "test_var");
4330       TREE_STATIC (var) = 1;
4331       x = produce_memory_decl_rtl (var, NULL);
4332       SET_DECL_RTL (var, x);
4333 
4334       addr = build1 (ADDR_EXPR, type, var);
4335 
4336 
4337       for (i = 0; i < 2; i++)
4338 	{
4339 	  integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4340 							     2000), i);
4341 
4342 	  symbol_cost[i] = computation_cost (addr, i) + 1;
4343 
4344 	  address_cost[i]
4345 	    = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4346 	  if (dump_file && (dump_flags & TDF_DETAILS))
4347 	    {
4348 	      fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4349 	      fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4350 	      fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4351 	      fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4352 	      fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4353 	      fprintf (dump_file, "\n");
4354 	    }
4355 	}
4356 
4357       costs_initialized = true;
4358     }
4359 
4360   STRIP_NOPS (expr);
4361 
4362   if (SSA_VAR_P (expr))
4363     return no_cost;
4364 
4365   if (is_gimple_min_invariant (expr))
4366     {
4367       if (TREE_CODE (expr) == INTEGER_CST)
4368 	return new_cost (integer_cost [speed], 0);
4369 
4370       if (TREE_CODE (expr) == ADDR_EXPR)
4371 	{
4372 	  tree obj = TREE_OPERAND (expr, 0);
4373 
4374 	  if (TREE_CODE (obj) == VAR_DECL
4375 	      || TREE_CODE (obj) == PARM_DECL
4376 	      || TREE_CODE (obj) == RESULT_DECL)
4377 	    return new_cost (symbol_cost [speed], 0);
4378 	}
4379 
4380       return new_cost (address_cost [speed], 0);
4381     }
4382 
4383   switch (TREE_CODE (expr))
4384     {
4385     case POINTER_PLUS_EXPR:
4386     case PLUS_EXPR:
4387     case MINUS_EXPR:
4388     case MULT_EXPR:
4389       op0 = TREE_OPERAND (expr, 0);
4390       op1 = TREE_OPERAND (expr, 1);
4391       STRIP_NOPS (op0);
4392       STRIP_NOPS (op1);
4393       break;
4394 
4395     CASE_CONVERT:
4396     case NEGATE_EXPR:
4397       op0 = TREE_OPERAND (expr, 0);
4398       STRIP_NOPS (op0);
4399       op1 = NULL_TREE;
4400       break;
4401 
4402     default:
4403       /* Just an arbitrary value, FIXME.  */
4404       return new_cost (target_spill_cost[speed], 0);
4405     }
4406 
4407   if (op0 == NULL_TREE
4408       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4409     cost0 = no_cost;
4410   else
4411     cost0 = force_expr_to_var_cost (op0, speed);
4412 
4413   if (op1 == NULL_TREE
4414       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4415     cost1 = no_cost;
4416   else
4417     cost1 = force_expr_to_var_cost (op1, speed);
4418 
4419   mode = TYPE_MODE (TREE_TYPE (expr));
4420   switch (TREE_CODE (expr))
4421     {
4422     case POINTER_PLUS_EXPR:
4423     case PLUS_EXPR:
4424     case MINUS_EXPR:
4425     case NEGATE_EXPR:
4426       cost = new_cost (add_cost (speed, mode), 0);
4427       if (TREE_CODE (expr) != NEGATE_EXPR)
4428         {
4429           tree mult = NULL_TREE;
4430           comp_cost sa_cost;
4431           if (TREE_CODE (op1) == MULT_EXPR)
4432             mult = op1;
4433           else if (TREE_CODE (op0) == MULT_EXPR)
4434             mult = op0;
4435 
4436           if (mult != NULL_TREE
4437               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4438               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4439                                     speed, &sa_cost))
4440             return sa_cost;
4441         }
4442       break;
4443 
4444     CASE_CONVERT:
4445       {
4446 	tree inner_mode, outer_mode;
4447 	outer_mode = TREE_TYPE (expr);
4448 	inner_mode = TREE_TYPE (op0);
4449 	cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4450 				       TYPE_MODE (inner_mode), speed), 0);
4451       }
4452       break;
4453 
4454     case MULT_EXPR:
4455       if (cst_and_fits_in_hwi (op0))
4456 	cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4457 					     mode, speed), 0);
4458       else if (cst_and_fits_in_hwi (op1))
4459 	cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4460 					     mode, speed), 0);
4461       else
4462 	return new_cost (target_spill_cost [speed], 0);
4463       break;
4464 
4465     default:
4466       gcc_unreachable ();
4467     }
4468 
4469   cost = add_costs (cost, cost0);
4470   cost = add_costs (cost, cost1);
4471 
4472   /* Bound the cost by target_spill_cost.  The parts of complicated
4473      computations often are either loop invariant or at least can
4474      be shared between several iv uses, so letting this grow without
4475      limits would not give reasonable results.  */
4476   if (cost.cost > (int) target_spill_cost [speed])
4477     cost.cost = target_spill_cost [speed];
4478 
4479   return cost;
4480 }
4481 
4482 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
4483    invariants the computation depends on.  */
4484 
4485 static comp_cost
force_var_cost(struct ivopts_data * data,tree expr,bitmap * depends_on)4486 force_var_cost (struct ivopts_data *data,
4487 		tree expr, bitmap *depends_on)
4488 {
4489   if (depends_on)
4490     {
4491       fd_ivopts_data = data;
4492       walk_tree (&expr, find_depends, depends_on, NULL);
4493     }
4494 
4495   return force_expr_to_var_cost (expr, data->speed);
4496 }
4497 
4498 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
4499    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4500    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
4501    invariants the computation depends on.  */
4502 
4503 static comp_cost
split_address_cost(struct ivopts_data * data,tree addr,bool * symbol_present,bool * var_present,unsigned HOST_WIDE_INT * offset,bitmap * depends_on)4504 split_address_cost (struct ivopts_data *data,
4505 		    tree addr, bool *symbol_present, bool *var_present,
4506 		    unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4507 {
4508   tree core;
4509   HOST_WIDE_INT bitsize;
4510   HOST_WIDE_INT bitpos;
4511   tree toffset;
4512   machine_mode mode;
4513   int unsignedp, reversep, volatilep;
4514 
4515   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4516 			      &unsignedp, &reversep, &volatilep, false);
4517 
4518   if (toffset != 0
4519       || bitpos % BITS_PER_UNIT != 0
4520       || reversep
4521       || TREE_CODE (core) != VAR_DECL)
4522     {
4523       *symbol_present = false;
4524       *var_present = true;
4525       fd_ivopts_data = data;
4526       if (depends_on)
4527 	walk_tree (&addr, find_depends, depends_on, NULL);
4528 
4529       return new_cost (target_spill_cost[data->speed], 0);
4530     }
4531 
4532   *offset += bitpos / BITS_PER_UNIT;
4533   if (TREE_STATIC (core)
4534       || DECL_EXTERNAL (core))
4535     {
4536       *symbol_present = true;
4537       *var_present = false;
4538       return no_cost;
4539     }
4540 
4541   *symbol_present = false;
4542   *var_present = true;
4543   return no_cost;
4544 }
4545 
4546 /* Estimates cost of expressing difference of addresses E1 - E2 as
4547    var + symbol + offset.  The value of offset is added to OFFSET,
4548    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4549    part is missing.  DEPENDS_ON is a set of the invariants the computation
4550    depends on.  */
4551 
4552 static comp_cost
ptr_difference_cost(struct ivopts_data * data,tree e1,tree e2,bool * symbol_present,bool * var_present,unsigned HOST_WIDE_INT * offset,bitmap * depends_on)4553 ptr_difference_cost (struct ivopts_data *data,
4554 		     tree e1, tree e2, bool *symbol_present, bool *var_present,
4555 		     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4556 {
4557   HOST_WIDE_INT diff = 0;
4558   aff_tree aff_e1, aff_e2;
4559   tree type;
4560 
4561   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4562 
4563   if (ptr_difference_const (e1, e2, &diff))
4564     {
4565       *offset += diff;
4566       *symbol_present = false;
4567       *var_present = false;
4568       return no_cost;
4569     }
4570 
4571   if (integer_zerop (e2))
4572     return split_address_cost (data, TREE_OPERAND (e1, 0),
4573 			       symbol_present, var_present, offset, depends_on);
4574 
4575   *symbol_present = false;
4576   *var_present = true;
4577 
4578   type = signed_type_for (TREE_TYPE (e1));
4579   tree_to_aff_combination (e1, type, &aff_e1);
4580   tree_to_aff_combination (e2, type, &aff_e2);
4581   aff_combination_scale (&aff_e2, -1);
4582   aff_combination_add (&aff_e1, &aff_e2);
4583 
4584   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4585 }
4586 
4587 /* Estimates cost of expressing difference E1 - E2 as
4588    var + symbol + offset.  The value of offset is added to OFFSET,
4589    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4590    part is missing.  DEPENDS_ON is a set of the invariants the computation
4591    depends on.  */
4592 
4593 static comp_cost
difference_cost(struct ivopts_data * data,tree e1,tree e2,bool * symbol_present,bool * var_present,unsigned HOST_WIDE_INT * offset,bitmap * depends_on)4594 difference_cost (struct ivopts_data *data,
4595 		 tree e1, tree e2, bool *symbol_present, bool *var_present,
4596 		 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4597 {
4598   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4599   unsigned HOST_WIDE_INT off1, off2;
4600   aff_tree aff_e1, aff_e2;
4601   tree type;
4602 
4603   e1 = strip_offset (e1, &off1);
4604   e2 = strip_offset (e2, &off2);
4605   *offset += off1 - off2;
4606 
4607   STRIP_NOPS (e1);
4608   STRIP_NOPS (e2);
4609 
4610   if (TREE_CODE (e1) == ADDR_EXPR)
4611     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4612 				offset, depends_on);
4613   *symbol_present = false;
4614 
4615   if (operand_equal_p (e1, e2, 0))
4616     {
4617       *var_present = false;
4618       return no_cost;
4619     }
4620 
4621   *var_present = true;
4622 
4623   if (integer_zerop (e2))
4624     return force_var_cost (data, e1, depends_on);
4625 
4626   if (integer_zerop (e1))
4627     {
4628       comp_cost cost = force_var_cost (data, e2, depends_on);
4629       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4630       return cost;
4631     }
4632 
4633   type = signed_type_for (TREE_TYPE (e1));
4634   tree_to_aff_combination (e1, type, &aff_e1);
4635   tree_to_aff_combination (e2, type, &aff_e2);
4636   aff_combination_scale (&aff_e2, -1);
4637   aff_combination_add (&aff_e1, &aff_e2);
4638 
4639   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4640 }
4641 
4642 /* Returns true if AFF1 and AFF2 are identical.  */
4643 
4644 static bool
compare_aff_trees(aff_tree * aff1,aff_tree * aff2)4645 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4646 {
4647   unsigned i;
4648 
4649   if (aff1->n != aff2->n)
4650     return false;
4651 
4652   for (i = 0; i < aff1->n; i++)
4653     {
4654       if (aff1->elts[i].coef != aff2->elts[i].coef)
4655         return false;
4656 
4657       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4658         return false;
4659     }
4660   return true;
4661 }
4662 
4663 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
4664 
4665 static int
get_expr_id(struct ivopts_data * data,tree expr)4666 get_expr_id (struct ivopts_data *data, tree expr)
4667 {
4668   struct iv_inv_expr_ent ent;
4669   struct iv_inv_expr_ent **slot;
4670 
4671   ent.expr = expr;
4672   ent.hash = iterative_hash_expr (expr, 0);
4673   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4674   if (*slot)
4675     return (*slot)->id;
4676 
4677   *slot = XNEW (struct iv_inv_expr_ent);
4678   (*slot)->expr = expr;
4679   (*slot)->hash = ent.hash;
4680   (*slot)->id = data->inv_expr_id++;
4681   return (*slot)->id;
4682 }
4683 
4684 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4685    requires a new compiler generated temporary.  Returns -1 otherwise.
4686    ADDRESS_P is a flag indicating if the expression is for address
4687    computation.  */
4688 
4689 static int
get_loop_invariant_expr_id(struct ivopts_data * data,tree ubase,tree cbase,HOST_WIDE_INT ratio,bool address_p)4690 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4691                             tree cbase, HOST_WIDE_INT ratio,
4692                             bool address_p)
4693 {
4694   aff_tree ubase_aff, cbase_aff;
4695   tree expr, ub, cb;
4696 
4697   STRIP_NOPS (ubase);
4698   STRIP_NOPS (cbase);
4699   ub = ubase;
4700   cb = cbase;
4701 
4702   if ((TREE_CODE (ubase) == INTEGER_CST)
4703       && (TREE_CODE (cbase) == INTEGER_CST))
4704     return -1;
4705 
4706   /* Strips the constant part. */
4707   if (TREE_CODE (ubase) == PLUS_EXPR
4708       || TREE_CODE (ubase) == MINUS_EXPR
4709       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4710     {
4711       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4712         ubase = TREE_OPERAND (ubase, 0);
4713     }
4714 
4715   /* Strips the constant part. */
4716   if (TREE_CODE (cbase) == PLUS_EXPR
4717       || TREE_CODE (cbase) == MINUS_EXPR
4718       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4719     {
4720       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4721         cbase = TREE_OPERAND (cbase, 0);
4722     }
4723 
4724   if (address_p)
4725     {
4726       if (((TREE_CODE (ubase) == SSA_NAME)
4727            || (TREE_CODE (ubase) == ADDR_EXPR
4728                && is_gimple_min_invariant (ubase)))
4729           && (TREE_CODE (cbase) == INTEGER_CST))
4730         return -1;
4731 
4732       if (((TREE_CODE (cbase) == SSA_NAME)
4733            || (TREE_CODE (cbase) == ADDR_EXPR
4734                && is_gimple_min_invariant (cbase)))
4735           && (TREE_CODE (ubase) == INTEGER_CST))
4736         return -1;
4737     }
4738 
4739   if (ratio == 1)
4740     {
4741       if (operand_equal_p (ubase, cbase, 0))
4742         return -1;
4743 
4744       if (TREE_CODE (ubase) == ADDR_EXPR
4745           && TREE_CODE (cbase) == ADDR_EXPR)
4746         {
4747           tree usym, csym;
4748 
4749           usym = TREE_OPERAND (ubase, 0);
4750           csym = TREE_OPERAND (cbase, 0);
4751           if (TREE_CODE (usym) == ARRAY_REF)
4752             {
4753               tree ind = TREE_OPERAND (usym, 1);
4754               if (TREE_CODE (ind) == INTEGER_CST
4755                   && tree_fits_shwi_p (ind)
4756                   && tree_to_shwi (ind) == 0)
4757                 usym = TREE_OPERAND (usym, 0);
4758             }
4759           if (TREE_CODE (csym) == ARRAY_REF)
4760             {
4761               tree ind = TREE_OPERAND (csym, 1);
4762               if (TREE_CODE (ind) == INTEGER_CST
4763                   && tree_fits_shwi_p (ind)
4764                   && tree_to_shwi (ind) == 0)
4765                 csym = TREE_OPERAND (csym, 0);
4766             }
4767           if (operand_equal_p (usym, csym, 0))
4768             return -1;
4769         }
4770       /* Now do more complex comparison  */
4771       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4772       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4773       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4774         return -1;
4775     }
4776 
4777   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4778   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4779 
4780   aff_combination_scale (&cbase_aff, -1 * ratio);
4781   aff_combination_add (&ubase_aff, &cbase_aff);
4782   expr = aff_combination_to_tree (&ubase_aff);
4783   return get_expr_id (data, expr);
4784 }
4785 
4786 
4787 
4788 /* Determines the cost of the computation by that USE is expressed
4789    from induction variable CAND.  If ADDRESS_P is true, we just need
4790    to create an address from it, otherwise we want to get it into
4791    register.  A set of invariants we depend on is stored in
4792    DEPENDS_ON.  AT is the statement at that the value is computed.
4793    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4794    addressing is likely.  */
4795 
4796 static comp_cost
get_computation_cost_at(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,bool address_p,bitmap * depends_on,gimple * at,bool * can_autoinc,int * inv_expr_id)4797 get_computation_cost_at (struct ivopts_data *data,
4798 			 struct iv_use *use, struct iv_cand *cand,
4799 			 bool address_p, bitmap *depends_on, gimple *at,
4800 			 bool *can_autoinc,
4801                          int *inv_expr_id)
4802 {
4803   tree ubase = use->iv->base, ustep = use->iv->step;
4804   tree cbase, cstep;
4805   tree utype = TREE_TYPE (ubase), ctype;
4806   unsigned HOST_WIDE_INT cstepi, offset = 0;
4807   HOST_WIDE_INT ratio, aratio;
4808   bool var_present, symbol_present, stmt_is_after_inc;
4809   comp_cost cost;
4810   widest_int rat;
4811   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4812   machine_mode mem_mode = (address_p
4813 				? TYPE_MODE (TREE_TYPE (*use->op_p))
4814 				: VOIDmode);
4815 
4816   if (depends_on)
4817     *depends_on = NULL;
4818 
4819   /* Only consider real candidates.  */
4820   if (!cand->iv)
4821     return infinite_cost;
4822 
4823   cbase = cand->iv->base;
4824   cstep = cand->iv->step;
4825   ctype = TREE_TYPE (cbase);
4826 
4827   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4828     {
4829       /* We do not have a precision to express the values of use.  */
4830       return infinite_cost;
4831     }
4832 
4833   if (address_p
4834       || (use->iv->base_object
4835 	  && cand->iv->base_object
4836 	  && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4837 	  && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4838     {
4839       /* Do not try to express address of an object with computation based
4840 	 on address of a different object.  This may cause problems in rtl
4841 	 level alias analysis (that does not expect this to be happening,
4842 	 as this is illegal in C), and would be unlikely to be useful
4843 	 anyway.  */
4844       if (use->iv->base_object
4845 	  && cand->iv->base_object
4846 	  && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4847 	return infinite_cost;
4848     }
4849 
4850   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4851     {
4852       /* TODO -- add direct handling of this case.  */
4853       goto fallback;
4854     }
4855 
4856   /* CSTEPI is removed from the offset in case statement is after the
4857      increment.  If the step is not constant, we use zero instead.
4858      This is a bit imprecise (there is the extra addition), but
4859      redundancy elimination is likely to transform the code so that
4860      it uses value of the variable before increment anyway,
4861      so it is not that much unrealistic.  */
4862   if (cst_and_fits_in_hwi (cstep))
4863     cstepi = int_cst_value (cstep);
4864   else
4865     cstepi = 0;
4866 
4867   if (!constant_multiple_of (ustep, cstep, &rat))
4868     return infinite_cost;
4869 
4870   if (wi::fits_shwi_p (rat))
4871     ratio = rat.to_shwi ();
4872   else
4873     return infinite_cost;
4874 
4875   STRIP_NOPS (cbase);
4876   ctype = TREE_TYPE (cbase);
4877 
4878   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4879 
4880   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4881      or ratio == 1, it is better to handle this like
4882 
4883      ubase - ratio * cbase + ratio * var
4884 
4885      (also holds in the case ratio == -1, TODO.  */
4886 
4887   if (cst_and_fits_in_hwi (cbase))
4888     {
4889       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4890       cost = difference_cost (data,
4891 			      ubase, build_int_cst (utype, 0),
4892 			      &symbol_present, &var_present, &offset,
4893 			      depends_on);
4894       cost.cost /= avg_loop_niter (data->current_loop);
4895     }
4896   else if (ratio == 1)
4897     {
4898       tree real_cbase = cbase;
4899 
4900       /* Check to see if any adjustment is needed.  */
4901       if (cstepi == 0 && stmt_is_after_inc)
4902         {
4903           aff_tree real_cbase_aff;
4904           aff_tree cstep_aff;
4905 
4906           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4907                                    &real_cbase_aff);
4908           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4909 
4910           aff_combination_add (&real_cbase_aff, &cstep_aff);
4911           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4912         }
4913 
4914       cost = difference_cost (data,
4915 			      ubase, real_cbase,
4916 			      &symbol_present, &var_present, &offset,
4917 			      depends_on);
4918       cost.cost /= avg_loop_niter (data->current_loop);
4919     }
4920   else if (address_p
4921 	   && !POINTER_TYPE_P (ctype)
4922 	   && multiplier_allowed_in_address_p
4923 		(ratio, mem_mode,
4924 			TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4925     {
4926       if (cstepi == 0 && stmt_is_after_inc)
4927 	{
4928 	  if (POINTER_TYPE_P (ctype))
4929 	    cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4930 	  else
4931 	    cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4932 	}
4933       cbase
4934 	= fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4935       cost = difference_cost (data,
4936 			      ubase, cbase,
4937 			      &symbol_present, &var_present, &offset,
4938 			      depends_on);
4939       cost.cost /= avg_loop_niter (data->current_loop);
4940     }
4941   else
4942     {
4943       cost = force_var_cost (data, cbase, depends_on);
4944       cost = add_costs (cost,
4945 			difference_cost (data,
4946 					 ubase, build_int_cst (utype, 0),
4947 					 &symbol_present, &var_present,
4948 					 &offset, depends_on));
4949       cost.cost /= avg_loop_niter (data->current_loop);
4950       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4951     }
4952 
4953   /* Record setup cost in scrach field.  */
4954   cost.scratch = cost.cost;
4955   /* Set of invariants depended on by sub use has already been computed
4956      for the first use in the group.  */
4957   if (use->sub_id)
4958     {
4959       cost.cost = 0;
4960       if (depends_on && *depends_on)
4961 	bitmap_clear (*depends_on);
4962     }
4963   else if (inv_expr_id)
4964     {
4965       *inv_expr_id =
4966           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4967       /* Clear depends on.  */
4968       if (*inv_expr_id != -1 && depends_on && *depends_on)
4969         bitmap_clear (*depends_on);
4970     }
4971 
4972   /* If we are after the increment, the value of the candidate is higher by
4973      one iteration.  */
4974   if (stmt_is_after_inc)
4975     offset -= ratio * cstepi;
4976 
4977   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4978      (symbol/var1/const parts may be omitted).  If we are looking for an
4979      address, find the cost of addressing this.  */
4980   if (address_p)
4981     return add_costs (cost,
4982 		      get_address_cost (symbol_present, var_present,
4983 					offset, ratio, cstepi,
4984 					mem_mode,
4985 					TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4986 					speed, stmt_is_after_inc,
4987 					can_autoinc));
4988 
4989   /* Otherwise estimate the costs for computing the expression.  */
4990   if (!symbol_present && !var_present && !offset)
4991     {
4992       if (ratio != 1)
4993 	cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4994       return cost;
4995     }
4996 
4997   /* Symbol + offset should be compile-time computable so consider that they
4998       are added once to the variable, if present.  */
4999   if (var_present && (symbol_present || offset))
5000     cost.cost += adjust_setup_cost (data,
5001 				    add_cost (speed, TYPE_MODE (ctype)));
5002 
5003   /* Having offset does not affect runtime cost in case it is added to
5004      symbol, but it increases complexity.  */
5005   if (offset)
5006     cost.complexity++;
5007 
5008   cost.cost += add_cost (speed, TYPE_MODE (ctype));
5009 
5010   aratio = ratio > 0 ? ratio : -ratio;
5011   if (aratio != 1)
5012     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
5013   return cost;
5014 
5015 fallback:
5016   if (can_autoinc)
5017     *can_autoinc = false;
5018 
5019   {
5020     /* Just get the expression, expand it and measure the cost.  */
5021     tree comp = get_computation_at (data->current_loop, use, cand, at);
5022 
5023     if (!comp)
5024       return infinite_cost;
5025 
5026     if (address_p)
5027       comp = build_simple_mem_ref (comp);
5028 
5029     cost = new_cost (computation_cost (comp, speed), 0);
5030     cost.scratch = 0;
5031     return cost;
5032   }
5033 }
5034 
5035 /* Determines the cost of the computation by that USE is expressed
5036    from induction variable CAND.  If ADDRESS_P is true, we just need
5037    to create an address from it, otherwise we want to get it into
5038    register.  A set of invariants we depend on is stored in
5039    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
5040    autoinc addressing is likely.  */
5041 
5042 static comp_cost
get_computation_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,bool address_p,bitmap * depends_on,bool * can_autoinc,int * inv_expr_id)5043 get_computation_cost (struct ivopts_data *data,
5044 		      struct iv_use *use, struct iv_cand *cand,
5045 		      bool address_p, bitmap *depends_on,
5046                       bool *can_autoinc, int *inv_expr_id)
5047 {
5048   return get_computation_cost_at (data,
5049 				  use, cand, address_p, depends_on, use->stmt,
5050 				  can_autoinc, inv_expr_id);
5051 }
5052 
5053 /* Determines cost of basing replacement of USE on CAND in a generic
5054    expression.  */
5055 
5056 static bool
determine_use_iv_cost_generic(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5057 determine_use_iv_cost_generic (struct ivopts_data *data,
5058 			       struct iv_use *use, struct iv_cand *cand)
5059 {
5060   bitmap depends_on;
5061   comp_cost cost;
5062   int inv_expr_id = -1;
5063 
5064   /* The simple case first -- if we need to express value of the preserved
5065      original biv, the cost is 0.  This also prevents us from counting the
5066      cost of increment twice -- once at this use and once in the cost of
5067      the candidate.  */
5068   if (cand->pos == IP_ORIGINAL
5069       && cand->incremented_at == use->stmt)
5070     {
5071       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
5072                        ERROR_MARK, -1);
5073       return true;
5074     }
5075 
5076   cost = get_computation_cost (data, use, cand, false, &depends_on,
5077                                NULL, &inv_expr_id);
5078 
5079   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
5080                    inv_expr_id);
5081 
5082   return !infinite_cost_p (cost);
5083 }
5084 
5085 /* Determines cost of basing replacement of USE on CAND in an address.  */
5086 
5087 static bool
determine_use_iv_cost_address(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5088 determine_use_iv_cost_address (struct ivopts_data *data,
5089 			       struct iv_use *use, struct iv_cand *cand)
5090 {
5091   bitmap depends_on;
5092   bool can_autoinc, first;
5093   int inv_expr_id = -1;
5094   struct iv_use *sub_use;
5095   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
5096 					 &can_autoinc, &inv_expr_id);
5097   comp_cost sub_cost = cost;
5098 
5099   if (cand->ainc_use == use)
5100     {
5101       if (can_autoinc)
5102 	cost.cost -= cand->cost_step;
5103       /* If we generated the candidate solely for exploiting autoincrement
5104 	 opportunities, and it turns out it can't be used, set the cost to
5105 	 infinity to make sure we ignore it.  */
5106       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5107 	cost = infinite_cost;
5108     }
5109 
5110   if (!infinite_cost_p (cost) && use->next)
5111     {
5112       first = true;
5113       sub_use = use->next;
5114       /* We don't want to add setup cost for sub-uses.  */
5115       sub_cost.cost -= sub_cost.scratch;
5116       /* Add cost for sub uses in group.  */
5117       do
5118 	{
5119 	  /* Compute cost for the first sub use with different offset to
5120 	     the main use and add it afterwards.  Costs for these uses
5121 	     could be quite different.  Given below uses in a group:
5122 	       use 0  : {base + A + offset_0, step}
5123 	       use 0.1: {base + A + offset_0, step}
5124 	       use 0.2: {base + A + offset_1, step}
5125 	       use 0.3: {base + A + offset_2, step}
5126 	     when we need to compute costs with candidate:
5127 	       cand 1 : {base + B + offset_0, step}
5128 
5129 	     The first sub use with different offset is use 0.2, its cost
5130 	     is larger than cost of use 0/0.1 because we need to compute:
5131 	       A - B + offset_1 - offset_0
5132 	     rather than:
5133 	       A - B.  */
5134 	  if (first && use->addr_offset != sub_use->addr_offset)
5135 	    {
5136 	      first = false;
5137 	      sub_cost = get_computation_cost (data, sub_use, cand, true,
5138 					       NULL, &can_autoinc, NULL);
5139 	      if (infinite_cost_p (sub_cost))
5140 		{
5141 		  cost = infinite_cost;
5142 		  break;
5143 		}
5144 	    }
5145 
5146 	  cost = add_costs (cost, sub_cost);
5147 	  sub_use = sub_use->next;
5148 	}
5149       while (sub_use);
5150     }
5151 
5152   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
5153                    inv_expr_id);
5154 
5155   return !infinite_cost_p (cost);
5156 }
5157 
5158 /* Computes value of candidate CAND at position AT in iteration NITER, and
5159    stores it to VAL.  */
5160 
5161 static void
cand_value_at(struct loop * loop,struct iv_cand * cand,gimple * at,tree niter,aff_tree * val)5162 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5163 	       aff_tree *val)
5164 {
5165   aff_tree step, delta, nit;
5166   struct iv *iv = cand->iv;
5167   tree type = TREE_TYPE (iv->base);
5168   tree steptype = type;
5169   if (POINTER_TYPE_P (type))
5170     steptype = sizetype;
5171   steptype = unsigned_type_for (type);
5172 
5173   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5174   aff_combination_convert (&step, steptype);
5175   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5176   aff_combination_convert (&nit, steptype);
5177   aff_combination_mult (&nit, &step, &delta);
5178   if (stmt_after_increment (loop, cand, at))
5179     aff_combination_add (&delta, &step);
5180 
5181   tree_to_aff_combination (iv->base, type, val);
5182   if (!POINTER_TYPE_P (type))
5183     aff_combination_convert (val, steptype);
5184   aff_combination_add (val, &delta);
5185 }
5186 
5187 /* Returns period of induction variable iv.  */
5188 
5189 static tree
iv_period(struct iv * iv)5190 iv_period (struct iv *iv)
5191 {
5192   tree step = iv->step, period, type;
5193   tree pow2div;
5194 
5195   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5196 
5197   type = unsigned_type_for (TREE_TYPE (step));
5198   /* Period of the iv is lcm (step, type_range)/step -1,
5199      i.e., N*type_range/step - 1. Since type range is power
5200      of two, N == (step >> num_of_ending_zeros_binary (step),
5201      so the final result is
5202 
5203        (type_range >> num_of_ending_zeros_binary (step)) - 1
5204 
5205   */
5206   pow2div = num_ending_zeros (step);
5207 
5208   period = build_low_bits_mask (type,
5209                                 (TYPE_PRECISION (type)
5210                                  - tree_to_uhwi (pow2div)));
5211 
5212   return period;
5213 }
5214 
5215 /* Returns the comparison operator used when eliminating the iv USE.  */
5216 
5217 static enum tree_code
iv_elimination_compare(struct ivopts_data * data,struct iv_use * use)5218 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5219 {
5220   struct loop *loop = data->current_loop;
5221   basic_block ex_bb;
5222   edge exit;
5223 
5224   ex_bb = gimple_bb (use->stmt);
5225   exit = EDGE_SUCC (ex_bb, 0);
5226   if (flow_bb_inside_loop_p (loop, exit->dest))
5227     exit = EDGE_SUCC (ex_bb, 1);
5228 
5229   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5230 }
5231 
5232 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5233    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5234    calculation is performed in non-wrapping type.
5235 
5236    TODO: More generally, we could test for the situation that
5237 	 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5238 	 This would require knowing the sign of OFFSET.  */
5239 
5240 static bool
difference_cannot_overflow_p(struct ivopts_data * data,tree base,tree offset)5241 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5242 {
5243   enum tree_code code;
5244   tree e1, e2;
5245   aff_tree aff_e1, aff_e2, aff_offset;
5246 
5247   if (!nowrap_type_p (TREE_TYPE (base)))
5248     return false;
5249 
5250   base = expand_simple_operations (base);
5251 
5252   if (TREE_CODE (base) == SSA_NAME)
5253     {
5254       gimple *stmt = SSA_NAME_DEF_STMT (base);
5255 
5256       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5257 	return false;
5258 
5259       code = gimple_assign_rhs_code (stmt);
5260       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5261 	return false;
5262 
5263       e1 = gimple_assign_rhs1 (stmt);
5264       e2 = gimple_assign_rhs2 (stmt);
5265     }
5266   else
5267     {
5268       code = TREE_CODE (base);
5269       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5270 	return false;
5271       e1 = TREE_OPERAND (base, 0);
5272       e2 = TREE_OPERAND (base, 1);
5273     }
5274 
5275   /* Use affine expansion as deeper inspection to prove the equality.  */
5276   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5277 				  &aff_e2, &data->name_expansion_cache);
5278   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5279 				  &aff_offset, &data->name_expansion_cache);
5280   aff_combination_scale (&aff_offset, -1);
5281   switch (code)
5282     {
5283     case PLUS_EXPR:
5284       aff_combination_add (&aff_e2, &aff_offset);
5285       if (aff_combination_zero_p (&aff_e2))
5286 	return true;
5287 
5288       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5289 				      &aff_e1, &data->name_expansion_cache);
5290       aff_combination_add (&aff_e1, &aff_offset);
5291       return aff_combination_zero_p (&aff_e1);
5292 
5293     case POINTER_PLUS_EXPR:
5294       aff_combination_add (&aff_e2, &aff_offset);
5295       return aff_combination_zero_p (&aff_e2);
5296 
5297     default:
5298       return false;
5299     }
5300 }
5301 
5302 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5303    comparison with CAND.  NITER describes the number of iterations of
5304    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5305 
5306    We aim to handle the following situation:
5307 
5308    sometype *base, *p;
5309    int a, b, i;
5310 
5311    i = a;
5312    p = p_0 = base + a;
5313 
5314    do
5315      {
5316        bla (*p);
5317        p++;
5318        i++;
5319      }
5320    while (i < b);
5321 
5322    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5323    We aim to optimize this to
5324 
5325    p = p_0 = base + a;
5326    do
5327      {
5328        bla (*p);
5329        p++;
5330      }
5331    while (p < p_0 - a + b);
5332 
5333    This preserves the correctness, since the pointer arithmetics does not
5334    overflow.  More precisely:
5335 
5336    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5337       overflow in computing it or the values of p.
5338    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5339       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5340 
5341 static bool
iv_elimination_compare_lt(struct ivopts_data * data,struct iv_cand * cand,enum tree_code * comp_p,struct tree_niter_desc * niter)5342 iv_elimination_compare_lt (struct ivopts_data *data,
5343                            struct iv_cand *cand, enum tree_code *comp_p,
5344 			   struct tree_niter_desc *niter)
5345 {
5346   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5347   struct aff_tree nit, tmpa, tmpb;
5348   enum tree_code comp;
5349   HOST_WIDE_INT step;
5350 
5351   /* We need to know that the candidate induction variable does not overflow.
5352      While more complex analysis may be used to prove this, for now just
5353      check that the variable appears in the original program and that it
5354      is computed in a type that guarantees no overflows.  */
5355   cand_type = TREE_TYPE (cand->iv->base);
5356   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5357     return false;
5358 
5359   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5360      the calculation of the BOUND could overflow, making the comparison
5361      invalid.  */
5362   if (!data->loop_single_exit_p)
5363     return false;
5364 
5365   /* We need to be able to decide whether candidate is increasing or decreasing
5366      in order to choose the right comparison operator.  */
5367   if (!cst_and_fits_in_hwi (cand->iv->step))
5368     return false;
5369   step = int_cst_value (cand->iv->step);
5370 
5371   /* Check that the number of iterations matches the expected pattern:
5372      a + 1 > b ? 0 : b - a - 1.  */
5373   mbz = niter->may_be_zero;
5374   if (TREE_CODE (mbz) == GT_EXPR)
5375     {
5376       /* Handle a + 1 > b.  */
5377       tree op0 = TREE_OPERAND (mbz, 0);
5378       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5379 	{
5380 	  a = TREE_OPERAND (op0, 0);
5381 	  b = TREE_OPERAND (mbz, 1);
5382 	}
5383       else
5384 	return false;
5385     }
5386   else if (TREE_CODE (mbz) == LT_EXPR)
5387     {
5388       tree op1 = TREE_OPERAND (mbz, 1);
5389 
5390       /* Handle b < a + 1.  */
5391       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5392         {
5393           a = TREE_OPERAND (op1, 0);
5394           b = TREE_OPERAND (mbz, 0);
5395         }
5396       else
5397 	return false;
5398     }
5399   else
5400     return false;
5401 
5402   /* Expected number of iterations is B - A - 1.  Check that it matches
5403      the actual number, i.e., that B - A - NITER = 1.  */
5404   tree_to_aff_combination (niter->niter, nit_type, &nit);
5405   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5406   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5407   aff_combination_scale (&nit, -1);
5408   aff_combination_scale (&tmpa, -1);
5409   aff_combination_add (&tmpb, &tmpa);
5410   aff_combination_add (&tmpb, &nit);
5411   if (tmpb.n != 0 || tmpb.offset != 1)
5412     return false;
5413 
5414   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5415      overflow.  */
5416   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5417 			cand->iv->step,
5418 			fold_convert (TREE_TYPE (cand->iv->step), a));
5419   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5420     return false;
5421 
5422   /* Determine the new comparison operator.  */
5423   comp = step < 0 ? GT_EXPR : LT_EXPR;
5424   if (*comp_p == NE_EXPR)
5425     *comp_p = comp;
5426   else if (*comp_p == EQ_EXPR)
5427     *comp_p = invert_tree_comparison (comp, false);
5428   else
5429     gcc_unreachable ();
5430 
5431   return true;
5432 }
5433 
5434 /* Check whether it is possible to express the condition in USE by comparison
5435    of candidate CAND.  If so, store the value compared with to BOUND, and the
5436    comparison operator to COMP.  */
5437 
5438 static bool
may_eliminate_iv(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,tree * bound,enum tree_code * comp)5439 may_eliminate_iv (struct ivopts_data *data,
5440 		  struct iv_use *use, struct iv_cand *cand, tree *bound,
5441 		  enum tree_code *comp)
5442 {
5443   basic_block ex_bb;
5444   edge exit;
5445   tree period;
5446   struct loop *loop = data->current_loop;
5447   aff_tree bnd;
5448   struct tree_niter_desc *desc = NULL;
5449 
5450   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5451     return false;
5452 
5453   /* For now works only for exits that dominate the loop latch.
5454      TODO: extend to other conditions inside loop body.  */
5455   ex_bb = gimple_bb (use->stmt);
5456   if (use->stmt != last_stmt (ex_bb)
5457       || gimple_code (use->stmt) != GIMPLE_COND
5458       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5459     return false;
5460 
5461   exit = EDGE_SUCC (ex_bb, 0);
5462   if (flow_bb_inside_loop_p (loop, exit->dest))
5463     exit = EDGE_SUCC (ex_bb, 1);
5464   if (flow_bb_inside_loop_p (loop, exit->dest))
5465     return false;
5466 
5467   desc = niter_for_exit (data, exit);
5468   if (!desc)
5469     return false;
5470 
5471   /* Determine whether we can use the variable to test the exit condition.
5472      This is the case iff the period of the induction variable is greater
5473      than the number of iterations for which the exit condition is true.  */
5474   period = iv_period (cand->iv);
5475 
5476   /* If the number of iterations is constant, compare against it directly.  */
5477   if (TREE_CODE (desc->niter) == INTEGER_CST)
5478     {
5479       /* See cand_value_at.  */
5480       if (stmt_after_increment (loop, cand, use->stmt))
5481         {
5482           if (!tree_int_cst_lt (desc->niter, period))
5483             return false;
5484         }
5485       else
5486         {
5487           if (tree_int_cst_lt (period, desc->niter))
5488             return false;
5489         }
5490     }
5491 
5492   /* If not, and if this is the only possible exit of the loop, see whether
5493      we can get a conservative estimate on the number of iterations of the
5494      entire loop and compare against that instead.  */
5495   else
5496     {
5497       widest_int period_value, max_niter;
5498 
5499       max_niter = desc->max;
5500       if (stmt_after_increment (loop, cand, use->stmt))
5501         max_niter += 1;
5502       period_value = wi::to_widest (period);
5503       if (wi::gtu_p (max_niter, period_value))
5504         {
5505           /* See if we can take advantage of inferred loop bound information.  */
5506           if (data->loop_single_exit_p)
5507             {
5508               if (!max_loop_iterations (loop, &max_niter))
5509                 return false;
5510               /* The loop bound is already adjusted by adding 1.  */
5511               if (wi::gtu_p (max_niter, period_value))
5512                 return false;
5513             }
5514           else
5515             return false;
5516         }
5517     }
5518 
5519   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5520 
5521   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5522 			 aff_combination_to_tree (&bnd));
5523   *comp = iv_elimination_compare (data, use);
5524 
5525   /* It is unlikely that computing the number of iterations using division
5526      would be more profitable than keeping the original induction variable.  */
5527   if (expression_expensive_p (*bound))
5528     return false;
5529 
5530   /* Sometimes, it is possible to handle the situation that the number of
5531      iterations may be zero unless additional assumtions by using <
5532      instead of != in the exit condition.
5533 
5534      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5535 	   base the exit condition on it.  However, that is often too
5536 	   expensive.  */
5537   if (!integer_zerop (desc->may_be_zero))
5538     return iv_elimination_compare_lt (data, cand, comp, desc);
5539 
5540   return true;
5541 }
5542 
5543  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5544     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5545 
5546 static int
parm_decl_cost(struct ivopts_data * data,tree bound)5547 parm_decl_cost (struct ivopts_data *data, tree bound)
5548 {
5549   tree sbound = bound;
5550   STRIP_NOPS (sbound);
5551 
5552   if (TREE_CODE (sbound) == SSA_NAME
5553       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5554       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5555       && data->body_includes_call)
5556     return COSTS_N_INSNS (1);
5557 
5558   return 0;
5559 }
5560 
5561 /* Determines cost of basing replacement of USE on CAND in a condition.  */
5562 
5563 static bool
determine_use_iv_cost_condition(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5564 determine_use_iv_cost_condition (struct ivopts_data *data,
5565 				 struct iv_use *use, struct iv_cand *cand)
5566 {
5567   tree bound = NULL_TREE;
5568   struct iv *cmp_iv;
5569   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5570   comp_cost elim_cost, express_cost, cost, bound_cost;
5571   bool ok;
5572   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5573   tree *control_var, *bound_cst;
5574   enum tree_code comp = ERROR_MARK;
5575 
5576   /* Only consider real candidates.  */
5577   if (!cand->iv)
5578     {
5579       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5580 		       ERROR_MARK, -1);
5581       return false;
5582     }
5583 
5584   /* Try iv elimination.  */
5585   if (may_eliminate_iv (data, use, cand, &bound, &comp))
5586     {
5587       elim_cost = force_var_cost (data, bound, &depends_on_elim);
5588       if (elim_cost.cost == 0)
5589         elim_cost.cost = parm_decl_cost (data, bound);
5590       else if (TREE_CODE (bound) == INTEGER_CST)
5591         elim_cost.cost = 0;
5592       /* If we replace a loop condition 'i < n' with 'p < base + n',
5593 	 depends_on_elim will have 'base' and 'n' set, which implies
5594 	 that both 'base' and 'n' will be live during the loop.	 More likely,
5595 	 'base + n' will be loop invariant, resulting in only one live value
5596 	 during the loop.  So in that case we clear depends_on_elim and set
5597         elim_inv_expr_id instead.  */
5598       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5599 	{
5600 	  elim_inv_expr_id = get_expr_id (data, bound);
5601 	  bitmap_clear (depends_on_elim);
5602 	}
5603       /* The bound is a loop invariant, so it will be only computed
5604 	 once.  */
5605       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5606     }
5607   else
5608     elim_cost = infinite_cost;
5609 
5610   /* Try expressing the original giv.  If it is compared with an invariant,
5611      note that we cannot get rid of it.  */
5612   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5613 			      NULL, &cmp_iv);
5614   gcc_assert (ok);
5615 
5616   /* When the condition is a comparison of the candidate IV against
5617      zero, prefer this IV.
5618 
5619      TODO: The constant that we're subtracting from the cost should
5620      be target-dependent.  This information should be added to the
5621      target costs for each backend.  */
5622   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5623       && integer_zerop (*bound_cst)
5624       && (operand_equal_p (*control_var, cand->var_after, 0)
5625 	  || operand_equal_p (*control_var, cand->var_before, 0)))
5626     elim_cost.cost -= 1;
5627 
5628   express_cost = get_computation_cost (data, use, cand, false,
5629 				       &depends_on_express, NULL,
5630                                        &express_inv_expr_id);
5631   fd_ivopts_data = data;
5632   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5633 
5634   /* Count the cost of the original bound as well.  */
5635   bound_cost = force_var_cost (data, *bound_cst, NULL);
5636   if (bound_cost.cost == 0)
5637     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5638   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5639     bound_cost.cost = 0;
5640   express_cost.cost += bound_cost.cost;
5641 
5642   /* Choose the better approach, preferring the eliminated IV. */
5643   if (compare_costs (elim_cost, express_cost) <= 0)
5644     {
5645       cost = elim_cost;
5646       depends_on = depends_on_elim;
5647       depends_on_elim = NULL;
5648       inv_expr_id = elim_inv_expr_id;
5649     }
5650   else
5651     {
5652       cost = express_cost;
5653       depends_on = depends_on_express;
5654       depends_on_express = NULL;
5655       bound = NULL_TREE;
5656       comp = ERROR_MARK;
5657       inv_expr_id = express_inv_expr_id;
5658     }
5659 
5660   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5661 
5662   if (depends_on_elim)
5663     BITMAP_FREE (depends_on_elim);
5664   if (depends_on_express)
5665     BITMAP_FREE (depends_on_express);
5666 
5667   return !infinite_cost_p (cost);
5668 }
5669 
5670 /* Determines cost of basing replacement of USE on CAND.  Returns false
5671    if USE cannot be based on CAND.  */
5672 
5673 static bool
determine_use_iv_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5674 determine_use_iv_cost (struct ivopts_data *data,
5675 		       struct iv_use *use, struct iv_cand *cand)
5676 {
5677   switch (use->type)
5678     {
5679     case USE_NONLINEAR_EXPR:
5680       return determine_use_iv_cost_generic (data, use, cand);
5681 
5682     case USE_ADDRESS:
5683       return determine_use_iv_cost_address (data, use, cand);
5684 
5685     case USE_COMPARE:
5686       return determine_use_iv_cost_condition (data, use, cand);
5687 
5688     default:
5689       gcc_unreachable ();
5690     }
5691 }
5692 
5693 /* Return true if get_computation_cost indicates that autoincrement is
5694    a possibility for the pair of USE and CAND, false otherwise.  */
5695 
5696 static bool
autoinc_possible_for_pair(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5697 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5698 			   struct iv_cand *cand)
5699 {
5700   bitmap depends_on;
5701   bool can_autoinc;
5702   comp_cost cost;
5703 
5704   if (use->type != USE_ADDRESS)
5705     return false;
5706 
5707   cost = get_computation_cost (data, use, cand, true, &depends_on,
5708 			       &can_autoinc, NULL);
5709 
5710   BITMAP_FREE (depends_on);
5711 
5712   return !infinite_cost_p (cost) && can_autoinc;
5713 }
5714 
5715 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5716    use that allows autoincrement, and set their AINC_USE if possible.  */
5717 
5718 static void
set_autoinc_for_original_candidates(struct ivopts_data * data)5719 set_autoinc_for_original_candidates (struct ivopts_data *data)
5720 {
5721   unsigned i, j;
5722 
5723   for (i = 0; i < n_iv_cands (data); i++)
5724     {
5725       struct iv_cand *cand = iv_cand (data, i);
5726       struct iv_use *closest_before = NULL;
5727       struct iv_use *closest_after = NULL;
5728       if (cand->pos != IP_ORIGINAL)
5729 	continue;
5730 
5731       for (j = 0; j < n_iv_uses (data); j++)
5732 	{
5733 	  struct iv_use *use = iv_use (data, j);
5734 	  unsigned uid = gimple_uid (use->stmt);
5735 
5736 	  if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5737 	    continue;
5738 
5739 	  if (uid < gimple_uid (cand->incremented_at)
5740 	      && (closest_before == NULL
5741 		  || uid > gimple_uid (closest_before->stmt)))
5742 	    closest_before = use;
5743 
5744 	  if (uid > gimple_uid (cand->incremented_at)
5745 	      && (closest_after == NULL
5746 		  || uid < gimple_uid (closest_after->stmt)))
5747 	    closest_after = use;
5748 	}
5749 
5750       if (closest_before != NULL
5751 	  && autoinc_possible_for_pair (data, closest_before, cand))
5752 	cand->ainc_use = closest_before;
5753       else if (closest_after != NULL
5754 	       && autoinc_possible_for_pair (data, closest_after, cand))
5755 	cand->ainc_use = closest_after;
5756     }
5757 }
5758 
5759 /* Finds the candidates for the induction variables.  */
5760 
5761 static void
find_iv_candidates(struct ivopts_data * data)5762 find_iv_candidates (struct ivopts_data *data)
5763 {
5764   /* Add commonly used ivs.  */
5765   add_standard_iv_candidates (data);
5766 
5767   /* Add old induction variables.  */
5768   add_iv_candidate_for_bivs (data);
5769 
5770   /* Add induction variables derived from uses.  */
5771   add_iv_candidate_for_uses (data);
5772 
5773   set_autoinc_for_original_candidates (data);
5774 
5775   /* Record the important candidates.  */
5776   record_important_candidates (data);
5777 }
5778 
5779 /* Determines costs of basing the use of the iv on an iv candidate.  */
5780 
5781 static void
determine_use_iv_costs(struct ivopts_data * data)5782 determine_use_iv_costs (struct ivopts_data *data)
5783 {
5784   unsigned i, j;
5785   struct iv_use *use;
5786   struct iv_cand *cand;
5787   bitmap to_clear = BITMAP_ALLOC (NULL);
5788 
5789   alloc_use_cost_map (data);
5790 
5791   for (i = 0; i < n_iv_uses (data); i++)
5792     {
5793       use = iv_use (data, i);
5794 
5795       if (data->consider_all_candidates)
5796 	{
5797 	  for (j = 0; j < n_iv_cands (data); j++)
5798 	    {
5799 	      cand = iv_cand (data, j);
5800 	      determine_use_iv_cost (data, use, cand);
5801 	    }
5802 	}
5803       else
5804 	{
5805 	  bitmap_iterator bi;
5806 
5807 	  EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5808 	    {
5809 	      cand = iv_cand (data, j);
5810 	      if (!determine_use_iv_cost (data, use, cand))
5811 		bitmap_set_bit (to_clear, j);
5812 	    }
5813 
5814 	  /* Remove the candidates for that the cost is infinite from
5815 	     the list of related candidates.  */
5816 	  bitmap_and_compl_into (use->related_cands, to_clear);
5817 	  bitmap_clear (to_clear);
5818 	}
5819     }
5820 
5821   BITMAP_FREE (to_clear);
5822 
5823   if (dump_file && (dump_flags & TDF_DETAILS))
5824     {
5825       fprintf (dump_file, "Use-candidate costs:\n");
5826 
5827       for (i = 0; i < n_iv_uses (data); i++)
5828 	{
5829 	  use = iv_use (data, i);
5830 
5831 	  fprintf (dump_file, "Use %d:\n", i);
5832 	  fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5833 	  for (j = 0; j < use->n_map_members; j++)
5834 	    {
5835 	      if (!use->cost_map[j].cand
5836 		  || infinite_cost_p (use->cost_map[j].cost))
5837 		continue;
5838 
5839 	      fprintf (dump_file, "  %d\t%d\t%d\t",
5840 		       use->cost_map[j].cand->id,
5841 		       use->cost_map[j].cost.cost,
5842 		       use->cost_map[j].cost.complexity);
5843 	      if (use->cost_map[j].depends_on)
5844 		bitmap_print (dump_file,
5845 			      use->cost_map[j].depends_on, "","");
5846               if (use->cost_map[j].inv_expr_id != -1)
5847                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5848 	      fprintf (dump_file, "\n");
5849 	    }
5850 
5851 	  fprintf (dump_file, "\n");
5852 	}
5853       fprintf (dump_file, "\n");
5854     }
5855 }
5856 
5857 /* Determines cost of the candidate CAND.  */
5858 
5859 static void
determine_iv_cost(struct ivopts_data * data,struct iv_cand * cand)5860 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5861 {
5862   comp_cost cost_base;
5863   unsigned cost, cost_step;
5864   tree base;
5865 
5866   if (!cand->iv)
5867     {
5868       cand->cost = 0;
5869       return;
5870     }
5871 
5872   /* There are two costs associated with the candidate -- its increment
5873      and its initialization.  The second is almost negligible for any loop
5874      that rolls enough, so we take it just very little into account.  */
5875 
5876   base = cand->iv->base;
5877   cost_base = force_var_cost (data, base, NULL);
5878   /* It will be exceptional that the iv register happens to be initialized with
5879      the proper value at no cost.  In general, there will at least be a regcopy
5880      or a const set.  */
5881   if (cost_base.cost == 0)
5882     cost_base.cost = COSTS_N_INSNS (1);
5883   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5884 
5885   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5886 
5887   /* Prefer the original ivs unless we may gain something by replacing it.
5888      The reason is to make debugging simpler; so this is not relevant for
5889      artificial ivs created by other optimization passes.  */
5890   if (cand->pos != IP_ORIGINAL
5891       || !SSA_NAME_VAR (cand->var_before)
5892       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5893     cost++;
5894 
5895   /* Prefer not to insert statements into latch unless there are some
5896      already (so that we do not create unnecessary jumps).  */
5897   if (cand->pos == IP_END
5898       && empty_block_p (ip_end_pos (data->current_loop)))
5899     cost++;
5900 
5901   cand->cost = cost;
5902   cand->cost_step = cost_step;
5903 }
5904 
5905 /* Determines costs of computation of the candidates.  */
5906 
5907 static void
determine_iv_costs(struct ivopts_data * data)5908 determine_iv_costs (struct ivopts_data *data)
5909 {
5910   unsigned i;
5911 
5912   if (dump_file && (dump_flags & TDF_DETAILS))
5913     {
5914       fprintf (dump_file, "Candidate costs:\n");
5915       fprintf (dump_file, "  cand\tcost\n");
5916     }
5917 
5918   for (i = 0; i < n_iv_cands (data); i++)
5919     {
5920       struct iv_cand *cand = iv_cand (data, i);
5921 
5922       determine_iv_cost (data, cand);
5923 
5924       if (dump_file && (dump_flags & TDF_DETAILS))
5925 	fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5926     }
5927 
5928   if (dump_file && (dump_flags & TDF_DETAILS))
5929     fprintf (dump_file, "\n");
5930 }
5931 
5932 /* Calculates cost for having SIZE induction variables.  */
5933 
5934 static unsigned
ivopts_global_cost_for_size(struct ivopts_data * data,unsigned size)5935 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5936 {
5937   /* We add size to the cost, so that we prefer eliminating ivs
5938      if possible.  */
5939   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5940 					    data->body_includes_call);
5941 }
5942 
5943 /* For each size of the induction variable set determine the penalty.  */
5944 
5945 static void
determine_set_costs(struct ivopts_data * data)5946 determine_set_costs (struct ivopts_data *data)
5947 {
5948   unsigned j, n;
5949   gphi *phi;
5950   gphi_iterator psi;
5951   tree op;
5952   struct loop *loop = data->current_loop;
5953   bitmap_iterator bi;
5954 
5955   if (dump_file && (dump_flags & TDF_DETAILS))
5956     {
5957       fprintf (dump_file, "Global costs:\n");
5958       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5959       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5960       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5961       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5962     }
5963 
5964   n = 0;
5965   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5966     {
5967       phi = psi.phi ();
5968       op = PHI_RESULT (phi);
5969 
5970       if (virtual_operand_p (op))
5971 	continue;
5972 
5973       if (get_iv (data, op))
5974 	continue;
5975 
5976       n++;
5977     }
5978 
5979   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5980     {
5981       struct version_info *info = ver_info (data, j);
5982 
5983       if (info->inv_id && info->has_nonlin_use)
5984 	n++;
5985     }
5986 
5987   data->regs_used = n;
5988   if (dump_file && (dump_flags & TDF_DETAILS))
5989     fprintf (dump_file, "  regs_used %d\n", n);
5990 
5991   if (dump_file && (dump_flags & TDF_DETAILS))
5992     {
5993       fprintf (dump_file, "  cost for size:\n");
5994       fprintf (dump_file, "  ivs\tcost\n");
5995       for (j = 0; j <= 2 * target_avail_regs; j++)
5996 	fprintf (dump_file, "  %d\t%d\n", j,
5997 		 ivopts_global_cost_for_size (data, j));
5998       fprintf (dump_file, "\n");
5999     }
6000 }
6001 
6002 /* Returns true if A is a cheaper cost pair than B.  */
6003 
6004 static bool
cheaper_cost_pair(struct cost_pair * a,struct cost_pair * b)6005 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
6006 {
6007   int cmp;
6008 
6009   if (!a)
6010     return false;
6011 
6012   if (!b)
6013     return true;
6014 
6015   cmp = compare_costs (a->cost, b->cost);
6016   if (cmp < 0)
6017     return true;
6018 
6019   if (cmp > 0)
6020     return false;
6021 
6022   /* In case the costs are the same, prefer the cheaper candidate.  */
6023   if (a->cand->cost < b->cand->cost)
6024     return true;
6025 
6026   return false;
6027 }
6028 
6029 
6030 /* Returns candidate by that USE is expressed in IVS.  */
6031 
6032 static struct cost_pair *
iv_ca_cand_for_use(struct iv_ca * ivs,struct iv_use * use)6033 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
6034 {
6035   return ivs->cand_for_use[use->id];
6036 }
6037 
6038 /* Computes the cost field of IVS structure.  */
6039 
6040 static void
iv_ca_recount_cost(struct ivopts_data * data,struct iv_ca * ivs)6041 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
6042 {
6043   comp_cost cost = ivs->cand_use_cost;
6044 
6045   cost.cost += ivs->cand_cost;
6046 
6047   cost.cost += ivopts_global_cost_for_size (data,
6048                                             ivs->n_regs + ivs->num_used_inv_expr);
6049 
6050   ivs->cost = cost;
6051 }
6052 
6053 /* Remove invariants in set INVS to set IVS.  */
6054 
6055 static void
iv_ca_set_remove_invariants(struct iv_ca * ivs,bitmap invs)6056 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
6057 {
6058   bitmap_iterator bi;
6059   unsigned iid;
6060 
6061   if (!invs)
6062     return;
6063 
6064   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6065     {
6066       ivs->n_invariant_uses[iid]--;
6067       if (ivs->n_invariant_uses[iid] == 0)
6068         ivs->n_regs--;
6069     }
6070 }
6071 
6072 /* Set USE not to be expressed by any candidate in IVS.  */
6073 
6074 static void
iv_ca_set_no_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_use * use)6075 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
6076 		 struct iv_use *use)
6077 {
6078   unsigned uid = use->id, cid;
6079   struct cost_pair *cp;
6080 
6081   cp = ivs->cand_for_use[uid];
6082   if (!cp)
6083     return;
6084   cid = cp->cand->id;
6085 
6086   ivs->bad_uses++;
6087   ivs->cand_for_use[uid] = NULL;
6088   ivs->n_cand_uses[cid]--;
6089 
6090   if (ivs->n_cand_uses[cid] == 0)
6091     {
6092       bitmap_clear_bit (ivs->cands, cid);
6093       /* Do not count the pseudocandidates.  */
6094       if (cp->cand->iv)
6095 	ivs->n_regs--;
6096       ivs->n_cands--;
6097       ivs->cand_cost -= cp->cand->cost;
6098 
6099       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
6100     }
6101 
6102   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
6103 
6104   iv_ca_set_remove_invariants (ivs, cp->depends_on);
6105 
6106   if (cp->inv_expr_id != -1)
6107     {
6108       ivs->used_inv_expr[cp->inv_expr_id]--;
6109       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
6110         ivs->num_used_inv_expr--;
6111     }
6112   iv_ca_recount_cost (data, ivs);
6113 }
6114 
6115 /* Add invariants in set INVS to set IVS.  */
6116 
6117 static void
iv_ca_set_add_invariants(struct iv_ca * ivs,bitmap invs)6118 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
6119 {
6120   bitmap_iterator bi;
6121   unsigned iid;
6122 
6123   if (!invs)
6124     return;
6125 
6126   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6127     {
6128       ivs->n_invariant_uses[iid]++;
6129       if (ivs->n_invariant_uses[iid] == 1)
6130         ivs->n_regs++;
6131     }
6132 }
6133 
6134 /* Set cost pair for USE in set IVS to CP.  */
6135 
6136 static void
iv_ca_set_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_use * use,struct cost_pair * cp)6137 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
6138 	      struct iv_use *use, struct cost_pair *cp)
6139 {
6140   unsigned uid = use->id, cid;
6141 
6142   if (ivs->cand_for_use[uid] == cp)
6143     return;
6144 
6145   if (ivs->cand_for_use[uid])
6146     iv_ca_set_no_cp (data, ivs, use);
6147 
6148   if (cp)
6149     {
6150       cid = cp->cand->id;
6151 
6152       ivs->bad_uses--;
6153       ivs->cand_for_use[uid] = cp;
6154       ivs->n_cand_uses[cid]++;
6155       if (ivs->n_cand_uses[cid] == 1)
6156 	{
6157 	  bitmap_set_bit (ivs->cands, cid);
6158 	  /* Do not count the pseudocandidates.  */
6159 	  if (cp->cand->iv)
6160 	    ivs->n_regs++;
6161 	  ivs->n_cands++;
6162 	  ivs->cand_cost += cp->cand->cost;
6163 
6164 	  iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
6165 	}
6166 
6167       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
6168       iv_ca_set_add_invariants (ivs, cp->depends_on);
6169 
6170       if (cp->inv_expr_id != -1)
6171         {
6172           ivs->used_inv_expr[cp->inv_expr_id]++;
6173           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
6174             ivs->num_used_inv_expr++;
6175         }
6176       iv_ca_recount_cost (data, ivs);
6177     }
6178 }
6179 
6180 /* Extend set IVS by expressing USE by some of the candidates in it
6181    if possible.  Consider all important candidates if candidates in
6182    set IVS don't give any result.  */
6183 
6184 static void
iv_ca_add_use(struct ivopts_data * data,struct iv_ca * ivs,struct iv_use * use)6185 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
6186 	       struct iv_use *use)
6187 {
6188   struct cost_pair *best_cp = NULL, *cp;
6189   bitmap_iterator bi;
6190   unsigned i;
6191   struct iv_cand *cand;
6192 
6193   gcc_assert (ivs->upto >= use->id);
6194   ivs->upto++;
6195   ivs->bad_uses++;
6196 
6197   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6198     {
6199       cand = iv_cand (data, i);
6200       cp = get_use_iv_cost (data, use, cand);
6201       if (cheaper_cost_pair (cp, best_cp))
6202 	best_cp = cp;
6203     }
6204 
6205   if (best_cp == NULL)
6206     {
6207       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6208 	{
6209 	  cand = iv_cand (data, i);
6210 	  cp = get_use_iv_cost (data, use, cand);
6211 	  if (cheaper_cost_pair (cp, best_cp))
6212 	    best_cp = cp;
6213 	}
6214     }
6215 
6216   iv_ca_set_cp (data, ivs, use, best_cp);
6217 }
6218 
6219 /* Get cost for assignment IVS.  */
6220 
6221 static comp_cost
iv_ca_cost(struct iv_ca * ivs)6222 iv_ca_cost (struct iv_ca *ivs)
6223 {
6224   /* This was a conditional expression but it triggered a bug in
6225      Sun C 5.5.  */
6226   if (ivs->bad_uses)
6227     return infinite_cost;
6228   else
6229     return ivs->cost;
6230 }
6231 
6232 /* Returns true if all dependences of CP are among invariants in IVS.  */
6233 
6234 static bool
iv_ca_has_deps(struct iv_ca * ivs,struct cost_pair * cp)6235 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6236 {
6237   unsigned i;
6238   bitmap_iterator bi;
6239 
6240   if (!cp->depends_on)
6241     return true;
6242 
6243   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6244     {
6245       if (ivs->n_invariant_uses[i] == 0)
6246 	return false;
6247     }
6248 
6249   return true;
6250 }
6251 
6252 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
6253    it before NEXT_CHANGE.  */
6254 
6255 static struct iv_ca_delta *
iv_ca_delta_add(struct iv_use * use,struct cost_pair * old_cp,struct cost_pair * new_cp,struct iv_ca_delta * next_change)6256 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
6257 		 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
6258 {
6259   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6260 
6261   change->use = use;
6262   change->old_cp = old_cp;
6263   change->new_cp = new_cp;
6264   change->next_change = next_change;
6265 
6266   return change;
6267 }
6268 
6269 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6270    are rewritten.  */
6271 
6272 static struct iv_ca_delta *
iv_ca_delta_join(struct iv_ca_delta * l1,struct iv_ca_delta * l2)6273 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6274 {
6275   struct iv_ca_delta *last;
6276 
6277   if (!l2)
6278     return l1;
6279 
6280   if (!l1)
6281     return l2;
6282 
6283   for (last = l1; last->next_change; last = last->next_change)
6284     continue;
6285   last->next_change = l2;
6286 
6287   return l1;
6288 }
6289 
6290 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6291 
6292 static struct iv_ca_delta *
iv_ca_delta_reverse(struct iv_ca_delta * delta)6293 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6294 {
6295   struct iv_ca_delta *act, *next, *prev = NULL;
6296 
6297   for (act = delta; act; act = next)
6298     {
6299       next = act->next_change;
6300       act->next_change = prev;
6301       prev = act;
6302 
6303       std::swap (act->old_cp, act->new_cp);
6304     }
6305 
6306   return prev;
6307 }
6308 
6309 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6310    reverted instead.  */
6311 
6312 static void
iv_ca_delta_commit(struct ivopts_data * data,struct iv_ca * ivs,struct iv_ca_delta * delta,bool forward)6313 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6314 		    struct iv_ca_delta *delta, bool forward)
6315 {
6316   struct cost_pair *from, *to;
6317   struct iv_ca_delta *act;
6318 
6319   if (!forward)
6320     delta = iv_ca_delta_reverse (delta);
6321 
6322   for (act = delta; act; act = act->next_change)
6323     {
6324       from = act->old_cp;
6325       to = act->new_cp;
6326       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
6327       iv_ca_set_cp (data, ivs, act->use, to);
6328     }
6329 
6330   if (!forward)
6331     iv_ca_delta_reverse (delta);
6332 }
6333 
6334 /* Returns true if CAND is used in IVS.  */
6335 
6336 static bool
iv_ca_cand_used_p(struct iv_ca * ivs,struct iv_cand * cand)6337 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6338 {
6339   return ivs->n_cand_uses[cand->id] > 0;
6340 }
6341 
6342 /* Returns number of induction variable candidates in the set IVS.  */
6343 
6344 static unsigned
iv_ca_n_cands(struct iv_ca * ivs)6345 iv_ca_n_cands (struct iv_ca *ivs)
6346 {
6347   return ivs->n_cands;
6348 }
6349 
6350 /* Free the list of changes DELTA.  */
6351 
6352 static void
iv_ca_delta_free(struct iv_ca_delta ** delta)6353 iv_ca_delta_free (struct iv_ca_delta **delta)
6354 {
6355   struct iv_ca_delta *act, *next;
6356 
6357   for (act = *delta; act; act = next)
6358     {
6359       next = act->next_change;
6360       free (act);
6361     }
6362 
6363   *delta = NULL;
6364 }
6365 
6366 /* Allocates new iv candidates assignment.  */
6367 
6368 static struct iv_ca *
iv_ca_new(struct ivopts_data * data)6369 iv_ca_new (struct ivopts_data *data)
6370 {
6371   struct iv_ca *nw = XNEW (struct iv_ca);
6372 
6373   nw->upto = 0;
6374   nw->bad_uses = 0;
6375   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
6376   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
6377   nw->cands = BITMAP_ALLOC (NULL);
6378   nw->n_cands = 0;
6379   nw->n_regs = 0;
6380   nw->cand_use_cost = no_cost;
6381   nw->cand_cost = 0;
6382   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6383   nw->cost = no_cost;
6384   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
6385   nw->num_used_inv_expr = 0;
6386 
6387   return nw;
6388 }
6389 
6390 /* Free memory occupied by the set IVS.  */
6391 
6392 static void
iv_ca_free(struct iv_ca ** ivs)6393 iv_ca_free (struct iv_ca **ivs)
6394 {
6395   free ((*ivs)->cand_for_use);
6396   free ((*ivs)->n_cand_uses);
6397   BITMAP_FREE ((*ivs)->cands);
6398   free ((*ivs)->n_invariant_uses);
6399   free ((*ivs)->used_inv_expr);
6400   free (*ivs);
6401   *ivs = NULL;
6402 }
6403 
6404 /* Dumps IVS to FILE.  */
6405 
6406 static void
iv_ca_dump(struct ivopts_data * data,FILE * file,struct iv_ca * ivs)6407 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6408 {
6409   const char *pref = "  invariants ";
6410   unsigned i;
6411   comp_cost cost = iv_ca_cost (ivs);
6412 
6413   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
6414   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
6415            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6416   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6417 
6418    for (i = 0; i < ivs->upto; i++)
6419     {
6420       struct iv_use *use = iv_use (data, i);
6421       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
6422       if (cp)
6423         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6424                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6425       else
6426         fprintf (file, "   use:%d --> ??\n", use->id);
6427     }
6428 
6429   for (i = 1; i <= data->max_inv_id; i++)
6430     if (ivs->n_invariant_uses[i])
6431       {
6432 	fprintf (file, "%s%d", pref, i);
6433 	pref = ", ";
6434       }
6435   fprintf (file, "\n\n");
6436 }
6437 
6438 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6439    new set, and store differences in DELTA.  Number of induction variables
6440    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6441    the function will try to find a solution with mimimal iv candidates.  */
6442 
6443 static comp_cost
iv_ca_extend(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_ca_delta ** delta,unsigned * n_ivs,bool min_ncand)6444 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6445 	      struct iv_cand *cand, struct iv_ca_delta **delta,
6446 	      unsigned *n_ivs, bool min_ncand)
6447 {
6448   unsigned i;
6449   comp_cost cost;
6450   struct iv_use *use;
6451   struct cost_pair *old_cp, *new_cp;
6452 
6453   *delta = NULL;
6454   for (i = 0; i < ivs->upto; i++)
6455     {
6456       use = iv_use (data, i);
6457       old_cp = iv_ca_cand_for_use (ivs, use);
6458 
6459       if (old_cp
6460 	  && old_cp->cand == cand)
6461 	continue;
6462 
6463       new_cp = get_use_iv_cost (data, use, cand);
6464       if (!new_cp)
6465 	continue;
6466 
6467       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6468 	continue;
6469 
6470       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6471         continue;
6472 
6473       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6474     }
6475 
6476   iv_ca_delta_commit (data, ivs, *delta, true);
6477   cost = iv_ca_cost (ivs);
6478   if (n_ivs)
6479     *n_ivs = iv_ca_n_cands (ivs);
6480   iv_ca_delta_commit (data, ivs, *delta, false);
6481 
6482   return cost;
6483 }
6484 
6485 /* Try narrowing set IVS by removing CAND.  Return the cost of
6486    the new set and store the differences in DELTA.  START is
6487    the candidate with which we start narrowing.  */
6488 
6489 static comp_cost
iv_ca_narrow(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_cand * start,struct iv_ca_delta ** delta)6490 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6491 	      struct iv_cand *cand, struct iv_cand *start,
6492 	      struct iv_ca_delta **delta)
6493 {
6494   unsigned i, ci;
6495   struct iv_use *use;
6496   struct cost_pair *old_cp, *new_cp, *cp;
6497   bitmap_iterator bi;
6498   struct iv_cand *cnd;
6499   comp_cost cost, best_cost, acost;
6500 
6501   *delta = NULL;
6502   for (i = 0; i < n_iv_uses (data); i++)
6503     {
6504       use = iv_use (data, i);
6505 
6506       old_cp = iv_ca_cand_for_use (ivs, use);
6507       if (old_cp->cand != cand)
6508 	continue;
6509 
6510       best_cost = iv_ca_cost (ivs);
6511       /* Start narrowing with START.  */
6512       new_cp = get_use_iv_cost (data, use, start);
6513 
6514       if (data->consider_all_candidates)
6515 	{
6516 	  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6517 	    {
6518 	      if (ci == cand->id || (start && ci == start->id))
6519 		continue;
6520 
6521 	      cnd = iv_cand (data, ci);
6522 
6523 	      cp = get_use_iv_cost (data, use, cnd);
6524 	      if (!cp)
6525 		continue;
6526 
6527 	      iv_ca_set_cp (data, ivs, use, cp);
6528 	      acost = iv_ca_cost (ivs);
6529 
6530 	      if (compare_costs (acost, best_cost) < 0)
6531 		{
6532 		  best_cost = acost;
6533 		  new_cp = cp;
6534 		}
6535 	    }
6536 	}
6537       else
6538 	{
6539 	  EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6540 	    {
6541 	      if (ci == cand->id || (start && ci == start->id))
6542 		continue;
6543 
6544 	      cnd = iv_cand (data, ci);
6545 
6546 	      cp = get_use_iv_cost (data, use, cnd);
6547 	      if (!cp)
6548 		continue;
6549 
6550 	      iv_ca_set_cp (data, ivs, use, cp);
6551 	      acost = iv_ca_cost (ivs);
6552 
6553 	      if (compare_costs (acost, best_cost) < 0)
6554 		{
6555 		  best_cost = acost;
6556 		  new_cp = cp;
6557 		}
6558 	    }
6559 	}
6560       /* Restore to old cp for use.  */
6561       iv_ca_set_cp (data, ivs, use, old_cp);
6562 
6563       if (!new_cp)
6564 	{
6565 	  iv_ca_delta_free (delta);
6566 	  return infinite_cost;
6567 	}
6568 
6569       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6570     }
6571 
6572   iv_ca_delta_commit (data, ivs, *delta, true);
6573   cost = iv_ca_cost (ivs);
6574   iv_ca_delta_commit (data, ivs, *delta, false);
6575 
6576   return cost;
6577 }
6578 
6579 /* Try optimizing the set of candidates IVS by removing candidates different
6580    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6581    differences in DELTA.  */
6582 
6583 static comp_cost
iv_ca_prune(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * except_cand,struct iv_ca_delta ** delta)6584 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6585 	     struct iv_cand *except_cand, struct iv_ca_delta **delta)
6586 {
6587   bitmap_iterator bi;
6588   struct iv_ca_delta *act_delta, *best_delta;
6589   unsigned i;
6590   comp_cost best_cost, acost;
6591   struct iv_cand *cand;
6592 
6593   best_delta = NULL;
6594   best_cost = iv_ca_cost (ivs);
6595 
6596   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6597     {
6598       cand = iv_cand (data, i);
6599 
6600       if (cand == except_cand)
6601 	continue;
6602 
6603       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6604 
6605       if (compare_costs (acost, best_cost) < 0)
6606 	{
6607 	  best_cost = acost;
6608 	  iv_ca_delta_free (&best_delta);
6609 	  best_delta = act_delta;
6610 	}
6611       else
6612 	iv_ca_delta_free (&act_delta);
6613     }
6614 
6615   if (!best_delta)
6616     {
6617       *delta = NULL;
6618       return best_cost;
6619     }
6620 
6621   /* Recurse to possibly remove other unnecessary ivs.  */
6622   iv_ca_delta_commit (data, ivs, best_delta, true);
6623   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6624   iv_ca_delta_commit (data, ivs, best_delta, false);
6625   *delta = iv_ca_delta_join (best_delta, *delta);
6626   return best_cost;
6627 }
6628 
6629 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6630    cheaper local cost for USE than BEST_CP.  Return pointer to
6631    the corresponding cost_pair, otherwise just return BEST_CP.  */
6632 
6633 static struct cost_pair*
cheaper_cost_with_cand(struct ivopts_data * data,struct iv_use * use,unsigned int cand_idx,struct iv_cand * old_cand,struct cost_pair * best_cp)6634 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6635 			unsigned int cand_idx, struct iv_cand *old_cand,
6636 			struct cost_pair *best_cp)
6637 {
6638   struct iv_cand *cand;
6639   struct cost_pair *cp;
6640 
6641   gcc_assert (old_cand != NULL && best_cp != NULL);
6642   if (cand_idx == old_cand->id)
6643     return best_cp;
6644 
6645   cand = iv_cand (data, cand_idx);
6646   cp = get_use_iv_cost (data, use, cand);
6647   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6648     return cp;
6649 
6650   return best_cp;
6651 }
6652 
6653 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6654    which are used by more than one iv uses.  For each of those candidates,
6655    this function tries to represent iv uses under that candidate using
6656    other ones with lower local cost, then tries to prune the new set.
6657    If the new set has lower cost, It returns the new cost after recording
6658    candidate replacement in list DELTA.  */
6659 
6660 static comp_cost
iv_ca_replace(struct ivopts_data * data,struct iv_ca * ivs,struct iv_ca_delta ** delta)6661 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6662 	       struct iv_ca_delta **delta)
6663 {
6664   bitmap_iterator bi, bj;
6665   unsigned int i, j, k;
6666   struct iv_use *use;
6667   struct iv_cand *cand;
6668   comp_cost orig_cost, acost;
6669   struct iv_ca_delta *act_delta, *tmp_delta;
6670   struct cost_pair *old_cp, *best_cp = NULL;
6671 
6672   *delta = NULL;
6673   orig_cost = iv_ca_cost (ivs);
6674 
6675   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6676     {
6677       if (ivs->n_cand_uses[i] == 1
6678 	  || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6679 	continue;
6680 
6681       cand = iv_cand (data, i);
6682 
6683       act_delta = NULL;
6684       /*  Represent uses under current candidate using other ones with
6685 	  lower local cost.  */
6686       for (j = 0; j < ivs->upto; j++)
6687 	{
6688 	  use = iv_use (data, j);
6689 	  old_cp = iv_ca_cand_for_use (ivs, use);
6690 
6691 	  if (old_cp->cand != cand)
6692 	    continue;
6693 
6694 	  best_cp = old_cp;
6695 	  if (data->consider_all_candidates)
6696 	    for (k = 0; k < n_iv_cands (data); k++)
6697 	      best_cp = cheaper_cost_with_cand (data, use, k,
6698 						old_cp->cand, best_cp);
6699 	  else
6700 	    EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6701 	      best_cp = cheaper_cost_with_cand (data, use, k,
6702 						old_cp->cand, best_cp);
6703 
6704 	  if (best_cp == old_cp)
6705 	    continue;
6706 
6707 	  act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6708 	}
6709       /* No need for further prune.  */
6710       if (!act_delta)
6711 	continue;
6712 
6713       /* Prune the new candidate set.  */
6714       iv_ca_delta_commit (data, ivs, act_delta, true);
6715       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6716       iv_ca_delta_commit (data, ivs, act_delta, false);
6717       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6718 
6719       if (compare_costs (acost, orig_cost) < 0)
6720 	{
6721 	  *delta = act_delta;
6722 	  return acost;
6723 	}
6724       else
6725 	iv_ca_delta_free (&act_delta);
6726     }
6727 
6728   return orig_cost;
6729 }
6730 
6731 /* Tries to extend the sets IVS in the best possible way in order
6732    to express the USE.  If ORIGINALP is true, prefer candidates from
6733    the original set of IVs, otherwise favor important candidates not
6734    based on any memory object.  */
6735 
6736 static bool
try_add_cand_for(struct ivopts_data * data,struct iv_ca * ivs,struct iv_use * use,bool originalp)6737 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6738 		  struct iv_use *use, bool originalp)
6739 {
6740   comp_cost best_cost, act_cost;
6741   unsigned i;
6742   bitmap_iterator bi;
6743   struct iv_cand *cand;
6744   struct iv_ca_delta *best_delta = NULL, *act_delta;
6745   struct cost_pair *cp;
6746 
6747   iv_ca_add_use (data, ivs, use);
6748   best_cost = iv_ca_cost (ivs);
6749   cp = iv_ca_cand_for_use (ivs, use);
6750   if (cp)
6751     {
6752       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6753       iv_ca_set_no_cp (data, ivs, use);
6754     }
6755 
6756   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6757      first try important candidates not based on any memory object.  Only if
6758      this fails, try the specific ones.  Rationale -- in loops with many
6759      variables the best choice often is to use just one generic biv.  If we
6760      added here many ivs specific to the uses, the optimization algorithm later
6761      would be likely to get stuck in a local minimum, thus causing us to create
6762      too many ivs.  The approach from few ivs to more seems more likely to be
6763      successful -- starting from few ivs, replacing an expensive use by a
6764      specific iv should always be a win.  */
6765   EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, i, bi)
6766     {
6767       cand = iv_cand (data, i);
6768 
6769       if (originalp && cand->pos !=IP_ORIGINAL)
6770 	continue;
6771 
6772       if (!originalp && cand->iv->base_object != NULL_TREE)
6773 	continue;
6774 
6775       if (iv_ca_cand_used_p (ivs, cand))
6776         continue;
6777 
6778       cp = get_use_iv_cost (data, use, cand);
6779       if (!cp)
6780 	continue;
6781 
6782       iv_ca_set_cp (data, ivs, use, cp);
6783       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6784                                true);
6785       iv_ca_set_no_cp (data, ivs, use);
6786       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6787 
6788       if (compare_costs (act_cost, best_cost) < 0)
6789 	{
6790 	  best_cost = act_cost;
6791 
6792 	  iv_ca_delta_free (&best_delta);
6793 	  best_delta = act_delta;
6794 	}
6795       else
6796 	iv_ca_delta_free (&act_delta);
6797     }
6798 
6799   if (infinite_cost_p (best_cost))
6800     {
6801       for (i = 0; i < use->n_map_members; i++)
6802 	{
6803 	  cp = use->cost_map + i;
6804 	  cand = cp->cand;
6805 	  if (!cand)
6806 	    continue;
6807 
6808 	  /* Already tried this.  */
6809 	  if (cand->important)
6810 	    {
6811 	      if (originalp && cand->pos == IP_ORIGINAL)
6812 		continue;
6813 	      if (!originalp && cand->iv->base_object == NULL_TREE)
6814 		continue;
6815 	    }
6816 
6817 	  if (iv_ca_cand_used_p (ivs, cand))
6818 	    continue;
6819 
6820 	  act_delta = NULL;
6821 	  iv_ca_set_cp (data, ivs, use, cp);
6822 	  act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6823 	  iv_ca_set_no_cp (data, ivs, use);
6824 	  act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6825 				       cp, act_delta);
6826 
6827 	  if (compare_costs (act_cost, best_cost) < 0)
6828 	    {
6829 	      best_cost = act_cost;
6830 
6831 	      if (best_delta)
6832 		iv_ca_delta_free (&best_delta);
6833 	      best_delta = act_delta;
6834 	    }
6835 	  else
6836 	    iv_ca_delta_free (&act_delta);
6837 	}
6838     }
6839 
6840   iv_ca_delta_commit (data, ivs, best_delta, true);
6841   iv_ca_delta_free (&best_delta);
6842 
6843   return !infinite_cost_p (best_cost);
6844 }
6845 
6846 /* Finds an initial assignment of candidates to uses.  */
6847 
6848 static struct iv_ca *
get_initial_solution(struct ivopts_data * data,bool originalp)6849 get_initial_solution (struct ivopts_data *data, bool originalp)
6850 {
6851   struct iv_ca *ivs = iv_ca_new (data);
6852   unsigned i;
6853 
6854   for (i = 0; i < n_iv_uses (data); i++)
6855     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6856       {
6857 	iv_ca_free (&ivs);
6858 	return NULL;
6859       }
6860 
6861   return ivs;
6862 }
6863 
6864 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6865    points to a bool variable, this function tries to break local
6866    optimal fixed-point by replacing candidates in IVS if it's true.  */
6867 
6868 static bool
try_improve_iv_set(struct ivopts_data * data,struct iv_ca * ivs,bool * try_replace_p)6869 try_improve_iv_set (struct ivopts_data *data,
6870 		    struct iv_ca *ivs, bool *try_replace_p)
6871 {
6872   unsigned i, n_ivs;
6873   comp_cost acost, best_cost = iv_ca_cost (ivs);
6874   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6875   struct iv_cand *cand;
6876 
6877   /* Try extending the set of induction variables by one.  */
6878   for (i = 0; i < n_iv_cands (data); i++)
6879     {
6880       cand = iv_cand (data, i);
6881 
6882       if (iv_ca_cand_used_p (ivs, cand))
6883 	continue;
6884 
6885       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6886       if (!act_delta)
6887 	continue;
6888 
6889       /* If we successfully added the candidate and the set is small enough,
6890 	 try optimizing it by removing other candidates.  */
6891       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6892       	{
6893 	  iv_ca_delta_commit (data, ivs, act_delta, true);
6894 	  acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6895 	  iv_ca_delta_commit (data, ivs, act_delta, false);
6896 	  act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6897 	}
6898 
6899       if (compare_costs (acost, best_cost) < 0)
6900 	{
6901 	  best_cost = acost;
6902 	  iv_ca_delta_free (&best_delta);
6903 	  best_delta = act_delta;
6904 	}
6905       else
6906 	iv_ca_delta_free (&act_delta);
6907     }
6908 
6909   if (!best_delta)
6910     {
6911       /* Try removing the candidates from the set instead.  */
6912       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6913 
6914       if (!best_delta && *try_replace_p)
6915 	{
6916 	  *try_replace_p = false;
6917 	  /* So far candidate selecting algorithm tends to choose fewer IVs
6918 	     so that it can handle cases in which loops have many variables
6919 	     but the best choice is often to use only one general biv.  One
6920 	     weakness is it can't handle opposite cases, in which different
6921 	     candidates should be chosen with respect to each use.  To solve
6922 	     the problem, we replace candidates in a manner described by the
6923 	     comments of iv_ca_replace, thus give general algorithm a chance
6924 	     to break local optimal fixed-point in these cases.  */
6925 	  best_cost = iv_ca_replace (data, ivs, &best_delta);
6926 	}
6927 
6928       if (!best_delta)
6929 	return false;
6930     }
6931 
6932   iv_ca_delta_commit (data, ivs, best_delta, true);
6933   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6934   iv_ca_delta_free (&best_delta);
6935   return true;
6936 }
6937 
6938 /* Attempts to find the optimal set of induction variables.  We do simple
6939    greedy heuristic -- we try to replace at most one candidate in the selected
6940    solution and remove the unused ivs while this improves the cost.  */
6941 
6942 static struct iv_ca *
find_optimal_iv_set_1(struct ivopts_data * data,bool originalp)6943 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6944 {
6945   struct iv_ca *set;
6946   bool try_replace_p = true;
6947 
6948   /* Get the initial solution.  */
6949   set = get_initial_solution (data, originalp);
6950   if (!set)
6951     {
6952       if (dump_file && (dump_flags & TDF_DETAILS))
6953 	fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6954       return NULL;
6955     }
6956 
6957   if (dump_file && (dump_flags & TDF_DETAILS))
6958     {
6959       fprintf (dump_file, "Initial set of candidates:\n");
6960       iv_ca_dump (data, dump_file, set);
6961     }
6962 
6963   while (try_improve_iv_set (data, set, &try_replace_p))
6964     {
6965       if (dump_file && (dump_flags & TDF_DETAILS))
6966 	{
6967 	  fprintf (dump_file, "Improved to:\n");
6968 	  iv_ca_dump (data, dump_file, set);
6969 	}
6970     }
6971 
6972   return set;
6973 }
6974 
6975 static struct iv_ca *
find_optimal_iv_set(struct ivopts_data * data)6976 find_optimal_iv_set (struct ivopts_data *data)
6977 {
6978   unsigned i;
6979   struct iv_ca *set, *origset;
6980   struct iv_use *use;
6981   comp_cost cost, origcost;
6982 
6983   /* Determine the cost based on a strategy that starts with original IVs,
6984      and try again using a strategy that prefers candidates not based
6985      on any IVs.  */
6986   origset = find_optimal_iv_set_1 (data, true);
6987   set = find_optimal_iv_set_1 (data, false);
6988 
6989   if (!origset && !set)
6990     return NULL;
6991 
6992   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6993   cost = set ? iv_ca_cost (set) : infinite_cost;
6994 
6995   if (dump_file && (dump_flags & TDF_DETAILS))
6996     {
6997       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6998 	       origcost.cost, origcost.complexity);
6999       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
7000 	       cost.cost, cost.complexity);
7001     }
7002 
7003   /* Choose the one with the best cost.  */
7004   if (compare_costs (origcost, cost) <= 0)
7005     {
7006       if (set)
7007 	iv_ca_free (&set);
7008       set = origset;
7009     }
7010   else if (origset)
7011     iv_ca_free (&origset);
7012 
7013   for (i = 0; i < n_iv_uses (data); i++)
7014     {
7015       use = iv_use (data, i);
7016       use->selected = iv_ca_cand_for_use (set, use)->cand;
7017     }
7018 
7019   return set;
7020 }
7021 
7022 /* Creates a new induction variable corresponding to CAND.  */
7023 
7024 static void
create_new_iv(struct ivopts_data * data,struct iv_cand * cand)7025 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7026 {
7027   gimple_stmt_iterator incr_pos;
7028   tree base;
7029   bool after = false;
7030 
7031   if (!cand->iv)
7032     return;
7033 
7034   switch (cand->pos)
7035     {
7036     case IP_NORMAL:
7037       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7038       break;
7039 
7040     case IP_END:
7041       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7042       after = true;
7043       break;
7044 
7045     case IP_AFTER_USE:
7046       after = true;
7047       /* fall through */
7048     case IP_BEFORE_USE:
7049       incr_pos = gsi_for_stmt (cand->incremented_at);
7050       break;
7051 
7052     case IP_ORIGINAL:
7053       /* Mark that the iv is preserved.  */
7054       name_info (data, cand->var_before)->preserve_biv = true;
7055       name_info (data, cand->var_after)->preserve_biv = true;
7056 
7057       /* Rewrite the increment so that it uses var_before directly.  */
7058       find_interesting_uses_op (data, cand->var_after)->selected = cand;
7059       return;
7060     }
7061 
7062   gimple_add_tmp_var (cand->var_before);
7063 
7064   base = unshare_expr (cand->iv->base);
7065 
7066   create_iv (base, unshare_expr (cand->iv->step),
7067 	     cand->var_before, data->current_loop,
7068 	     &incr_pos, after, &cand->var_before, &cand->var_after);
7069 }
7070 
7071 /* Creates new induction variables described in SET.  */
7072 
7073 static void
create_new_ivs(struct ivopts_data * data,struct iv_ca * set)7074 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
7075 {
7076   unsigned i;
7077   struct iv_cand *cand;
7078   bitmap_iterator bi;
7079 
7080   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7081     {
7082       cand = iv_cand (data, i);
7083       create_new_iv (data, cand);
7084     }
7085 
7086   if (dump_file && (dump_flags & TDF_DETAILS))
7087     {
7088       fprintf (dump_file, "Selected IV set for loop %d",
7089 	       data->current_loop->num);
7090       if (data->loop_loc != UNKNOWN_LOCATION)
7091 	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7092 		 LOCATION_LINE (data->loop_loc));
7093       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7094       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7095         {
7096           cand = iv_cand (data, i);
7097           dump_cand (dump_file, cand);
7098         }
7099       fprintf (dump_file, "\n");
7100     }
7101 }
7102 
7103 /* Rewrites USE (definition of iv used in a nonlinear expression)
7104    using candidate CAND.  */
7105 
7106 static void
rewrite_use_nonlinear_expr(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7107 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7108 			    struct iv_use *use, struct iv_cand *cand)
7109 {
7110   tree comp;
7111   tree op, tgt;
7112   gassign *ass;
7113   gimple_stmt_iterator bsi;
7114 
7115   /* An important special case -- if we are asked to express value of
7116      the original iv by itself, just exit; there is no need to
7117      introduce a new computation (that might also need casting the
7118      variable to unsigned and back).  */
7119   if (cand->pos == IP_ORIGINAL
7120       && cand->incremented_at == use->stmt)
7121     {
7122       enum tree_code stmt_code;
7123 
7124       gcc_assert (is_gimple_assign (use->stmt));
7125       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7126 
7127       /* Check whether we may leave the computation unchanged.
7128 	 This is the case only if it does not rely on other
7129 	 computations in the loop -- otherwise, the computation
7130 	 we rely upon may be removed in remove_unused_ivs,
7131 	 thus leading to ICE.  */
7132       stmt_code = gimple_assign_rhs_code (use->stmt);
7133       if (stmt_code == PLUS_EXPR
7134 	  || stmt_code == MINUS_EXPR
7135 	  || stmt_code == POINTER_PLUS_EXPR)
7136 	{
7137 	  if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7138 	    op = gimple_assign_rhs2 (use->stmt);
7139 	  else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7140 	    op = gimple_assign_rhs1 (use->stmt);
7141 	  else
7142 	    op = NULL_TREE;
7143 	}
7144       else
7145 	op = NULL_TREE;
7146 
7147       if (op && expr_invariant_in_loop_p (data->current_loop, op))
7148 	return;
7149     }
7150 
7151   comp = get_computation (data->current_loop, use, cand);
7152   gcc_assert (comp != NULL_TREE);
7153 
7154   switch (gimple_code (use->stmt))
7155     {
7156     case GIMPLE_PHI:
7157       tgt = PHI_RESULT (use->stmt);
7158 
7159       /* If we should keep the biv, do not replace it.  */
7160       if (name_info (data, tgt)->preserve_biv)
7161 	return;
7162 
7163       bsi = gsi_after_labels (gimple_bb (use->stmt));
7164       break;
7165 
7166     case GIMPLE_ASSIGN:
7167       tgt = gimple_assign_lhs (use->stmt);
7168       bsi = gsi_for_stmt (use->stmt);
7169       break;
7170 
7171     default:
7172       gcc_unreachable ();
7173     }
7174 
7175   if (!valid_gimple_rhs_p (comp)
7176       || (gimple_code (use->stmt) != GIMPLE_PHI
7177 	  /* We can't allow re-allocating the stmt as it might be pointed
7178 	     to still.  */
7179 	  && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7180 	      >= gimple_num_ops (gsi_stmt (bsi)))))
7181     {
7182       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
7183 				       true, GSI_SAME_STMT);
7184       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7185 	{
7186 	  duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7187 	  /* As this isn't a plain copy we have to reset alignment
7188 	     information.  */
7189 	  if (SSA_NAME_PTR_INFO (comp))
7190 	    mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7191 	}
7192     }
7193 
7194   if (gimple_code (use->stmt) == GIMPLE_PHI)
7195     {
7196       ass = gimple_build_assign (tgt, comp);
7197       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7198 
7199       bsi = gsi_for_stmt (use->stmt);
7200       remove_phi_node (&bsi, false);
7201     }
7202   else
7203     {
7204       gimple_assign_set_rhs_from_tree (&bsi, comp);
7205       use->stmt = gsi_stmt (bsi);
7206     }
7207 }
7208 
7209 /* Performs a peephole optimization to reorder the iv update statement with
7210    a mem ref to enable instruction combining in later phases. The mem ref uses
7211    the iv value before the update, so the reordering transformation requires
7212    adjustment of the offset. CAND is the selected IV_CAND.
7213 
7214    Example:
7215 
7216    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7217    iv2 = iv1 + 1;
7218 
7219    if (t < val)      (1)
7220      goto L;
7221    goto Head;
7222 
7223 
7224    directly propagating t over to (1) will introduce overlapping live range
7225    thus increase register pressure. This peephole transform it into:
7226 
7227 
7228    iv2 = iv1 + 1;
7229    t = MEM_REF (base, iv2, 8, 8);
7230    if (t < val)
7231      goto L;
7232    goto Head;
7233 */
7234 
7235 static void
adjust_iv_update_pos(struct iv_cand * cand,struct iv_use * use)7236 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7237 {
7238   tree var_after;
7239   gimple *iv_update, *stmt;
7240   basic_block bb;
7241   gimple_stmt_iterator gsi, gsi_iv;
7242 
7243   if (cand->pos != IP_NORMAL)
7244     return;
7245 
7246   var_after = cand->var_after;
7247   iv_update = SSA_NAME_DEF_STMT (var_after);
7248 
7249   bb = gimple_bb (iv_update);
7250   gsi = gsi_last_nondebug_bb (bb);
7251   stmt = gsi_stmt (gsi);
7252 
7253   /* Only handle conditional statement for now.  */
7254   if (gimple_code (stmt) != GIMPLE_COND)
7255     return;
7256 
7257   gsi_prev_nondebug (&gsi);
7258   stmt = gsi_stmt (gsi);
7259   if (stmt != iv_update)
7260     return;
7261 
7262   gsi_prev_nondebug (&gsi);
7263   if (gsi_end_p (gsi))
7264     return;
7265 
7266   stmt = gsi_stmt (gsi);
7267   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7268     return;
7269 
7270   if (stmt != use->stmt)
7271     return;
7272 
7273   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7274     return;
7275 
7276   if (dump_file && (dump_flags & TDF_DETAILS))
7277     {
7278       fprintf (dump_file, "Reordering \n");
7279       print_gimple_stmt (dump_file, iv_update, 0, 0);
7280       print_gimple_stmt (dump_file, use->stmt, 0, 0);
7281       fprintf (dump_file, "\n");
7282     }
7283 
7284   gsi = gsi_for_stmt (use->stmt);
7285   gsi_iv = gsi_for_stmt (iv_update);
7286   gsi_move_before (&gsi_iv, &gsi);
7287 
7288   cand->pos = IP_BEFORE_USE;
7289   cand->incremented_at = use->stmt;
7290 }
7291 
7292 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7293 
7294 static void
rewrite_use_address_1(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7295 rewrite_use_address_1 (struct ivopts_data *data,
7296 		       struct iv_use *use, struct iv_cand *cand)
7297 {
7298   aff_tree aff;
7299   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7300   tree base_hint = NULL_TREE;
7301   tree ref, iv;
7302   bool ok;
7303 
7304   adjust_iv_update_pos (cand, use);
7305   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7306   gcc_assert (ok);
7307   unshare_aff_combination (&aff);
7308 
7309   /* To avoid undefined overflow problems, all IV candidates use unsigned
7310      integer types.  The drawback is that this makes it impossible for
7311      create_mem_ref to distinguish an IV that is based on a memory object
7312      from one that represents simply an offset.
7313 
7314      To work around this problem, we pass a hint to create_mem_ref that
7315      indicates which variable (if any) in aff is an IV based on a memory
7316      object.  Note that we only consider the candidate.  If this is not
7317      based on an object, the base of the reference is in some subexpression
7318      of the use -- but these will use pointer types, so they are recognized
7319      by the create_mem_ref heuristics anyway.  */
7320   if (cand->iv->base_object)
7321     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7322 
7323   iv = var_at_stmt (data->current_loop, cand, use->stmt);
7324   tree type = TREE_TYPE (*use->op_p);
7325   unsigned int align = get_object_alignment (*use->op_p);
7326   if (align != TYPE_ALIGN (type))
7327     type = build_aligned_type (type, align);
7328   ref = create_mem_ref (&bsi, type, &aff,
7329 			reference_alias_ptr_type (*use->op_p),
7330 			iv, base_hint, data->speed);
7331   copy_ref_info (ref, *use->op_p);
7332   *use->op_p = ref;
7333 }
7334 
7335 /* Rewrites USE (address that is an iv) using candidate CAND.  If it's the
7336    first use of a group, rewrites sub uses in the group too.  */
7337 
7338 static void
rewrite_use_address(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7339 rewrite_use_address (struct ivopts_data *data,
7340 		      struct iv_use *use, struct iv_cand *cand)
7341 {
7342   struct iv_use *next;
7343 
7344   gcc_assert (use->sub_id == 0);
7345   rewrite_use_address_1 (data, use, cand);
7346   update_stmt (use->stmt);
7347 
7348   for (next = use->next; next != NULL; next = next->next)
7349     {
7350       rewrite_use_address_1 (data, next, cand);
7351       update_stmt (next->stmt);
7352     }
7353 
7354   return;
7355 }
7356 
7357 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7358    candidate CAND.  */
7359 
7360 static void
rewrite_use_compare(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7361 rewrite_use_compare (struct ivopts_data *data,
7362 		     struct iv_use *use, struct iv_cand *cand)
7363 {
7364   tree comp, *var_p, op, bound;
7365   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7366   enum tree_code compare;
7367   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
7368   bool ok;
7369 
7370   bound = cp->value;
7371   if (bound)
7372     {
7373       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7374       tree var_type = TREE_TYPE (var);
7375       gimple_seq stmts;
7376 
7377       if (dump_file && (dump_flags & TDF_DETAILS))
7378         {
7379           fprintf (dump_file, "Replacing exit test: ");
7380           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7381         }
7382       compare = cp->comp;
7383       bound = unshare_expr (fold_convert (var_type, bound));
7384       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7385       if (stmts)
7386 	gsi_insert_seq_on_edge_immediate (
7387 		loop_preheader_edge (data->current_loop),
7388 		stmts);
7389 
7390       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7391       gimple_cond_set_lhs (cond_stmt, var);
7392       gimple_cond_set_code (cond_stmt, compare);
7393       gimple_cond_set_rhs (cond_stmt, op);
7394       return;
7395     }
7396 
7397   /* The induction variable elimination failed; just express the original
7398      giv.  */
7399   comp = get_computation (data->current_loop, use, cand);
7400   gcc_assert (comp != NULL_TREE);
7401 
7402   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7403   gcc_assert (ok);
7404 
7405   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7406 				     true, GSI_SAME_STMT);
7407 }
7408 
7409 /* Rewrites USE using candidate CAND.  */
7410 
7411 static void
rewrite_use(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7412 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
7413 {
7414   switch (use->type)
7415     {
7416       case USE_NONLINEAR_EXPR:
7417 	rewrite_use_nonlinear_expr (data, use, cand);
7418 	break;
7419 
7420       case USE_ADDRESS:
7421 	rewrite_use_address (data, use, cand);
7422 	break;
7423 
7424       case USE_COMPARE:
7425 	rewrite_use_compare (data, use, cand);
7426 	break;
7427 
7428       default:
7429 	gcc_unreachable ();
7430     }
7431 
7432   update_stmt (use->stmt);
7433 }
7434 
7435 /* Rewrite the uses using the selected induction variables.  */
7436 
7437 static void
rewrite_uses(struct ivopts_data * data)7438 rewrite_uses (struct ivopts_data *data)
7439 {
7440   unsigned i;
7441   struct iv_cand *cand;
7442   struct iv_use *use;
7443 
7444   for (i = 0; i < n_iv_uses (data); i++)
7445     {
7446       use = iv_use (data, i);
7447       cand = use->selected;
7448       gcc_assert (cand);
7449 
7450       rewrite_use (data, use, cand);
7451     }
7452 }
7453 
7454 /* Removes the ivs that are not used after rewriting.  */
7455 
7456 static void
remove_unused_ivs(struct ivopts_data * data)7457 remove_unused_ivs (struct ivopts_data *data)
7458 {
7459   unsigned j;
7460   bitmap_iterator bi;
7461   bitmap toremove = BITMAP_ALLOC (NULL);
7462 
7463   /* Figure out an order in which to release SSA DEFs so that we don't
7464      release something that we'd have to propagate into a debug stmt
7465      afterwards.  */
7466   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7467     {
7468       struct version_info *info;
7469 
7470       info = ver_info (data, j);
7471       if (info->iv
7472 	  && !integer_zerop (info->iv->step)
7473 	  && !info->inv_id
7474 	  && !info->iv->have_use_for
7475 	  && !info->preserve_biv)
7476 	{
7477 	  bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7478 
7479 	  tree def = info->iv->ssa_name;
7480 
7481 	  if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7482 	    {
7483 	      imm_use_iterator imm_iter;
7484 	      use_operand_p use_p;
7485 	      gimple *stmt;
7486 	      int count = 0;
7487 
7488 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7489 		{
7490 		  if (!gimple_debug_bind_p (stmt))
7491 		    continue;
7492 
7493 		  /* We just want to determine whether to do nothing
7494 		     (count == 0), to substitute the computed
7495 		     expression into a single use of the SSA DEF by
7496 		     itself (count == 1), or to use a debug temp
7497 		     because the SSA DEF is used multiple times or as
7498 		     part of a larger expression (count > 1). */
7499 		  count++;
7500 		  if (gimple_debug_bind_get_value (stmt) != def)
7501 		    count++;
7502 
7503 		  if (count > 1)
7504 		    BREAK_FROM_IMM_USE_STMT (imm_iter);
7505 		}
7506 
7507 	      if (!count)
7508 		continue;
7509 
7510 	      struct iv_use dummy_use;
7511 	      struct iv_cand *best_cand = NULL, *cand;
7512 	      unsigned i, best_pref = 0, cand_pref;
7513 
7514 	      memset (&dummy_use, 0, sizeof (dummy_use));
7515 	      dummy_use.iv = info->iv;
7516 	      for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7517 		{
7518 		  cand = iv_use (data, i)->selected;
7519 		  if (cand == best_cand)
7520 		    continue;
7521 		  cand_pref = operand_equal_p (cand->iv->step,
7522 					       info->iv->step, 0)
7523 		    ? 4 : 0;
7524 		  cand_pref
7525 		    += TYPE_MODE (TREE_TYPE (cand->iv->base))
7526 		    == TYPE_MODE (TREE_TYPE (info->iv->base))
7527 		    ? 2 : 0;
7528 		  cand_pref
7529 		    += TREE_CODE (cand->iv->base) == INTEGER_CST
7530 		    ? 1 : 0;
7531 		  if (best_cand == NULL || best_pref < cand_pref)
7532 		    {
7533 		      best_cand = cand;
7534 		      best_pref = cand_pref;
7535 		    }
7536 		}
7537 
7538 	      if (!best_cand)
7539 		continue;
7540 
7541 	      tree comp = get_computation_at (data->current_loop,
7542 					      &dummy_use, best_cand,
7543 					      SSA_NAME_DEF_STMT (def));
7544 	      if (!comp)
7545 		continue;
7546 
7547 	      if (count > 1)
7548 		{
7549 		  tree vexpr = make_node (DEBUG_EXPR_DECL);
7550 		  DECL_ARTIFICIAL (vexpr) = 1;
7551 		  TREE_TYPE (vexpr) = TREE_TYPE (comp);
7552 		  if (SSA_NAME_VAR (def))
7553 		    DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7554 		  else
7555 		    DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7556 		  gdebug *def_temp
7557 		    = gimple_build_debug_bind (vexpr, comp, NULL);
7558 		  gimple_stmt_iterator gsi;
7559 
7560 		  if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7561 		    gsi = gsi_after_labels (gimple_bb
7562 					    (SSA_NAME_DEF_STMT (def)));
7563 		  else
7564 		    gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7565 
7566 		  gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7567 		  comp = vexpr;
7568 		}
7569 
7570 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7571 		{
7572 		  if (!gimple_debug_bind_p (stmt))
7573 		    continue;
7574 
7575 		  FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7576 		    SET_USE (use_p, comp);
7577 
7578 		  update_stmt (stmt);
7579 		}
7580 	    }
7581 	}
7582     }
7583 
7584   release_defs_bitset (toremove);
7585 
7586   BITMAP_FREE (toremove);
7587 }
7588 
7589 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7590    for hash_map::traverse.  */
7591 
7592 bool
free_tree_niter_desc(edge const &,tree_niter_desc * const & value,void *)7593 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7594 {
7595   free (value);
7596   return true;
7597 }
7598 
7599 /* Frees data allocated by the optimization of a single loop.  */
7600 
7601 static void
free_loop_data(struct ivopts_data * data)7602 free_loop_data (struct ivopts_data *data)
7603 {
7604   unsigned i, j;
7605   bitmap_iterator bi;
7606   tree obj;
7607 
7608   if (data->niters)
7609     {
7610       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7611       delete data->niters;
7612       data->niters = NULL;
7613     }
7614 
7615   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7616     {
7617       struct version_info *info;
7618 
7619       info = ver_info (data, i);
7620       info->iv = NULL;
7621       info->has_nonlin_use = false;
7622       info->preserve_biv = false;
7623       info->inv_id = 0;
7624     }
7625   bitmap_clear (data->relevant);
7626   bitmap_clear (data->important_candidates);
7627 
7628   for (i = 0; i < n_iv_uses (data); i++)
7629     {
7630       struct iv_use *use = iv_use (data, i);
7631       struct iv_use *pre = use, *sub = use->next;
7632 
7633       while (sub)
7634 	{
7635 	  gcc_assert (sub->related_cands == NULL);
7636 	  gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7637 
7638 	  pre = sub;
7639 	  sub = sub->next;
7640 	  free (pre);
7641 	}
7642 
7643       BITMAP_FREE (use->related_cands);
7644       for (j = 0; j < use->n_map_members; j++)
7645 	if (use->cost_map[j].depends_on)
7646 	  BITMAP_FREE (use->cost_map[j].depends_on);
7647       free (use->cost_map);
7648       free (use);
7649     }
7650   data->iv_uses.truncate (0);
7651 
7652   for (i = 0; i < n_iv_cands (data); i++)
7653     {
7654       struct iv_cand *cand = iv_cand (data, i);
7655 
7656       if (cand->depends_on)
7657 	BITMAP_FREE (cand->depends_on);
7658       free (cand);
7659     }
7660   data->iv_candidates.truncate (0);
7661 
7662   if (data->version_info_size < num_ssa_names)
7663     {
7664       data->version_info_size = 2 * num_ssa_names;
7665       free (data->version_info);
7666       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7667     }
7668 
7669   data->max_inv_id = 0;
7670 
7671   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7672     SET_DECL_RTL (obj, NULL_RTX);
7673 
7674   decl_rtl_to_reset.truncate (0);
7675 
7676   data->inv_expr_tab->empty ();
7677   data->inv_expr_id = 0;
7678 
7679   data->iv_common_cand_tab->empty ();
7680   data->iv_common_cands.truncate (0);
7681 }
7682 
7683 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7684    loop tree.  */
7685 
7686 static void
tree_ssa_iv_optimize_finalize(struct ivopts_data * data)7687 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7688 {
7689   free_loop_data (data);
7690   free (data->version_info);
7691   BITMAP_FREE (data->relevant);
7692   BITMAP_FREE (data->important_candidates);
7693 
7694   decl_rtl_to_reset.release ();
7695   data->iv_uses.release ();
7696   data->iv_candidates.release ();
7697   delete data->inv_expr_tab;
7698   data->inv_expr_tab = NULL;
7699   free_affine_expand_cache (&data->name_expansion_cache);
7700   delete data->iv_common_cand_tab;
7701   data->iv_common_cand_tab = NULL;
7702   data->iv_common_cands.release ();
7703   obstack_free (&data->iv_obstack, NULL);
7704 }
7705 
7706 /* Returns true if the loop body BODY includes any function calls.  */
7707 
7708 static bool
loop_body_includes_call(basic_block * body,unsigned num_nodes)7709 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7710 {
7711   gimple_stmt_iterator gsi;
7712   unsigned i;
7713 
7714   for (i = 0; i < num_nodes; i++)
7715     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7716       {
7717 	gimple *stmt = gsi_stmt (gsi);
7718 	if (is_gimple_call (stmt)
7719 	    && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7720 	  return true;
7721       }
7722   return false;
7723 }
7724 
7725 /* Optimizes the LOOP.  Returns true if anything changed.  */
7726 
7727 static bool
tree_ssa_iv_optimize_loop(struct ivopts_data * data,struct loop * loop)7728 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7729 {
7730   bool changed = false;
7731   struct iv_ca *iv_ca;
7732   edge exit = single_dom_exit (loop);
7733   basic_block *body;
7734 
7735   gcc_assert (!data->niters);
7736   data->current_loop = loop;
7737   data->loop_loc = find_loop_location (loop);
7738   data->speed = optimize_loop_for_speed_p (loop);
7739 
7740   if (dump_file && (dump_flags & TDF_DETAILS))
7741     {
7742       fprintf (dump_file, "Processing loop %d", loop->num);
7743       if (data->loop_loc != UNKNOWN_LOCATION)
7744 	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7745 		 LOCATION_LINE (data->loop_loc));
7746       fprintf (dump_file, "\n");
7747 
7748       if (exit)
7749 	{
7750 	  fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7751 		   exit->src->index, exit->dest->index);
7752 	  print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7753 	  fprintf (dump_file, "\n");
7754 	}
7755 
7756       fprintf (dump_file, "\n");
7757     }
7758 
7759   body = get_loop_body (loop);
7760   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7761   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7762   free (body);
7763 
7764   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7765 
7766   /* For each ssa name determines whether it behaves as an induction variable
7767      in some loop.  */
7768   if (!find_induction_variables (data))
7769     goto finish;
7770 
7771   /* Finds interesting uses (item 1).  */
7772   find_interesting_uses (data);
7773   group_address_uses (data);
7774   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7775     goto finish;
7776 
7777   /* Finds candidates for the induction variables (item 2).  */
7778   find_iv_candidates (data);
7779 
7780   /* Calculates the costs (item 3, part 1).  */
7781   determine_iv_costs (data);
7782   determine_use_iv_costs (data);
7783   determine_set_costs (data);
7784 
7785   /* Find the optimal set of induction variables (item 3, part 2).  */
7786   iv_ca = find_optimal_iv_set (data);
7787   if (!iv_ca)
7788     goto finish;
7789   changed = true;
7790 
7791   /* Create the new induction variables (item 4, part 1).  */
7792   create_new_ivs (data, iv_ca);
7793   iv_ca_free (&iv_ca);
7794 
7795   /* Rewrite the uses (item 4, part 2).  */
7796   rewrite_uses (data);
7797 
7798   /* Remove the ivs that are unused after rewriting.  */
7799   remove_unused_ivs (data);
7800 
7801   /* We have changed the structure of induction variables; it might happen
7802      that definitions in the scev database refer to some of them that were
7803      eliminated.  */
7804   scev_reset ();
7805 
7806 finish:
7807   free_loop_data (data);
7808 
7809   return changed;
7810 }
7811 
7812 /* Main entry point.  Optimizes induction variables in loops.  */
7813 
7814 void
tree_ssa_iv_optimize(void)7815 tree_ssa_iv_optimize (void)
7816 {
7817   struct loop *loop;
7818   struct ivopts_data data;
7819 
7820   tree_ssa_iv_optimize_init (&data);
7821 
7822   /* Optimize the loops starting with the innermost ones.  */
7823   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7824     {
7825       if (dump_file && (dump_flags & TDF_DETAILS))
7826 	flow_loop_dump (loop, dump_file, NULL, 1);
7827 
7828       tree_ssa_iv_optimize_loop (&data, loop);
7829     }
7830 
7831   tree_ssa_iv_optimize_finalize (&data);
7832 }
7833