1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
20
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
92
93 /* The infinite cost. */
94 #define INFTY 10000000
95
96 /* The expected number of loop iterations. TODO -- use profiling instead of
97 this. */
98 #define AVG_LOOP_NITER(LOOP) 5
99
100
101 /* Representation of the induction variable. */
102 struct iv
103 {
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
111 };
112
113 /* Per-ssa version information (induction variable descriptions, etc.). */
114 struct version_info
115 {
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
122 };
123
124 /* Types of uses. */
125 enum use_type
126 {
127 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
128 USE_ADDRESS, /* Use in an address. */
129 USE_COMPARE /* Use is a compare. */
130 };
131
132 /* The candidate - cost pair. */
133 struct cost_pair
134 {
135 struct iv_cand *cand; /* The candidate. */
136 unsigned cost; /* The cost. */
137 bitmap depends_on; /* The list of invariants that have to be
138 preserved. */
139 tree value; /* For final value elimination, the expression for
140 the final value of the iv. For iv elimination,
141 the new bound to compare with. */
142 };
143
144 /* Use. */
145 struct iv_use
146 {
147 unsigned id; /* The id of the use. */
148 enum use_type type; /* Type of the use. */
149 struct iv *iv; /* The induction variable it is based on. */
150 tree stmt; /* Statement in that it occurs. */
151 tree *op_p; /* The place where it occurs. */
152 bitmap related_cands; /* The set of "related" iv candidates, plus the common
153 important ones. */
154
155 unsigned n_map_members; /* Number of candidates in the cost_map list. */
156 struct cost_pair *cost_map;
157 /* The costs wrto the iv candidates. */
158
159 struct iv_cand *selected;
160 /* The selected candidate. */
161 };
162
163 /* The position where the iv is computed. */
164 enum iv_position
165 {
166 IP_NORMAL, /* At the end, just before the exit condition. */
167 IP_END, /* At the end of the latch block. */
168 IP_ORIGINAL /* The original biv. */
169 };
170
171 /* The induction variable candidate. */
172 struct iv_cand
173 {
174 unsigned id; /* The number of the candidate. */
175 bool important; /* Whether this is an "important" candidate, i.e. such
176 that it should be considered by all uses. */
177 enum iv_position pos; /* Where it is computed. */
178 tree incremented_at; /* For original biv, the statement where it is
179 incremented. */
180 tree var_before; /* The variable used for it before increment. */
181 tree var_after; /* The variable used for it after increment. */
182 struct iv *iv; /* The value of the candidate. NULL for
183 "pseudocandidate" used to indicate the possibility
184 to replace the final value of an iv by direct
185 computation of the value. */
186 unsigned cost; /* Cost of the candidate. */
187 bitmap depends_on; /* The list of invariants that are used in step of the
188 biv. */
189 };
190
191 /* The data used by the induction variable optimizations. */
192
193 typedef struct iv_use *iv_use_p;
194 DEF_VEC_P(iv_use_p);
195 DEF_VEC_ALLOC_P(iv_use_p,heap);
196
197 typedef struct iv_cand *iv_cand_p;
198 DEF_VEC_P(iv_cand_p);
199 DEF_VEC_ALLOC_P(iv_cand_p,heap);
200
201 struct ivopts_data
202 {
203 /* The currently optimized loop. */
204 struct loop *current_loop;
205
206 /* Number of registers used in it. */
207 unsigned regs_used;
208
209 /* Numbers of iterations for all exits of the current loop. */
210 htab_t niters;
211
212 /* The size of version_info array allocated. */
213 unsigned version_info_size;
214
215 /* The array of information for the ssa names. */
216 struct version_info *version_info;
217
218 /* The bitmap of indices in version_info whose value was changed. */
219 bitmap relevant;
220
221 /* The maximum invariant id. */
222 unsigned max_inv_id;
223
224 /* The uses of induction variables. */
225 VEC(iv_use_p,heap) *iv_uses;
226
227 /* The candidates. */
228 VEC(iv_cand_p,heap) *iv_candidates;
229
230 /* A bitmap of important candidates. */
231 bitmap important_candidates;
232
233 /* Whether to consider just related and important candidates when replacing a
234 use. */
235 bool consider_all_candidates;
236 };
237
238 /* An assignment of iv candidates to uses. */
239
240 struct iv_ca
241 {
242 /* The number of uses covered by the assignment. */
243 unsigned upto;
244
245 /* Number of uses that cannot be expressed by the candidates in the set. */
246 unsigned bad_uses;
247
248 /* Candidate assigned to a use, together with the related costs. */
249 struct cost_pair **cand_for_use;
250
251 /* Number of times each candidate is used. */
252 unsigned *n_cand_uses;
253
254 /* The candidates used. */
255 bitmap cands;
256
257 /* The number of candidates in the set. */
258 unsigned n_cands;
259
260 /* Total number of registers needed. */
261 unsigned n_regs;
262
263 /* Total cost of expressing uses. */
264 unsigned cand_use_cost;
265
266 /* Total cost of candidates. */
267 unsigned cand_cost;
268
269 /* Number of times each invariant is used. */
270 unsigned *n_invariant_uses;
271
272 /* Total cost of the assignment. */
273 unsigned cost;
274 };
275
276 /* Difference of two iv candidate assignments. */
277
278 struct iv_ca_delta
279 {
280 /* Changed use. */
281 struct iv_use *use;
282
283 /* An old assignment (for rollback purposes). */
284 struct cost_pair *old_cp;
285
286 /* A new assignment. */
287 struct cost_pair *new_cp;
288
289 /* Next change in the list. */
290 struct iv_ca_delta *next_change;
291 };
292
293 /* Bound on number of candidates below that all candidates are considered. */
294
295 #define CONSIDER_ALL_CANDIDATES_BOUND \
296 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
297
298 /* If there are more iv occurrences, we just give up (it is quite unlikely that
299 optimizing such a loop would help, and it would take ages). */
300
301 #define MAX_CONSIDERED_USES \
302 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
303
304 /* If there are at most this number of ivs in the set, try removing unnecessary
305 ivs from the set always. */
306
307 #define ALWAYS_PRUNE_CAND_SET_BOUND \
308 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
309
310 /* The list of trees for that the decl_rtl field must be reset is stored
311 here. */
312
VEC(tree,heap)313 static VEC(tree,heap) *decl_rtl_to_reset;
314
315 /* Number of uses recorded in DATA. */
316
317 static inline unsigned
318 n_iv_uses (struct ivopts_data *data)
319 {
320 return VEC_length (iv_use_p, data->iv_uses);
321 }
322
323 /* Ith use recorded in DATA. */
324
325 static inline struct iv_use *
iv_use(struct ivopts_data * data,unsigned i)326 iv_use (struct ivopts_data *data, unsigned i)
327 {
328 return VEC_index (iv_use_p, data->iv_uses, i);
329 }
330
331 /* Number of candidates recorded in DATA. */
332
333 static inline unsigned
n_iv_cands(struct ivopts_data * data)334 n_iv_cands (struct ivopts_data *data)
335 {
336 return VEC_length (iv_cand_p, data->iv_candidates);
337 }
338
339 /* Ith candidate recorded in DATA. */
340
341 static inline struct iv_cand *
iv_cand(struct ivopts_data * data,unsigned i)342 iv_cand (struct ivopts_data *data, unsigned i)
343 {
344 return VEC_index (iv_cand_p, data->iv_candidates, i);
345 }
346
347 /* The single loop exit if it dominates the latch, NULL otherwise. */
348
349 edge
single_dom_exit(struct loop * loop)350 single_dom_exit (struct loop *loop)
351 {
352 edge exit = loop->single_exit;
353
354 if (!exit)
355 return NULL;
356
357 if (!just_once_each_iteration_p (loop, exit->src))
358 return NULL;
359
360 return exit;
361 }
362
363 /* Dumps information about the induction variable IV to FILE. */
364
365 extern void dump_iv (FILE *, struct iv *);
366 void
dump_iv(FILE * file,struct iv * iv)367 dump_iv (FILE *file, struct iv *iv)
368 {
369 if (iv->ssa_name)
370 {
371 fprintf (file, "ssa name ");
372 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
373 fprintf (file, "\n");
374 }
375
376 fprintf (file, " type ");
377 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
378 fprintf (file, "\n");
379
380 if (iv->step)
381 {
382 fprintf (file, " base ");
383 print_generic_expr (file, iv->base, TDF_SLIM);
384 fprintf (file, "\n");
385
386 fprintf (file, " step ");
387 print_generic_expr (file, iv->step, TDF_SLIM);
388 fprintf (file, "\n");
389 }
390 else
391 {
392 fprintf (file, " invariant ");
393 print_generic_expr (file, iv->base, TDF_SLIM);
394 fprintf (file, "\n");
395 }
396
397 if (iv->base_object)
398 {
399 fprintf (file, " base object ");
400 print_generic_expr (file, iv->base_object, TDF_SLIM);
401 fprintf (file, "\n");
402 }
403
404 if (iv->biv_p)
405 fprintf (file, " is a biv\n");
406 }
407
408 /* Dumps information about the USE to FILE. */
409
410 extern void dump_use (FILE *, struct iv_use *);
411 void
dump_use(FILE * file,struct iv_use * use)412 dump_use (FILE *file, struct iv_use *use)
413 {
414 fprintf (file, "use %d\n", use->id);
415
416 switch (use->type)
417 {
418 case USE_NONLINEAR_EXPR:
419 fprintf (file, " generic\n");
420 break;
421
422 case USE_ADDRESS:
423 fprintf (file, " address\n");
424 break;
425
426 case USE_COMPARE:
427 fprintf (file, " compare\n");
428 break;
429
430 default:
431 gcc_unreachable ();
432 }
433
434 fprintf (file, " in statement ");
435 print_generic_expr (file, use->stmt, TDF_SLIM);
436 fprintf (file, "\n");
437
438 fprintf (file, " at position ");
439 if (use->op_p)
440 print_generic_expr (file, *use->op_p, TDF_SLIM);
441 fprintf (file, "\n");
442
443 dump_iv (file, use->iv);
444
445 if (use->related_cands)
446 {
447 fprintf (file, " related candidates ");
448 dump_bitmap (file, use->related_cands);
449 }
450 }
451
452 /* Dumps information about the uses to FILE. */
453
454 extern void dump_uses (FILE *, struct ivopts_data *);
455 void
dump_uses(FILE * file,struct ivopts_data * data)456 dump_uses (FILE *file, struct ivopts_data *data)
457 {
458 unsigned i;
459 struct iv_use *use;
460
461 for (i = 0; i < n_iv_uses (data); i++)
462 {
463 use = iv_use (data, i);
464
465 dump_use (file, use);
466 fprintf (file, "\n");
467 }
468 }
469
470 /* Dumps information about induction variable candidate CAND to FILE. */
471
472 extern void dump_cand (FILE *, struct iv_cand *);
473 void
dump_cand(FILE * file,struct iv_cand * cand)474 dump_cand (FILE *file, struct iv_cand *cand)
475 {
476 struct iv *iv = cand->iv;
477
478 fprintf (file, "candidate %d%s\n",
479 cand->id, cand->important ? " (important)" : "");
480
481 if (cand->depends_on)
482 {
483 fprintf (file, " depends on ");
484 dump_bitmap (file, cand->depends_on);
485 }
486
487 if (!iv)
488 {
489 fprintf (file, " final value replacement\n");
490 return;
491 }
492
493 switch (cand->pos)
494 {
495 case IP_NORMAL:
496 fprintf (file, " incremented before exit test\n");
497 break;
498
499 case IP_END:
500 fprintf (file, " incremented at end\n");
501 break;
502
503 case IP_ORIGINAL:
504 fprintf (file, " original biv\n");
505 break;
506 }
507
508 dump_iv (file, iv);
509 }
510
511 /* Returns the info for ssa version VER. */
512
513 static inline struct version_info *
ver_info(struct ivopts_data * data,unsigned ver)514 ver_info (struct ivopts_data *data, unsigned ver)
515 {
516 return data->version_info + ver;
517 }
518
519 /* Returns the info for ssa name NAME. */
520
521 static inline struct version_info *
name_info(struct ivopts_data * data,tree name)522 name_info (struct ivopts_data *data, tree name)
523 {
524 return ver_info (data, SSA_NAME_VERSION (name));
525 }
526
527 /* Checks whether there exists number X such that X * B = A, counting modulo
528 2^BITS. */
529
530 static bool
divide(unsigned bits,unsigned HOST_WIDE_INT a,unsigned HOST_WIDE_INT b,HOST_WIDE_INT * x)531 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
532 HOST_WIDE_INT *x)
533 {
534 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
535 unsigned HOST_WIDE_INT inv, ex, val;
536 unsigned i;
537
538 a &= mask;
539 b &= mask;
540
541 /* First divide the whole equation by 2 as long as possible. */
542 while (!(a & 1) && !(b & 1))
543 {
544 a >>= 1;
545 b >>= 1;
546 bits--;
547 mask >>= 1;
548 }
549
550 if (!(b & 1))
551 {
552 /* If b is still even, a is odd and there is no such x. */
553 return false;
554 }
555
556 /* Find the inverse of b. We compute it as
557 b^(2^(bits - 1) - 1) (mod 2^bits). */
558 inv = 1;
559 ex = b;
560 for (i = 0; i < bits - 1; i++)
561 {
562 inv = (inv * ex) & mask;
563 ex = (ex * ex) & mask;
564 }
565
566 val = (a * inv) & mask;
567
568 gcc_assert (((val * b) & mask) == a);
569
570 if ((val >> (bits - 1)) & 1)
571 val |= ~mask;
572
573 *x = val;
574
575 return true;
576 }
577
578 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
579 emitted in LOOP. */
580
581 static bool
stmt_after_ip_normal_pos(struct loop * loop,tree stmt)582 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
583 {
584 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
585
586 gcc_assert (bb);
587
588 if (sbb == loop->latch)
589 return true;
590
591 if (sbb != bb)
592 return false;
593
594 return stmt == last_stmt (bb);
595 }
596
597 /* Returns true if STMT if after the place where the original induction
598 variable CAND is incremented. */
599
600 static bool
stmt_after_ip_original_pos(struct iv_cand * cand,tree stmt)601 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
602 {
603 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
604 basic_block stmt_bb = bb_for_stmt (stmt);
605 block_stmt_iterator bsi;
606
607 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
608 return false;
609
610 if (stmt_bb != cand_bb)
611 return true;
612
613 /* Scan the block from the end, since the original ivs are usually
614 incremented at the end of the loop body. */
615 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
616 {
617 if (bsi_stmt (bsi) == cand->incremented_at)
618 return false;
619 if (bsi_stmt (bsi) == stmt)
620 return true;
621 }
622 }
623
624 /* Returns true if STMT if after the place where the induction variable
625 CAND is incremented in LOOP. */
626
627 static bool
stmt_after_increment(struct loop * loop,struct iv_cand * cand,tree stmt)628 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
629 {
630 switch (cand->pos)
631 {
632 case IP_END:
633 return false;
634
635 case IP_NORMAL:
636 return stmt_after_ip_normal_pos (loop, stmt);
637
638 case IP_ORIGINAL:
639 return stmt_after_ip_original_pos (cand, stmt);
640
641 default:
642 gcc_unreachable ();
643 }
644 }
645
646 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
647
648 static bool
abnormal_ssa_name_p(tree exp)649 abnormal_ssa_name_p (tree exp)
650 {
651 if (!exp)
652 return false;
653
654 if (TREE_CODE (exp) != SSA_NAME)
655 return false;
656
657 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
658 }
659
660 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
661 abnormal phi node. Callback for for_each_index. */
662
663 static bool
idx_contains_abnormal_ssa_name_p(tree base,tree * index,void * data ATTRIBUTE_UNUSED)664 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
665 void *data ATTRIBUTE_UNUSED)
666 {
667 if (TREE_CODE (base) == ARRAY_REF)
668 {
669 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
670 return false;
671 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
672 return false;
673 }
674
675 return !abnormal_ssa_name_p (*index);
676 }
677
678 /* Returns true if EXPR contains a ssa name that occurs in an
679 abnormal phi node. */
680
681 bool
contains_abnormal_ssa_name_p(tree expr)682 contains_abnormal_ssa_name_p (tree expr)
683 {
684 enum tree_code code;
685 enum tree_code_class class;
686
687 if (!expr)
688 return false;
689
690 code = TREE_CODE (expr);
691 class = TREE_CODE_CLASS (code);
692
693 if (code == SSA_NAME)
694 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
695
696 if (code == INTEGER_CST
697 || is_gimple_min_invariant (expr))
698 return false;
699
700 if (code == ADDR_EXPR)
701 return !for_each_index (&TREE_OPERAND (expr, 0),
702 idx_contains_abnormal_ssa_name_p,
703 NULL);
704
705 switch (class)
706 {
707 case tcc_binary:
708 case tcc_comparison:
709 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
710 return true;
711
712 /* Fallthru. */
713 case tcc_unary:
714 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
715 return true;
716
717 break;
718
719 default:
720 gcc_unreachable ();
721 }
722
723 return false;
724 }
725
726 /* Element of the table in that we cache the numbers of iterations obtained
727 from exits of the loop. */
728
729 struct nfe_cache_elt
730 {
731 /* The edge for that the number of iterations is cached. */
732 edge exit;
733
734 /* Number of iterations corresponding to this exit, or NULL if it cannot be
735 determined. */
736 tree niter;
737 };
738
739 /* Hash function for nfe_cache_elt E. */
740
741 static hashval_t
nfe_hash(const void * e)742 nfe_hash (const void *e)
743 {
744 const struct nfe_cache_elt *elt = e;
745
746 return htab_hash_pointer (elt->exit);
747 }
748
749 /* Equality function for nfe_cache_elt E1 and edge E2. */
750
751 static int
nfe_eq(const void * e1,const void * e2)752 nfe_eq (const void *e1, const void *e2)
753 {
754 const struct nfe_cache_elt *elt1 = e1;
755
756 return elt1->exit == e2;
757 }
758
759 /* Returns tree describing number of iterations determined from
760 EXIT of DATA->current_loop, or NULL if something goes wrong. */
761
762 static tree
niter_for_exit(struct ivopts_data * data,edge exit)763 niter_for_exit (struct ivopts_data *data, edge exit)
764 {
765 struct nfe_cache_elt *nfe_desc;
766 struct tree_niter_desc desc;
767 PTR *slot;
768
769 slot = htab_find_slot_with_hash (data->niters, exit,
770 htab_hash_pointer (exit),
771 INSERT);
772
773 if (!*slot)
774 {
775 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
776 nfe_desc->exit = exit;
777
778 /* Try to determine number of iterations. We must know it
779 unconditionally (i.e., without possibility of # of iterations
780 being zero). Also, we cannot safely work with ssa names that
781 appear in phi nodes on abnormal edges, so that we do not create
782 overlapping life ranges for them (PR 27283). */
783 if (number_of_iterations_exit (data->current_loop,
784 exit, &desc, true)
785 && zero_p (desc.may_be_zero)
786 && !contains_abnormal_ssa_name_p (desc.niter))
787 nfe_desc->niter = desc.niter;
788 else
789 nfe_desc->niter = NULL_TREE;
790 }
791 else
792 nfe_desc = *slot;
793
794 return nfe_desc->niter;
795 }
796
797 /* Returns tree describing number of iterations determined from
798 single dominating exit of DATA->current_loop, or NULL if something
799 goes wrong. */
800
801 static tree
niter_for_single_dom_exit(struct ivopts_data * data)802 niter_for_single_dom_exit (struct ivopts_data *data)
803 {
804 edge exit = single_dom_exit (data->current_loop);
805
806 if (!exit)
807 return NULL;
808
809 return niter_for_exit (data, exit);
810 }
811
812 /* Initializes data structures used by the iv optimization pass, stored
813 in DATA. */
814
815 static void
tree_ssa_iv_optimize_init(struct ivopts_data * data)816 tree_ssa_iv_optimize_init (struct ivopts_data *data)
817 {
818 data->version_info_size = 2 * num_ssa_names;
819 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
820 data->relevant = BITMAP_ALLOC (NULL);
821 data->important_candidates = BITMAP_ALLOC (NULL);
822 data->max_inv_id = 0;
823 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
824 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
825 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
826 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
827 }
828
829 /* Returns a memory object to that EXPR points. In case we are able to
830 determine that it does not point to any such object, NULL is returned. */
831
832 static tree
determine_base_object(tree expr)833 determine_base_object (tree expr)
834 {
835 enum tree_code code = TREE_CODE (expr);
836 tree base, obj, op0, op1;
837
838 /* If this is a pointer casted to any type, we need to determine
839 the base object for the pointer; so handle conversions before
840 throwing away non-pointer expressions. */
841 if (TREE_CODE (expr) == NOP_EXPR
842 || TREE_CODE (expr) == CONVERT_EXPR)
843 return determine_base_object (TREE_OPERAND (expr, 0));
844
845 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
846 return NULL_TREE;
847
848 switch (code)
849 {
850 case INTEGER_CST:
851 return NULL_TREE;
852
853 case ADDR_EXPR:
854 obj = TREE_OPERAND (expr, 0);
855 base = get_base_address (obj);
856
857 if (!base)
858 return expr;
859
860 if (TREE_CODE (base) == INDIRECT_REF)
861 return determine_base_object (TREE_OPERAND (base, 0));
862
863 return fold_convert (ptr_type_node,
864 build_fold_addr_expr (base));
865
866 case PLUS_EXPR:
867 case MINUS_EXPR:
868 op0 = determine_base_object (TREE_OPERAND (expr, 0));
869 op1 = determine_base_object (TREE_OPERAND (expr, 1));
870
871 if (!op1)
872 return op0;
873
874 if (!op0)
875 return (code == PLUS_EXPR
876 ? op1
877 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
878
879 return fold_build2 (code, ptr_type_node, op0, op1);
880
881 default:
882 return fold_convert (ptr_type_node, expr);
883 }
884 }
885
886 /* Allocates an induction variable with given initial value BASE and step STEP
887 for loop LOOP. */
888
889 static struct iv *
alloc_iv(tree base,tree step)890 alloc_iv (tree base, tree step)
891 {
892 struct iv *iv = XCNEW (struct iv);
893
894 if (step && integer_zerop (step))
895 step = NULL_TREE;
896
897 iv->base = base;
898 iv->base_object = determine_base_object (base);
899 iv->step = step;
900 iv->biv_p = false;
901 iv->have_use_for = false;
902 iv->use_id = 0;
903 iv->ssa_name = NULL_TREE;
904
905 return iv;
906 }
907
908 /* Sets STEP and BASE for induction variable IV. */
909
910 static void
set_iv(struct ivopts_data * data,tree iv,tree base,tree step)911 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
912 {
913 struct version_info *info = name_info (data, iv);
914
915 gcc_assert (!info->iv);
916
917 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
918 info->iv = alloc_iv (base, step);
919 info->iv->ssa_name = iv;
920 }
921
922 /* Finds induction variable declaration for VAR. */
923
924 static struct iv *
get_iv(struct ivopts_data * data,tree var)925 get_iv (struct ivopts_data *data, tree var)
926 {
927 basic_block bb;
928
929 if (!name_info (data, var)->iv)
930 {
931 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
932
933 if (!bb
934 || !flow_bb_inside_loop_p (data->current_loop, bb))
935 set_iv (data, var, var, NULL_TREE);
936 }
937
938 return name_info (data, var)->iv;
939 }
940
941 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
942 not define a simple affine biv with nonzero step. */
943
944 static tree
determine_biv_step(tree phi)945 determine_biv_step (tree phi)
946 {
947 struct loop *loop = bb_for_stmt (phi)->loop_father;
948 tree name = PHI_RESULT (phi);
949 affine_iv iv;
950
951 if (!is_gimple_reg (name))
952 return NULL_TREE;
953
954 if (!simple_iv (loop, phi, name, &iv, true))
955 return NULL_TREE;
956
957 return (zero_p (iv.step) ? NULL_TREE : iv.step);
958 }
959
960 /* Finds basic ivs. */
961
962 static bool
find_bivs(struct ivopts_data * data)963 find_bivs (struct ivopts_data *data)
964 {
965 tree phi, step, type, base;
966 bool found = false;
967 struct loop *loop = data->current_loop;
968
969 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
970 {
971 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
972 continue;
973
974 step = determine_biv_step (phi);
975 if (!step)
976 continue;
977
978 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
979 base = expand_simple_operations (base);
980 if (contains_abnormal_ssa_name_p (base)
981 || contains_abnormal_ssa_name_p (step))
982 continue;
983
984 type = TREE_TYPE (PHI_RESULT (phi));
985 base = fold_convert (type, base);
986 if (step)
987 step = fold_convert (type, step);
988
989 set_iv (data, PHI_RESULT (phi), base, step);
990 found = true;
991 }
992
993 return found;
994 }
995
996 /* Marks basic ivs. */
997
998 static void
mark_bivs(struct ivopts_data * data)999 mark_bivs (struct ivopts_data *data)
1000 {
1001 tree phi, var;
1002 struct iv *iv, *incr_iv;
1003 struct loop *loop = data->current_loop;
1004 basic_block incr_bb;
1005
1006 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1007 {
1008 iv = get_iv (data, PHI_RESULT (phi));
1009 if (!iv)
1010 continue;
1011
1012 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1013 incr_iv = get_iv (data, var);
1014 if (!incr_iv)
1015 continue;
1016
1017 /* If the increment is in the subloop, ignore it. */
1018 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1019 if (incr_bb->loop_father != data->current_loop
1020 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1021 continue;
1022
1023 iv->biv_p = true;
1024 incr_iv->biv_p = true;
1025 }
1026 }
1027
1028 /* Checks whether STMT defines a linear induction variable and stores its
1029 parameters to IV. */
1030
1031 static bool
find_givs_in_stmt_scev(struct ivopts_data * data,tree stmt,affine_iv * iv)1032 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1033 {
1034 tree lhs;
1035 struct loop *loop = data->current_loop;
1036
1037 iv->base = NULL_TREE;
1038 iv->step = NULL_TREE;
1039
1040 if (TREE_CODE (stmt) != MODIFY_EXPR)
1041 return false;
1042
1043 lhs = TREE_OPERAND (stmt, 0);
1044 if (TREE_CODE (lhs) != SSA_NAME)
1045 return false;
1046
1047 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
1048 return false;
1049 iv->base = expand_simple_operations (iv->base);
1050
1051 if (contains_abnormal_ssa_name_p (iv->base)
1052 || contains_abnormal_ssa_name_p (iv->step))
1053 return false;
1054
1055 return true;
1056 }
1057
1058 /* Finds general ivs in statement STMT. */
1059
1060 static void
find_givs_in_stmt(struct ivopts_data * data,tree stmt)1061 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1062 {
1063 affine_iv iv;
1064
1065 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1066 return;
1067
1068 set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
1069 }
1070
1071 /* Finds general ivs in basic block BB. */
1072
1073 static void
find_givs_in_bb(struct ivopts_data * data,basic_block bb)1074 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1075 {
1076 block_stmt_iterator bsi;
1077
1078 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1079 find_givs_in_stmt (data, bsi_stmt (bsi));
1080 }
1081
1082 /* Finds general ivs. */
1083
1084 static void
find_givs(struct ivopts_data * data)1085 find_givs (struct ivopts_data *data)
1086 {
1087 struct loop *loop = data->current_loop;
1088 basic_block *body = get_loop_body_in_dom_order (loop);
1089 unsigned i;
1090
1091 for (i = 0; i < loop->num_nodes; i++)
1092 find_givs_in_bb (data, body[i]);
1093 free (body);
1094 }
1095
1096 /* For each ssa name defined in LOOP determines whether it is an induction
1097 variable and if so, its initial value and step. */
1098
1099 static bool
find_induction_variables(struct ivopts_data * data)1100 find_induction_variables (struct ivopts_data *data)
1101 {
1102 unsigned i;
1103 bitmap_iterator bi;
1104
1105 if (!find_bivs (data))
1106 return false;
1107
1108 find_givs (data);
1109 mark_bivs (data);
1110
1111 if (dump_file && (dump_flags & TDF_DETAILS))
1112 {
1113 tree niter = niter_for_single_dom_exit (data);
1114
1115 if (niter)
1116 {
1117 fprintf (dump_file, " number of iterations ");
1118 print_generic_expr (dump_file, niter, TDF_SLIM);
1119 fprintf (dump_file, "\n\n");
1120 };
1121
1122 fprintf (dump_file, "Induction variables:\n\n");
1123
1124 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1125 {
1126 if (ver_info (data, i)->iv)
1127 dump_iv (dump_file, ver_info (data, i)->iv);
1128 }
1129 }
1130
1131 return true;
1132 }
1133
1134 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1135
1136 static struct iv_use *
record_use(struct ivopts_data * data,tree * use_p,struct iv * iv,tree stmt,enum use_type use_type)1137 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1138 tree stmt, enum use_type use_type)
1139 {
1140 struct iv_use *use = XCNEW (struct iv_use);
1141
1142 use->id = n_iv_uses (data);
1143 use->type = use_type;
1144 use->iv = iv;
1145 use->stmt = stmt;
1146 use->op_p = use_p;
1147 use->related_cands = BITMAP_ALLOC (NULL);
1148
1149 /* To avoid showing ssa name in the dumps, if it was not reset by the
1150 caller. */
1151 iv->ssa_name = NULL_TREE;
1152
1153 if (dump_file && (dump_flags & TDF_DETAILS))
1154 dump_use (dump_file, use);
1155
1156 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1157
1158 return use;
1159 }
1160
1161 /* Checks whether OP is a loop-level invariant and if so, records it.
1162 NONLINEAR_USE is true if the invariant is used in a way we do not
1163 handle specially. */
1164
1165 static void
record_invariant(struct ivopts_data * data,tree op,bool nonlinear_use)1166 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1167 {
1168 basic_block bb;
1169 struct version_info *info;
1170
1171 if (TREE_CODE (op) != SSA_NAME
1172 || !is_gimple_reg (op))
1173 return;
1174
1175 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1176 if (bb
1177 && flow_bb_inside_loop_p (data->current_loop, bb))
1178 return;
1179
1180 info = name_info (data, op);
1181 info->name = op;
1182 info->has_nonlin_use |= nonlinear_use;
1183 if (!info->inv_id)
1184 info->inv_id = ++data->max_inv_id;
1185 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1186 }
1187
1188 /* Checks whether the use OP is interesting and if so, records it. */
1189
1190 static struct iv_use *
find_interesting_uses_op(struct ivopts_data * data,tree op)1191 find_interesting_uses_op (struct ivopts_data *data, tree op)
1192 {
1193 struct iv *iv;
1194 struct iv *civ;
1195 tree stmt;
1196 struct iv_use *use;
1197
1198 if (TREE_CODE (op) != SSA_NAME)
1199 return NULL;
1200
1201 iv = get_iv (data, op);
1202 if (!iv)
1203 return NULL;
1204
1205 if (iv->have_use_for)
1206 {
1207 use = iv_use (data, iv->use_id);
1208
1209 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1210 return use;
1211 }
1212
1213 if (zero_p (iv->step))
1214 {
1215 record_invariant (data, op, true);
1216 return NULL;
1217 }
1218 iv->have_use_for = true;
1219
1220 civ = XNEW (struct iv);
1221 *civ = *iv;
1222
1223 stmt = SSA_NAME_DEF_STMT (op);
1224 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1225 || TREE_CODE (stmt) == MODIFY_EXPR);
1226
1227 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1228 iv->use_id = use->id;
1229
1230 return use;
1231 }
1232
1233 /* Checks whether the condition *COND_P in STMT is interesting
1234 and if so, records it. */
1235
1236 static void
find_interesting_uses_cond(struct ivopts_data * data,tree stmt,tree * cond_p)1237 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1238 {
1239 tree *op0_p;
1240 tree *op1_p;
1241 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1242 struct iv const_iv;
1243 tree zero = integer_zero_node;
1244
1245 const_iv.step = NULL_TREE;
1246
1247 if (TREE_CODE (*cond_p) != SSA_NAME
1248 && !COMPARISON_CLASS_P (*cond_p))
1249 return;
1250
1251 if (TREE_CODE (*cond_p) == SSA_NAME)
1252 {
1253 op0_p = cond_p;
1254 op1_p = &zero;
1255 }
1256 else
1257 {
1258 op0_p = &TREE_OPERAND (*cond_p, 0);
1259 op1_p = &TREE_OPERAND (*cond_p, 1);
1260 }
1261
1262 if (TREE_CODE (*op0_p) == SSA_NAME)
1263 iv0 = get_iv (data, *op0_p);
1264 else
1265 iv0 = &const_iv;
1266
1267 if (TREE_CODE (*op1_p) == SSA_NAME)
1268 iv1 = get_iv (data, *op1_p);
1269 else
1270 iv1 = &const_iv;
1271
1272 if (/* When comparing with non-invariant value, we may not do any senseful
1273 induction variable elimination. */
1274 (!iv0 || !iv1)
1275 /* Eliminating condition based on two ivs would be nontrivial.
1276 ??? TODO -- it is not really important to handle this case. */
1277 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1278 {
1279 find_interesting_uses_op (data, *op0_p);
1280 find_interesting_uses_op (data, *op1_p);
1281 return;
1282 }
1283
1284 if (zero_p (iv0->step) && zero_p (iv1->step))
1285 {
1286 /* If both are invariants, this is a work for unswitching. */
1287 return;
1288 }
1289
1290 civ = XNEW (struct iv);
1291 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1292 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1293 }
1294
1295 /* Returns true if expression EXPR is obviously invariant in LOOP,
1296 i.e. if all its operands are defined outside of the LOOP. */
1297
1298 bool
expr_invariant_in_loop_p(struct loop * loop,tree expr)1299 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1300 {
1301 basic_block def_bb;
1302 unsigned i, len;
1303
1304 if (is_gimple_min_invariant (expr))
1305 return true;
1306
1307 if (TREE_CODE (expr) == SSA_NAME)
1308 {
1309 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1310 if (def_bb
1311 && flow_bb_inside_loop_p (loop, def_bb))
1312 return false;
1313
1314 return true;
1315 }
1316
1317 if (!EXPR_P (expr))
1318 return false;
1319
1320 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1321 for (i = 0; i < len; i++)
1322 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1323 return false;
1324
1325 return true;
1326 }
1327
1328 /* Cumulates the steps of indices into DATA and replaces their values with the
1329 initial ones. Returns false when the value of the index cannot be determined.
1330 Callback for for_each_index. */
1331
1332 struct ifs_ivopts_data
1333 {
1334 struct ivopts_data *ivopts_data;
1335 tree stmt;
1336 tree *step_p;
1337 };
1338
1339 static bool
idx_find_step(tree base,tree * idx,void * data)1340 idx_find_step (tree base, tree *idx, void *data)
1341 {
1342 struct ifs_ivopts_data *dta = data;
1343 struct iv *iv;
1344 tree step, iv_base, iv_step, lbound, off;
1345 struct loop *loop = dta->ivopts_data->current_loop;
1346
1347 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1348 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1349 return false;
1350
1351 /* If base is a component ref, require that the offset of the reference
1352 be invariant. */
1353 if (TREE_CODE (base) == COMPONENT_REF)
1354 {
1355 off = component_ref_field_offset (base);
1356 return expr_invariant_in_loop_p (loop, off);
1357 }
1358
1359 /* If base is array, first check whether we will be able to move the
1360 reference out of the loop (in order to take its address in strength
1361 reduction). In order for this to work we need both lower bound
1362 and step to be loop invariants. */
1363 if (TREE_CODE (base) == ARRAY_REF)
1364 {
1365 step = array_ref_element_size (base);
1366 lbound = array_ref_low_bound (base);
1367
1368 if (!expr_invariant_in_loop_p (loop, step)
1369 || !expr_invariant_in_loop_p (loop, lbound))
1370 return false;
1371 }
1372
1373 if (TREE_CODE (*idx) != SSA_NAME)
1374 return true;
1375
1376 iv = get_iv (dta->ivopts_data, *idx);
1377 if (!iv)
1378 return false;
1379
1380 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1381 *&x[0], which is not folded and does not trigger the
1382 ARRAY_REF path below. */
1383 *idx = iv->base;
1384
1385 if (!iv->step)
1386 return true;
1387
1388 if (TREE_CODE (base) == ARRAY_REF)
1389 {
1390 step = array_ref_element_size (base);
1391
1392 /* We only handle addresses whose step is an integer constant. */
1393 if (TREE_CODE (step) != INTEGER_CST)
1394 return false;
1395 }
1396 else
1397 /* The step for pointer arithmetics already is 1 byte. */
1398 step = build_int_cst (sizetype, 1);
1399
1400 iv_base = iv->base;
1401 iv_step = iv->step;
1402 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1403 sizetype, &iv_base, &iv_step, dta->stmt,
1404 false))
1405 {
1406 /* The index might wrap. */
1407 return false;
1408 }
1409
1410 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1411
1412 if (!*dta->step_p)
1413 *dta->step_p = step;
1414 else
1415 *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1416
1417 return true;
1418 }
1419
1420 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1421 object is passed to it in DATA. */
1422
1423 static bool
idx_record_use(tree base,tree * idx,void * data)1424 idx_record_use (tree base, tree *idx,
1425 void *data)
1426 {
1427 find_interesting_uses_op (data, *idx);
1428 if (TREE_CODE (base) == ARRAY_REF)
1429 {
1430 find_interesting_uses_op (data, array_ref_element_size (base));
1431 find_interesting_uses_op (data, array_ref_low_bound (base));
1432 }
1433 return true;
1434 }
1435
1436 /* Returns true if memory reference REF may be unaligned. */
1437
1438 static bool
may_be_unaligned_p(tree ref)1439 may_be_unaligned_p (tree ref)
1440 {
1441 tree base;
1442 tree base_type;
1443 HOST_WIDE_INT bitsize;
1444 HOST_WIDE_INT bitpos;
1445 tree toffset;
1446 enum machine_mode mode;
1447 int unsignedp, volatilep;
1448 unsigned base_align;
1449
1450 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1451 thus they are not misaligned. */
1452 if (TREE_CODE (ref) == TARGET_MEM_REF)
1453 return false;
1454
1455 /* The test below is basically copy of what expr.c:normal_inner_ref
1456 does to check whether the object must be loaded by parts when
1457 STRICT_ALIGNMENT is true. */
1458 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1459 &unsignedp, &volatilep, true);
1460 base_type = TREE_TYPE (base);
1461 base_align = TYPE_ALIGN (base_type);
1462
1463 if (mode != BLKmode
1464 && (base_align < GET_MODE_ALIGNMENT (mode)
1465 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1466 || bitpos % BITS_PER_UNIT != 0))
1467 return true;
1468
1469 return false;
1470 }
1471
1472 /* Return true if EXPR may be non-addressable. */
1473
1474 static bool
may_be_nonaddressable_p(tree expr)1475 may_be_nonaddressable_p (tree expr)
1476 {
1477 switch (TREE_CODE (expr))
1478 {
1479 case COMPONENT_REF:
1480 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1481 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1482
1483 case ARRAY_REF:
1484 case ARRAY_RANGE_REF:
1485 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1486
1487 case VIEW_CONVERT_EXPR:
1488 /* This kind of view-conversions may wrap non-addressable objects
1489 and make them look addressable. After some processing the
1490 non-addressability may be uncovered again, causing ADDR_EXPRs
1491 of inappropriate objects to be built. */
1492 return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1493 && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1494
1495 default:
1496 break;
1497 }
1498
1499 return false;
1500 }
1501
1502 /* Finds addresses in *OP_P inside STMT. */
1503
1504 static void
find_interesting_uses_address(struct ivopts_data * data,tree stmt,tree * op_p)1505 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1506 {
1507 tree base = *op_p, step = NULL;
1508 struct iv *civ;
1509 struct ifs_ivopts_data ifs_ivopts_data;
1510
1511 /* Do not play with volatile memory references. A bit too conservative,
1512 perhaps, but safe. */
1513 if (stmt_ann (stmt)->has_volatile_ops)
1514 goto fail;
1515
1516 /* Ignore bitfields for now. Not really something terribly complicated
1517 to handle. TODO. */
1518 if (TREE_CODE (base) == BIT_FIELD_REF)
1519 goto fail;
1520
1521 if (may_be_nonaddressable_p (base))
1522 goto fail;
1523
1524 if (STRICT_ALIGNMENT
1525 && may_be_unaligned_p (base))
1526 goto fail;
1527
1528 base = unshare_expr (base);
1529
1530 if (TREE_CODE (base) == TARGET_MEM_REF)
1531 {
1532 tree type = build_pointer_type (TREE_TYPE (base));
1533 tree astep;
1534
1535 if (TMR_BASE (base)
1536 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1537 {
1538 civ = get_iv (data, TMR_BASE (base));
1539 if (!civ)
1540 goto fail;
1541
1542 TMR_BASE (base) = civ->base;
1543 step = civ->step;
1544 }
1545 if (TMR_INDEX (base)
1546 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1547 {
1548 civ = get_iv (data, TMR_INDEX (base));
1549 if (!civ)
1550 goto fail;
1551
1552 TMR_INDEX (base) = civ->base;
1553 astep = civ->step;
1554
1555 if (astep)
1556 {
1557 if (TMR_STEP (base))
1558 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1559
1560 if (step)
1561 step = fold_build2 (PLUS_EXPR, type, step, astep);
1562 else
1563 step = astep;
1564 }
1565 }
1566
1567 if (zero_p (step))
1568 goto fail;
1569 base = tree_mem_ref_addr (type, base);
1570 }
1571 else
1572 {
1573 ifs_ivopts_data.ivopts_data = data;
1574 ifs_ivopts_data.stmt = stmt;
1575 ifs_ivopts_data.step_p = &step;
1576 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1577 || zero_p (step))
1578 goto fail;
1579
1580 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1581 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1582
1583 base = build_fold_addr_expr (base);
1584
1585 /* Substituting bases of IVs into the base expression might
1586 have caused folding opportunities. */
1587 if (TREE_CODE (base) == ADDR_EXPR)
1588 {
1589 tree *ref = &TREE_OPERAND (base, 0);
1590 while (handled_component_p (*ref))
1591 ref = &TREE_OPERAND (*ref, 0);
1592 if (TREE_CODE (*ref) == INDIRECT_REF)
1593 *ref = fold_indirect_ref (*ref);
1594 }
1595 }
1596
1597 civ = alloc_iv (base, step);
1598 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1599 return;
1600
1601 fail:
1602 for_each_index (op_p, idx_record_use, data);
1603 }
1604
1605 /* Finds and records invariants used in STMT. */
1606
1607 static void
find_invariants_stmt(struct ivopts_data * data,tree stmt)1608 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1609 {
1610 ssa_op_iter iter;
1611 use_operand_p use_p;
1612 tree op;
1613
1614 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1615 {
1616 op = USE_FROM_PTR (use_p);
1617 record_invariant (data, op, false);
1618 }
1619 }
1620
1621 /* Finds interesting uses of induction variables in the statement STMT. */
1622
1623 static void
find_interesting_uses_stmt(struct ivopts_data * data,tree stmt)1624 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1625 {
1626 struct iv *iv;
1627 tree op, lhs, rhs;
1628 ssa_op_iter iter;
1629 use_operand_p use_p;
1630
1631 find_invariants_stmt (data, stmt);
1632
1633 if (TREE_CODE (stmt) == COND_EXPR)
1634 {
1635 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1636 return;
1637 }
1638
1639 if (TREE_CODE (stmt) == MODIFY_EXPR)
1640 {
1641 lhs = TREE_OPERAND (stmt, 0);
1642 rhs = TREE_OPERAND (stmt, 1);
1643
1644 if (TREE_CODE (lhs) == SSA_NAME)
1645 {
1646 /* If the statement defines an induction variable, the uses are not
1647 interesting by themselves. */
1648
1649 iv = get_iv (data, lhs);
1650
1651 if (iv && !zero_p (iv->step))
1652 return;
1653 }
1654
1655 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1656 {
1657 case tcc_comparison:
1658 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1659 return;
1660
1661 case tcc_reference:
1662 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1663 if (REFERENCE_CLASS_P (lhs))
1664 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1665 return;
1666
1667 default: ;
1668 }
1669
1670 if (REFERENCE_CLASS_P (lhs)
1671 && is_gimple_val (rhs))
1672 {
1673 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1674 find_interesting_uses_op (data, rhs);
1675 return;
1676 }
1677
1678 /* TODO -- we should also handle address uses of type
1679
1680 memory = call (whatever);
1681
1682 and
1683
1684 call (memory). */
1685 }
1686
1687 if (TREE_CODE (stmt) == PHI_NODE
1688 && bb_for_stmt (stmt) == data->current_loop->header)
1689 {
1690 lhs = PHI_RESULT (stmt);
1691 iv = get_iv (data, lhs);
1692
1693 if (iv && !zero_p (iv->step))
1694 return;
1695 }
1696
1697 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1698 {
1699 op = USE_FROM_PTR (use_p);
1700
1701 if (TREE_CODE (op) != SSA_NAME)
1702 continue;
1703
1704 iv = get_iv (data, op);
1705 if (!iv)
1706 continue;
1707
1708 find_interesting_uses_op (data, op);
1709 }
1710 }
1711
1712 /* Finds interesting uses of induction variables outside of loops
1713 on loop exit edge EXIT. */
1714
1715 static void
find_interesting_uses_outside(struct ivopts_data * data,edge exit)1716 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1717 {
1718 tree phi, def;
1719
1720 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1721 {
1722 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1723 find_interesting_uses_op (data, def);
1724 }
1725 }
1726
1727 /* Finds uses of the induction variables that are interesting. */
1728
1729 static void
find_interesting_uses(struct ivopts_data * data)1730 find_interesting_uses (struct ivopts_data *data)
1731 {
1732 basic_block bb;
1733 block_stmt_iterator bsi;
1734 tree phi;
1735 basic_block *body = get_loop_body (data->current_loop);
1736 unsigned i;
1737 struct version_info *info;
1738 edge e;
1739
1740 if (dump_file && (dump_flags & TDF_DETAILS))
1741 fprintf (dump_file, "Uses:\n\n");
1742
1743 for (i = 0; i < data->current_loop->num_nodes; i++)
1744 {
1745 edge_iterator ei;
1746 bb = body[i];
1747
1748 FOR_EACH_EDGE (e, ei, bb->succs)
1749 if (e->dest != EXIT_BLOCK_PTR
1750 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1751 find_interesting_uses_outside (data, e);
1752
1753 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1754 find_interesting_uses_stmt (data, phi);
1755 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1756 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1757 }
1758
1759 if (dump_file && (dump_flags & TDF_DETAILS))
1760 {
1761 bitmap_iterator bi;
1762
1763 fprintf (dump_file, "\n");
1764
1765 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1766 {
1767 info = ver_info (data, i);
1768 if (info->inv_id)
1769 {
1770 fprintf (dump_file, " ");
1771 print_generic_expr (dump_file, info->name, TDF_SLIM);
1772 fprintf (dump_file, " is invariant (%d)%s\n",
1773 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1774 }
1775 }
1776
1777 fprintf (dump_file, "\n");
1778 }
1779
1780 free (body);
1781 }
1782
1783 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1784 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1785 we are at the top-level of the processed address. */
1786
1787 static tree
strip_offset_1(tree expr,bool inside_addr,bool top_compref,unsigned HOST_WIDE_INT * offset)1788 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1789 unsigned HOST_WIDE_INT *offset)
1790 {
1791 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1792 enum tree_code code;
1793 tree type, orig_type = TREE_TYPE (expr);
1794 unsigned HOST_WIDE_INT off0, off1, st;
1795 tree orig_expr = expr;
1796
1797 STRIP_NOPS (expr);
1798
1799 type = TREE_TYPE (expr);
1800 code = TREE_CODE (expr);
1801 *offset = 0;
1802
1803 switch (code)
1804 {
1805 case INTEGER_CST:
1806 if (!cst_and_fits_in_hwi (expr)
1807 || zero_p (expr))
1808 return orig_expr;
1809
1810 *offset = int_cst_value (expr);
1811 return build_int_cst (orig_type, 0);
1812
1813 case PLUS_EXPR:
1814 case MINUS_EXPR:
1815 op0 = TREE_OPERAND (expr, 0);
1816 op1 = TREE_OPERAND (expr, 1);
1817
1818 op0 = strip_offset_1 (op0, false, false, &off0);
1819 op1 = strip_offset_1 (op1, false, false, &off1);
1820
1821 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1822 if (op0 == TREE_OPERAND (expr, 0)
1823 && op1 == TREE_OPERAND (expr, 1))
1824 return orig_expr;
1825
1826 if (zero_p (op1))
1827 expr = op0;
1828 else if (zero_p (op0))
1829 {
1830 if (code == PLUS_EXPR)
1831 expr = op1;
1832 else
1833 expr = fold_build1 (NEGATE_EXPR, type, op1);
1834 }
1835 else
1836 expr = fold_build2 (code, type, op0, op1);
1837
1838 return fold_convert (orig_type, expr);
1839
1840 case ARRAY_REF:
1841 if (!inside_addr)
1842 return orig_expr;
1843
1844 step = array_ref_element_size (expr);
1845 if (!cst_and_fits_in_hwi (step))
1846 break;
1847
1848 st = int_cst_value (step);
1849 op1 = TREE_OPERAND (expr, 1);
1850 op1 = strip_offset_1 (op1, false, false, &off1);
1851 *offset = off1 * st;
1852
1853 if (top_compref
1854 && zero_p (op1))
1855 {
1856 /* Strip the component reference completely. */
1857 op0 = TREE_OPERAND (expr, 0);
1858 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1859 *offset += off0;
1860 return op0;
1861 }
1862 break;
1863
1864 case COMPONENT_REF:
1865 if (!inside_addr)
1866 return orig_expr;
1867
1868 tmp = component_ref_field_offset (expr);
1869 if (top_compref
1870 && cst_and_fits_in_hwi (tmp))
1871 {
1872 /* Strip the component reference completely. */
1873 op0 = TREE_OPERAND (expr, 0);
1874 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1875 *offset = off0 + int_cst_value (tmp);
1876 return op0;
1877 }
1878 break;
1879
1880 case ADDR_EXPR:
1881 op0 = TREE_OPERAND (expr, 0);
1882 op0 = strip_offset_1 (op0, true, true, &off0);
1883 *offset += off0;
1884
1885 if (op0 == TREE_OPERAND (expr, 0))
1886 return orig_expr;
1887
1888 expr = build_fold_addr_expr (op0);
1889 return fold_convert (orig_type, expr);
1890
1891 case INDIRECT_REF:
1892 inside_addr = false;
1893 break;
1894
1895 default:
1896 return orig_expr;
1897 }
1898
1899 /* Default handling of expressions for that we want to recurse into
1900 the first operand. */
1901 op0 = TREE_OPERAND (expr, 0);
1902 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1903 *offset += off0;
1904
1905 if (op0 == TREE_OPERAND (expr, 0)
1906 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1907 return orig_expr;
1908
1909 expr = copy_node (expr);
1910 TREE_OPERAND (expr, 0) = op0;
1911 if (op1)
1912 TREE_OPERAND (expr, 1) = op1;
1913
1914 /* Inside address, we might strip the top level component references,
1915 thus changing type of the expression. Handling of ADDR_EXPR
1916 will fix that. */
1917 expr = fold_convert (orig_type, expr);
1918
1919 return expr;
1920 }
1921
1922 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1923
1924 static tree
strip_offset(tree expr,unsigned HOST_WIDE_INT * offset)1925 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1926 {
1927 return strip_offset_1 (expr, false, false, offset);
1928 }
1929
1930 /* Returns variant of TYPE that can be used as base for different uses.
1931 We return unsigned type with the same precision, which avoids problems
1932 with overflows. */
1933
1934 static tree
generic_type_for(tree type)1935 generic_type_for (tree type)
1936 {
1937 if (POINTER_TYPE_P (type))
1938 return unsigned_type_for (type);
1939
1940 if (TYPE_UNSIGNED (type))
1941 return type;
1942
1943 return unsigned_type_for (type);
1944 }
1945
1946 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1947 the bitmap to that we should store it. */
1948
1949 static struct ivopts_data *fd_ivopts_data;
1950 static tree
find_depends(tree * expr_p,int * ws ATTRIBUTE_UNUSED,void * data)1951 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1952 {
1953 bitmap *depends_on = data;
1954 struct version_info *info;
1955
1956 if (TREE_CODE (*expr_p) != SSA_NAME)
1957 return NULL_TREE;
1958 info = name_info (fd_ivopts_data, *expr_p);
1959
1960 if (!info->inv_id || info->has_nonlin_use)
1961 return NULL_TREE;
1962
1963 if (!*depends_on)
1964 *depends_on = BITMAP_ALLOC (NULL);
1965 bitmap_set_bit (*depends_on, info->inv_id);
1966
1967 return NULL_TREE;
1968 }
1969
1970 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1971 position to POS. If USE is not NULL, the candidate is set as related to
1972 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1973 replacement of the final value of the iv by a direct computation. */
1974
1975 static struct iv_cand *
add_candidate_1(struct ivopts_data * data,tree base,tree step,bool important,enum iv_position pos,struct iv_use * use,tree incremented_at)1976 add_candidate_1 (struct ivopts_data *data,
1977 tree base, tree step, bool important, enum iv_position pos,
1978 struct iv_use *use, tree incremented_at)
1979 {
1980 unsigned i;
1981 struct iv_cand *cand = NULL;
1982 tree type, orig_type;
1983
1984 if (base)
1985 {
1986 orig_type = TREE_TYPE (base);
1987 type = generic_type_for (orig_type);
1988 if (type != orig_type)
1989 {
1990 base = fold_convert (type, base);
1991 if (step)
1992 step = fold_convert (type, step);
1993 }
1994 }
1995
1996 for (i = 0; i < n_iv_cands (data); i++)
1997 {
1998 cand = iv_cand (data, i);
1999
2000 if (cand->pos != pos)
2001 continue;
2002
2003 if (cand->incremented_at != incremented_at)
2004 continue;
2005
2006 if (!cand->iv)
2007 {
2008 if (!base && !step)
2009 break;
2010
2011 continue;
2012 }
2013
2014 if (!base && !step)
2015 continue;
2016
2017 if (!operand_equal_p (base, cand->iv->base, 0))
2018 continue;
2019
2020 if (zero_p (cand->iv->step))
2021 {
2022 if (zero_p (step))
2023 break;
2024 }
2025 else
2026 {
2027 if (step && operand_equal_p (step, cand->iv->step, 0))
2028 break;
2029 }
2030 }
2031
2032 if (i == n_iv_cands (data))
2033 {
2034 cand = XCNEW (struct iv_cand);
2035 cand->id = i;
2036
2037 if (!base && !step)
2038 cand->iv = NULL;
2039 else
2040 cand->iv = alloc_iv (base, step);
2041
2042 cand->pos = pos;
2043 if (pos != IP_ORIGINAL && cand->iv)
2044 {
2045 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2046 cand->var_after = cand->var_before;
2047 }
2048 cand->important = important;
2049 cand->incremented_at = incremented_at;
2050 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2051
2052 if (step
2053 && TREE_CODE (step) != INTEGER_CST)
2054 {
2055 fd_ivopts_data = data;
2056 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2057 }
2058
2059 if (dump_file && (dump_flags & TDF_DETAILS))
2060 dump_cand (dump_file, cand);
2061 }
2062
2063 if (important && !cand->important)
2064 {
2065 cand->important = true;
2066 if (dump_file && (dump_flags & TDF_DETAILS))
2067 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2068 }
2069
2070 if (use)
2071 {
2072 bitmap_set_bit (use->related_cands, i);
2073 if (dump_file && (dump_flags & TDF_DETAILS))
2074 fprintf (dump_file, "Candidate %d is related to use %d\n",
2075 cand->id, use->id);
2076 }
2077
2078 return cand;
2079 }
2080
2081 /* Returns true if incrementing the induction variable at the end of the LOOP
2082 is allowed.
2083
2084 The purpose is to avoid splitting latch edge with a biv increment, thus
2085 creating a jump, possibly confusing other optimization passes and leaving
2086 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2087 is not available (so we do not have a better alternative), or if the latch
2088 edge is already nonempty. */
2089
2090 static bool
allow_ip_end_pos_p(struct loop * loop)2091 allow_ip_end_pos_p (struct loop *loop)
2092 {
2093 if (!ip_normal_pos (loop))
2094 return true;
2095
2096 if (!empty_block_p (ip_end_pos (loop)))
2097 return true;
2098
2099 return false;
2100 }
2101
2102 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2103 position to POS. If USE is not NULL, the candidate is set as related to
2104 it. The candidate computation is scheduled on all available positions. */
2105
2106 static void
add_candidate(struct ivopts_data * data,tree base,tree step,bool important,struct iv_use * use)2107 add_candidate (struct ivopts_data *data,
2108 tree base, tree step, bool important, struct iv_use *use)
2109 {
2110 if (ip_normal_pos (data->current_loop))
2111 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2112 if (ip_end_pos (data->current_loop)
2113 && allow_ip_end_pos_p (data->current_loop))
2114 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2115 }
2116
2117 /* Add a standard "0 + 1 * iteration" iv candidate for a
2118 type with SIZE bits. */
2119
2120 static void
add_standard_iv_candidates_for_size(struct ivopts_data * data,unsigned int size)2121 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2122 unsigned int size)
2123 {
2124 tree type = lang_hooks.types.type_for_size (size, true);
2125 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2126 true, NULL);
2127 }
2128
2129 /* Adds standard iv candidates. */
2130
2131 static void
add_standard_iv_candidates(struct ivopts_data * data)2132 add_standard_iv_candidates (struct ivopts_data *data)
2133 {
2134 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2135
2136 /* The same for a double-integer type if it is still fast enough. */
2137 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2138 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2139 }
2140
2141
2142 /* Adds candidates bases on the old induction variable IV. */
2143
2144 static void
add_old_iv_candidates(struct ivopts_data * data,struct iv * iv)2145 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2146 {
2147 tree phi, def;
2148 struct iv_cand *cand;
2149
2150 add_candidate (data, iv->base, iv->step, true, NULL);
2151
2152 /* The same, but with initial value zero. */
2153 add_candidate (data,
2154 build_int_cst (TREE_TYPE (iv->base), 0),
2155 iv->step, true, NULL);
2156
2157 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2158 if (TREE_CODE (phi) == PHI_NODE)
2159 {
2160 /* Additionally record the possibility of leaving the original iv
2161 untouched. */
2162 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2163 cand = add_candidate_1 (data,
2164 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2165 SSA_NAME_DEF_STMT (def));
2166 cand->var_before = iv->ssa_name;
2167 cand->var_after = def;
2168 }
2169 }
2170
2171 /* Adds candidates based on the old induction variables. */
2172
2173 static void
add_old_ivs_candidates(struct ivopts_data * data)2174 add_old_ivs_candidates (struct ivopts_data *data)
2175 {
2176 unsigned i;
2177 struct iv *iv;
2178 bitmap_iterator bi;
2179
2180 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2181 {
2182 iv = ver_info (data, i)->iv;
2183 if (iv && iv->biv_p && !zero_p (iv->step))
2184 add_old_iv_candidates (data, iv);
2185 }
2186 }
2187
2188 /* Adds candidates based on the value of the induction variable IV and USE. */
2189
2190 static void
add_iv_value_candidates(struct ivopts_data * data,struct iv * iv,struct iv_use * use)2191 add_iv_value_candidates (struct ivopts_data *data,
2192 struct iv *iv, struct iv_use *use)
2193 {
2194 unsigned HOST_WIDE_INT offset;
2195 tree base;
2196
2197 add_candidate (data, iv->base, iv->step, false, use);
2198
2199 /* The same, but with initial value zero. Make such variable important,
2200 since it is generic enough so that possibly many uses may be based
2201 on it. */
2202 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2203 iv->step, true, use);
2204
2205 /* Third, try removing the constant offset. */
2206 base = strip_offset (iv->base, &offset);
2207 if (offset)
2208 add_candidate (data, base, iv->step, false, use);
2209 }
2210
2211 /* Adds candidates based on the uses. */
2212
2213 static void
add_derived_ivs_candidates(struct ivopts_data * data)2214 add_derived_ivs_candidates (struct ivopts_data *data)
2215 {
2216 unsigned i;
2217
2218 for (i = 0; i < n_iv_uses (data); i++)
2219 {
2220 struct iv_use *use = iv_use (data, i);
2221
2222 if (!use)
2223 continue;
2224
2225 switch (use->type)
2226 {
2227 case USE_NONLINEAR_EXPR:
2228 case USE_COMPARE:
2229 case USE_ADDRESS:
2230 /* Just add the ivs based on the value of the iv used here. */
2231 add_iv_value_candidates (data, use->iv, use);
2232 break;
2233
2234 default:
2235 gcc_unreachable ();
2236 }
2237 }
2238 }
2239
2240 /* Record important candidates and add them to related_cands bitmaps
2241 if needed. */
2242
2243 static void
record_important_candidates(struct ivopts_data * data)2244 record_important_candidates (struct ivopts_data *data)
2245 {
2246 unsigned i;
2247 struct iv_use *use;
2248
2249 for (i = 0; i < n_iv_cands (data); i++)
2250 {
2251 struct iv_cand *cand = iv_cand (data, i);
2252
2253 if (cand->important)
2254 bitmap_set_bit (data->important_candidates, i);
2255 }
2256
2257 data->consider_all_candidates = (n_iv_cands (data)
2258 <= CONSIDER_ALL_CANDIDATES_BOUND);
2259
2260 if (data->consider_all_candidates)
2261 {
2262 /* We will not need "related_cands" bitmaps in this case,
2263 so release them to decrease peak memory consumption. */
2264 for (i = 0; i < n_iv_uses (data); i++)
2265 {
2266 use = iv_use (data, i);
2267 BITMAP_FREE (use->related_cands);
2268 }
2269 }
2270 else
2271 {
2272 /* Add important candidates to the related_cands bitmaps. */
2273 for (i = 0; i < n_iv_uses (data); i++)
2274 bitmap_ior_into (iv_use (data, i)->related_cands,
2275 data->important_candidates);
2276 }
2277 }
2278
2279 /* Finds the candidates for the induction variables. */
2280
2281 static void
find_iv_candidates(struct ivopts_data * data)2282 find_iv_candidates (struct ivopts_data *data)
2283 {
2284 /* Add commonly used ivs. */
2285 add_standard_iv_candidates (data);
2286
2287 /* Add old induction variables. */
2288 add_old_ivs_candidates (data);
2289
2290 /* Add induction variables derived from uses. */
2291 add_derived_ivs_candidates (data);
2292
2293 /* Record the important candidates. */
2294 record_important_candidates (data);
2295 }
2296
2297 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2298 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2299 we allocate a simple list to every use. */
2300
2301 static void
alloc_use_cost_map(struct ivopts_data * data)2302 alloc_use_cost_map (struct ivopts_data *data)
2303 {
2304 unsigned i, size, s, j;
2305
2306 for (i = 0; i < n_iv_uses (data); i++)
2307 {
2308 struct iv_use *use = iv_use (data, i);
2309 bitmap_iterator bi;
2310
2311 if (data->consider_all_candidates)
2312 size = n_iv_cands (data);
2313 else
2314 {
2315 s = 0;
2316 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2317 {
2318 s++;
2319 }
2320
2321 /* Round up to the power of two, so that moduling by it is fast. */
2322 for (size = 1; size < s; size <<= 1)
2323 continue;
2324 }
2325
2326 use->n_map_members = size;
2327 use->cost_map = XCNEWVEC (struct cost_pair, size);
2328 }
2329 }
2330
2331 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2332 on invariants DEPENDS_ON and that the value used in expressing it
2333 is VALUE.*/
2334
2335 static void
set_use_iv_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,unsigned cost,bitmap depends_on,tree value)2336 set_use_iv_cost (struct ivopts_data *data,
2337 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2338 bitmap depends_on, tree value)
2339 {
2340 unsigned i, s;
2341
2342 if (cost == INFTY)
2343 {
2344 BITMAP_FREE (depends_on);
2345 return;
2346 }
2347
2348 if (data->consider_all_candidates)
2349 {
2350 use->cost_map[cand->id].cand = cand;
2351 use->cost_map[cand->id].cost = cost;
2352 use->cost_map[cand->id].depends_on = depends_on;
2353 use->cost_map[cand->id].value = value;
2354 return;
2355 }
2356
2357 /* n_map_members is a power of two, so this computes modulo. */
2358 s = cand->id & (use->n_map_members - 1);
2359 for (i = s; i < use->n_map_members; i++)
2360 if (!use->cost_map[i].cand)
2361 goto found;
2362 for (i = 0; i < s; i++)
2363 if (!use->cost_map[i].cand)
2364 goto found;
2365
2366 gcc_unreachable ();
2367
2368 found:
2369 use->cost_map[i].cand = cand;
2370 use->cost_map[i].cost = cost;
2371 use->cost_map[i].depends_on = depends_on;
2372 use->cost_map[i].value = value;
2373 }
2374
2375 /* Gets cost of (USE, CANDIDATE) pair. */
2376
2377 static struct cost_pair *
get_use_iv_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)2378 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2379 struct iv_cand *cand)
2380 {
2381 unsigned i, s;
2382 struct cost_pair *ret;
2383
2384 if (!cand)
2385 return NULL;
2386
2387 if (data->consider_all_candidates)
2388 {
2389 ret = use->cost_map + cand->id;
2390 if (!ret->cand)
2391 return NULL;
2392
2393 return ret;
2394 }
2395
2396 /* n_map_members is a power of two, so this computes modulo. */
2397 s = cand->id & (use->n_map_members - 1);
2398 for (i = s; i < use->n_map_members; i++)
2399 if (use->cost_map[i].cand == cand)
2400 return use->cost_map + i;
2401
2402 for (i = 0; i < s; i++)
2403 if (use->cost_map[i].cand == cand)
2404 return use->cost_map + i;
2405
2406 return NULL;
2407 }
2408
2409 /* Returns estimate on cost of computing SEQ. */
2410
2411 static unsigned
seq_cost(rtx seq)2412 seq_cost (rtx seq)
2413 {
2414 unsigned cost = 0;
2415 rtx set;
2416
2417 for (; seq; seq = NEXT_INSN (seq))
2418 {
2419 set = single_set (seq);
2420 if (set)
2421 cost += rtx_cost (set, SET);
2422 else
2423 cost++;
2424 }
2425
2426 return cost;
2427 }
2428
2429 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2430 static rtx
produce_memory_decl_rtl(tree obj,int * regno)2431 produce_memory_decl_rtl (tree obj, int *regno)
2432 {
2433 rtx x;
2434
2435 gcc_assert (obj);
2436 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2437 {
2438 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2439 x = gen_rtx_SYMBOL_REF (Pmode, name);
2440 }
2441 else
2442 x = gen_raw_REG (Pmode, (*regno)++);
2443
2444 return gen_rtx_MEM (DECL_MODE (obj), x);
2445 }
2446
2447 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2448 walk_tree. DATA contains the actual fake register number. */
2449
2450 static tree
prepare_decl_rtl(tree * expr_p,int * ws,void * data)2451 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2452 {
2453 tree obj = NULL_TREE;
2454 rtx x = NULL_RTX;
2455 int *regno = data;
2456
2457 switch (TREE_CODE (*expr_p))
2458 {
2459 case ADDR_EXPR:
2460 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2461 handled_component_p (*expr_p);
2462 expr_p = &TREE_OPERAND (*expr_p, 0))
2463 continue;
2464 obj = *expr_p;
2465 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2466 x = produce_memory_decl_rtl (obj, regno);
2467 break;
2468
2469 case SSA_NAME:
2470 *ws = 0;
2471 obj = SSA_NAME_VAR (*expr_p);
2472 if (!DECL_RTL_SET_P (obj))
2473 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2474 break;
2475
2476 case VAR_DECL:
2477 case PARM_DECL:
2478 case RESULT_DECL:
2479 *ws = 0;
2480 obj = *expr_p;
2481
2482 if (DECL_RTL_SET_P (obj))
2483 break;
2484
2485 if (DECL_MODE (obj) == BLKmode)
2486 x = produce_memory_decl_rtl (obj, regno);
2487 else
2488 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2489
2490 break;
2491
2492 default:
2493 break;
2494 }
2495
2496 if (x)
2497 {
2498 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2499 SET_DECL_RTL (obj, x);
2500 }
2501
2502 return NULL_TREE;
2503 }
2504
2505 /* Determines cost of the computation of EXPR. */
2506
2507 static unsigned
computation_cost(tree expr)2508 computation_cost (tree expr)
2509 {
2510 rtx seq, rslt;
2511 tree type = TREE_TYPE (expr);
2512 unsigned cost;
2513 /* Avoid using hard regs in ways which may be unsupported. */
2514 int regno = LAST_VIRTUAL_REGISTER + 1;
2515
2516 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
2517 start_sequence ();
2518 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2519 seq = get_insns ();
2520 end_sequence ();
2521
2522 cost = seq_cost (seq);
2523 if (MEM_P (rslt))
2524 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2525
2526 return cost;
2527 }
2528
2529 /* Returns variable containing the value of candidate CAND at statement AT. */
2530
2531 static tree
var_at_stmt(struct loop * loop,struct iv_cand * cand,tree stmt)2532 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2533 {
2534 if (stmt_after_increment (loop, cand, stmt))
2535 return cand->var_after;
2536 else
2537 return cand->var_before;
2538 }
2539
2540 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2541 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2542
2543 int
tree_int_cst_sign_bit(tree t)2544 tree_int_cst_sign_bit (tree t)
2545 {
2546 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2547 unsigned HOST_WIDE_INT w;
2548
2549 if (bitno < HOST_BITS_PER_WIDE_INT)
2550 w = TREE_INT_CST_LOW (t);
2551 else
2552 {
2553 w = TREE_INT_CST_HIGH (t);
2554 bitno -= HOST_BITS_PER_WIDE_INT;
2555 }
2556
2557 return (w >> bitno) & 1;
2558 }
2559
2560 /* If we can prove that TOP = cst * BOT for some constant cst,
2561 store cst to MUL and return true. Otherwise return false.
2562 The returned value is always sign-extended, regardless of the
2563 signedness of TOP and BOT. */
2564
2565 static bool
constant_multiple_of(tree top,tree bot,double_int * mul)2566 constant_multiple_of (tree top, tree bot, double_int *mul)
2567 {
2568 tree mby;
2569 enum tree_code code;
2570 double_int res, p0, p1;
2571 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2572
2573 STRIP_NOPS (top);
2574 STRIP_NOPS (bot);
2575
2576 if (operand_equal_p (top, bot, 0))
2577 {
2578 *mul = double_int_one;
2579 return true;
2580 }
2581
2582 code = TREE_CODE (top);
2583 switch (code)
2584 {
2585 case MULT_EXPR:
2586 mby = TREE_OPERAND (top, 1);
2587 if (TREE_CODE (mby) != INTEGER_CST)
2588 return false;
2589
2590 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2591 return false;
2592
2593 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2594 precision);
2595 return true;
2596
2597 case PLUS_EXPR:
2598 case MINUS_EXPR:
2599 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2600 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2601 return false;
2602
2603 if (code == MINUS_EXPR)
2604 p1 = double_int_neg (p1);
2605 *mul = double_int_sext (double_int_add (p0, p1), precision);
2606 return true;
2607
2608 case INTEGER_CST:
2609 if (TREE_CODE (bot) != INTEGER_CST)
2610 return false;
2611
2612 p0 = double_int_sext (tree_to_double_int (bot), precision);
2613 p1 = double_int_sext (tree_to_double_int (top), precision);
2614 if (double_int_zero_p (p1))
2615 return false;
2616 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2617 precision);
2618 return double_int_zero_p (res);
2619
2620 default:
2621 return false;
2622 }
2623 }
2624
2625 /* Sets COMB to CST. */
2626
2627 static void
aff_combination_const(struct affine_tree_combination * comb,tree type,unsigned HOST_WIDE_INT cst)2628 aff_combination_const (struct affine_tree_combination *comb, tree type,
2629 unsigned HOST_WIDE_INT cst)
2630 {
2631 unsigned prec = TYPE_PRECISION (type);
2632
2633 comb->type = type;
2634 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2635
2636 comb->n = 0;
2637 comb->rest = NULL_TREE;
2638 comb->offset = cst & comb->mask;
2639 }
2640
2641 /* Sets COMB to single element ELT. */
2642
2643 static void
aff_combination_elt(struct affine_tree_combination * comb,tree type,tree elt)2644 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2645 {
2646 unsigned prec = TYPE_PRECISION (type);
2647
2648 comb->type = type;
2649 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2650
2651 comb->n = 1;
2652 comb->elts[0] = elt;
2653 comb->coefs[0] = 1;
2654 comb->rest = NULL_TREE;
2655 comb->offset = 0;
2656 }
2657
2658 /* Scales COMB by SCALE. */
2659
2660 static void
aff_combination_scale(struct affine_tree_combination * comb,unsigned HOST_WIDE_INT scale)2661 aff_combination_scale (struct affine_tree_combination *comb,
2662 unsigned HOST_WIDE_INT scale)
2663 {
2664 unsigned i, j;
2665
2666 if (scale == 1)
2667 return;
2668
2669 if (scale == 0)
2670 {
2671 aff_combination_const (comb, comb->type, 0);
2672 return;
2673 }
2674
2675 comb->offset = (scale * comb->offset) & comb->mask;
2676 for (i = 0, j = 0; i < comb->n; i++)
2677 {
2678 comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2679 comb->elts[j] = comb->elts[i];
2680 if (comb->coefs[j] != 0)
2681 j++;
2682 }
2683 comb->n = j;
2684
2685 if (comb->rest)
2686 {
2687 if (comb->n < MAX_AFF_ELTS)
2688 {
2689 comb->coefs[comb->n] = scale;
2690 comb->elts[comb->n] = comb->rest;
2691 comb->rest = NULL_TREE;
2692 comb->n++;
2693 }
2694 else
2695 comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2696 build_int_cst_type (comb->type, scale));
2697 }
2698 }
2699
2700 /* Adds ELT * SCALE to COMB. */
2701
2702 static void
aff_combination_add_elt(struct affine_tree_combination * comb,tree elt,unsigned HOST_WIDE_INT scale)2703 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2704 unsigned HOST_WIDE_INT scale)
2705 {
2706 unsigned i;
2707
2708 if (scale == 0)
2709 return;
2710
2711 for (i = 0; i < comb->n; i++)
2712 if (operand_equal_p (comb->elts[i], elt, 0))
2713 {
2714 comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2715 if (comb->coefs[i])
2716 return;
2717
2718 comb->n--;
2719 comb->coefs[i] = comb->coefs[comb->n];
2720 comb->elts[i] = comb->elts[comb->n];
2721
2722 if (comb->rest)
2723 {
2724 gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2725 comb->coefs[comb->n] = 1;
2726 comb->elts[comb->n] = comb->rest;
2727 comb->rest = NULL_TREE;
2728 comb->n++;
2729 }
2730 return;
2731 }
2732 if (comb->n < MAX_AFF_ELTS)
2733 {
2734 comb->coefs[comb->n] = scale;
2735 comb->elts[comb->n] = elt;
2736 comb->n++;
2737 return;
2738 }
2739
2740 if (scale == 1)
2741 elt = fold_convert (comb->type, elt);
2742 else
2743 elt = fold_build2 (MULT_EXPR, comb->type,
2744 fold_convert (comb->type, elt),
2745 build_int_cst_type (comb->type, scale));
2746
2747 if (comb->rest)
2748 comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2749 else
2750 comb->rest = elt;
2751 }
2752
2753 /* Adds COMB2 to COMB1. */
2754
2755 static void
aff_combination_add(struct affine_tree_combination * comb1,struct affine_tree_combination * comb2)2756 aff_combination_add (struct affine_tree_combination *comb1,
2757 struct affine_tree_combination *comb2)
2758 {
2759 unsigned i;
2760
2761 comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2762 for (i = 0; i < comb2->n; i++)
2763 aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2764 if (comb2->rest)
2765 aff_combination_add_elt (comb1, comb2->rest, 1);
2766 }
2767
2768 /* Convert COMB to TYPE. */
2769
2770 static void
aff_combination_convert(tree type,struct affine_tree_combination * comb)2771 aff_combination_convert (tree type, struct affine_tree_combination *comb)
2772 {
2773 unsigned prec = TYPE_PRECISION (type);
2774 unsigned i;
2775
2776 /* If the precision of both types is the same, it suffices to change the type
2777 of the whole combination -- the elements are allowed to have another type
2778 equivalent wrto STRIP_NOPS. */
2779 if (prec == TYPE_PRECISION (comb->type))
2780 {
2781 comb->type = type;
2782 return;
2783 }
2784
2785 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2786 comb->offset = comb->offset & comb->mask;
2787
2788 /* The type of the elements can be different from comb->type only as
2789 much as what STRIP_NOPS would remove. We can just directly cast
2790 to TYPE. */
2791 for (i = 0; i < comb->n; i++)
2792 comb->elts[i] = fold_convert (type, comb->elts[i]);
2793 if (comb->rest)
2794 comb->rest = fold_convert (type, comb->rest);
2795
2796 comb->type = type;
2797 }
2798
2799 /* Splits EXPR into an affine combination of parts. */
2800
2801 static void
tree_to_aff_combination(tree expr,tree type,struct affine_tree_combination * comb)2802 tree_to_aff_combination (tree expr, tree type,
2803 struct affine_tree_combination *comb)
2804 {
2805 struct affine_tree_combination tmp;
2806 enum tree_code code;
2807 tree cst, core, toffset;
2808 HOST_WIDE_INT bitpos, bitsize;
2809 enum machine_mode mode;
2810 int unsignedp, volatilep;
2811
2812 STRIP_NOPS (expr);
2813
2814 code = TREE_CODE (expr);
2815 switch (code)
2816 {
2817 case INTEGER_CST:
2818 aff_combination_const (comb, type, int_cst_value (expr));
2819 return;
2820
2821 case PLUS_EXPR:
2822 case MINUS_EXPR:
2823 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2824 tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2825 if (code == MINUS_EXPR)
2826 aff_combination_scale (&tmp, -1);
2827 aff_combination_add (comb, &tmp);
2828 return;
2829
2830 case MULT_EXPR:
2831 cst = TREE_OPERAND (expr, 1);
2832 if (TREE_CODE (cst) != INTEGER_CST)
2833 break;
2834 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2835 aff_combination_scale (comb, int_cst_value (cst));
2836 return;
2837
2838 case NEGATE_EXPR:
2839 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2840 aff_combination_scale (comb, -1);
2841 return;
2842
2843 case ADDR_EXPR:
2844 core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2845 &toffset, &mode, &unsignedp, &volatilep,
2846 false);
2847 if (bitpos % BITS_PER_UNIT != 0)
2848 break;
2849 aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2850 core = build_fold_addr_expr (core);
2851 if (TREE_CODE (core) == ADDR_EXPR)
2852 aff_combination_add_elt (comb, core, 1);
2853 else
2854 {
2855 tree_to_aff_combination (core, type, &tmp);
2856 aff_combination_add (comb, &tmp);
2857 }
2858 if (toffset)
2859 {
2860 tree_to_aff_combination (toffset, type, &tmp);
2861 aff_combination_add (comb, &tmp);
2862 }
2863 return;
2864
2865 default:
2866 break;
2867 }
2868
2869 aff_combination_elt (comb, type, expr);
2870 }
2871
2872 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2873
2874 static tree
add_elt_to_tree(tree expr,tree type,tree elt,unsigned HOST_WIDE_INT scale,unsigned HOST_WIDE_INT mask)2875 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2876 unsigned HOST_WIDE_INT mask)
2877 {
2878 enum tree_code code;
2879
2880 scale &= mask;
2881 elt = fold_convert (type, elt);
2882
2883 if (scale == 1)
2884 {
2885 if (!expr)
2886 return elt;
2887
2888 return fold_build2 (PLUS_EXPR, type, expr, elt);
2889 }
2890
2891 if (scale == mask)
2892 {
2893 if (!expr)
2894 return fold_build1 (NEGATE_EXPR, type, elt);
2895
2896 return fold_build2 (MINUS_EXPR, type, expr, elt);
2897 }
2898
2899 if (!expr)
2900 return fold_build2 (MULT_EXPR, type, elt,
2901 build_int_cst_type (type, scale));
2902
2903 if ((scale | (mask >> 1)) == mask)
2904 {
2905 /* Scale is negative. */
2906 code = MINUS_EXPR;
2907 scale = (-scale) & mask;
2908 }
2909 else
2910 code = PLUS_EXPR;
2911
2912 elt = fold_build2 (MULT_EXPR, type, elt,
2913 build_int_cst_type (type, scale));
2914 return fold_build2 (code, type, expr, elt);
2915 }
2916
2917 /* Copies the tree elements of COMB to ensure that they are not shared. */
2918
2919 static void
unshare_aff_combination(struct affine_tree_combination * comb)2920 unshare_aff_combination (struct affine_tree_combination *comb)
2921 {
2922 unsigned i;
2923
2924 for (i = 0; i < comb->n; i++)
2925 comb->elts[i] = unshare_expr (comb->elts[i]);
2926 if (comb->rest)
2927 comb->rest = unshare_expr (comb->rest);
2928 }
2929
2930 /* Makes tree from the affine combination COMB. */
2931
2932 static tree
aff_combination_to_tree(struct affine_tree_combination * comb)2933 aff_combination_to_tree (struct affine_tree_combination *comb)
2934 {
2935 tree type = comb->type;
2936 tree expr = comb->rest;
2937 unsigned i;
2938 unsigned HOST_WIDE_INT off, sgn;
2939
2940 if (comb->n == 0 && comb->offset == 0)
2941 {
2942 if (expr)
2943 {
2944 /* Handle the special case produced by get_computation_aff when
2945 the type does not fit in HOST_WIDE_INT. */
2946 return fold_convert (type, expr);
2947 }
2948 else
2949 return build_int_cst (type, 0);
2950 }
2951
2952 gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2953
2954 for (i = 0; i < comb->n; i++)
2955 expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2956 comb->mask);
2957
2958 if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2959 {
2960 /* Offset is negative. */
2961 off = (-comb->offset) & comb->mask;
2962 sgn = comb->mask;
2963 }
2964 else
2965 {
2966 off = comb->offset;
2967 sgn = 1;
2968 }
2969 return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2970 comb->mask);
2971 }
2972
2973 /* Folds EXPR using the affine expressions framework. */
2974
2975 static tree
fold_affine_expr(tree expr)2976 fold_affine_expr (tree expr)
2977 {
2978 tree type = TREE_TYPE (expr);
2979 struct affine_tree_combination comb;
2980
2981 if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
2982 return expr;
2983
2984 tree_to_aff_combination (expr, type, &comb);
2985 return aff_combination_to_tree (&comb);
2986 }
2987
2988 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2989 same precision that is at least as wide as the precision of TYPE, stores
2990 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2991 type of A and B. */
2992
2993 static tree
determine_common_wider_type(tree * a,tree * b)2994 determine_common_wider_type (tree *a, tree *b)
2995 {
2996 tree wider_type = NULL;
2997 tree suba, subb;
2998 tree atype = TREE_TYPE (*a);
2999
3000 if ((TREE_CODE (*a) == NOP_EXPR
3001 || TREE_CODE (*a) == CONVERT_EXPR))
3002 {
3003 suba = TREE_OPERAND (*a, 0);
3004 wider_type = TREE_TYPE (suba);
3005 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3006 return atype;
3007 }
3008 else
3009 return atype;
3010
3011 if ((TREE_CODE (*b) == NOP_EXPR
3012 || TREE_CODE (*b) == CONVERT_EXPR))
3013 {
3014 subb = TREE_OPERAND (*b, 0);
3015 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3016 return atype;
3017 }
3018 else
3019 return atype;
3020
3021 *a = suba;
3022 *b = subb;
3023 return wider_type;
3024 }
3025
3026 /* Determines the expression by that USE is expressed from induction variable
3027 CAND at statement AT in LOOP. The expression is stored in a decomposed
3028 form into AFF. Returns false if USE cannot be expressed using CAND. */
3029
3030 static bool
get_computation_aff(struct loop * loop,struct iv_use * use,struct iv_cand * cand,tree at,struct affine_tree_combination * aff)3031 get_computation_aff (struct loop *loop,
3032 struct iv_use *use, struct iv_cand *cand, tree at,
3033 struct affine_tree_combination *aff)
3034 {
3035 tree ubase = use->iv->base;
3036 tree ustep = use->iv->step;
3037 tree cbase = cand->iv->base;
3038 tree cstep = cand->iv->step;
3039 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3040 tree common_type;
3041 tree uutype;
3042 tree expr, delta;
3043 tree ratio;
3044 unsigned HOST_WIDE_INT ustepi, cstepi;
3045 HOST_WIDE_INT ratioi;
3046 struct affine_tree_combination cbase_aff, expr_aff;
3047 tree cstep_orig = cstep, ustep_orig = ustep;
3048 double_int rat;
3049
3050 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3051 {
3052 /* We do not have a precision to express the values of use. */
3053 return false;
3054 }
3055
3056 expr = var_at_stmt (loop, cand, at);
3057
3058 if (TREE_TYPE (expr) != ctype)
3059 {
3060 /* This may happen with the original ivs. */
3061 expr = fold_convert (ctype, expr);
3062 }
3063
3064 if (TYPE_UNSIGNED (utype))
3065 uutype = utype;
3066 else
3067 {
3068 uutype = unsigned_type_for (utype);
3069 ubase = fold_convert (uutype, ubase);
3070 ustep = fold_convert (uutype, ustep);
3071 }
3072
3073 if (uutype != ctype)
3074 {
3075 expr = fold_convert (uutype, expr);
3076 cbase = fold_convert (uutype, cbase);
3077 cstep = fold_convert (uutype, cstep);
3078
3079 /* If the conversion is not noop, we must take it into account when
3080 considering the value of the step. */
3081 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3082 cstep_orig = cstep;
3083 }
3084
3085 if (cst_and_fits_in_hwi (cstep_orig)
3086 && cst_and_fits_in_hwi (ustep_orig))
3087 {
3088 ustepi = int_cst_value (ustep_orig);
3089 cstepi = int_cst_value (cstep_orig);
3090
3091 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3092 {
3093 /* TODO maybe consider case when ustep divides cstep and the ratio is
3094 a power of 2 (so that the division is fast to execute)? We would
3095 need to be much more careful with overflows etc. then. */
3096 return false;
3097 }
3098
3099 ratio = build_int_cst_type (uutype, ratioi);
3100 }
3101 else
3102 {
3103 if (!constant_multiple_of (ustep_orig, cstep_orig, &rat))
3104 return false;
3105 ratio = double_int_to_tree (uutype, rat);
3106
3107 /* Ratioi is only used to detect special cases when the multiplicative
3108 factor is 1 or -1, so if rat does not fit to HOST_WIDE_INT, we may
3109 set it to 0. */
3110 if (double_int_fits_in_shwi_p (rat))
3111 ratioi = double_int_to_shwi (rat);
3112 else
3113 ratioi = 0;
3114 }
3115
3116 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3117 type, we achieve better folding by computing their difference in this
3118 wider type, and cast the result to UUTYPE. We do not need to worry about
3119 overflows, as all the arithmetics will in the end be performed in UUTYPE
3120 anyway. */
3121 common_type = determine_common_wider_type (&ubase, &cbase);
3122
3123 /* We may need to shift the value if we are after the increment. */
3124 if (stmt_after_increment (loop, cand, at))
3125 {
3126 if (uutype != common_type)
3127 cstep = fold_convert (common_type, cstep);
3128 cbase = fold_build2 (PLUS_EXPR, common_type, cbase, cstep);
3129 }
3130
3131 /* use = ubase - ratio * cbase + ratio * var.
3132
3133 In general case ubase + ratio * (var - cbase) could be better (one less
3134 multiplication), but often it is possible to eliminate redundant parts
3135 of computations from (ubase - ratio * cbase) term, and if it does not
3136 happen, fold is able to apply the distributive law to obtain this form
3137 anyway. */
3138
3139 if (TYPE_PRECISION (common_type) > HOST_BITS_PER_WIDE_INT)
3140 {
3141 /* Let's compute in trees and just return the result in AFF. This case
3142 should not be very common, and fold itself is not that bad either,
3143 so making the aff. functions more complicated to handle this case
3144 is not that urgent. */
3145 if (ratioi == 1)
3146 {
3147 delta = fold_build2 (MINUS_EXPR, common_type, ubase, cbase);
3148 if (uutype != common_type)
3149 delta = fold_convert (uutype, delta);
3150 expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3151 }
3152 else if (ratioi == -1)
3153 {
3154 delta = fold_build2 (PLUS_EXPR, common_type, ubase, cbase);
3155 if (uutype != common_type)
3156 delta = fold_convert (uutype, delta);
3157 expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3158 }
3159 else
3160 {
3161 delta = fold_build2 (MULT_EXPR, common_type, cbase, ratio);
3162 delta = fold_build2 (MINUS_EXPR, common_type, ubase, delta);
3163 if (uutype != common_type)
3164 delta = fold_convert (uutype, delta);
3165 expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3166 expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3167 }
3168
3169 aff->type = uutype;
3170 aff->n = 0;
3171 aff->offset = 0;
3172 aff->mask = 0;
3173 aff->rest = expr;
3174 return true;
3175 }
3176
3177 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3178 possible to compute ratioi. */
3179 gcc_assert (ratioi);
3180
3181 tree_to_aff_combination (ubase, common_type, aff);
3182 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3183 tree_to_aff_combination (expr, uutype, &expr_aff);
3184 aff_combination_scale (&cbase_aff, -ratioi);
3185 aff_combination_scale (&expr_aff, ratioi);
3186 aff_combination_add (aff, &cbase_aff);
3187 if (common_type != uutype)
3188 aff_combination_convert (uutype, aff);
3189 aff_combination_add (aff, &expr_aff);
3190
3191 return true;
3192 }
3193
3194 /* Determines the expression by that USE is expressed from induction variable
3195 CAND at statement AT in LOOP. The computation is unshared. */
3196
3197 static tree
get_computation_at(struct loop * loop,struct iv_use * use,struct iv_cand * cand,tree at)3198 get_computation_at (struct loop *loop,
3199 struct iv_use *use, struct iv_cand *cand, tree at)
3200 {
3201 struct affine_tree_combination aff;
3202 tree type = TREE_TYPE (use->iv->base);
3203
3204 if (!get_computation_aff (loop, use, cand, at, &aff))
3205 return NULL_TREE;
3206 unshare_aff_combination (&aff);
3207 return fold_convert (type, aff_combination_to_tree (&aff));
3208 }
3209
3210 /* Determines the expression by that USE is expressed from induction variable
3211 CAND in LOOP. The computation is unshared. */
3212
3213 static tree
get_computation(struct loop * loop,struct iv_use * use,struct iv_cand * cand)3214 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3215 {
3216 return get_computation_at (loop, use, cand, use->stmt);
3217 }
3218
3219 /* Returns cost of addition in MODE. */
3220
3221 static unsigned
add_cost(enum machine_mode mode)3222 add_cost (enum machine_mode mode)
3223 {
3224 static unsigned costs[NUM_MACHINE_MODES];
3225 rtx seq;
3226 unsigned cost;
3227
3228 if (costs[mode])
3229 return costs[mode];
3230
3231 start_sequence ();
3232 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3233 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3234 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3235 NULL_RTX);
3236 seq = get_insns ();
3237 end_sequence ();
3238
3239 cost = seq_cost (seq);
3240 if (!cost)
3241 cost = 1;
3242
3243 costs[mode] = cost;
3244
3245 if (dump_file && (dump_flags & TDF_DETAILS))
3246 fprintf (dump_file, "Addition in %s costs %d\n",
3247 GET_MODE_NAME (mode), cost);
3248 return cost;
3249 }
3250
3251 /* Entry in a hashtable of already known costs for multiplication. */
3252 struct mbc_entry
3253 {
3254 HOST_WIDE_INT cst; /* The constant to multiply by. */
3255 enum machine_mode mode; /* In mode. */
3256 unsigned cost; /* The cost. */
3257 };
3258
3259 /* Counts hash value for the ENTRY. */
3260
3261 static hashval_t
mbc_entry_hash(const void * entry)3262 mbc_entry_hash (const void *entry)
3263 {
3264 const struct mbc_entry *e = entry;
3265
3266 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3267 }
3268
3269 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3270
3271 static int
mbc_entry_eq(const void * entry1,const void * entry2)3272 mbc_entry_eq (const void *entry1, const void *entry2)
3273 {
3274 const struct mbc_entry *e1 = entry1;
3275 const struct mbc_entry *e2 = entry2;
3276
3277 return (e1->mode == e2->mode
3278 && e1->cst == e2->cst);
3279 }
3280
3281 /* Returns cost of multiplication by constant CST in MODE. */
3282
3283 unsigned
multiply_by_cost(HOST_WIDE_INT cst,enum machine_mode mode)3284 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3285 {
3286 static htab_t costs;
3287 struct mbc_entry **cached, act;
3288 rtx seq;
3289 unsigned cost;
3290
3291 if (!costs)
3292 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3293
3294 act.mode = mode;
3295 act.cst = cst;
3296 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3297 if (*cached)
3298 return (*cached)->cost;
3299
3300 *cached = XNEW (struct mbc_entry);
3301 (*cached)->mode = mode;
3302 (*cached)->cst = cst;
3303
3304 start_sequence ();
3305 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3306 gen_int_mode (cst, mode), NULL_RTX, 0);
3307 seq = get_insns ();
3308 end_sequence ();
3309
3310 cost = seq_cost (seq);
3311
3312 if (dump_file && (dump_flags & TDF_DETAILS))
3313 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3314 (int) cst, GET_MODE_NAME (mode), cost);
3315
3316 (*cached)->cost = cost;
3317
3318 return cost;
3319 }
3320
3321 /* Returns true if multiplying by RATIO is allowed in address. */
3322
3323 bool
multiplier_allowed_in_address_p(HOST_WIDE_INT ratio)3324 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3325 {
3326 #define MAX_RATIO 128
3327 static sbitmap valid_mult;
3328
3329 if (!valid_mult)
3330 {
3331 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3332 rtx addr;
3333 HOST_WIDE_INT i;
3334
3335 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3336 sbitmap_zero (valid_mult);
3337 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3338 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3339 {
3340 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3341 if (memory_address_p (Pmode, addr))
3342 SET_BIT (valid_mult, i + MAX_RATIO);
3343 }
3344
3345 if (dump_file && (dump_flags & TDF_DETAILS))
3346 {
3347 fprintf (dump_file, " allowed multipliers:");
3348 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3349 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3350 fprintf (dump_file, " %d", (int) i);
3351 fprintf (dump_file, "\n");
3352 fprintf (dump_file, "\n");
3353 }
3354 }
3355
3356 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3357 return false;
3358
3359 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3360 }
3361
3362 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3363 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3364 variable is omitted. The created memory accesses MODE.
3365
3366 TODO -- there must be some better way. This all is quite crude. */
3367
3368 static unsigned
get_address_cost(bool symbol_present,bool var_present,unsigned HOST_WIDE_INT offset,HOST_WIDE_INT ratio)3369 get_address_cost (bool symbol_present, bool var_present,
3370 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3371 {
3372 static bool initialized = false;
3373 static HOST_WIDE_INT rat, off;
3374 static HOST_WIDE_INT min_offset, max_offset;
3375 static unsigned costs[2][2][2][2];
3376 unsigned cost, acost;
3377 bool offset_p, ratio_p;
3378 HOST_WIDE_INT s_offset;
3379 unsigned HOST_WIDE_INT mask;
3380 unsigned bits;
3381
3382 if (!initialized)
3383 {
3384 HOST_WIDE_INT i;
3385 int old_cse_not_expected;
3386 unsigned sym_p, var_p, off_p, rat_p, add_c;
3387 rtx seq, addr, base;
3388 rtx reg0, reg1;
3389
3390 initialized = true;
3391
3392 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3393
3394 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3395 for (i = 1; i <= 1 << 20; i <<= 1)
3396 {
3397 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3398 if (!memory_address_p (Pmode, addr))
3399 break;
3400 }
3401 max_offset = i >> 1;
3402 off = max_offset;
3403
3404 for (i = 1; i <= 1 << 20; i <<= 1)
3405 {
3406 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3407 if (!memory_address_p (Pmode, addr))
3408 break;
3409 }
3410 min_offset = -(i >> 1);
3411
3412 if (dump_file && (dump_flags & TDF_DETAILS))
3413 {
3414 fprintf (dump_file, "get_address_cost:\n");
3415 fprintf (dump_file, " min offset %d\n", (int) min_offset);
3416 fprintf (dump_file, " max offset %d\n", (int) max_offset);
3417 }
3418
3419 rat = 1;
3420 for (i = 2; i <= MAX_RATIO; i++)
3421 if (multiplier_allowed_in_address_p (i))
3422 {
3423 rat = i;
3424 break;
3425 }
3426
3427 /* Compute the cost of various addressing modes. */
3428 acost = 0;
3429 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3430 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3431
3432 for (i = 0; i < 16; i++)
3433 {
3434 sym_p = i & 1;
3435 var_p = (i >> 1) & 1;
3436 off_p = (i >> 2) & 1;
3437 rat_p = (i >> 3) & 1;
3438
3439 addr = reg0;
3440 if (rat_p)
3441 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3442
3443 if (var_p)
3444 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3445
3446 if (sym_p)
3447 {
3448 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3449 if (off_p)
3450 base = gen_rtx_fmt_e (CONST, Pmode,
3451 gen_rtx_fmt_ee (PLUS, Pmode,
3452 base,
3453 gen_int_mode (off, Pmode)));
3454 }
3455 else if (off_p)
3456 base = gen_int_mode (off, Pmode);
3457 else
3458 base = NULL_RTX;
3459
3460 if (base)
3461 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3462
3463 start_sequence ();
3464 /* To avoid splitting addressing modes, pretend that no cse will
3465 follow. */
3466 old_cse_not_expected = cse_not_expected;
3467 cse_not_expected = true;
3468 addr = memory_address (Pmode, addr);
3469 cse_not_expected = old_cse_not_expected;
3470 seq = get_insns ();
3471 end_sequence ();
3472
3473 acost = seq_cost (seq);
3474 acost += address_cost (addr, Pmode);
3475
3476 if (!acost)
3477 acost = 1;
3478 costs[sym_p][var_p][off_p][rat_p] = acost;
3479 }
3480
3481 /* On some targets, it is quite expensive to load symbol to a register,
3482 which makes addresses that contain symbols look much more expensive.
3483 However, the symbol will have to be loaded in any case before the
3484 loop (and quite likely we have it in register already), so it does not
3485 make much sense to penalize them too heavily. So make some final
3486 tweaks for the SYMBOL_PRESENT modes:
3487
3488 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3489 var is cheaper, use this mode with small penalty.
3490 If VAR_PRESENT is true, try whether the mode with
3491 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3492 if this is the case, use it. */
3493 add_c = add_cost (Pmode);
3494 for (i = 0; i < 8; i++)
3495 {
3496 var_p = i & 1;
3497 off_p = (i >> 1) & 1;
3498 rat_p = (i >> 2) & 1;
3499
3500 acost = costs[0][1][off_p][rat_p] + 1;
3501 if (var_p)
3502 acost += add_c;
3503
3504 if (acost < costs[1][var_p][off_p][rat_p])
3505 costs[1][var_p][off_p][rat_p] = acost;
3506 }
3507
3508 if (dump_file && (dump_flags & TDF_DETAILS))
3509 {
3510 fprintf (dump_file, "Address costs:\n");
3511
3512 for (i = 0; i < 16; i++)
3513 {
3514 sym_p = i & 1;
3515 var_p = (i >> 1) & 1;
3516 off_p = (i >> 2) & 1;
3517 rat_p = (i >> 3) & 1;
3518
3519 fprintf (dump_file, " ");
3520 if (sym_p)
3521 fprintf (dump_file, "sym + ");
3522 if (var_p)
3523 fprintf (dump_file, "var + ");
3524 if (off_p)
3525 fprintf (dump_file, "cst + ");
3526 if (rat_p)
3527 fprintf (dump_file, "rat * ");
3528
3529 acost = costs[sym_p][var_p][off_p][rat_p];
3530 fprintf (dump_file, "index costs %d\n", acost);
3531 }
3532 fprintf (dump_file, "\n");
3533 }
3534 }
3535
3536 bits = GET_MODE_BITSIZE (Pmode);
3537 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3538 offset &= mask;
3539 if ((offset >> (bits - 1) & 1))
3540 offset |= ~mask;
3541 s_offset = offset;
3542
3543 cost = 0;
3544 offset_p = (s_offset != 0
3545 && min_offset <= s_offset && s_offset <= max_offset);
3546 ratio_p = (ratio != 1
3547 && multiplier_allowed_in_address_p (ratio));
3548
3549 if (ratio != 1 && !ratio_p)
3550 cost += multiply_by_cost (ratio, Pmode);
3551
3552 if (s_offset && !offset_p && !symbol_present)
3553 {
3554 cost += add_cost (Pmode);
3555 var_present = true;
3556 }
3557
3558 acost = costs[symbol_present][var_present][offset_p][ratio_p];
3559 return cost + acost;
3560 }
3561
3562 /* Estimates cost of forcing expression EXPR into a variable. */
3563
3564 unsigned
force_expr_to_var_cost(tree expr)3565 force_expr_to_var_cost (tree expr)
3566 {
3567 static bool costs_initialized = false;
3568 static unsigned integer_cost;
3569 static unsigned symbol_cost;
3570 static unsigned address_cost;
3571 tree op0, op1;
3572 unsigned cost0, cost1, cost;
3573 enum machine_mode mode;
3574
3575 if (!costs_initialized)
3576 {
3577 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3578 rtx x = gen_rtx_MEM (DECL_MODE (var),
3579 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3580 tree addr;
3581 tree type = build_pointer_type (integer_type_node);
3582
3583 integer_cost = computation_cost (build_int_cst (integer_type_node,
3584 2000));
3585
3586 SET_DECL_RTL (var, x);
3587 TREE_STATIC (var) = 1;
3588 addr = build1 (ADDR_EXPR, type, var);
3589 symbol_cost = computation_cost (addr) + 1;
3590
3591 address_cost
3592 = computation_cost (build2 (PLUS_EXPR, type,
3593 addr,
3594 build_int_cst (type, 2000))) + 1;
3595 if (dump_file && (dump_flags & TDF_DETAILS))
3596 {
3597 fprintf (dump_file, "force_expr_to_var_cost:\n");
3598 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3599 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3600 fprintf (dump_file, " address %d\n", (int) address_cost);
3601 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3602 fprintf (dump_file, "\n");
3603 }
3604
3605 costs_initialized = true;
3606 }
3607
3608 STRIP_NOPS (expr);
3609
3610 if (SSA_VAR_P (expr))
3611 return 0;
3612
3613 if (TREE_INVARIANT (expr))
3614 {
3615 if (TREE_CODE (expr) == INTEGER_CST)
3616 return integer_cost;
3617
3618 if (TREE_CODE (expr) == ADDR_EXPR)
3619 {
3620 tree obj = TREE_OPERAND (expr, 0);
3621
3622 if (TREE_CODE (obj) == VAR_DECL
3623 || TREE_CODE (obj) == PARM_DECL
3624 || TREE_CODE (obj) == RESULT_DECL)
3625 return symbol_cost;
3626 }
3627
3628 return address_cost;
3629 }
3630
3631 switch (TREE_CODE (expr))
3632 {
3633 case PLUS_EXPR:
3634 case MINUS_EXPR:
3635 case MULT_EXPR:
3636 op0 = TREE_OPERAND (expr, 0);
3637 op1 = TREE_OPERAND (expr, 1);
3638 STRIP_NOPS (op0);
3639 STRIP_NOPS (op1);
3640
3641 if (is_gimple_val (op0))
3642 cost0 = 0;
3643 else
3644 cost0 = force_expr_to_var_cost (op0);
3645
3646 if (is_gimple_val (op1))
3647 cost1 = 0;
3648 else
3649 cost1 = force_expr_to_var_cost (op1);
3650
3651 break;
3652
3653 default:
3654 /* Just an arbitrary value, FIXME. */
3655 return target_spill_cost;
3656 }
3657
3658 mode = TYPE_MODE (TREE_TYPE (expr));
3659 switch (TREE_CODE (expr))
3660 {
3661 case PLUS_EXPR:
3662 case MINUS_EXPR:
3663 cost = add_cost (mode);
3664 break;
3665
3666 case MULT_EXPR:
3667 if (cst_and_fits_in_hwi (op0))
3668 cost = multiply_by_cost (int_cst_value (op0), mode);
3669 else if (cst_and_fits_in_hwi (op1))
3670 cost = multiply_by_cost (int_cst_value (op1), mode);
3671 else
3672 return target_spill_cost;
3673 break;
3674
3675 default:
3676 gcc_unreachable ();
3677 }
3678
3679 cost += cost0;
3680 cost += cost1;
3681
3682 /* Bound the cost by target_spill_cost. The parts of complicated
3683 computations often are either loop invariant or at least can
3684 be shared between several iv uses, so letting this grow without
3685 limits would not give reasonable results. */
3686 return cost < target_spill_cost ? cost : target_spill_cost;
3687 }
3688
3689 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3690 invariants the computation depends on. */
3691
3692 static unsigned
force_var_cost(struct ivopts_data * data,tree expr,bitmap * depends_on)3693 force_var_cost (struct ivopts_data *data,
3694 tree expr, bitmap *depends_on)
3695 {
3696 if (depends_on)
3697 {
3698 fd_ivopts_data = data;
3699 walk_tree (&expr, find_depends, depends_on, NULL);
3700 }
3701
3702 return force_expr_to_var_cost (expr);
3703 }
3704
3705 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3706 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3707 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3708 invariants the computation depends on. */
3709
3710 static unsigned
split_address_cost(struct ivopts_data * data,tree addr,bool * symbol_present,bool * var_present,unsigned HOST_WIDE_INT * offset,bitmap * depends_on)3711 split_address_cost (struct ivopts_data *data,
3712 tree addr, bool *symbol_present, bool *var_present,
3713 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3714 {
3715 tree core;
3716 HOST_WIDE_INT bitsize;
3717 HOST_WIDE_INT bitpos;
3718 tree toffset;
3719 enum machine_mode mode;
3720 int unsignedp, volatilep;
3721
3722 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3723 &unsignedp, &volatilep, false);
3724
3725 if (toffset != 0
3726 || bitpos % BITS_PER_UNIT != 0
3727 || TREE_CODE (core) != VAR_DECL)
3728 {
3729 *symbol_present = false;
3730 *var_present = true;
3731 fd_ivopts_data = data;
3732 walk_tree (&addr, find_depends, depends_on, NULL);
3733 return target_spill_cost;
3734 }
3735
3736 *offset += bitpos / BITS_PER_UNIT;
3737 if (TREE_STATIC (core)
3738 || DECL_EXTERNAL (core))
3739 {
3740 *symbol_present = true;
3741 *var_present = false;
3742 return 0;
3743 }
3744
3745 *symbol_present = false;
3746 *var_present = true;
3747 return 0;
3748 }
3749
3750 /* Estimates cost of expressing difference of addresses E1 - E2 as
3751 var + symbol + offset. The value of offset is added to OFFSET,
3752 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3753 part is missing. DEPENDS_ON is a set of the invariants the computation
3754 depends on. */
3755
3756 static unsigned
ptr_difference_cost(struct ivopts_data * data,tree e1,tree e2,bool * symbol_present,bool * var_present,unsigned HOST_WIDE_INT * offset,bitmap * depends_on)3757 ptr_difference_cost (struct ivopts_data *data,
3758 tree e1, tree e2, bool *symbol_present, bool *var_present,
3759 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3760 {
3761 HOST_WIDE_INT diff = 0;
3762 unsigned cost;
3763
3764 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3765
3766 if (ptr_difference_const (e1, e2, &diff))
3767 {
3768 *offset += diff;
3769 *symbol_present = false;
3770 *var_present = false;
3771 return 0;
3772 }
3773
3774 if (e2 == integer_zero_node)
3775 return split_address_cost (data, TREE_OPERAND (e1, 0),
3776 symbol_present, var_present, offset, depends_on);
3777
3778 *symbol_present = false;
3779 *var_present = true;
3780
3781 cost = force_var_cost (data, e1, depends_on);
3782 cost += force_var_cost (data, e2, depends_on);
3783 cost += add_cost (Pmode);
3784
3785 return cost;
3786 }
3787
3788 /* Estimates cost of expressing difference E1 - E2 as
3789 var + symbol + offset. The value of offset is added to OFFSET,
3790 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3791 part is missing. DEPENDS_ON is a set of the invariants the computation
3792 depends on. */
3793
3794 static unsigned
difference_cost(struct ivopts_data * data,tree e1,tree e2,bool * symbol_present,bool * var_present,unsigned HOST_WIDE_INT * offset,bitmap * depends_on)3795 difference_cost (struct ivopts_data *data,
3796 tree e1, tree e2, bool *symbol_present, bool *var_present,
3797 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3798 {
3799 unsigned cost;
3800 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3801 unsigned HOST_WIDE_INT off1, off2;
3802
3803 e1 = strip_offset (e1, &off1);
3804 e2 = strip_offset (e2, &off2);
3805 *offset += off1 - off2;
3806
3807 STRIP_NOPS (e1);
3808 STRIP_NOPS (e2);
3809
3810 if (TREE_CODE (e1) == ADDR_EXPR)
3811 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3812 depends_on);
3813 *symbol_present = false;
3814
3815 if (operand_equal_p (e1, e2, 0))
3816 {
3817 *var_present = false;
3818 return 0;
3819 }
3820 *var_present = true;
3821 if (zero_p (e2))
3822 return force_var_cost (data, e1, depends_on);
3823
3824 if (zero_p (e1))
3825 {
3826 cost = force_var_cost (data, e2, depends_on);
3827 cost += multiply_by_cost (-1, mode);
3828
3829 return cost;
3830 }
3831
3832 cost = force_var_cost (data, e1, depends_on);
3833 cost += force_var_cost (data, e2, depends_on);
3834 cost += add_cost (mode);
3835
3836 return cost;
3837 }
3838
3839 /* Determines the cost of the computation by that USE is expressed
3840 from induction variable CAND. If ADDRESS_P is true, we just need
3841 to create an address from it, otherwise we want to get it into
3842 register. A set of invariants we depend on is stored in
3843 DEPENDS_ON. AT is the statement at that the value is computed. */
3844
3845 static unsigned
get_computation_cost_at(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,bool address_p,bitmap * depends_on,tree at)3846 get_computation_cost_at (struct ivopts_data *data,
3847 struct iv_use *use, struct iv_cand *cand,
3848 bool address_p, bitmap *depends_on, tree at)
3849 {
3850 tree ubase = use->iv->base, ustep = use->iv->step;
3851 tree cbase, cstep;
3852 tree utype = TREE_TYPE (ubase), ctype;
3853 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3854 HOST_WIDE_INT ratio, aratio;
3855 bool var_present, symbol_present;
3856 unsigned cost = 0, n_sums;
3857
3858 *depends_on = NULL;
3859
3860 /* Only consider real candidates. */
3861 if (!cand->iv)
3862 return INFTY;
3863
3864 cbase = cand->iv->base;
3865 cstep = cand->iv->step;
3866 ctype = TREE_TYPE (cbase);
3867
3868 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3869 {
3870 /* We do not have a precision to express the values of use. */
3871 return INFTY;
3872 }
3873
3874 if (address_p)
3875 {
3876 /* Do not try to express address of an object with computation based
3877 on address of a different object. This may cause problems in rtl
3878 level alias analysis (that does not expect this to be happening,
3879 as this is illegal in C), and would be unlikely to be useful
3880 anyway. */
3881 if (use->iv->base_object
3882 && cand->iv->base_object
3883 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3884 return INFTY;
3885 }
3886
3887 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3888 {
3889 /* TODO -- add direct handling of this case. */
3890 goto fallback;
3891 }
3892
3893 /* CSTEPI is removed from the offset in case statement is after the
3894 increment. If the step is not constant, we use zero instead.
3895 This is a bit imprecise (there is the extra addition), but
3896 redundancy elimination is likely to transform the code so that
3897 it uses value of the variable before increment anyway,
3898 so it is not that much unrealistic. */
3899 if (cst_and_fits_in_hwi (cstep))
3900 cstepi = int_cst_value (cstep);
3901 else
3902 cstepi = 0;
3903
3904 if (cst_and_fits_in_hwi (ustep)
3905 && cst_and_fits_in_hwi (cstep))
3906 {
3907 ustepi = int_cst_value (ustep);
3908
3909 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3910 return INFTY;
3911 }
3912 else
3913 {
3914 double_int rat;
3915
3916 if (!constant_multiple_of (ustep, cstep, &rat))
3917 return INFTY;
3918
3919 if (double_int_fits_in_shwi_p (rat))
3920 ratio = double_int_to_shwi (rat);
3921 else
3922 return INFTY;
3923 }
3924
3925 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3926 or ratio == 1, it is better to handle this like
3927
3928 ubase - ratio * cbase + ratio * var
3929
3930 (also holds in the case ratio == -1, TODO. */
3931
3932 if (cst_and_fits_in_hwi (cbase))
3933 {
3934 offset = - ratio * int_cst_value (cbase);
3935 cost += difference_cost (data,
3936 ubase, integer_zero_node,
3937 &symbol_present, &var_present, &offset,
3938 depends_on);
3939 }
3940 else if (ratio == 1)
3941 {
3942 cost += difference_cost (data,
3943 ubase, cbase,
3944 &symbol_present, &var_present, &offset,
3945 depends_on);
3946 }
3947 else
3948 {
3949 cost += force_var_cost (data, cbase, depends_on);
3950 cost += add_cost (TYPE_MODE (ctype));
3951 cost += difference_cost (data,
3952 ubase, integer_zero_node,
3953 &symbol_present, &var_present, &offset,
3954 depends_on);
3955 }
3956
3957 /* If we are after the increment, the value of the candidate is higher by
3958 one iteration. */
3959 if (stmt_after_increment (data->current_loop, cand, at))
3960 offset -= ratio * cstepi;
3961
3962 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3963 (symbol/var/const parts may be omitted). If we are looking for an address,
3964 find the cost of addressing this. */
3965 if (address_p)
3966 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3967
3968 /* Otherwise estimate the costs for computing the expression. */
3969 aratio = ratio > 0 ? ratio : -ratio;
3970 if (!symbol_present && !var_present && !offset)
3971 {
3972 if (ratio != 1)
3973 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3974
3975 return cost;
3976 }
3977
3978 if (aratio != 1)
3979 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3980
3981 n_sums = 1;
3982 if (var_present
3983 /* Symbol + offset should be compile-time computable. */
3984 && (symbol_present || offset))
3985 n_sums++;
3986
3987 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3988
3989 fallback:
3990 {
3991 /* Just get the expression, expand it and measure the cost. */
3992 tree comp = get_computation_at (data->current_loop, use, cand, at);
3993
3994 if (!comp)
3995 return INFTY;
3996
3997 if (address_p)
3998 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3999
4000 return computation_cost (comp);
4001 }
4002 }
4003
4004 /* Determines the cost of the computation by that USE is expressed
4005 from induction variable CAND. If ADDRESS_P is true, we just need
4006 to create an address from it, otherwise we want to get it into
4007 register. A set of invariants we depend on is stored in
4008 DEPENDS_ON. */
4009
4010 static unsigned
get_computation_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,bool address_p,bitmap * depends_on)4011 get_computation_cost (struct ivopts_data *data,
4012 struct iv_use *use, struct iv_cand *cand,
4013 bool address_p, bitmap *depends_on)
4014 {
4015 return get_computation_cost_at (data,
4016 use, cand, address_p, depends_on, use->stmt);
4017 }
4018
4019 /* Determines cost of basing replacement of USE on CAND in a generic
4020 expression. */
4021
4022 static bool
determine_use_iv_cost_generic(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)4023 determine_use_iv_cost_generic (struct ivopts_data *data,
4024 struct iv_use *use, struct iv_cand *cand)
4025 {
4026 bitmap depends_on;
4027 unsigned cost;
4028
4029 /* The simple case first -- if we need to express value of the preserved
4030 original biv, the cost is 0. This also prevents us from counting the
4031 cost of increment twice -- once at this use and once in the cost of
4032 the candidate. */
4033 if (cand->pos == IP_ORIGINAL
4034 && cand->incremented_at == use->stmt)
4035 {
4036 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4037 return true;
4038 }
4039
4040 cost = get_computation_cost (data, use, cand, false, &depends_on);
4041 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4042
4043 return cost != INFTY;
4044 }
4045
4046 /* Determines cost of basing replacement of USE on CAND in an address. */
4047
4048 static bool
determine_use_iv_cost_address(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)4049 determine_use_iv_cost_address (struct ivopts_data *data,
4050 struct iv_use *use, struct iv_cand *cand)
4051 {
4052 bitmap depends_on;
4053 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
4054
4055 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4056
4057 return cost != INFTY;
4058 }
4059
4060 /* Computes value of induction variable IV in iteration NITER. */
4061
4062 static tree
iv_value(struct iv * iv,tree niter)4063 iv_value (struct iv *iv, tree niter)
4064 {
4065 tree val;
4066 tree type = TREE_TYPE (iv->base);
4067
4068 niter = fold_convert (type, niter);
4069 val = fold_build2 (MULT_EXPR, type, iv->step, niter);
4070
4071 return fold_build2 (PLUS_EXPR, type, iv->base, val);
4072 }
4073
4074 /* Computes value of candidate CAND at position AT in iteration NITER. */
4075
4076 static tree
cand_value_at(struct loop * loop,struct iv_cand * cand,tree at,tree niter)4077 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
4078 {
4079 tree val = iv_value (cand->iv, niter);
4080 tree type = TREE_TYPE (cand->iv->base);
4081
4082 if (stmt_after_increment (loop, cand, at))
4083 val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
4084
4085 return val;
4086 }
4087
4088 /* Returns period of induction variable iv. */
4089
4090 static tree
iv_period(struct iv * iv)4091 iv_period (struct iv *iv)
4092 {
4093 tree step = iv->step, period, type;
4094 tree pow2div;
4095
4096 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4097
4098 /* Period of the iv is gcd (step, type range). Since type range is power
4099 of two, it suffices to determine the maximum power of two that divides
4100 step. */
4101 pow2div = num_ending_zeros (step);
4102 type = unsigned_type_for (TREE_TYPE (step));
4103
4104 period = build_low_bits_mask (type,
4105 (TYPE_PRECISION (type)
4106 - tree_low_cst (pow2div, 1)));
4107
4108 return period;
4109 }
4110
4111 /* Returns the comparison operator used when eliminating the iv USE. */
4112
4113 static enum tree_code
iv_elimination_compare(struct ivopts_data * data,struct iv_use * use)4114 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4115 {
4116 struct loop *loop = data->current_loop;
4117 basic_block ex_bb;
4118 edge exit;
4119
4120 ex_bb = bb_for_stmt (use->stmt);
4121 exit = EDGE_SUCC (ex_bb, 0);
4122 if (flow_bb_inside_loop_p (loop, exit->dest))
4123 exit = EDGE_SUCC (ex_bb, 1);
4124
4125 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4126 }
4127
4128 /* Check whether it is possible to express the condition in USE by comparison
4129 of candidate CAND. If so, store the value compared with to BOUND. */
4130
4131 static bool
may_eliminate_iv(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,tree * bound)4132 may_eliminate_iv (struct ivopts_data *data,
4133 struct iv_use *use, struct iv_cand *cand, tree *bound)
4134 {
4135 basic_block ex_bb;
4136 edge exit;
4137 tree nit, nit_type;
4138 tree wider_type, period, per_type;
4139 struct loop *loop = data->current_loop;
4140
4141 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4142 return false;
4143
4144 /* For now works only for exits that dominate the loop latch. TODO -- extend
4145 for other conditions inside loop body. */
4146 ex_bb = bb_for_stmt (use->stmt);
4147 if (use->stmt != last_stmt (ex_bb)
4148 || TREE_CODE (use->stmt) != COND_EXPR)
4149 return false;
4150 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4151 return false;
4152
4153 exit = EDGE_SUCC (ex_bb, 0);
4154 if (flow_bb_inside_loop_p (loop, exit->dest))
4155 exit = EDGE_SUCC (ex_bb, 1);
4156 if (flow_bb_inside_loop_p (loop, exit->dest))
4157 return false;
4158
4159 nit = niter_for_exit (data, exit);
4160 if (!nit)
4161 return false;
4162
4163 nit_type = TREE_TYPE (nit);
4164
4165 /* Determine whether we may use the variable to test whether niter iterations
4166 elapsed. This is the case iff the period of the induction variable is
4167 greater than the number of iterations. */
4168 period = iv_period (cand->iv);
4169 if (!period)
4170 return false;
4171 per_type = TREE_TYPE (period);
4172
4173 wider_type = TREE_TYPE (period);
4174 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4175 wider_type = per_type;
4176 else
4177 wider_type = nit_type;
4178
4179 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4180 fold_convert (wider_type, period),
4181 fold_convert (wider_type, nit))))
4182 return false;
4183
4184 *bound = fold_affine_expr (cand_value_at (loop, cand, use->stmt, nit));
4185 return true;
4186 }
4187
4188 /* Determines cost of basing replacement of USE on CAND in a condition. */
4189
4190 static bool
determine_use_iv_cost_condition(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)4191 determine_use_iv_cost_condition (struct ivopts_data *data,
4192 struct iv_use *use, struct iv_cand *cand)
4193 {
4194 tree bound = NULL_TREE, op, cond;
4195 bitmap depends_on = NULL;
4196 unsigned cost;
4197
4198 /* Only consider real candidates. */
4199 if (!cand->iv)
4200 {
4201 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4202 return false;
4203 }
4204
4205 if (may_eliminate_iv (data, use, cand, &bound))
4206 {
4207 cost = force_var_cost (data, bound, &depends_on);
4208
4209 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4210 return cost != INFTY;
4211 }
4212
4213 /* The induction variable elimination failed; just express the original
4214 giv. If it is compared with an invariant, note that we cannot get
4215 rid of it. */
4216 cost = get_computation_cost (data, use, cand, false, &depends_on);
4217
4218 cond = *use->op_p;
4219 if (TREE_CODE (cond) != SSA_NAME)
4220 {
4221 op = TREE_OPERAND (cond, 0);
4222 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4223 op = TREE_OPERAND (cond, 1);
4224 if (TREE_CODE (op) == SSA_NAME)
4225 {
4226 op = get_iv (data, op)->base;
4227 fd_ivopts_data = data;
4228 walk_tree (&op, find_depends, &depends_on, NULL);
4229 }
4230 }
4231
4232 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4233 return cost != INFTY;
4234 }
4235
4236 /* Determines cost of basing replacement of USE on CAND. Returns false
4237 if USE cannot be based on CAND. */
4238
4239 static bool
determine_use_iv_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)4240 determine_use_iv_cost (struct ivopts_data *data,
4241 struct iv_use *use, struct iv_cand *cand)
4242 {
4243 switch (use->type)
4244 {
4245 case USE_NONLINEAR_EXPR:
4246 return determine_use_iv_cost_generic (data, use, cand);
4247
4248 case USE_ADDRESS:
4249 return determine_use_iv_cost_address (data, use, cand);
4250
4251 case USE_COMPARE:
4252 return determine_use_iv_cost_condition (data, use, cand);
4253
4254 default:
4255 gcc_unreachable ();
4256 }
4257 }
4258
4259 /* Determines costs of basing the use of the iv on an iv candidate. */
4260
4261 static void
determine_use_iv_costs(struct ivopts_data * data)4262 determine_use_iv_costs (struct ivopts_data *data)
4263 {
4264 unsigned i, j;
4265 struct iv_use *use;
4266 struct iv_cand *cand;
4267 bitmap to_clear = BITMAP_ALLOC (NULL);
4268
4269 alloc_use_cost_map (data);
4270
4271 for (i = 0; i < n_iv_uses (data); i++)
4272 {
4273 use = iv_use (data, i);
4274
4275 if (data->consider_all_candidates)
4276 {
4277 for (j = 0; j < n_iv_cands (data); j++)
4278 {
4279 cand = iv_cand (data, j);
4280 determine_use_iv_cost (data, use, cand);
4281 }
4282 }
4283 else
4284 {
4285 bitmap_iterator bi;
4286
4287 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4288 {
4289 cand = iv_cand (data, j);
4290 if (!determine_use_iv_cost (data, use, cand))
4291 bitmap_set_bit (to_clear, j);
4292 }
4293
4294 /* Remove the candidates for that the cost is infinite from
4295 the list of related candidates. */
4296 bitmap_and_compl_into (use->related_cands, to_clear);
4297 bitmap_clear (to_clear);
4298 }
4299 }
4300
4301 BITMAP_FREE (to_clear);
4302
4303 if (dump_file && (dump_flags & TDF_DETAILS))
4304 {
4305 fprintf (dump_file, "Use-candidate costs:\n");
4306
4307 for (i = 0; i < n_iv_uses (data); i++)
4308 {
4309 use = iv_use (data, i);
4310
4311 fprintf (dump_file, "Use %d:\n", i);
4312 fprintf (dump_file, " cand\tcost\tdepends on\n");
4313 for (j = 0; j < use->n_map_members; j++)
4314 {
4315 if (!use->cost_map[j].cand
4316 || use->cost_map[j].cost == INFTY)
4317 continue;
4318
4319 fprintf (dump_file, " %d\t%d\t",
4320 use->cost_map[j].cand->id,
4321 use->cost_map[j].cost);
4322 if (use->cost_map[j].depends_on)
4323 bitmap_print (dump_file,
4324 use->cost_map[j].depends_on, "","");
4325 fprintf (dump_file, "\n");
4326 }
4327
4328 fprintf (dump_file, "\n");
4329 }
4330 fprintf (dump_file, "\n");
4331 }
4332 }
4333
4334 /* Determines cost of the candidate CAND. */
4335
4336 static void
determine_iv_cost(struct ivopts_data * data,struct iv_cand * cand)4337 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4338 {
4339 unsigned cost_base, cost_step;
4340 tree base;
4341
4342 if (!cand->iv)
4343 {
4344 cand->cost = 0;
4345 return;
4346 }
4347
4348 /* There are two costs associated with the candidate -- its increment
4349 and its initialization. The second is almost negligible for any loop
4350 that rolls enough, so we take it just very little into account. */
4351
4352 base = cand->iv->base;
4353 cost_base = force_var_cost (data, base, NULL);
4354 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4355
4356 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4357
4358 /* Prefer the original iv unless we may gain something by replacing it;
4359 this is not really relevant for artificial ivs created by other
4360 passes. */
4361 if (cand->pos == IP_ORIGINAL
4362 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4363 cand->cost--;
4364
4365 /* Prefer not to insert statements into latch unless there are some
4366 already (so that we do not create unnecessary jumps). */
4367 if (cand->pos == IP_END
4368 && empty_block_p (ip_end_pos (data->current_loop)))
4369 cand->cost++;
4370 }
4371
4372 /* Determines costs of computation of the candidates. */
4373
4374 static void
determine_iv_costs(struct ivopts_data * data)4375 determine_iv_costs (struct ivopts_data *data)
4376 {
4377 unsigned i;
4378
4379 if (dump_file && (dump_flags & TDF_DETAILS))
4380 {
4381 fprintf (dump_file, "Candidate costs:\n");
4382 fprintf (dump_file, " cand\tcost\n");
4383 }
4384
4385 for (i = 0; i < n_iv_cands (data); i++)
4386 {
4387 struct iv_cand *cand = iv_cand (data, i);
4388
4389 determine_iv_cost (data, cand);
4390
4391 if (dump_file && (dump_flags & TDF_DETAILS))
4392 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4393 }
4394
4395 if (dump_file && (dump_flags & TDF_DETAILS))
4396 fprintf (dump_file, "\n");
4397 }
4398
4399 /* Calculates cost for having SIZE induction variables. */
4400
4401 static unsigned
ivopts_global_cost_for_size(struct ivopts_data * data,unsigned size)4402 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4403 {
4404 return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
4405 }
4406
4407 /* For each size of the induction variable set determine the penalty. */
4408
4409 static void
determine_set_costs(struct ivopts_data * data)4410 determine_set_costs (struct ivopts_data *data)
4411 {
4412 unsigned j, n;
4413 tree phi, op;
4414 struct loop *loop = data->current_loop;
4415 bitmap_iterator bi;
4416
4417 /* We use the following model (definitely improvable, especially the
4418 cost function -- TODO):
4419
4420 We estimate the number of registers available (using MD data), name it A.
4421
4422 We estimate the number of registers used by the loop, name it U. This
4423 number is obtained as the number of loop phi nodes (not counting virtual
4424 registers and bivs) + the number of variables from outside of the loop.
4425
4426 We set a reserve R (free regs that are used for temporary computations,
4427 etc.). For now the reserve is a constant 3.
4428
4429 Let I be the number of induction variables.
4430
4431 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4432 make a lot of ivs without a reason).
4433 -- if A - R < U + I <= A, the cost is I * PRES_COST
4434 -- if U + I > A, the cost is I * PRES_COST and
4435 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4436
4437 if (dump_file && (dump_flags & TDF_DETAILS))
4438 {
4439 fprintf (dump_file, "Global costs:\n");
4440 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4441 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
4442 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
4443 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4444 }
4445
4446 n = 0;
4447 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4448 {
4449 op = PHI_RESULT (phi);
4450
4451 if (!is_gimple_reg (op))
4452 continue;
4453
4454 if (get_iv (data, op))
4455 continue;
4456
4457 n++;
4458 }
4459
4460 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4461 {
4462 struct version_info *info = ver_info (data, j);
4463
4464 if (info->inv_id && info->has_nonlin_use)
4465 n++;
4466 }
4467
4468 data->regs_used = n;
4469 if (dump_file && (dump_flags & TDF_DETAILS))
4470 fprintf (dump_file, " regs_used %d\n", n);
4471
4472 if (dump_file && (dump_flags & TDF_DETAILS))
4473 {
4474 fprintf (dump_file, " cost for size:\n");
4475 fprintf (dump_file, " ivs\tcost\n");
4476 for (j = 0; j <= 2 * target_avail_regs; j++)
4477 fprintf (dump_file, " %d\t%d\n", j,
4478 ivopts_global_cost_for_size (data, j));
4479 fprintf (dump_file, "\n");
4480 }
4481 }
4482
4483 /* Returns true if A is a cheaper cost pair than B. */
4484
4485 static bool
cheaper_cost_pair(struct cost_pair * a,struct cost_pair * b)4486 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4487 {
4488 if (!a)
4489 return false;
4490
4491 if (!b)
4492 return true;
4493
4494 if (a->cost < b->cost)
4495 return true;
4496
4497 if (a->cost > b->cost)
4498 return false;
4499
4500 /* In case the costs are the same, prefer the cheaper candidate. */
4501 if (a->cand->cost < b->cand->cost)
4502 return true;
4503
4504 return false;
4505 }
4506
4507 /* Computes the cost field of IVS structure. */
4508
4509 static void
iv_ca_recount_cost(struct ivopts_data * data,struct iv_ca * ivs)4510 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4511 {
4512 unsigned cost = 0;
4513
4514 cost += ivs->cand_use_cost;
4515 cost += ivs->cand_cost;
4516 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4517
4518 ivs->cost = cost;
4519 }
4520
4521 /* Remove invariants in set INVS to set IVS. */
4522
4523 static void
iv_ca_set_remove_invariants(struct iv_ca * ivs,bitmap invs)4524 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4525 {
4526 bitmap_iterator bi;
4527 unsigned iid;
4528
4529 if (!invs)
4530 return;
4531
4532 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4533 {
4534 ivs->n_invariant_uses[iid]--;
4535 if (ivs->n_invariant_uses[iid] == 0)
4536 ivs->n_regs--;
4537 }
4538 }
4539
4540 /* Set USE not to be expressed by any candidate in IVS. */
4541
4542 static void
iv_ca_set_no_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_use * use)4543 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4544 struct iv_use *use)
4545 {
4546 unsigned uid = use->id, cid;
4547 struct cost_pair *cp;
4548
4549 cp = ivs->cand_for_use[uid];
4550 if (!cp)
4551 return;
4552 cid = cp->cand->id;
4553
4554 ivs->bad_uses++;
4555 ivs->cand_for_use[uid] = NULL;
4556 ivs->n_cand_uses[cid]--;
4557
4558 if (ivs->n_cand_uses[cid] == 0)
4559 {
4560 bitmap_clear_bit (ivs->cands, cid);
4561 /* Do not count the pseudocandidates. */
4562 if (cp->cand->iv)
4563 ivs->n_regs--;
4564 ivs->n_cands--;
4565 ivs->cand_cost -= cp->cand->cost;
4566
4567 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4568 }
4569
4570 ivs->cand_use_cost -= cp->cost;
4571
4572 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4573 iv_ca_recount_cost (data, ivs);
4574 }
4575
4576 /* Add invariants in set INVS to set IVS. */
4577
4578 static void
iv_ca_set_add_invariants(struct iv_ca * ivs,bitmap invs)4579 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4580 {
4581 bitmap_iterator bi;
4582 unsigned iid;
4583
4584 if (!invs)
4585 return;
4586
4587 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4588 {
4589 ivs->n_invariant_uses[iid]++;
4590 if (ivs->n_invariant_uses[iid] == 1)
4591 ivs->n_regs++;
4592 }
4593 }
4594
4595 /* Set cost pair for USE in set IVS to CP. */
4596
4597 static void
iv_ca_set_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_use * use,struct cost_pair * cp)4598 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4599 struct iv_use *use, struct cost_pair *cp)
4600 {
4601 unsigned uid = use->id, cid;
4602
4603 if (ivs->cand_for_use[uid] == cp)
4604 return;
4605
4606 if (ivs->cand_for_use[uid])
4607 iv_ca_set_no_cp (data, ivs, use);
4608
4609 if (cp)
4610 {
4611 cid = cp->cand->id;
4612
4613 ivs->bad_uses--;
4614 ivs->cand_for_use[uid] = cp;
4615 ivs->n_cand_uses[cid]++;
4616 if (ivs->n_cand_uses[cid] == 1)
4617 {
4618 bitmap_set_bit (ivs->cands, cid);
4619 /* Do not count the pseudocandidates. */
4620 if (cp->cand->iv)
4621 ivs->n_regs++;
4622 ivs->n_cands++;
4623 ivs->cand_cost += cp->cand->cost;
4624
4625 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4626 }
4627
4628 ivs->cand_use_cost += cp->cost;
4629 iv_ca_set_add_invariants (ivs, cp->depends_on);
4630 iv_ca_recount_cost (data, ivs);
4631 }
4632 }
4633
4634 /* Extend set IVS by expressing USE by some of the candidates in it
4635 if possible. */
4636
4637 static void
iv_ca_add_use(struct ivopts_data * data,struct iv_ca * ivs,struct iv_use * use)4638 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4639 struct iv_use *use)
4640 {
4641 struct cost_pair *best_cp = NULL, *cp;
4642 bitmap_iterator bi;
4643 unsigned i;
4644
4645 gcc_assert (ivs->upto >= use->id);
4646
4647 if (ivs->upto == use->id)
4648 {
4649 ivs->upto++;
4650 ivs->bad_uses++;
4651 }
4652
4653 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4654 {
4655 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4656
4657 if (cheaper_cost_pair (cp, best_cp))
4658 best_cp = cp;
4659 }
4660
4661 iv_ca_set_cp (data, ivs, use, best_cp);
4662 }
4663
4664 /* Get cost for assignment IVS. */
4665
4666 static unsigned
iv_ca_cost(struct iv_ca * ivs)4667 iv_ca_cost (struct iv_ca *ivs)
4668 {
4669 return (ivs->bad_uses ? INFTY : ivs->cost);
4670 }
4671
4672 /* Returns true if all dependences of CP are among invariants in IVS. */
4673
4674 static bool
iv_ca_has_deps(struct iv_ca * ivs,struct cost_pair * cp)4675 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4676 {
4677 unsigned i;
4678 bitmap_iterator bi;
4679
4680 if (!cp->depends_on)
4681 return true;
4682
4683 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4684 {
4685 if (ivs->n_invariant_uses[i] == 0)
4686 return false;
4687 }
4688
4689 return true;
4690 }
4691
4692 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4693 it before NEXT_CHANGE. */
4694
4695 static struct iv_ca_delta *
iv_ca_delta_add(struct iv_use * use,struct cost_pair * old_cp,struct cost_pair * new_cp,struct iv_ca_delta * next_change)4696 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4697 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4698 {
4699 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4700
4701 change->use = use;
4702 change->old_cp = old_cp;
4703 change->new_cp = new_cp;
4704 change->next_change = next_change;
4705
4706 return change;
4707 }
4708
4709 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4710 are rewritten. */
4711
4712 static struct iv_ca_delta *
iv_ca_delta_join(struct iv_ca_delta * l1,struct iv_ca_delta * l2)4713 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4714 {
4715 struct iv_ca_delta *last;
4716
4717 if (!l2)
4718 return l1;
4719
4720 if (!l1)
4721 return l2;
4722
4723 for (last = l1; last->next_change; last = last->next_change)
4724 continue;
4725 last->next_change = l2;
4726
4727 return l1;
4728 }
4729
4730 /* Returns candidate by that USE is expressed in IVS. */
4731
4732 static struct cost_pair *
iv_ca_cand_for_use(struct iv_ca * ivs,struct iv_use * use)4733 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4734 {
4735 return ivs->cand_for_use[use->id];
4736 }
4737
4738 /* Reverse the list of changes DELTA, forming the inverse to it. */
4739
4740 static struct iv_ca_delta *
iv_ca_delta_reverse(struct iv_ca_delta * delta)4741 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4742 {
4743 struct iv_ca_delta *act, *next, *prev = NULL;
4744 struct cost_pair *tmp;
4745
4746 for (act = delta; act; act = next)
4747 {
4748 next = act->next_change;
4749 act->next_change = prev;
4750 prev = act;
4751
4752 tmp = act->old_cp;
4753 act->old_cp = act->new_cp;
4754 act->new_cp = tmp;
4755 }
4756
4757 return prev;
4758 }
4759
4760 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4761 reverted instead. */
4762
4763 static void
iv_ca_delta_commit(struct ivopts_data * data,struct iv_ca * ivs,struct iv_ca_delta * delta,bool forward)4764 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4765 struct iv_ca_delta *delta, bool forward)
4766 {
4767 struct cost_pair *from, *to;
4768 struct iv_ca_delta *act;
4769
4770 if (!forward)
4771 delta = iv_ca_delta_reverse (delta);
4772
4773 for (act = delta; act; act = act->next_change)
4774 {
4775 from = act->old_cp;
4776 to = act->new_cp;
4777 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4778 iv_ca_set_cp (data, ivs, act->use, to);
4779 }
4780
4781 if (!forward)
4782 iv_ca_delta_reverse (delta);
4783 }
4784
4785 /* Returns true if CAND is used in IVS. */
4786
4787 static bool
iv_ca_cand_used_p(struct iv_ca * ivs,struct iv_cand * cand)4788 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4789 {
4790 return ivs->n_cand_uses[cand->id] > 0;
4791 }
4792
4793 /* Returns number of induction variable candidates in the set IVS. */
4794
4795 static unsigned
iv_ca_n_cands(struct iv_ca * ivs)4796 iv_ca_n_cands (struct iv_ca *ivs)
4797 {
4798 return ivs->n_cands;
4799 }
4800
4801 /* Free the list of changes DELTA. */
4802
4803 static void
iv_ca_delta_free(struct iv_ca_delta ** delta)4804 iv_ca_delta_free (struct iv_ca_delta **delta)
4805 {
4806 struct iv_ca_delta *act, *next;
4807
4808 for (act = *delta; act; act = next)
4809 {
4810 next = act->next_change;
4811 free (act);
4812 }
4813
4814 *delta = NULL;
4815 }
4816
4817 /* Allocates new iv candidates assignment. */
4818
4819 static struct iv_ca *
iv_ca_new(struct ivopts_data * data)4820 iv_ca_new (struct ivopts_data *data)
4821 {
4822 struct iv_ca *nw = XNEW (struct iv_ca);
4823
4824 nw->upto = 0;
4825 nw->bad_uses = 0;
4826 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4827 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4828 nw->cands = BITMAP_ALLOC (NULL);
4829 nw->n_cands = 0;
4830 nw->n_regs = 0;
4831 nw->cand_use_cost = 0;
4832 nw->cand_cost = 0;
4833 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4834 nw->cost = 0;
4835
4836 return nw;
4837 }
4838
4839 /* Free memory occupied by the set IVS. */
4840
4841 static void
iv_ca_free(struct iv_ca ** ivs)4842 iv_ca_free (struct iv_ca **ivs)
4843 {
4844 free ((*ivs)->cand_for_use);
4845 free ((*ivs)->n_cand_uses);
4846 BITMAP_FREE ((*ivs)->cands);
4847 free ((*ivs)->n_invariant_uses);
4848 free (*ivs);
4849 *ivs = NULL;
4850 }
4851
4852 /* Dumps IVS to FILE. */
4853
4854 static void
iv_ca_dump(struct ivopts_data * data,FILE * file,struct iv_ca * ivs)4855 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4856 {
4857 const char *pref = " invariants ";
4858 unsigned i;
4859
4860 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4861 bitmap_print (file, ivs->cands, " candidates ","\n");
4862
4863 for (i = 1; i <= data->max_inv_id; i++)
4864 if (ivs->n_invariant_uses[i])
4865 {
4866 fprintf (file, "%s%d", pref, i);
4867 pref = ", ";
4868 }
4869 fprintf (file, "\n");
4870 }
4871
4872 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4873 new set, and store differences in DELTA. Number of induction variables
4874 in the new set is stored to N_IVS. */
4875
4876 static unsigned
iv_ca_extend(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_ca_delta ** delta,unsigned * n_ivs)4877 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4878 struct iv_cand *cand, struct iv_ca_delta **delta,
4879 unsigned *n_ivs)
4880 {
4881 unsigned i, cost;
4882 struct iv_use *use;
4883 struct cost_pair *old_cp, *new_cp;
4884
4885 *delta = NULL;
4886 for (i = 0; i < ivs->upto; i++)
4887 {
4888 use = iv_use (data, i);
4889 old_cp = iv_ca_cand_for_use (ivs, use);
4890
4891 if (old_cp
4892 && old_cp->cand == cand)
4893 continue;
4894
4895 new_cp = get_use_iv_cost (data, use, cand);
4896 if (!new_cp)
4897 continue;
4898
4899 if (!iv_ca_has_deps (ivs, new_cp))
4900 continue;
4901
4902 if (!cheaper_cost_pair (new_cp, old_cp))
4903 continue;
4904
4905 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4906 }
4907
4908 iv_ca_delta_commit (data, ivs, *delta, true);
4909 cost = iv_ca_cost (ivs);
4910 if (n_ivs)
4911 *n_ivs = iv_ca_n_cands (ivs);
4912 iv_ca_delta_commit (data, ivs, *delta, false);
4913
4914 return cost;
4915 }
4916
4917 /* Try narrowing set IVS by removing CAND. Return the cost of
4918 the new set and store the differences in DELTA. */
4919
4920 static unsigned
iv_ca_narrow(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_ca_delta ** delta)4921 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4922 struct iv_cand *cand, struct iv_ca_delta **delta)
4923 {
4924 unsigned i, ci;
4925 struct iv_use *use;
4926 struct cost_pair *old_cp, *new_cp, *cp;
4927 bitmap_iterator bi;
4928 struct iv_cand *cnd;
4929 unsigned cost;
4930
4931 *delta = NULL;
4932 for (i = 0; i < n_iv_uses (data); i++)
4933 {
4934 use = iv_use (data, i);
4935
4936 old_cp = iv_ca_cand_for_use (ivs, use);
4937 if (old_cp->cand != cand)
4938 continue;
4939
4940 new_cp = NULL;
4941
4942 if (data->consider_all_candidates)
4943 {
4944 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4945 {
4946 if (ci == cand->id)
4947 continue;
4948
4949 cnd = iv_cand (data, ci);
4950
4951 cp = get_use_iv_cost (data, use, cnd);
4952 if (!cp)
4953 continue;
4954 if (!iv_ca_has_deps (ivs, cp))
4955 continue;
4956
4957 if (!cheaper_cost_pair (cp, new_cp))
4958 continue;
4959
4960 new_cp = cp;
4961 }
4962 }
4963 else
4964 {
4965 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4966 {
4967 if (ci == cand->id)
4968 continue;
4969
4970 cnd = iv_cand (data, ci);
4971
4972 cp = get_use_iv_cost (data, use, cnd);
4973 if (!cp)
4974 continue;
4975 if (!iv_ca_has_deps (ivs, cp))
4976 continue;
4977
4978 if (!cheaper_cost_pair (cp, new_cp))
4979 continue;
4980
4981 new_cp = cp;
4982 }
4983 }
4984
4985 if (!new_cp)
4986 {
4987 iv_ca_delta_free (delta);
4988 return INFTY;
4989 }
4990
4991 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4992 }
4993
4994 iv_ca_delta_commit (data, ivs, *delta, true);
4995 cost = iv_ca_cost (ivs);
4996 iv_ca_delta_commit (data, ivs, *delta, false);
4997
4998 return cost;
4999 }
5000
5001 /* Try optimizing the set of candidates IVS by removing candidates different
5002 from to EXCEPT_CAND from it. Return cost of the new set, and store
5003 differences in DELTA. */
5004
5005 static unsigned
iv_ca_prune(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * except_cand,struct iv_ca_delta ** delta)5006 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5007 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5008 {
5009 bitmap_iterator bi;
5010 struct iv_ca_delta *act_delta, *best_delta;
5011 unsigned i, best_cost, acost;
5012 struct iv_cand *cand;
5013
5014 best_delta = NULL;
5015 best_cost = iv_ca_cost (ivs);
5016
5017 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5018 {
5019 cand = iv_cand (data, i);
5020
5021 if (cand == except_cand)
5022 continue;
5023
5024 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5025
5026 if (acost < best_cost)
5027 {
5028 best_cost = acost;
5029 iv_ca_delta_free (&best_delta);
5030 best_delta = act_delta;
5031 }
5032 else
5033 iv_ca_delta_free (&act_delta);
5034 }
5035
5036 if (!best_delta)
5037 {
5038 *delta = NULL;
5039 return best_cost;
5040 }
5041
5042 /* Recurse to possibly remove other unnecessary ivs. */
5043 iv_ca_delta_commit (data, ivs, best_delta, true);
5044 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5045 iv_ca_delta_commit (data, ivs, best_delta, false);
5046 *delta = iv_ca_delta_join (best_delta, *delta);
5047 return best_cost;
5048 }
5049
5050 /* Tries to extend the sets IVS in the best possible way in order
5051 to express the USE. */
5052
5053 static bool
try_add_cand_for(struct ivopts_data * data,struct iv_ca * ivs,struct iv_use * use)5054 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5055 struct iv_use *use)
5056 {
5057 unsigned best_cost, act_cost;
5058 unsigned i;
5059 bitmap_iterator bi;
5060 struct iv_cand *cand;
5061 struct iv_ca_delta *best_delta = NULL, *act_delta;
5062 struct cost_pair *cp;
5063
5064 iv_ca_add_use (data, ivs, use);
5065 best_cost = iv_ca_cost (ivs);
5066
5067 cp = iv_ca_cand_for_use (ivs, use);
5068 if (cp)
5069 {
5070 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5071 iv_ca_set_no_cp (data, ivs, use);
5072 }
5073
5074 /* First try important candidates. Only if it fails, try the specific ones.
5075 Rationale -- in loops with many variables the best choice often is to use
5076 just one generic biv. If we added here many ivs specific to the uses,
5077 the optimization algorithm later would be likely to get stuck in a local
5078 minimum, thus causing us to create too many ivs. The approach from
5079 few ivs to more seems more likely to be successful -- starting from few
5080 ivs, replacing an expensive use by a specific iv should always be a
5081 win. */
5082 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5083 {
5084 cand = iv_cand (data, i);
5085
5086 if (iv_ca_cand_used_p (ivs, cand))
5087 continue;
5088
5089 cp = get_use_iv_cost (data, use, cand);
5090 if (!cp)
5091 continue;
5092
5093 iv_ca_set_cp (data, ivs, use, cp);
5094 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5095 iv_ca_set_no_cp (data, ivs, use);
5096 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5097
5098 if (act_cost < best_cost)
5099 {
5100 best_cost = act_cost;
5101
5102 iv_ca_delta_free (&best_delta);
5103 best_delta = act_delta;
5104 }
5105 else
5106 iv_ca_delta_free (&act_delta);
5107 }
5108
5109 if (best_cost == INFTY)
5110 {
5111 for (i = 0; i < use->n_map_members; i++)
5112 {
5113 cp = use->cost_map + i;
5114 cand = cp->cand;
5115 if (!cand)
5116 continue;
5117
5118 /* Already tried this. */
5119 if (cand->important)
5120 continue;
5121
5122 if (iv_ca_cand_used_p (ivs, cand))
5123 continue;
5124
5125 act_delta = NULL;
5126 iv_ca_set_cp (data, ivs, use, cp);
5127 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5128 iv_ca_set_no_cp (data, ivs, use);
5129 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5130 cp, act_delta);
5131
5132 if (act_cost < best_cost)
5133 {
5134 best_cost = act_cost;
5135
5136 if (best_delta)
5137 iv_ca_delta_free (&best_delta);
5138 best_delta = act_delta;
5139 }
5140 else
5141 iv_ca_delta_free (&act_delta);
5142 }
5143 }
5144
5145 iv_ca_delta_commit (data, ivs, best_delta, true);
5146 iv_ca_delta_free (&best_delta);
5147
5148 return (best_cost != INFTY);
5149 }
5150
5151 /* Finds an initial assignment of candidates to uses. */
5152
5153 static struct iv_ca *
get_initial_solution(struct ivopts_data * data)5154 get_initial_solution (struct ivopts_data *data)
5155 {
5156 struct iv_ca *ivs = iv_ca_new (data);
5157 unsigned i;
5158
5159 for (i = 0; i < n_iv_uses (data); i++)
5160 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5161 {
5162 iv_ca_free (&ivs);
5163 return NULL;
5164 }
5165
5166 return ivs;
5167 }
5168
5169 /* Tries to improve set of induction variables IVS. */
5170
5171 static bool
try_improve_iv_set(struct ivopts_data * data,struct iv_ca * ivs)5172 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5173 {
5174 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5175 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5176 struct iv_cand *cand;
5177
5178 /* Try extending the set of induction variables by one. */
5179 for (i = 0; i < n_iv_cands (data); i++)
5180 {
5181 cand = iv_cand (data, i);
5182
5183 if (iv_ca_cand_used_p (ivs, cand))
5184 continue;
5185
5186 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5187 if (!act_delta)
5188 continue;
5189
5190 /* If we successfully added the candidate and the set is small enough,
5191 try optimizing it by removing other candidates. */
5192 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5193 {
5194 iv_ca_delta_commit (data, ivs, act_delta, true);
5195 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5196 iv_ca_delta_commit (data, ivs, act_delta, false);
5197 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5198 }
5199
5200 if (acost < best_cost)
5201 {
5202 best_cost = acost;
5203 iv_ca_delta_free (&best_delta);
5204 best_delta = act_delta;
5205 }
5206 else
5207 iv_ca_delta_free (&act_delta);
5208 }
5209
5210 if (!best_delta)
5211 {
5212 /* Try removing the candidates from the set instead. */
5213 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5214
5215 /* Nothing more we can do. */
5216 if (!best_delta)
5217 return false;
5218 }
5219
5220 iv_ca_delta_commit (data, ivs, best_delta, true);
5221 gcc_assert (best_cost == iv_ca_cost (ivs));
5222 iv_ca_delta_free (&best_delta);
5223 return true;
5224 }
5225
5226 /* Attempts to find the optimal set of induction variables. We do simple
5227 greedy heuristic -- we try to replace at most one candidate in the selected
5228 solution and remove the unused ivs while this improves the cost. */
5229
5230 static struct iv_ca *
find_optimal_iv_set(struct ivopts_data * data)5231 find_optimal_iv_set (struct ivopts_data *data)
5232 {
5233 unsigned i;
5234 struct iv_ca *set;
5235 struct iv_use *use;
5236
5237 /* Get the initial solution. */
5238 set = get_initial_solution (data);
5239 if (!set)
5240 {
5241 if (dump_file && (dump_flags & TDF_DETAILS))
5242 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5243 return NULL;
5244 }
5245
5246 if (dump_file && (dump_flags & TDF_DETAILS))
5247 {
5248 fprintf (dump_file, "Initial set of candidates:\n");
5249 iv_ca_dump (data, dump_file, set);
5250 }
5251
5252 while (try_improve_iv_set (data, set))
5253 {
5254 if (dump_file && (dump_flags & TDF_DETAILS))
5255 {
5256 fprintf (dump_file, "Improved to:\n");
5257 iv_ca_dump (data, dump_file, set);
5258 }
5259 }
5260
5261 if (dump_file && (dump_flags & TDF_DETAILS))
5262 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5263
5264 for (i = 0; i < n_iv_uses (data); i++)
5265 {
5266 use = iv_use (data, i);
5267 use->selected = iv_ca_cand_for_use (set, use)->cand;
5268 }
5269
5270 return set;
5271 }
5272
5273 /* Creates a new induction variable corresponding to CAND. */
5274
5275 static void
create_new_iv(struct ivopts_data * data,struct iv_cand * cand)5276 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5277 {
5278 block_stmt_iterator incr_pos;
5279 tree base;
5280 bool after = false;
5281
5282 if (!cand->iv)
5283 return;
5284
5285 switch (cand->pos)
5286 {
5287 case IP_NORMAL:
5288 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5289 break;
5290
5291 case IP_END:
5292 incr_pos = bsi_last (ip_end_pos (data->current_loop));
5293 after = true;
5294 break;
5295
5296 case IP_ORIGINAL:
5297 /* Mark that the iv is preserved. */
5298 name_info (data, cand->var_before)->preserve_biv = true;
5299 name_info (data, cand->var_after)->preserve_biv = true;
5300
5301 /* Rewrite the increment so that it uses var_before directly. */
5302 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5303
5304 return;
5305 }
5306
5307 gimple_add_tmp_var (cand->var_before);
5308 add_referenced_var (cand->var_before);
5309
5310 base = unshare_expr (cand->iv->base);
5311
5312 create_iv (base, unshare_expr (cand->iv->step),
5313 cand->var_before, data->current_loop,
5314 &incr_pos, after, &cand->var_before, &cand->var_after);
5315 }
5316
5317 /* Creates new induction variables described in SET. */
5318
5319 static void
create_new_ivs(struct ivopts_data * data,struct iv_ca * set)5320 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5321 {
5322 unsigned i;
5323 struct iv_cand *cand;
5324 bitmap_iterator bi;
5325
5326 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5327 {
5328 cand = iv_cand (data, i);
5329 create_new_iv (data, cand);
5330 }
5331 }
5332
5333 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5334 is true, remove also the ssa name defined by the statement. */
5335
5336 static void
remove_statement(tree stmt,bool including_defined_name)5337 remove_statement (tree stmt, bool including_defined_name)
5338 {
5339 if (TREE_CODE (stmt) == PHI_NODE)
5340 {
5341 if (!including_defined_name)
5342 {
5343 /* Prevent the ssa name defined by the statement from being removed. */
5344 SET_PHI_RESULT (stmt, NULL);
5345 }
5346 remove_phi_node (stmt, NULL_TREE);
5347 }
5348 else
5349 {
5350 block_stmt_iterator bsi = bsi_for_stmt (stmt);
5351
5352 bsi_remove (&bsi, true);
5353 }
5354 }
5355
5356 /* Rewrites USE (definition of iv used in a nonlinear expression)
5357 using candidate CAND. */
5358
5359 static void
rewrite_use_nonlinear_expr(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5360 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5361 struct iv_use *use, struct iv_cand *cand)
5362 {
5363 tree comp;
5364 tree op, stmts, tgt, ass;
5365 block_stmt_iterator bsi, pbsi;
5366
5367 /* An important special case -- if we are asked to express value of
5368 the original iv by itself, just exit; there is no need to
5369 introduce a new computation (that might also need casting the
5370 variable to unsigned and back). */
5371 if (cand->pos == IP_ORIGINAL
5372 && cand->incremented_at == use->stmt)
5373 {
5374 tree step, ctype, utype;
5375 enum tree_code incr_code = PLUS_EXPR;
5376
5377 gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5378 gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5379
5380 step = cand->iv->step;
5381 ctype = TREE_TYPE (step);
5382 utype = TREE_TYPE (cand->var_after);
5383 if (TREE_CODE (step) == NEGATE_EXPR)
5384 {
5385 incr_code = MINUS_EXPR;
5386 step = TREE_OPERAND (step, 0);
5387 }
5388
5389 /* Check whether we may leave the computation unchanged.
5390 This is the case only if it does not rely on other
5391 computations in the loop -- otherwise, the computation
5392 we rely upon may be removed in remove_unused_ivs,
5393 thus leading to ICE. */
5394 op = TREE_OPERAND (use->stmt, 1);
5395 if (TREE_CODE (op) == PLUS_EXPR
5396 || TREE_CODE (op) == MINUS_EXPR)
5397 {
5398 if (TREE_OPERAND (op, 0) == cand->var_before)
5399 op = TREE_OPERAND (op, 1);
5400 else if (TREE_CODE (op) == PLUS_EXPR
5401 && TREE_OPERAND (op, 1) == cand->var_before)
5402 op = TREE_OPERAND (op, 0);
5403 else
5404 op = NULL_TREE;
5405 }
5406 else
5407 op = NULL_TREE;
5408
5409 if (op
5410 && (TREE_CODE (op) == INTEGER_CST
5411 || operand_equal_p (op, step, 0)))
5412 return;
5413
5414 /* Otherwise, add the necessary computations to express
5415 the iv. */
5416 op = fold_convert (ctype, cand->var_before);
5417 comp = fold_convert (utype,
5418 build2 (incr_code, ctype, op,
5419 unshare_expr (step)));
5420 }
5421 else
5422 comp = get_computation (data->current_loop, use, cand);
5423
5424 switch (TREE_CODE (use->stmt))
5425 {
5426 case PHI_NODE:
5427 tgt = PHI_RESULT (use->stmt);
5428
5429 /* If we should keep the biv, do not replace it. */
5430 if (name_info (data, tgt)->preserve_biv)
5431 return;
5432
5433 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5434 while (!bsi_end_p (pbsi)
5435 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5436 {
5437 bsi = pbsi;
5438 bsi_next (&pbsi);
5439 }
5440 break;
5441
5442 case MODIFY_EXPR:
5443 tgt = TREE_OPERAND (use->stmt, 0);
5444 bsi = bsi_for_stmt (use->stmt);
5445 break;
5446
5447 default:
5448 gcc_unreachable ();
5449 }
5450
5451 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5452
5453 if (TREE_CODE (use->stmt) == PHI_NODE)
5454 {
5455 if (stmts)
5456 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5457 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5458 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5459 remove_statement (use->stmt, false);
5460 SSA_NAME_DEF_STMT (tgt) = ass;
5461 }
5462 else
5463 {
5464 if (stmts)
5465 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5466 TREE_OPERAND (use->stmt, 1) = op;
5467 }
5468 }
5469
5470 /* Replaces ssa name in index IDX by its basic variable. Callback for
5471 for_each_index. */
5472
5473 static bool
idx_remove_ssa_names(tree base,tree * idx,void * data ATTRIBUTE_UNUSED)5474 idx_remove_ssa_names (tree base, tree *idx,
5475 void *data ATTRIBUTE_UNUSED)
5476 {
5477 tree *op;
5478
5479 if (TREE_CODE (*idx) == SSA_NAME)
5480 *idx = SSA_NAME_VAR (*idx);
5481
5482 if (TREE_CODE (base) == ARRAY_REF)
5483 {
5484 op = &TREE_OPERAND (base, 2);
5485 if (*op
5486 && TREE_CODE (*op) == SSA_NAME)
5487 *op = SSA_NAME_VAR (*op);
5488 op = &TREE_OPERAND (base, 3);
5489 if (*op
5490 && TREE_CODE (*op) == SSA_NAME)
5491 *op = SSA_NAME_VAR (*op);
5492 }
5493
5494 return true;
5495 }
5496
5497 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5498
5499 static tree
unshare_and_remove_ssa_names(tree ref)5500 unshare_and_remove_ssa_names (tree ref)
5501 {
5502 ref = unshare_expr (ref);
5503 for_each_index (&ref, idx_remove_ssa_names, NULL);
5504
5505 return ref;
5506 }
5507
5508 /* Extract the alias analysis info for the memory reference REF. There are
5509 several ways how this information may be stored and what precisely is
5510 its semantics depending on the type of the reference, but there always is
5511 somewhere hidden one _DECL node that is used to determine the set of
5512 virtual operands for the reference. The code below deciphers this jungle
5513 and extracts this single useful piece of information. */
5514
5515 static tree
get_ref_tag(tree ref,tree orig)5516 get_ref_tag (tree ref, tree orig)
5517 {
5518 tree var = get_base_address (ref);
5519 tree aref = NULL_TREE, tag, sv;
5520 HOST_WIDE_INT offset, size, maxsize;
5521
5522 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5523 {
5524 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5525 if (ref)
5526 break;
5527 }
5528
5529 if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5530 return unshare_expr (sv);
5531
5532 if (!var)
5533 return NULL_TREE;
5534
5535 if (TREE_CODE (var) == INDIRECT_REF)
5536 {
5537 /* If the base is a dereference of a pointer, first check its name memory
5538 tag. If it does not have one, use its symbol memory tag. */
5539 var = TREE_OPERAND (var, 0);
5540 if (TREE_CODE (var) != SSA_NAME)
5541 return NULL_TREE;
5542
5543 if (SSA_NAME_PTR_INFO (var))
5544 {
5545 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5546 if (tag)
5547 return tag;
5548 }
5549
5550 var = SSA_NAME_VAR (var);
5551 tag = var_ann (var)->symbol_mem_tag;
5552 gcc_assert (tag != NULL_TREE);
5553 return tag;
5554 }
5555 else
5556 {
5557 if (!DECL_P (var))
5558 return NULL_TREE;
5559
5560 tag = var_ann (var)->symbol_mem_tag;
5561 if (tag)
5562 return tag;
5563
5564 return var;
5565 }
5566 }
5567
5568 /* Copies the reference information from OLD_REF to NEW_REF. */
5569
5570 static void
copy_ref_info(tree new_ref,tree old_ref)5571 copy_ref_info (tree new_ref, tree old_ref)
5572 {
5573 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5574 copy_mem_ref_info (new_ref, old_ref);
5575 else
5576 {
5577 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5578 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5579 }
5580 }
5581
5582 /* Rewrites USE (address that is an iv) using candidate CAND. */
5583
5584 static void
rewrite_use_address(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5585 rewrite_use_address (struct ivopts_data *data,
5586 struct iv_use *use, struct iv_cand *cand)
5587 {
5588 struct affine_tree_combination aff;
5589 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5590 tree ref;
5591
5592 get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5593 unshare_aff_combination (&aff);
5594
5595 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5596 copy_ref_info (ref, *use->op_p);
5597 *use->op_p = ref;
5598 }
5599
5600 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5601 candidate CAND. */
5602
5603 static void
rewrite_use_compare(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5604 rewrite_use_compare (struct ivopts_data *data,
5605 struct iv_use *use, struct iv_cand *cand)
5606 {
5607 tree comp;
5608 tree *op_p, cond, op, stmts, bound;
5609 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5610 enum tree_code compare;
5611 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5612
5613 bound = cp->value;
5614 if (bound)
5615 {
5616 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5617 tree var_type = TREE_TYPE (var);
5618
5619 compare = iv_elimination_compare (data, use);
5620 bound = fold_convert (var_type, bound);
5621 op = force_gimple_operand (unshare_expr (bound), &stmts,
5622 true, NULL_TREE);
5623
5624 if (stmts)
5625 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5626
5627 *use->op_p = build2 (compare, boolean_type_node, var, op);
5628 update_stmt (use->stmt);
5629 return;
5630 }
5631
5632 /* The induction variable elimination failed; just express the original
5633 giv. */
5634 comp = get_computation (data->current_loop, use, cand);
5635
5636 cond = *use->op_p;
5637 op_p = &TREE_OPERAND (cond, 0);
5638 if (TREE_CODE (*op_p) != SSA_NAME
5639 || zero_p (get_iv (data, *op_p)->step))
5640 op_p = &TREE_OPERAND (cond, 1);
5641
5642 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5643 if (stmts)
5644 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5645
5646 *op_p = op;
5647 }
5648
5649 /* Rewrites USE using candidate CAND. */
5650
5651 static void
rewrite_use(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5652 rewrite_use (struct ivopts_data *data,
5653 struct iv_use *use, struct iv_cand *cand)
5654 {
5655 switch (use->type)
5656 {
5657 case USE_NONLINEAR_EXPR:
5658 rewrite_use_nonlinear_expr (data, use, cand);
5659 break;
5660
5661 case USE_ADDRESS:
5662 rewrite_use_address (data, use, cand);
5663 break;
5664
5665 case USE_COMPARE:
5666 rewrite_use_compare (data, use, cand);
5667 break;
5668
5669 default:
5670 gcc_unreachable ();
5671 }
5672 mark_new_vars_to_rename (use->stmt);
5673 }
5674
5675 /* Rewrite the uses using the selected induction variables. */
5676
5677 static void
rewrite_uses(struct ivopts_data * data)5678 rewrite_uses (struct ivopts_data *data)
5679 {
5680 unsigned i;
5681 struct iv_cand *cand;
5682 struct iv_use *use;
5683
5684 for (i = 0; i < n_iv_uses (data); i++)
5685 {
5686 use = iv_use (data, i);
5687 cand = use->selected;
5688 gcc_assert (cand);
5689
5690 rewrite_use (data, use, cand);
5691 }
5692 }
5693
5694 /* Removes the ivs that are not used after rewriting. */
5695
5696 static void
remove_unused_ivs(struct ivopts_data * data)5697 remove_unused_ivs (struct ivopts_data *data)
5698 {
5699 unsigned j;
5700 bitmap_iterator bi;
5701
5702 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5703 {
5704 struct version_info *info;
5705
5706 info = ver_info (data, j);
5707 if (info->iv
5708 && !zero_p (info->iv->step)
5709 && !info->inv_id
5710 && !info->iv->have_use_for
5711 && !info->preserve_biv)
5712 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5713 }
5714 }
5715
5716 /* Frees data allocated by the optimization of a single loop. */
5717
5718 static void
free_loop_data(struct ivopts_data * data)5719 free_loop_data (struct ivopts_data *data)
5720 {
5721 unsigned i, j;
5722 bitmap_iterator bi;
5723 tree obj;
5724
5725 htab_empty (data->niters);
5726
5727 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5728 {
5729 struct version_info *info;
5730
5731 info = ver_info (data, i);
5732 if (info->iv)
5733 free (info->iv);
5734 info->iv = NULL;
5735 info->has_nonlin_use = false;
5736 info->preserve_biv = false;
5737 info->inv_id = 0;
5738 }
5739 bitmap_clear (data->relevant);
5740 bitmap_clear (data->important_candidates);
5741
5742 for (i = 0; i < n_iv_uses (data); i++)
5743 {
5744 struct iv_use *use = iv_use (data, i);
5745
5746 free (use->iv);
5747 BITMAP_FREE (use->related_cands);
5748 for (j = 0; j < use->n_map_members; j++)
5749 if (use->cost_map[j].depends_on)
5750 BITMAP_FREE (use->cost_map[j].depends_on);
5751 free (use->cost_map);
5752 free (use);
5753 }
5754 VEC_truncate (iv_use_p, data->iv_uses, 0);
5755
5756 for (i = 0; i < n_iv_cands (data); i++)
5757 {
5758 struct iv_cand *cand = iv_cand (data, i);
5759
5760 if (cand->iv)
5761 free (cand->iv);
5762 if (cand->depends_on)
5763 BITMAP_FREE (cand->depends_on);
5764 free (cand);
5765 }
5766 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5767
5768 if (data->version_info_size < num_ssa_names)
5769 {
5770 data->version_info_size = 2 * num_ssa_names;
5771 free (data->version_info);
5772 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5773 }
5774
5775 data->max_inv_id = 0;
5776
5777 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5778 SET_DECL_RTL (obj, NULL_RTX);
5779
5780 VEC_truncate (tree, decl_rtl_to_reset, 0);
5781 }
5782
5783 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5784 loop tree. */
5785
5786 static void
tree_ssa_iv_optimize_finalize(struct ivopts_data * data)5787 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5788 {
5789 free_loop_data (data);
5790 free (data->version_info);
5791 BITMAP_FREE (data->relevant);
5792 BITMAP_FREE (data->important_candidates);
5793 htab_delete (data->niters);
5794
5795 VEC_free (tree, heap, decl_rtl_to_reset);
5796 VEC_free (iv_use_p, heap, data->iv_uses);
5797 VEC_free (iv_cand_p, heap, data->iv_candidates);
5798 }
5799
5800 /* Optimizes the LOOP. Returns true if anything changed. */
5801
5802 static bool
tree_ssa_iv_optimize_loop(struct ivopts_data * data,struct loop * loop)5803 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5804 {
5805 bool changed = false;
5806 struct iv_ca *iv_ca;
5807 edge exit;
5808
5809 data->current_loop = loop;
5810
5811 if (dump_file && (dump_flags & TDF_DETAILS))
5812 {
5813 fprintf (dump_file, "Processing loop %d\n", loop->num);
5814
5815 exit = single_dom_exit (loop);
5816 if (exit)
5817 {
5818 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5819 exit->src->index, exit->dest->index);
5820 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5821 fprintf (dump_file, "\n");
5822 }
5823
5824 fprintf (dump_file, "\n");
5825 }
5826
5827 /* For each ssa name determines whether it behaves as an induction variable
5828 in some loop. */
5829 if (!find_induction_variables (data))
5830 goto finish;
5831
5832 /* Finds interesting uses (item 1). */
5833 find_interesting_uses (data);
5834 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5835 goto finish;
5836
5837 /* Finds candidates for the induction variables (item 2). */
5838 find_iv_candidates (data);
5839
5840 /* Calculates the costs (item 3, part 1). */
5841 determine_use_iv_costs (data);
5842 determine_iv_costs (data);
5843 determine_set_costs (data);
5844
5845 /* Find the optimal set of induction variables (item 3, part 2). */
5846 iv_ca = find_optimal_iv_set (data);
5847 if (!iv_ca)
5848 goto finish;
5849 changed = true;
5850
5851 /* Create the new induction variables (item 4, part 1). */
5852 create_new_ivs (data, iv_ca);
5853 iv_ca_free (&iv_ca);
5854
5855 /* Rewrite the uses (item 4, part 2). */
5856 rewrite_uses (data);
5857
5858 /* Remove the ivs that are unused after rewriting. */
5859 remove_unused_ivs (data);
5860
5861 /* We have changed the structure of induction variables; it might happen
5862 that definitions in the scev database refer to some of them that were
5863 eliminated. */
5864 scev_reset ();
5865
5866 finish:
5867 free_loop_data (data);
5868
5869 return changed;
5870 }
5871
5872 /* Main entry point. Optimizes induction variables in LOOPS. */
5873
5874 void
tree_ssa_iv_optimize(struct loops * loops)5875 tree_ssa_iv_optimize (struct loops *loops)
5876 {
5877 struct loop *loop;
5878 struct ivopts_data data;
5879
5880 tree_ssa_iv_optimize_init (&data);
5881
5882 /* Optimize the loops starting with the innermost ones. */
5883 loop = loops->tree_root;
5884 while (loop->inner)
5885 loop = loop->inner;
5886
5887 /* Scan the loops, inner ones first. */
5888 while (loop != loops->tree_root)
5889 {
5890 if (dump_file && (dump_flags & TDF_DETAILS))
5891 flow_loop_dump (loop, dump_file, NULL, 1);
5892
5893 tree_ssa_iv_optimize_loop (&data, loop);
5894
5895 if (loop->next)
5896 {
5897 loop = loop->next;
5898 while (loop->inner)
5899 loop = loop->inner;
5900 }
5901 else
5902 loop = loop->outer;
5903 }
5904
5905 tree_ssa_iv_optimize_finalize (&data);
5906 }
5907