1 /* Induction variable optimizations.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 2) Candidates for the induction variables are found. This includes
34
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
38
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
42
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
52
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
55
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
58
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
63
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "stor-layout.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "gimple-pretty-print.h"
73 #include "pointer-set.h"
74 #include "hash-table.h"
75 #include "tree-ssa-alias.h"
76 #include "internal-fn.h"
77 #include "tree-eh.h"
78 #include "gimple-expr.h"
79 #include "is-a.h"
80 #include "gimple.h"
81 #include "gimplify.h"
82 #include "gimple-iterator.h"
83 #include "gimplify-me.h"
84 #include "gimple-ssa.h"
85 #include "cgraph.h"
86 #include "tree-cfg.h"
87 #include "tree-phinodes.h"
88 #include "ssa-iterators.h"
89 #include "stringpool.h"
90 #include "tree-ssanames.h"
91 #include "tree-ssa-loop-ivopts.h"
92 #include "tree-ssa-loop-manip.h"
93 #include "tree-ssa-loop-niter.h"
94 #include "tree-ssa-loop.h"
95 #include "expr.h"
96 #include "tree-dfa.h"
97 #include "tree-ssa.h"
98 #include "cfgloop.h"
99 #include "tree-pass.h"
100 #include "insn-config.h"
101 #include "tree-chrec.h"
102 #include "tree-scalar-evolution.h"
103 #include "cfgloop.h"
104 #include "params.h"
105 #include "langhooks.h"
106 #include "tree-affine.h"
107 #include "target.h"
108 #include "tree-inline.h"
109 #include "tree-ssa-propagate.h"
110 #include "expmed.h"
111 #include "tree-ssa-address.h"
112
113 /* FIXME: Expressions are expanded to RTL in this pass to determine the
114 cost of different addressing modes. This should be moved to a TBD
115 interface between the GIMPLE and RTL worlds. */
116 #include "expr.h"
117 #include "recog.h"
118
119 /* The infinite cost. */
120 #define INFTY 10000000
121
122 #define AVG_LOOP_NITER(LOOP) 5
123
124 /* Returns the expected number of loop iterations for LOOP.
125 The average trip count is computed from profile data if it
126 exists. */
127
128 static inline HOST_WIDE_INT
avg_loop_niter(struct loop * loop)129 avg_loop_niter (struct loop *loop)
130 {
131 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
132 if (niter == -1)
133 return AVG_LOOP_NITER (loop);
134
135 return niter;
136 }
137
138 /* Representation of the induction variable. */
139 struct iv
140 {
141 tree base; /* Initial value of the iv. */
142 tree base_object; /* A memory object to that the induction variable points. */
143 tree step; /* Step of the iv (constant only). */
144 tree ssa_name; /* The ssa name with the value. */
145 bool biv_p; /* Is it a biv? */
146 bool have_use_for; /* Do we already have a use for it? */
147 unsigned use_id; /* The identifier in the use if it is the case. */
148 };
149
150 /* Per-ssa version information (induction variable descriptions, etc.). */
151 struct version_info
152 {
153 tree name; /* The ssa name. */
154 struct iv *iv; /* Induction variable description. */
155 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
156 an expression that is not an induction variable. */
157 bool preserve_biv; /* For the original biv, whether to preserve it. */
158 unsigned inv_id; /* Id of an invariant. */
159 };
160
161 /* Types of uses. */
162 enum use_type
163 {
164 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
165 USE_ADDRESS, /* Use in an address. */
166 USE_COMPARE /* Use is a compare. */
167 };
168
169 /* Cost of a computation. */
170 typedef struct
171 {
172 int cost; /* The runtime cost. */
173 unsigned complexity; /* The estimate of the complexity of the code for
174 the computation (in no concrete units --
175 complexity field should be larger for more
176 complex expressions and addressing modes). */
177 } comp_cost;
178
179 static const comp_cost no_cost = {0, 0};
180 static const comp_cost infinite_cost = {INFTY, INFTY};
181
182 /* The candidate - cost pair. */
183 struct cost_pair
184 {
185 struct iv_cand *cand; /* The candidate. */
186 comp_cost cost; /* The cost. */
187 bitmap depends_on; /* The list of invariants that have to be
188 preserved. */
189 tree value; /* For final value elimination, the expression for
190 the final value of the iv. For iv elimination,
191 the new bound to compare with. */
192 enum tree_code comp; /* For iv elimination, the comparison. */
193 int inv_expr_id; /* Loop invariant expression id. */
194 };
195
196 /* Use. */
197 struct iv_use
198 {
199 unsigned id; /* The id of the use. */
200 enum use_type type; /* Type of the use. */
201 struct iv *iv; /* The induction variable it is based on. */
202 gimple stmt; /* Statement in that it occurs. */
203 tree *op_p; /* The place where it occurs. */
204 bitmap related_cands; /* The set of "related" iv candidates, plus the common
205 important ones. */
206
207 unsigned n_map_members; /* Number of candidates in the cost_map list. */
208 struct cost_pair *cost_map;
209 /* The costs wrto the iv candidates. */
210
211 struct iv_cand *selected;
212 /* The selected candidate. */
213 };
214
215 /* The position where the iv is computed. */
216 enum iv_position
217 {
218 IP_NORMAL, /* At the end, just before the exit condition. */
219 IP_END, /* At the end of the latch block. */
220 IP_BEFORE_USE, /* Immediately before a specific use. */
221 IP_AFTER_USE, /* Immediately after a specific use. */
222 IP_ORIGINAL /* The original biv. */
223 };
224
225 /* The induction variable candidate. */
226 struct iv_cand
227 {
228 unsigned id; /* The number of the candidate. */
229 bool important; /* Whether this is an "important" candidate, i.e. such
230 that it should be considered by all uses. */
231 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
232 gimple incremented_at;/* For original biv, the statement where it is
233 incremented. */
234 tree var_before; /* The variable used for it before increment. */
235 tree var_after; /* The variable used for it after increment. */
236 struct iv *iv; /* The value of the candidate. NULL for
237 "pseudocandidate" used to indicate the possibility
238 to replace the final value of an iv by direct
239 computation of the value. */
240 unsigned cost; /* Cost of the candidate. */
241 unsigned cost_step; /* Cost of the candidate's increment operation. */
242 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
243 where it is incremented. */
244 bitmap depends_on; /* The list of invariants that are used in step of the
245 biv. */
246 };
247
248 /* Loop invariant expression hashtable entry. */
249 struct iv_inv_expr_ent
250 {
251 tree expr;
252 int id;
253 hashval_t hash;
254 };
255
256 /* The data used by the induction variable optimizations. */
257
258 typedef struct iv_use *iv_use_p;
259
260 typedef struct iv_cand *iv_cand_p;
261
262 /* Hashtable helpers. */
263
264 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
265 {
266 typedef iv_inv_expr_ent value_type;
267 typedef iv_inv_expr_ent compare_type;
268 static inline hashval_t hash (const value_type *);
269 static inline bool equal (const value_type *, const compare_type *);
270 };
271
272 /* Hash function for loop invariant expressions. */
273
274 inline hashval_t
hash(const value_type * expr)275 iv_inv_expr_hasher::hash (const value_type *expr)
276 {
277 return expr->hash;
278 }
279
280 /* Hash table equality function for expressions. */
281
282 inline bool
equal(const value_type * expr1,const compare_type * expr2)283 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
284 {
285 return expr1->hash == expr2->hash
286 && operand_equal_p (expr1->expr, expr2->expr, 0);
287 }
288
289 struct ivopts_data
290 {
291 /* The currently optimized loop. */
292 struct loop *current_loop;
293
294 /* Numbers of iterations for all exits of the current loop. */
295 struct pointer_map_t *niters;
296
297 /* Number of registers used in it. */
298 unsigned regs_used;
299
300 /* The size of version_info array allocated. */
301 unsigned version_info_size;
302
303 /* The array of information for the ssa names. */
304 struct version_info *version_info;
305
306 /* The hashtable of loop invariant expressions created
307 by ivopt. */
308 hash_table <iv_inv_expr_hasher> inv_expr_tab;
309
310 /* Loop invariant expression id. */
311 int inv_expr_id;
312
313 /* The bitmap of indices in version_info whose value was changed. */
314 bitmap relevant;
315
316 /* The uses of induction variables. */
317 vec<iv_use_p> iv_uses;
318
319 /* The candidates. */
320 vec<iv_cand_p> iv_candidates;
321
322 /* A bitmap of important candidates. */
323 bitmap important_candidates;
324
325 /* The maximum invariant id. */
326 unsigned max_inv_id;
327
328 /* Whether to consider just related and important candidates when replacing a
329 use. */
330 bool consider_all_candidates;
331
332 /* Are we optimizing for speed? */
333 bool speed;
334
335 /* Whether the loop body includes any function calls. */
336 bool body_includes_call;
337
338 /* Whether the loop body can only be exited via single exit. */
339 bool loop_single_exit_p;
340 };
341
342 /* An assignment of iv candidates to uses. */
343
344 struct iv_ca
345 {
346 /* The number of uses covered by the assignment. */
347 unsigned upto;
348
349 /* Number of uses that cannot be expressed by the candidates in the set. */
350 unsigned bad_uses;
351
352 /* Candidate assigned to a use, together with the related costs. */
353 struct cost_pair **cand_for_use;
354
355 /* Number of times each candidate is used. */
356 unsigned *n_cand_uses;
357
358 /* The candidates used. */
359 bitmap cands;
360
361 /* The number of candidates in the set. */
362 unsigned n_cands;
363
364 /* Total number of registers needed. */
365 unsigned n_regs;
366
367 /* Total cost of expressing uses. */
368 comp_cost cand_use_cost;
369
370 /* Total cost of candidates. */
371 unsigned cand_cost;
372
373 /* Number of times each invariant is used. */
374 unsigned *n_invariant_uses;
375
376 /* The array holding the number of uses of each loop
377 invariant expressions created by ivopt. */
378 unsigned *used_inv_expr;
379
380 /* The number of created loop invariants. */
381 unsigned num_used_inv_expr;
382
383 /* Total cost of the assignment. */
384 comp_cost cost;
385 };
386
387 /* Difference of two iv candidate assignments. */
388
389 struct iv_ca_delta
390 {
391 /* Changed use. */
392 struct iv_use *use;
393
394 /* An old assignment (for rollback purposes). */
395 struct cost_pair *old_cp;
396
397 /* A new assignment. */
398 struct cost_pair *new_cp;
399
400 /* Next change in the list. */
401 struct iv_ca_delta *next_change;
402 };
403
404 /* Bound on number of candidates below that all candidates are considered. */
405
406 #define CONSIDER_ALL_CANDIDATES_BOUND \
407 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
408
409 /* If there are more iv occurrences, we just give up (it is quite unlikely that
410 optimizing such a loop would help, and it would take ages). */
411
412 #define MAX_CONSIDERED_USES \
413 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
414
415 /* If there are at most this number of ivs in the set, try removing unnecessary
416 ivs from the set always. */
417
418 #define ALWAYS_PRUNE_CAND_SET_BOUND \
419 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
420
421 /* The list of trees for that the decl_rtl field must be reset is stored
422 here. */
423
424 static vec<tree> decl_rtl_to_reset;
425
426 static comp_cost force_expr_to_var_cost (tree, bool);
427
428 /* Number of uses recorded in DATA. */
429
430 static inline unsigned
n_iv_uses(struct ivopts_data * data)431 n_iv_uses (struct ivopts_data *data)
432 {
433 return data->iv_uses.length ();
434 }
435
436 /* Ith use recorded in DATA. */
437
438 static inline struct iv_use *
iv_use(struct ivopts_data * data,unsigned i)439 iv_use (struct ivopts_data *data, unsigned i)
440 {
441 return data->iv_uses[i];
442 }
443
444 /* Number of candidates recorded in DATA. */
445
446 static inline unsigned
n_iv_cands(struct ivopts_data * data)447 n_iv_cands (struct ivopts_data *data)
448 {
449 return data->iv_candidates.length ();
450 }
451
452 /* Ith candidate recorded in DATA. */
453
454 static inline struct iv_cand *
iv_cand(struct ivopts_data * data,unsigned i)455 iv_cand (struct ivopts_data *data, unsigned i)
456 {
457 return data->iv_candidates[i];
458 }
459
460 /* The single loop exit if it dominates the latch, NULL otherwise. */
461
462 edge
single_dom_exit(struct loop * loop)463 single_dom_exit (struct loop *loop)
464 {
465 edge exit = single_exit (loop);
466
467 if (!exit)
468 return NULL;
469
470 if (!just_once_each_iteration_p (loop, exit->src))
471 return NULL;
472
473 return exit;
474 }
475
476 /* Dumps information about the induction variable IV to FILE. */
477
478 void
dump_iv(FILE * file,struct iv * iv)479 dump_iv (FILE *file, struct iv *iv)
480 {
481 if (iv->ssa_name)
482 {
483 fprintf (file, "ssa name ");
484 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
485 fprintf (file, "\n");
486 }
487
488 fprintf (file, " type ");
489 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
490 fprintf (file, "\n");
491
492 if (iv->step)
493 {
494 fprintf (file, " base ");
495 print_generic_expr (file, iv->base, TDF_SLIM);
496 fprintf (file, "\n");
497
498 fprintf (file, " step ");
499 print_generic_expr (file, iv->step, TDF_SLIM);
500 fprintf (file, "\n");
501 }
502 else
503 {
504 fprintf (file, " invariant ");
505 print_generic_expr (file, iv->base, TDF_SLIM);
506 fprintf (file, "\n");
507 }
508
509 if (iv->base_object)
510 {
511 fprintf (file, " base object ");
512 print_generic_expr (file, iv->base_object, TDF_SLIM);
513 fprintf (file, "\n");
514 }
515
516 if (iv->biv_p)
517 fprintf (file, " is a biv\n");
518 }
519
520 /* Dumps information about the USE to FILE. */
521
522 void
dump_use(FILE * file,struct iv_use * use)523 dump_use (FILE *file, struct iv_use *use)
524 {
525 fprintf (file, "use %d\n", use->id);
526
527 switch (use->type)
528 {
529 case USE_NONLINEAR_EXPR:
530 fprintf (file, " generic\n");
531 break;
532
533 case USE_ADDRESS:
534 fprintf (file, " address\n");
535 break;
536
537 case USE_COMPARE:
538 fprintf (file, " compare\n");
539 break;
540
541 default:
542 gcc_unreachable ();
543 }
544
545 fprintf (file, " in statement ");
546 print_gimple_stmt (file, use->stmt, 0, 0);
547 fprintf (file, "\n");
548
549 fprintf (file, " at position ");
550 if (use->op_p)
551 print_generic_expr (file, *use->op_p, TDF_SLIM);
552 fprintf (file, "\n");
553
554 dump_iv (file, use->iv);
555
556 if (use->related_cands)
557 {
558 fprintf (file, " related candidates ");
559 dump_bitmap (file, use->related_cands);
560 }
561 }
562
563 /* Dumps information about the uses to FILE. */
564
565 void
dump_uses(FILE * file,struct ivopts_data * data)566 dump_uses (FILE *file, struct ivopts_data *data)
567 {
568 unsigned i;
569 struct iv_use *use;
570
571 for (i = 0; i < n_iv_uses (data); i++)
572 {
573 use = iv_use (data, i);
574
575 dump_use (file, use);
576 fprintf (file, "\n");
577 }
578 }
579
580 /* Dumps information about induction variable candidate CAND to FILE. */
581
582 void
dump_cand(FILE * file,struct iv_cand * cand)583 dump_cand (FILE *file, struct iv_cand *cand)
584 {
585 struct iv *iv = cand->iv;
586
587 fprintf (file, "candidate %d%s\n",
588 cand->id, cand->important ? " (important)" : "");
589
590 if (cand->depends_on)
591 {
592 fprintf (file, " depends on ");
593 dump_bitmap (file, cand->depends_on);
594 }
595
596 if (!iv)
597 {
598 fprintf (file, " final value replacement\n");
599 return;
600 }
601
602 if (cand->var_before)
603 {
604 fprintf (file, " var_before ");
605 print_generic_expr (file, cand->var_before, TDF_SLIM);
606 fprintf (file, "\n");
607 }
608 if (cand->var_after)
609 {
610 fprintf (file, " var_after ");
611 print_generic_expr (file, cand->var_after, TDF_SLIM);
612 fprintf (file, "\n");
613 }
614
615 switch (cand->pos)
616 {
617 case IP_NORMAL:
618 fprintf (file, " incremented before exit test\n");
619 break;
620
621 case IP_BEFORE_USE:
622 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
623 break;
624
625 case IP_AFTER_USE:
626 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
627 break;
628
629 case IP_END:
630 fprintf (file, " incremented at end\n");
631 break;
632
633 case IP_ORIGINAL:
634 fprintf (file, " original biv\n");
635 break;
636 }
637
638 dump_iv (file, iv);
639 }
640
641 /* Returns the info for ssa version VER. */
642
643 static inline struct version_info *
ver_info(struct ivopts_data * data,unsigned ver)644 ver_info (struct ivopts_data *data, unsigned ver)
645 {
646 return data->version_info + ver;
647 }
648
649 /* Returns the info for ssa name NAME. */
650
651 static inline struct version_info *
name_info(struct ivopts_data * data,tree name)652 name_info (struct ivopts_data *data, tree name)
653 {
654 return ver_info (data, SSA_NAME_VERSION (name));
655 }
656
657 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
658 emitted in LOOP. */
659
660 static bool
stmt_after_ip_normal_pos(struct loop * loop,gimple stmt)661 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
662 {
663 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
664
665 gcc_assert (bb);
666
667 if (sbb == loop->latch)
668 return true;
669
670 if (sbb != bb)
671 return false;
672
673 return stmt == last_stmt (bb);
674 }
675
676 /* Returns true if STMT if after the place where the original induction
677 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
678 if the positions are identical. */
679
680 static bool
stmt_after_inc_pos(struct iv_cand * cand,gimple stmt,bool true_if_equal)681 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
682 {
683 basic_block cand_bb = gimple_bb (cand->incremented_at);
684 basic_block stmt_bb = gimple_bb (stmt);
685
686 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
687 return false;
688
689 if (stmt_bb != cand_bb)
690 return true;
691
692 if (true_if_equal
693 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
694 return true;
695 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
696 }
697
698 /* Returns true if STMT if after the place where the induction variable
699 CAND is incremented in LOOP. */
700
701 static bool
stmt_after_increment(struct loop * loop,struct iv_cand * cand,gimple stmt)702 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
703 {
704 switch (cand->pos)
705 {
706 case IP_END:
707 return false;
708
709 case IP_NORMAL:
710 return stmt_after_ip_normal_pos (loop, stmt);
711
712 case IP_ORIGINAL:
713 case IP_AFTER_USE:
714 return stmt_after_inc_pos (cand, stmt, false);
715
716 case IP_BEFORE_USE:
717 return stmt_after_inc_pos (cand, stmt, true);
718
719 default:
720 gcc_unreachable ();
721 }
722 }
723
724 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
725
726 static bool
abnormal_ssa_name_p(tree exp)727 abnormal_ssa_name_p (tree exp)
728 {
729 if (!exp)
730 return false;
731
732 if (TREE_CODE (exp) != SSA_NAME)
733 return false;
734
735 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
736 }
737
738 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
739 abnormal phi node. Callback for for_each_index. */
740
741 static bool
idx_contains_abnormal_ssa_name_p(tree base,tree * index,void * data ATTRIBUTE_UNUSED)742 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
743 void *data ATTRIBUTE_UNUSED)
744 {
745 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
746 {
747 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
748 return false;
749 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
750 return false;
751 }
752
753 return !abnormal_ssa_name_p (*index);
754 }
755
756 /* Returns true if EXPR contains a ssa name that occurs in an
757 abnormal phi node. */
758
759 bool
contains_abnormal_ssa_name_p(tree expr)760 contains_abnormal_ssa_name_p (tree expr)
761 {
762 enum tree_code code;
763 enum tree_code_class codeclass;
764
765 if (!expr)
766 return false;
767
768 code = TREE_CODE (expr);
769 codeclass = TREE_CODE_CLASS (code);
770
771 if (code == SSA_NAME)
772 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
773
774 if (code == INTEGER_CST
775 || is_gimple_min_invariant (expr))
776 return false;
777
778 if (code == ADDR_EXPR)
779 return !for_each_index (&TREE_OPERAND (expr, 0),
780 idx_contains_abnormal_ssa_name_p,
781 NULL);
782
783 if (code == COND_EXPR)
784 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
785 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
786 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
787
788 switch (codeclass)
789 {
790 case tcc_binary:
791 case tcc_comparison:
792 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
793 return true;
794
795 /* Fallthru. */
796 case tcc_unary:
797 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
798 return true;
799
800 break;
801
802 default:
803 gcc_unreachable ();
804 }
805
806 return false;
807 }
808
809 /* Returns the structure describing number of iterations determined from
810 EXIT of DATA->current_loop, or NULL if something goes wrong. */
811
812 static struct tree_niter_desc *
niter_for_exit(struct ivopts_data * data,edge exit)813 niter_for_exit (struct ivopts_data *data, edge exit)
814 {
815 struct tree_niter_desc *desc;
816 void **slot;
817
818 if (!data->niters)
819 {
820 data->niters = pointer_map_create ();
821 slot = NULL;
822 }
823 else
824 slot = pointer_map_contains (data->niters, exit);
825
826 if (!slot)
827 {
828 /* Try to determine number of iterations. We cannot safely work with ssa
829 names that appear in phi nodes on abnormal edges, so that we do not
830 create overlapping life ranges for them (PR 27283). */
831 desc = XNEW (struct tree_niter_desc);
832 if (!number_of_iterations_exit (data->current_loop,
833 exit, desc, true)
834 || contains_abnormal_ssa_name_p (desc->niter))
835 {
836 XDELETE (desc);
837 desc = NULL;
838 }
839 slot = pointer_map_insert (data->niters, exit);
840 *slot = desc;
841 }
842 else
843 desc = (struct tree_niter_desc *) *slot;
844
845 return desc;
846 }
847
848 /* Returns the structure describing number of iterations determined from
849 single dominating exit of DATA->current_loop, or NULL if something
850 goes wrong. */
851
852 static struct tree_niter_desc *
niter_for_single_dom_exit(struct ivopts_data * data)853 niter_for_single_dom_exit (struct ivopts_data *data)
854 {
855 edge exit = single_dom_exit (data->current_loop);
856
857 if (!exit)
858 return NULL;
859
860 return niter_for_exit (data, exit);
861 }
862
863 /* Initializes data structures used by the iv optimization pass, stored
864 in DATA. */
865
866 static void
tree_ssa_iv_optimize_init(struct ivopts_data * data)867 tree_ssa_iv_optimize_init (struct ivopts_data *data)
868 {
869 data->version_info_size = 2 * num_ssa_names;
870 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
871 data->relevant = BITMAP_ALLOC (NULL);
872 data->important_candidates = BITMAP_ALLOC (NULL);
873 data->max_inv_id = 0;
874 data->niters = NULL;
875 data->iv_uses.create (20);
876 data->iv_candidates.create (20);
877 data->inv_expr_tab.create (10);
878 data->inv_expr_id = 0;
879 decl_rtl_to_reset.create (20);
880 }
881
882 /* Returns a memory object to that EXPR points. In case we are able to
883 determine that it does not point to any such object, NULL is returned. */
884
885 static tree
determine_base_object(tree expr)886 determine_base_object (tree expr)
887 {
888 enum tree_code code = TREE_CODE (expr);
889 tree base, obj;
890
891 /* If this is a pointer casted to any type, we need to determine
892 the base object for the pointer; so handle conversions before
893 throwing away non-pointer expressions. */
894 if (CONVERT_EXPR_P (expr))
895 return determine_base_object (TREE_OPERAND (expr, 0));
896
897 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
898 return NULL_TREE;
899
900 switch (code)
901 {
902 case INTEGER_CST:
903 return NULL_TREE;
904
905 case ADDR_EXPR:
906 obj = TREE_OPERAND (expr, 0);
907 base = get_base_address (obj);
908
909 if (!base)
910 return expr;
911
912 if (TREE_CODE (base) == MEM_REF)
913 return determine_base_object (TREE_OPERAND (base, 0));
914
915 return fold_convert (ptr_type_node,
916 build_fold_addr_expr (base));
917
918 case POINTER_PLUS_EXPR:
919 return determine_base_object (TREE_OPERAND (expr, 0));
920
921 case PLUS_EXPR:
922 case MINUS_EXPR:
923 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
924 gcc_unreachable ();
925
926 default:
927 return fold_convert (ptr_type_node, expr);
928 }
929 }
930
931 /* Allocates an induction variable with given initial value BASE and step STEP
932 for loop LOOP. */
933
934 static struct iv *
alloc_iv(tree base,tree step)935 alloc_iv (tree base, tree step)
936 {
937 tree base_object = base;
938 struct iv *iv = XCNEW (struct iv);
939 gcc_assert (step != NULL_TREE);
940
941 /* Lower all address expressions except ones with DECL_P as operand.
942 By doing this:
943 1) More accurate cost can be computed for address expressions;
944 2) Duplicate candidates won't be created for bases in different
945 forms, like &a[0] and &a. */
946 STRIP_NOPS (base_object);
947 if (TREE_CODE (base_object) == ADDR_EXPR
948 && !DECL_P (TREE_OPERAND (base_object, 0)))
949 {
950 aff_tree comb;
951 double_int size;
952 base_object = get_inner_reference_aff (TREE_OPERAND (base_object, 0),
953 &comb, &size);
954 gcc_assert (base_object != NULL_TREE);
955 base_object = build_fold_addr_expr (base_object);
956 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
957 }
958
959 iv->base = base;
960 iv->base_object = determine_base_object (base_object);
961 iv->step = step;
962 iv->biv_p = false;
963 iv->have_use_for = false;
964 iv->use_id = 0;
965 iv->ssa_name = NULL_TREE;
966
967 return iv;
968 }
969
970 /* Sets STEP and BASE for induction variable IV. */
971
972 static void
set_iv(struct ivopts_data * data,tree iv,tree base,tree step)973 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
974 {
975 struct version_info *info = name_info (data, iv);
976
977 gcc_assert (!info->iv);
978
979 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
980 info->iv = alloc_iv (base, step);
981 info->iv->ssa_name = iv;
982 }
983
984 /* Finds induction variable declaration for VAR. */
985
986 static struct iv *
get_iv(struct ivopts_data * data,tree var)987 get_iv (struct ivopts_data *data, tree var)
988 {
989 basic_block bb;
990 tree type = TREE_TYPE (var);
991
992 if (!POINTER_TYPE_P (type)
993 && !INTEGRAL_TYPE_P (type))
994 return NULL;
995
996 if (!name_info (data, var)->iv)
997 {
998 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
999
1000 if (!bb
1001 || !flow_bb_inside_loop_p (data->current_loop, bb))
1002 set_iv (data, var, var, build_int_cst (type, 0));
1003 }
1004
1005 return name_info (data, var)->iv;
1006 }
1007
1008 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1009 not define a simple affine biv with nonzero step. */
1010
1011 static tree
determine_biv_step(gimple phi)1012 determine_biv_step (gimple phi)
1013 {
1014 struct loop *loop = gimple_bb (phi)->loop_father;
1015 tree name = PHI_RESULT (phi);
1016 affine_iv iv;
1017
1018 if (virtual_operand_p (name))
1019 return NULL_TREE;
1020
1021 if (!simple_iv (loop, loop, name, &iv, true))
1022 return NULL_TREE;
1023
1024 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1025 }
1026
1027 /* Finds basic ivs. */
1028
1029 static bool
find_bivs(struct ivopts_data * data)1030 find_bivs (struct ivopts_data *data)
1031 {
1032 gimple phi;
1033 tree step, type, base;
1034 bool found = false;
1035 struct loop *loop = data->current_loop;
1036 gimple_stmt_iterator psi;
1037
1038 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1039 {
1040 phi = gsi_stmt (psi);
1041
1042 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1043 continue;
1044
1045 step = determine_biv_step (phi);
1046 if (!step)
1047 continue;
1048
1049 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1050 base = expand_simple_operations (base);
1051 if (contains_abnormal_ssa_name_p (base)
1052 || contains_abnormal_ssa_name_p (step))
1053 continue;
1054
1055 type = TREE_TYPE (PHI_RESULT (phi));
1056 base = fold_convert (type, base);
1057 if (step)
1058 {
1059 if (POINTER_TYPE_P (type))
1060 step = convert_to_ptrofftype (step);
1061 else
1062 step = fold_convert (type, step);
1063 }
1064
1065 set_iv (data, PHI_RESULT (phi), base, step);
1066 found = true;
1067 }
1068
1069 return found;
1070 }
1071
1072 /* Marks basic ivs. */
1073
1074 static void
mark_bivs(struct ivopts_data * data)1075 mark_bivs (struct ivopts_data *data)
1076 {
1077 gimple phi, def;
1078 tree var;
1079 struct iv *iv, *incr_iv;
1080 struct loop *loop = data->current_loop;
1081 basic_block incr_bb;
1082 gimple_stmt_iterator psi;
1083
1084 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1085 {
1086 phi = gsi_stmt (psi);
1087
1088 iv = get_iv (data, PHI_RESULT (phi));
1089 if (!iv)
1090 continue;
1091
1092 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1093 def = SSA_NAME_DEF_STMT (var);
1094 /* Don't mark iv peeled from other one as biv. */
1095 if (def
1096 && gimple_code (def) == GIMPLE_PHI
1097 && gimple_bb (def) == loop->header)
1098 continue;
1099
1100 incr_iv = get_iv (data, var);
1101 if (!incr_iv)
1102 continue;
1103
1104 /* If the increment is in the subloop, ignore it. */
1105 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1106 if (incr_bb->loop_father != data->current_loop
1107 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1108 continue;
1109
1110 iv->biv_p = true;
1111 incr_iv->biv_p = true;
1112 }
1113 }
1114
1115 /* Checks whether STMT defines a linear induction variable and stores its
1116 parameters to IV. */
1117
1118 static bool
find_givs_in_stmt_scev(struct ivopts_data * data,gimple stmt,affine_iv * iv)1119 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1120 {
1121 tree lhs;
1122 struct loop *loop = data->current_loop;
1123
1124 iv->base = NULL_TREE;
1125 iv->step = NULL_TREE;
1126
1127 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1128 return false;
1129
1130 lhs = gimple_assign_lhs (stmt);
1131 if (TREE_CODE (lhs) != SSA_NAME)
1132 return false;
1133
1134 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1135 return false;
1136 iv->base = expand_simple_operations (iv->base);
1137
1138 if (contains_abnormal_ssa_name_p (iv->base)
1139 || contains_abnormal_ssa_name_p (iv->step))
1140 return false;
1141
1142 /* If STMT could throw, then do not consider STMT as defining a GIV.
1143 While this will suppress optimizations, we can not safely delete this
1144 GIV and associated statements, even if it appears it is not used. */
1145 if (stmt_could_throw_p (stmt))
1146 return false;
1147
1148 return true;
1149 }
1150
1151 /* Finds general ivs in statement STMT. */
1152
1153 static void
find_givs_in_stmt(struct ivopts_data * data,gimple stmt)1154 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1155 {
1156 affine_iv iv;
1157
1158 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1159 return;
1160
1161 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1162 }
1163
1164 /* Finds general ivs in basic block BB. */
1165
1166 static void
find_givs_in_bb(struct ivopts_data * data,basic_block bb)1167 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1168 {
1169 gimple_stmt_iterator bsi;
1170
1171 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1172 find_givs_in_stmt (data, gsi_stmt (bsi));
1173 }
1174
1175 /* Finds general ivs. */
1176
1177 static void
find_givs(struct ivopts_data * data)1178 find_givs (struct ivopts_data *data)
1179 {
1180 struct loop *loop = data->current_loop;
1181 basic_block *body = get_loop_body_in_dom_order (loop);
1182 unsigned i;
1183
1184 for (i = 0; i < loop->num_nodes; i++)
1185 find_givs_in_bb (data, body[i]);
1186 free (body);
1187 }
1188
1189 /* For each ssa name defined in LOOP determines whether it is an induction
1190 variable and if so, its initial value and step. */
1191
1192 static bool
find_induction_variables(struct ivopts_data * data)1193 find_induction_variables (struct ivopts_data *data)
1194 {
1195 unsigned i;
1196 bitmap_iterator bi;
1197
1198 if (!find_bivs (data))
1199 return false;
1200
1201 find_givs (data);
1202 mark_bivs (data);
1203
1204 if (dump_file && (dump_flags & TDF_DETAILS))
1205 {
1206 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1207
1208 if (niter)
1209 {
1210 fprintf (dump_file, " number of iterations ");
1211 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1212 if (!integer_zerop (niter->may_be_zero))
1213 {
1214 fprintf (dump_file, "; zero if ");
1215 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1216 }
1217 fprintf (dump_file, "\n\n");
1218 };
1219
1220 fprintf (dump_file, "Induction variables:\n\n");
1221
1222 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1223 {
1224 if (ver_info (data, i)->iv)
1225 dump_iv (dump_file, ver_info (data, i)->iv);
1226 }
1227 }
1228
1229 return true;
1230 }
1231
1232 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1233
1234 static struct iv_use *
record_use(struct ivopts_data * data,tree * use_p,struct iv * iv,gimple stmt,enum use_type use_type)1235 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1236 gimple stmt, enum use_type use_type)
1237 {
1238 struct iv_use *use = XCNEW (struct iv_use);
1239
1240 use->id = n_iv_uses (data);
1241 use->type = use_type;
1242 use->iv = iv;
1243 use->stmt = stmt;
1244 use->op_p = use_p;
1245 use->related_cands = BITMAP_ALLOC (NULL);
1246
1247 /* To avoid showing ssa name in the dumps, if it was not reset by the
1248 caller. */
1249 iv->ssa_name = NULL_TREE;
1250
1251 if (dump_file && (dump_flags & TDF_DETAILS))
1252 dump_use (dump_file, use);
1253
1254 data->iv_uses.safe_push (use);
1255
1256 return use;
1257 }
1258
1259 /* Checks whether OP is a loop-level invariant and if so, records it.
1260 NONLINEAR_USE is true if the invariant is used in a way we do not
1261 handle specially. */
1262
1263 static void
record_invariant(struct ivopts_data * data,tree op,bool nonlinear_use)1264 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1265 {
1266 basic_block bb;
1267 struct version_info *info;
1268
1269 if (TREE_CODE (op) != SSA_NAME
1270 || virtual_operand_p (op))
1271 return;
1272
1273 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1274 if (bb
1275 && flow_bb_inside_loop_p (data->current_loop, bb))
1276 return;
1277
1278 info = name_info (data, op);
1279 info->name = op;
1280 info->has_nonlin_use |= nonlinear_use;
1281 if (!info->inv_id)
1282 info->inv_id = ++data->max_inv_id;
1283 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1284 }
1285
1286 /* Checks whether the use OP is interesting and if so, records it. */
1287
1288 static struct iv_use *
find_interesting_uses_op(struct ivopts_data * data,tree op)1289 find_interesting_uses_op (struct ivopts_data *data, tree op)
1290 {
1291 struct iv *iv;
1292 struct iv *civ;
1293 gimple stmt;
1294 struct iv_use *use;
1295
1296 if (TREE_CODE (op) != SSA_NAME)
1297 return NULL;
1298
1299 iv = get_iv (data, op);
1300 if (!iv)
1301 return NULL;
1302
1303 if (iv->have_use_for)
1304 {
1305 use = iv_use (data, iv->use_id);
1306
1307 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1308 return use;
1309 }
1310
1311 if (integer_zerop (iv->step))
1312 {
1313 record_invariant (data, op, true);
1314 return NULL;
1315 }
1316 iv->have_use_for = true;
1317
1318 civ = XNEW (struct iv);
1319 *civ = *iv;
1320
1321 stmt = SSA_NAME_DEF_STMT (op);
1322 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1323 || is_gimple_assign (stmt));
1324
1325 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1326 iv->use_id = use->id;
1327
1328 return use;
1329 }
1330
1331 /* Given a condition in statement STMT, checks whether it is a compare
1332 of an induction variable and an invariant. If this is the case,
1333 CONTROL_VAR is set to location of the iv, BOUND to the location of
1334 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1335 induction variable descriptions, and true is returned. If this is not
1336 the case, CONTROL_VAR and BOUND are set to the arguments of the
1337 condition and false is returned. */
1338
1339 static bool
extract_cond_operands(struct ivopts_data * data,gimple stmt,tree ** control_var,tree ** bound,struct iv ** iv_var,struct iv ** iv_bound)1340 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1341 tree **control_var, tree **bound,
1342 struct iv **iv_var, struct iv **iv_bound)
1343 {
1344 /* The objects returned when COND has constant operands. */
1345 static struct iv const_iv;
1346 static tree zero;
1347 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1348 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1349 bool ret = false;
1350
1351 if (gimple_code (stmt) == GIMPLE_COND)
1352 {
1353 op0 = gimple_cond_lhs_ptr (stmt);
1354 op1 = gimple_cond_rhs_ptr (stmt);
1355 }
1356 else
1357 {
1358 op0 = gimple_assign_rhs1_ptr (stmt);
1359 op1 = gimple_assign_rhs2_ptr (stmt);
1360 }
1361
1362 zero = integer_zero_node;
1363 const_iv.step = integer_zero_node;
1364
1365 if (TREE_CODE (*op0) == SSA_NAME)
1366 iv0 = get_iv (data, *op0);
1367 if (TREE_CODE (*op1) == SSA_NAME)
1368 iv1 = get_iv (data, *op1);
1369
1370 /* Exactly one of the compared values must be an iv, and the other one must
1371 be an invariant. */
1372 if (!iv0 || !iv1)
1373 goto end;
1374
1375 if (integer_zerop (iv0->step))
1376 {
1377 /* Control variable may be on the other side. */
1378 tmp_op = op0; op0 = op1; op1 = tmp_op;
1379 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1380 }
1381 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1382
1383 end:
1384 if (control_var)
1385 *control_var = op0;;
1386 if (iv_var)
1387 *iv_var = iv0;;
1388 if (bound)
1389 *bound = op1;
1390 if (iv_bound)
1391 *iv_bound = iv1;
1392
1393 return ret;
1394 }
1395
1396 /* Checks whether the condition in STMT is interesting and if so,
1397 records it. */
1398
1399 static void
find_interesting_uses_cond(struct ivopts_data * data,gimple stmt)1400 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1401 {
1402 tree *var_p, *bound_p;
1403 struct iv *var_iv, *civ;
1404
1405 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1406 {
1407 find_interesting_uses_op (data, *var_p);
1408 find_interesting_uses_op (data, *bound_p);
1409 return;
1410 }
1411
1412 civ = XNEW (struct iv);
1413 *civ = *var_iv;
1414 record_use (data, NULL, civ, stmt, USE_COMPARE);
1415 }
1416
1417 /* Returns the outermost loop EXPR is obviously invariant in
1418 relative to the loop LOOP, i.e. if all its operands are defined
1419 outside of the returned loop. Returns NULL if EXPR is not
1420 even obviously invariant in LOOP. */
1421
1422 struct loop *
outermost_invariant_loop_for_expr(struct loop * loop,tree expr)1423 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1424 {
1425 basic_block def_bb;
1426 unsigned i, len;
1427
1428 if (is_gimple_min_invariant (expr))
1429 return current_loops->tree_root;
1430
1431 if (TREE_CODE (expr) == SSA_NAME)
1432 {
1433 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1434 if (def_bb)
1435 {
1436 if (flow_bb_inside_loop_p (loop, def_bb))
1437 return NULL;
1438 return superloop_at_depth (loop,
1439 loop_depth (def_bb->loop_father) + 1);
1440 }
1441
1442 return current_loops->tree_root;
1443 }
1444
1445 if (!EXPR_P (expr))
1446 return NULL;
1447
1448 unsigned maxdepth = 0;
1449 len = TREE_OPERAND_LENGTH (expr);
1450 for (i = 0; i < len; i++)
1451 {
1452 struct loop *ivloop;
1453 if (!TREE_OPERAND (expr, i))
1454 continue;
1455
1456 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1457 if (!ivloop)
1458 return NULL;
1459 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1460 }
1461
1462 return superloop_at_depth (loop, maxdepth);
1463 }
1464
1465 /* Returns true if expression EXPR is obviously invariant in LOOP,
1466 i.e. if all its operands are defined outside of the LOOP. LOOP
1467 should not be the function body. */
1468
1469 bool
expr_invariant_in_loop_p(struct loop * loop,tree expr)1470 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1471 {
1472 basic_block def_bb;
1473 unsigned i, len;
1474
1475 gcc_assert (loop_depth (loop) > 0);
1476
1477 if (is_gimple_min_invariant (expr))
1478 return true;
1479
1480 if (TREE_CODE (expr) == SSA_NAME)
1481 {
1482 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1483 if (def_bb
1484 && flow_bb_inside_loop_p (loop, def_bb))
1485 return false;
1486
1487 return true;
1488 }
1489
1490 if (!EXPR_P (expr))
1491 return false;
1492
1493 len = TREE_OPERAND_LENGTH (expr);
1494 for (i = 0; i < len; i++)
1495 if (TREE_OPERAND (expr, i)
1496 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1497 return false;
1498
1499 return true;
1500 }
1501
1502 /* Cumulates the steps of indices into DATA and replaces their values with the
1503 initial ones. Returns false when the value of the index cannot be determined.
1504 Callback for for_each_index. */
1505
1506 struct ifs_ivopts_data
1507 {
1508 struct ivopts_data *ivopts_data;
1509 gimple stmt;
1510 tree step;
1511 };
1512
1513 static bool
idx_find_step(tree base,tree * idx,void * data)1514 idx_find_step (tree base, tree *idx, void *data)
1515 {
1516 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1517 struct iv *iv;
1518 tree step, iv_base, iv_step, lbound, off;
1519 struct loop *loop = dta->ivopts_data->current_loop;
1520
1521 /* If base is a component ref, require that the offset of the reference
1522 be invariant. */
1523 if (TREE_CODE (base) == COMPONENT_REF)
1524 {
1525 off = component_ref_field_offset (base);
1526 return expr_invariant_in_loop_p (loop, off);
1527 }
1528
1529 /* If base is array, first check whether we will be able to move the
1530 reference out of the loop (in order to take its address in strength
1531 reduction). In order for this to work we need both lower bound
1532 and step to be loop invariants. */
1533 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1534 {
1535 /* Moreover, for a range, the size needs to be invariant as well. */
1536 if (TREE_CODE (base) == ARRAY_RANGE_REF
1537 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1538 return false;
1539
1540 step = array_ref_element_size (base);
1541 lbound = array_ref_low_bound (base);
1542
1543 if (!expr_invariant_in_loop_p (loop, step)
1544 || !expr_invariant_in_loop_p (loop, lbound))
1545 return false;
1546 }
1547
1548 if (TREE_CODE (*idx) != SSA_NAME)
1549 return true;
1550
1551 iv = get_iv (dta->ivopts_data, *idx);
1552 if (!iv)
1553 return false;
1554
1555 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1556 *&x[0], which is not folded and does not trigger the
1557 ARRAY_REF path below. */
1558 *idx = iv->base;
1559
1560 if (integer_zerop (iv->step))
1561 return true;
1562
1563 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1564 {
1565 step = array_ref_element_size (base);
1566
1567 /* We only handle addresses whose step is an integer constant. */
1568 if (TREE_CODE (step) != INTEGER_CST)
1569 return false;
1570 }
1571 else
1572 /* The step for pointer arithmetics already is 1 byte. */
1573 step = size_one_node;
1574
1575 iv_base = iv->base;
1576 iv_step = iv->step;
1577 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1578 sizetype, &iv_base, &iv_step, dta->stmt,
1579 false))
1580 {
1581 /* The index might wrap. */
1582 return false;
1583 }
1584
1585 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1586 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1587
1588 return true;
1589 }
1590
1591 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1592 object is passed to it in DATA. */
1593
1594 static bool
idx_record_use(tree base,tree * idx,void * vdata)1595 idx_record_use (tree base, tree *idx,
1596 void *vdata)
1597 {
1598 struct ivopts_data *data = (struct ivopts_data *) vdata;
1599 find_interesting_uses_op (data, *idx);
1600 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1601 {
1602 find_interesting_uses_op (data, array_ref_element_size (base));
1603 find_interesting_uses_op (data, array_ref_low_bound (base));
1604 }
1605 return true;
1606 }
1607
1608 /* If we can prove that TOP = cst * BOT for some constant cst,
1609 store cst to MUL and return true. Otherwise return false.
1610 The returned value is always sign-extended, regardless of the
1611 signedness of TOP and BOT. */
1612
1613 static bool
constant_multiple_of(tree top,tree bot,double_int * mul)1614 constant_multiple_of (tree top, tree bot, double_int *mul)
1615 {
1616 tree mby;
1617 enum tree_code code;
1618 double_int res, p0, p1;
1619 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1620
1621 STRIP_NOPS (top);
1622 STRIP_NOPS (bot);
1623
1624 if (operand_equal_p (top, bot, 0))
1625 {
1626 *mul = double_int_one;
1627 return true;
1628 }
1629
1630 code = TREE_CODE (top);
1631 switch (code)
1632 {
1633 case MULT_EXPR:
1634 mby = TREE_OPERAND (top, 1);
1635 if (TREE_CODE (mby) != INTEGER_CST)
1636 return false;
1637
1638 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1639 return false;
1640
1641 *mul = (res * tree_to_double_int (mby)).sext (precision);
1642 return true;
1643
1644 case PLUS_EXPR:
1645 case MINUS_EXPR:
1646 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1647 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1648 return false;
1649
1650 if (code == MINUS_EXPR)
1651 p1 = -p1;
1652 *mul = (p0 + p1).sext (precision);
1653 return true;
1654
1655 case INTEGER_CST:
1656 if (TREE_CODE (bot) != INTEGER_CST)
1657 return false;
1658
1659 p0 = tree_to_double_int (top).sext (precision);
1660 p1 = tree_to_double_int (bot).sext (precision);
1661 if (p1.is_zero ())
1662 return false;
1663 *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1664 return res.is_zero ();
1665
1666 default:
1667 return false;
1668 }
1669 }
1670
1671 /* Return true if memory reference REF with step STEP may be unaligned. */
1672
1673 static bool
may_be_unaligned_p(tree ref,tree step)1674 may_be_unaligned_p (tree ref, tree step)
1675 {
1676 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1677 thus they are not misaligned. */
1678 if (TREE_CODE (ref) == TARGET_MEM_REF)
1679 return false;
1680
1681 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1682 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1683 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1684
1685 unsigned HOST_WIDE_INT bitpos;
1686 unsigned int ref_align;
1687 get_object_alignment_1 (ref, &ref_align, &bitpos);
1688 if (ref_align < align
1689 || (bitpos % align) != 0
1690 || (bitpos % BITS_PER_UNIT) != 0)
1691 return true;
1692
1693 unsigned int trailing_zeros = tree_ctz (step);
1694 if (trailing_zeros < HOST_BITS_PER_INT
1695 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1696 return true;
1697
1698 return false;
1699 }
1700
1701 /* Return true if EXPR may be non-addressable. */
1702
1703 bool
may_be_nonaddressable_p(tree expr)1704 may_be_nonaddressable_p (tree expr)
1705 {
1706 switch (TREE_CODE (expr))
1707 {
1708 case TARGET_MEM_REF:
1709 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1710 target, thus they are always addressable. */
1711 return false;
1712
1713 case COMPONENT_REF:
1714 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1715 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1716
1717 case VIEW_CONVERT_EXPR:
1718 /* This kind of view-conversions may wrap non-addressable objects
1719 and make them look addressable. After some processing the
1720 non-addressability may be uncovered again, causing ADDR_EXPRs
1721 of inappropriate objects to be built. */
1722 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1723 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1724 return true;
1725
1726 /* ... fall through ... */
1727
1728 case ARRAY_REF:
1729 case ARRAY_RANGE_REF:
1730 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1731
1732 CASE_CONVERT:
1733 return true;
1734
1735 default:
1736 break;
1737 }
1738
1739 return false;
1740 }
1741
1742 /* Finds addresses in *OP_P inside STMT. */
1743
1744 static void
find_interesting_uses_address(struct ivopts_data * data,gimple stmt,tree * op_p)1745 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1746 {
1747 tree base = *op_p, step = size_zero_node;
1748 struct iv *civ;
1749 struct ifs_ivopts_data ifs_ivopts_data;
1750
1751 /* Do not play with volatile memory references. A bit too conservative,
1752 perhaps, but safe. */
1753 if (gimple_has_volatile_ops (stmt))
1754 goto fail;
1755
1756 /* Ignore bitfields for now. Not really something terribly complicated
1757 to handle. TODO. */
1758 if (TREE_CODE (base) == BIT_FIELD_REF)
1759 goto fail;
1760
1761 base = unshare_expr (base);
1762
1763 if (TREE_CODE (base) == TARGET_MEM_REF)
1764 {
1765 tree type = build_pointer_type (TREE_TYPE (base));
1766 tree astep;
1767
1768 if (TMR_BASE (base)
1769 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1770 {
1771 civ = get_iv (data, TMR_BASE (base));
1772 if (!civ)
1773 goto fail;
1774
1775 TMR_BASE (base) = civ->base;
1776 step = civ->step;
1777 }
1778 if (TMR_INDEX2 (base)
1779 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1780 {
1781 civ = get_iv (data, TMR_INDEX2 (base));
1782 if (!civ)
1783 goto fail;
1784
1785 TMR_INDEX2 (base) = civ->base;
1786 step = civ->step;
1787 }
1788 if (TMR_INDEX (base)
1789 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1790 {
1791 civ = get_iv (data, TMR_INDEX (base));
1792 if (!civ)
1793 goto fail;
1794
1795 TMR_INDEX (base) = civ->base;
1796 astep = civ->step;
1797
1798 if (astep)
1799 {
1800 if (TMR_STEP (base))
1801 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1802
1803 step = fold_build2 (PLUS_EXPR, type, step, astep);
1804 }
1805 }
1806
1807 if (integer_zerop (step))
1808 goto fail;
1809 base = tree_mem_ref_addr (type, base);
1810 }
1811 else
1812 {
1813 ifs_ivopts_data.ivopts_data = data;
1814 ifs_ivopts_data.stmt = stmt;
1815 ifs_ivopts_data.step = size_zero_node;
1816 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1817 || integer_zerop (ifs_ivopts_data.step))
1818 goto fail;
1819 step = ifs_ivopts_data.step;
1820
1821 /* Check that the base expression is addressable. This needs
1822 to be done after substituting bases of IVs into it. */
1823 if (may_be_nonaddressable_p (base))
1824 goto fail;
1825
1826 /* Moreover, on strict alignment platforms, check that it is
1827 sufficiently aligned. */
1828 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1829 goto fail;
1830
1831 base = build_fold_addr_expr (base);
1832
1833 /* Substituting bases of IVs into the base expression might
1834 have caused folding opportunities. */
1835 if (TREE_CODE (base) == ADDR_EXPR)
1836 {
1837 tree *ref = &TREE_OPERAND (base, 0);
1838 while (handled_component_p (*ref))
1839 ref = &TREE_OPERAND (*ref, 0);
1840 if (TREE_CODE (*ref) == MEM_REF)
1841 {
1842 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1843 TREE_OPERAND (*ref, 0),
1844 TREE_OPERAND (*ref, 1));
1845 if (tem)
1846 *ref = tem;
1847 }
1848 }
1849 }
1850
1851 civ = alloc_iv (base, step);
1852 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1853 return;
1854
1855 fail:
1856 for_each_index (op_p, idx_record_use, data);
1857 }
1858
1859 /* Finds and records invariants used in STMT. */
1860
1861 static void
find_invariants_stmt(struct ivopts_data * data,gimple stmt)1862 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1863 {
1864 ssa_op_iter iter;
1865 use_operand_p use_p;
1866 tree op;
1867
1868 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1869 {
1870 op = USE_FROM_PTR (use_p);
1871 record_invariant (data, op, false);
1872 }
1873 }
1874
1875 /* Finds interesting uses of induction variables in the statement STMT. */
1876
1877 static void
find_interesting_uses_stmt(struct ivopts_data * data,gimple stmt)1878 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1879 {
1880 struct iv *iv;
1881 tree op, *lhs, *rhs;
1882 ssa_op_iter iter;
1883 use_operand_p use_p;
1884 enum tree_code code;
1885
1886 find_invariants_stmt (data, stmt);
1887
1888 if (gimple_code (stmt) == GIMPLE_COND)
1889 {
1890 find_interesting_uses_cond (data, stmt);
1891 return;
1892 }
1893
1894 if (is_gimple_assign (stmt))
1895 {
1896 lhs = gimple_assign_lhs_ptr (stmt);
1897 rhs = gimple_assign_rhs1_ptr (stmt);
1898
1899 if (TREE_CODE (*lhs) == SSA_NAME)
1900 {
1901 /* If the statement defines an induction variable, the uses are not
1902 interesting by themselves. */
1903
1904 iv = get_iv (data, *lhs);
1905
1906 if (iv && !integer_zerop (iv->step))
1907 return;
1908 }
1909
1910 code = gimple_assign_rhs_code (stmt);
1911 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1912 && (REFERENCE_CLASS_P (*rhs)
1913 || is_gimple_val (*rhs)))
1914 {
1915 if (REFERENCE_CLASS_P (*rhs))
1916 find_interesting_uses_address (data, stmt, rhs);
1917 else
1918 find_interesting_uses_op (data, *rhs);
1919
1920 if (REFERENCE_CLASS_P (*lhs))
1921 find_interesting_uses_address (data, stmt, lhs);
1922 return;
1923 }
1924 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1925 {
1926 find_interesting_uses_cond (data, stmt);
1927 return;
1928 }
1929
1930 /* TODO -- we should also handle address uses of type
1931
1932 memory = call (whatever);
1933
1934 and
1935
1936 call (memory). */
1937 }
1938
1939 if (gimple_code (stmt) == GIMPLE_PHI
1940 && gimple_bb (stmt) == data->current_loop->header)
1941 {
1942 iv = get_iv (data, PHI_RESULT (stmt));
1943
1944 if (iv && !integer_zerop (iv->step))
1945 return;
1946 }
1947
1948 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1949 {
1950 op = USE_FROM_PTR (use_p);
1951
1952 if (TREE_CODE (op) != SSA_NAME)
1953 continue;
1954
1955 iv = get_iv (data, op);
1956 if (!iv)
1957 continue;
1958
1959 find_interesting_uses_op (data, op);
1960 }
1961 }
1962
1963 /* Finds interesting uses of induction variables outside of loops
1964 on loop exit edge EXIT. */
1965
1966 static void
find_interesting_uses_outside(struct ivopts_data * data,edge exit)1967 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1968 {
1969 gimple phi;
1970 gimple_stmt_iterator psi;
1971 tree def;
1972
1973 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1974 {
1975 phi = gsi_stmt (psi);
1976 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1977 if (!virtual_operand_p (def))
1978 find_interesting_uses_op (data, def);
1979 }
1980 }
1981
1982 /* Finds uses of the induction variables that are interesting. */
1983
1984 static void
find_interesting_uses(struct ivopts_data * data)1985 find_interesting_uses (struct ivopts_data *data)
1986 {
1987 basic_block bb;
1988 gimple_stmt_iterator bsi;
1989 basic_block *body = get_loop_body (data->current_loop);
1990 unsigned i;
1991 struct version_info *info;
1992 edge e;
1993
1994 if (dump_file && (dump_flags & TDF_DETAILS))
1995 fprintf (dump_file, "Uses:\n\n");
1996
1997 for (i = 0; i < data->current_loop->num_nodes; i++)
1998 {
1999 edge_iterator ei;
2000 bb = body[i];
2001
2002 FOR_EACH_EDGE (e, ei, bb->succs)
2003 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2004 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2005 find_interesting_uses_outside (data, e);
2006
2007 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2008 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2009 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2010 if (!is_gimple_debug (gsi_stmt (bsi)))
2011 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2012 }
2013
2014 if (dump_file && (dump_flags & TDF_DETAILS))
2015 {
2016 bitmap_iterator bi;
2017
2018 fprintf (dump_file, "\n");
2019
2020 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2021 {
2022 info = ver_info (data, i);
2023 if (info->inv_id)
2024 {
2025 fprintf (dump_file, " ");
2026 print_generic_expr (dump_file, info->name, TDF_SLIM);
2027 fprintf (dump_file, " is invariant (%d)%s\n",
2028 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2029 }
2030 }
2031
2032 fprintf (dump_file, "\n");
2033 }
2034
2035 free (body);
2036 }
2037
2038 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2039 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2040 we are at the top-level of the processed address. */
2041
2042 static tree
strip_offset_1(tree expr,bool inside_addr,bool top_compref,HOST_WIDE_INT * offset)2043 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2044 HOST_WIDE_INT *offset)
2045 {
2046 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2047 enum tree_code code;
2048 tree type, orig_type = TREE_TYPE (expr);
2049 HOST_WIDE_INT off0, off1, st;
2050 tree orig_expr = expr;
2051
2052 STRIP_NOPS (expr);
2053
2054 type = TREE_TYPE (expr);
2055 code = TREE_CODE (expr);
2056 *offset = 0;
2057
2058 switch (code)
2059 {
2060 case INTEGER_CST:
2061 if (!cst_and_fits_in_hwi (expr)
2062 || integer_zerop (expr))
2063 return orig_expr;
2064
2065 *offset = int_cst_value (expr);
2066 return build_int_cst (orig_type, 0);
2067
2068 case POINTER_PLUS_EXPR:
2069 case PLUS_EXPR:
2070 case MINUS_EXPR:
2071 op0 = TREE_OPERAND (expr, 0);
2072 op1 = TREE_OPERAND (expr, 1);
2073
2074 op0 = strip_offset_1 (op0, false, false, &off0);
2075 op1 = strip_offset_1 (op1, false, false, &off1);
2076
2077 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2078 if (op0 == TREE_OPERAND (expr, 0)
2079 && op1 == TREE_OPERAND (expr, 1))
2080 return orig_expr;
2081
2082 if (integer_zerop (op1))
2083 expr = op0;
2084 else if (integer_zerop (op0))
2085 {
2086 if (code == MINUS_EXPR)
2087 expr = fold_build1 (NEGATE_EXPR, type, op1);
2088 else
2089 expr = op1;
2090 }
2091 else
2092 expr = fold_build2 (code, type, op0, op1);
2093
2094 return fold_convert (orig_type, expr);
2095
2096 case MULT_EXPR:
2097 op1 = TREE_OPERAND (expr, 1);
2098 if (!cst_and_fits_in_hwi (op1))
2099 return orig_expr;
2100
2101 op0 = TREE_OPERAND (expr, 0);
2102 op0 = strip_offset_1 (op0, false, false, &off0);
2103 if (op0 == TREE_OPERAND (expr, 0))
2104 return orig_expr;
2105
2106 *offset = off0 * int_cst_value (op1);
2107 if (integer_zerop (op0))
2108 expr = op0;
2109 else
2110 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2111
2112 return fold_convert (orig_type, expr);
2113
2114 case ARRAY_REF:
2115 case ARRAY_RANGE_REF:
2116 if (!inside_addr)
2117 return orig_expr;
2118
2119 step = array_ref_element_size (expr);
2120 if (!cst_and_fits_in_hwi (step))
2121 break;
2122
2123 st = int_cst_value (step);
2124 op1 = TREE_OPERAND (expr, 1);
2125 op1 = strip_offset_1 (op1, false, false, &off1);
2126 *offset = off1 * st;
2127
2128 if (top_compref
2129 && integer_zerop (op1))
2130 {
2131 /* Strip the component reference completely. */
2132 op0 = TREE_OPERAND (expr, 0);
2133 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2134 *offset += off0;
2135 return op0;
2136 }
2137 break;
2138
2139 case COMPONENT_REF:
2140 {
2141 tree field;
2142
2143 if (!inside_addr)
2144 return orig_expr;
2145
2146 tmp = component_ref_field_offset (expr);
2147 field = TREE_OPERAND (expr, 1);
2148 if (top_compref
2149 && cst_and_fits_in_hwi (tmp)
2150 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2151 {
2152 HOST_WIDE_INT boffset, abs_off;
2153
2154 /* Strip the component reference completely. */
2155 op0 = TREE_OPERAND (expr, 0);
2156 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2157 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2158 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2159 if (boffset < 0)
2160 abs_off = -abs_off;
2161
2162 *offset = off0 + int_cst_value (tmp) + abs_off;
2163 return op0;
2164 }
2165 }
2166 break;
2167
2168 case ADDR_EXPR:
2169 op0 = TREE_OPERAND (expr, 0);
2170 op0 = strip_offset_1 (op0, true, true, &off0);
2171 *offset += off0;
2172
2173 if (op0 == TREE_OPERAND (expr, 0))
2174 return orig_expr;
2175
2176 expr = build_fold_addr_expr (op0);
2177 return fold_convert (orig_type, expr);
2178
2179 case MEM_REF:
2180 /* ??? Offset operand? */
2181 inside_addr = false;
2182 break;
2183
2184 default:
2185 return orig_expr;
2186 }
2187
2188 /* Default handling of expressions for that we want to recurse into
2189 the first operand. */
2190 op0 = TREE_OPERAND (expr, 0);
2191 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2192 *offset += off0;
2193
2194 if (op0 == TREE_OPERAND (expr, 0)
2195 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2196 return orig_expr;
2197
2198 expr = copy_node (expr);
2199 TREE_OPERAND (expr, 0) = op0;
2200 if (op1)
2201 TREE_OPERAND (expr, 1) = op1;
2202
2203 /* Inside address, we might strip the top level component references,
2204 thus changing type of the expression. Handling of ADDR_EXPR
2205 will fix that. */
2206 expr = fold_convert (orig_type, expr);
2207
2208 return expr;
2209 }
2210
2211 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2212
2213 static tree
strip_offset(tree expr,unsigned HOST_WIDE_INT * offset)2214 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2215 {
2216 HOST_WIDE_INT off;
2217 tree core = strip_offset_1 (expr, false, false, &off);
2218 *offset = off;
2219 return core;
2220 }
2221
2222 /* Returns variant of TYPE that can be used as base for different uses.
2223 We return unsigned type with the same precision, which avoids problems
2224 with overflows. */
2225
2226 static tree
generic_type_for(tree type)2227 generic_type_for (tree type)
2228 {
2229 if (POINTER_TYPE_P (type))
2230 return unsigned_type_for (type);
2231
2232 if (TYPE_UNSIGNED (type))
2233 return type;
2234
2235 return unsigned_type_for (type);
2236 }
2237
2238 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2239 the bitmap to that we should store it. */
2240
2241 static struct ivopts_data *fd_ivopts_data;
2242 static tree
find_depends(tree * expr_p,int * ws ATTRIBUTE_UNUSED,void * data)2243 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2244 {
2245 bitmap *depends_on = (bitmap *) data;
2246 struct version_info *info;
2247
2248 if (TREE_CODE (*expr_p) != SSA_NAME)
2249 return NULL_TREE;
2250 info = name_info (fd_ivopts_data, *expr_p);
2251
2252 if (!info->inv_id || info->has_nonlin_use)
2253 return NULL_TREE;
2254
2255 if (!*depends_on)
2256 *depends_on = BITMAP_ALLOC (NULL);
2257 bitmap_set_bit (*depends_on, info->inv_id);
2258
2259 return NULL_TREE;
2260 }
2261
2262 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2263 position to POS. If USE is not NULL, the candidate is set as related to
2264 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2265 replacement of the final value of the iv by a direct computation. */
2266
2267 static struct iv_cand *
add_candidate_1(struct ivopts_data * data,tree base,tree step,bool important,enum iv_position pos,struct iv_use * use,gimple incremented_at)2268 add_candidate_1 (struct ivopts_data *data,
2269 tree base, tree step, bool important, enum iv_position pos,
2270 struct iv_use *use, gimple incremented_at)
2271 {
2272 unsigned i;
2273 struct iv_cand *cand = NULL;
2274 tree type, orig_type;
2275
2276 /* For non-original variables, make sure their values are computed in a type
2277 that does not invoke undefined behavior on overflows (since in general,
2278 we cannot prove that these induction variables are non-wrapping). */
2279 if (pos != IP_ORIGINAL)
2280 {
2281 orig_type = TREE_TYPE (base);
2282 type = generic_type_for (orig_type);
2283 if (type != orig_type)
2284 {
2285 base = fold_convert (type, base);
2286 step = fold_convert (type, step);
2287 }
2288 }
2289
2290 for (i = 0; i < n_iv_cands (data); i++)
2291 {
2292 cand = iv_cand (data, i);
2293
2294 if (cand->pos != pos)
2295 continue;
2296
2297 if (cand->incremented_at != incremented_at
2298 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2299 && cand->ainc_use != use))
2300 continue;
2301
2302 if (!cand->iv)
2303 {
2304 if (!base && !step)
2305 break;
2306
2307 continue;
2308 }
2309
2310 if (!base && !step)
2311 continue;
2312
2313 if (operand_equal_p (base, cand->iv->base, 0)
2314 && operand_equal_p (step, cand->iv->step, 0)
2315 && (TYPE_PRECISION (TREE_TYPE (base))
2316 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2317 break;
2318 }
2319
2320 if (i == n_iv_cands (data))
2321 {
2322 cand = XCNEW (struct iv_cand);
2323 cand->id = i;
2324
2325 if (!base && !step)
2326 cand->iv = NULL;
2327 else
2328 cand->iv = alloc_iv (base, step);
2329
2330 cand->pos = pos;
2331 if (pos != IP_ORIGINAL && cand->iv)
2332 {
2333 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2334 cand->var_after = cand->var_before;
2335 }
2336 cand->important = important;
2337 cand->incremented_at = incremented_at;
2338 data->iv_candidates.safe_push (cand);
2339
2340 if (step
2341 && TREE_CODE (step) != INTEGER_CST)
2342 {
2343 fd_ivopts_data = data;
2344 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2345 }
2346
2347 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2348 cand->ainc_use = use;
2349 else
2350 cand->ainc_use = NULL;
2351
2352 if (dump_file && (dump_flags & TDF_DETAILS))
2353 dump_cand (dump_file, cand);
2354 }
2355
2356 if (important && !cand->important)
2357 {
2358 cand->important = true;
2359 if (dump_file && (dump_flags & TDF_DETAILS))
2360 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2361 }
2362
2363 if (use)
2364 {
2365 bitmap_set_bit (use->related_cands, i);
2366 if (dump_file && (dump_flags & TDF_DETAILS))
2367 fprintf (dump_file, "Candidate %d is related to use %d\n",
2368 cand->id, use->id);
2369 }
2370
2371 return cand;
2372 }
2373
2374 /* Returns true if incrementing the induction variable at the end of the LOOP
2375 is allowed.
2376
2377 The purpose is to avoid splitting latch edge with a biv increment, thus
2378 creating a jump, possibly confusing other optimization passes and leaving
2379 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2380 is not available (so we do not have a better alternative), or if the latch
2381 edge is already nonempty. */
2382
2383 static bool
allow_ip_end_pos_p(struct loop * loop)2384 allow_ip_end_pos_p (struct loop *loop)
2385 {
2386 if (!ip_normal_pos (loop))
2387 return true;
2388
2389 if (!empty_block_p (ip_end_pos (loop)))
2390 return true;
2391
2392 return false;
2393 }
2394
2395 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2396 Important field is set to IMPORTANT. */
2397
2398 static void
add_autoinc_candidates(struct ivopts_data * data,tree base,tree step,bool important,struct iv_use * use)2399 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2400 bool important, struct iv_use *use)
2401 {
2402 basic_block use_bb = gimple_bb (use->stmt);
2403 enum machine_mode mem_mode;
2404 unsigned HOST_WIDE_INT cstepi;
2405
2406 /* If we insert the increment in any position other than the standard
2407 ones, we must ensure that it is incremented once per iteration.
2408 It must not be in an inner nested loop, or one side of an if
2409 statement. */
2410 if (use_bb->loop_father != data->current_loop
2411 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2412 || stmt_could_throw_p (use->stmt)
2413 || !cst_and_fits_in_hwi (step))
2414 return;
2415
2416 cstepi = int_cst_value (step);
2417
2418 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2419 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2420 || USE_STORE_PRE_INCREMENT (mem_mode))
2421 && GET_MODE_SIZE (mem_mode) == cstepi)
2422 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2423 || USE_STORE_PRE_DECREMENT (mem_mode))
2424 && GET_MODE_SIZE (mem_mode) == -cstepi))
2425 {
2426 enum tree_code code = MINUS_EXPR;
2427 tree new_base;
2428 tree new_step = step;
2429
2430 if (POINTER_TYPE_P (TREE_TYPE (base)))
2431 {
2432 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2433 code = POINTER_PLUS_EXPR;
2434 }
2435 else
2436 new_step = fold_convert (TREE_TYPE (base), new_step);
2437 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2438 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2439 use->stmt);
2440 }
2441 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2442 || USE_STORE_POST_INCREMENT (mem_mode))
2443 && GET_MODE_SIZE (mem_mode) == cstepi)
2444 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2445 || USE_STORE_POST_DECREMENT (mem_mode))
2446 && GET_MODE_SIZE (mem_mode) == -cstepi))
2447 {
2448 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2449 use->stmt);
2450 }
2451 }
2452
2453 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2454 position to POS. If USE is not NULL, the candidate is set as related to
2455 it. The candidate computation is scheduled on all available positions. */
2456
2457 static void
add_candidate(struct ivopts_data * data,tree base,tree step,bool important,struct iv_use * use)2458 add_candidate (struct ivopts_data *data,
2459 tree base, tree step, bool important, struct iv_use *use)
2460 {
2461 if (ip_normal_pos (data->current_loop))
2462 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2463 if (ip_end_pos (data->current_loop)
2464 && allow_ip_end_pos_p (data->current_loop))
2465 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2466
2467 if (use != NULL && use->type == USE_ADDRESS)
2468 add_autoinc_candidates (data, base, step, important, use);
2469 }
2470
2471 /* Adds standard iv candidates. */
2472
2473 static void
add_standard_iv_candidates(struct ivopts_data * data)2474 add_standard_iv_candidates (struct ivopts_data *data)
2475 {
2476 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2477
2478 /* The same for a double-integer type if it is still fast enough. */
2479 if (TYPE_PRECISION
2480 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2481 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2482 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2483 build_int_cst (long_integer_type_node, 1), true, NULL);
2484
2485 /* The same for a double-integer type if it is still fast enough. */
2486 if (TYPE_PRECISION
2487 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2488 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2489 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2490 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2491 }
2492
2493
2494 /* Adds candidates bases on the old induction variable IV. */
2495
2496 static void
add_old_iv_candidates(struct ivopts_data * data,struct iv * iv)2497 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2498 {
2499 gimple phi;
2500 tree def;
2501 struct iv_cand *cand;
2502
2503 add_candidate (data, iv->base, iv->step, true, NULL);
2504
2505 /* The same, but with initial value zero. */
2506 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2507 add_candidate (data, size_int (0), iv->step, true, NULL);
2508 else
2509 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2510 iv->step, true, NULL);
2511
2512 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2513 if (gimple_code (phi) == GIMPLE_PHI)
2514 {
2515 /* Additionally record the possibility of leaving the original iv
2516 untouched. */
2517 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2518 /* Don't add candidate if it's from another PHI node because
2519 it's an affine iv appearing in the form of PEELED_CHREC. */
2520 phi = SSA_NAME_DEF_STMT (def);
2521 if (gimple_code (phi) != GIMPLE_PHI)
2522 {
2523 cand = add_candidate_1 (data,
2524 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2525 SSA_NAME_DEF_STMT (def));
2526 cand->var_before = iv->ssa_name;
2527 cand->var_after = def;
2528 }
2529 else
2530 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2531 }
2532 }
2533
2534 /* Adds candidates based on the old induction variables. */
2535
2536 static void
add_old_ivs_candidates(struct ivopts_data * data)2537 add_old_ivs_candidates (struct ivopts_data *data)
2538 {
2539 unsigned i;
2540 struct iv *iv;
2541 bitmap_iterator bi;
2542
2543 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2544 {
2545 iv = ver_info (data, i)->iv;
2546 if (iv && iv->biv_p && !integer_zerop (iv->step))
2547 add_old_iv_candidates (data, iv);
2548 }
2549 }
2550
2551 /* Adds candidates based on the value of the induction variable IV and USE. */
2552
2553 static void
add_iv_value_candidates(struct ivopts_data * data,struct iv * iv,struct iv_use * use)2554 add_iv_value_candidates (struct ivopts_data *data,
2555 struct iv *iv, struct iv_use *use)
2556 {
2557 unsigned HOST_WIDE_INT offset;
2558 tree base;
2559 tree basetype;
2560
2561 add_candidate (data, iv->base, iv->step, false, use);
2562
2563 /* The same, but with initial value zero. Make such variable important,
2564 since it is generic enough so that possibly many uses may be based
2565 on it. */
2566 basetype = TREE_TYPE (iv->base);
2567 if (POINTER_TYPE_P (basetype))
2568 basetype = sizetype;
2569 add_candidate (data, build_int_cst (basetype, 0),
2570 iv->step, true, use);
2571
2572 /* Third, try removing the constant offset. Make sure to even
2573 add a candidate for &a[0] vs. (T *)&a. */
2574 base = strip_offset (iv->base, &offset);
2575 if (offset
2576 || base != iv->base)
2577 add_candidate (data, base, iv->step, false, use);
2578 }
2579
2580 /* Adds candidates based on the uses. */
2581
2582 static void
add_derived_ivs_candidates(struct ivopts_data * data)2583 add_derived_ivs_candidates (struct ivopts_data *data)
2584 {
2585 unsigned i;
2586
2587 for (i = 0; i < n_iv_uses (data); i++)
2588 {
2589 struct iv_use *use = iv_use (data, i);
2590
2591 if (!use)
2592 continue;
2593
2594 switch (use->type)
2595 {
2596 case USE_NONLINEAR_EXPR:
2597 case USE_COMPARE:
2598 case USE_ADDRESS:
2599 /* Just add the ivs based on the value of the iv used here. */
2600 add_iv_value_candidates (data, use->iv, use);
2601 break;
2602
2603 default:
2604 gcc_unreachable ();
2605 }
2606 }
2607 }
2608
2609 /* Record important candidates and add them to related_cands bitmaps
2610 if needed. */
2611
2612 static void
record_important_candidates(struct ivopts_data * data)2613 record_important_candidates (struct ivopts_data *data)
2614 {
2615 unsigned i;
2616 struct iv_use *use;
2617
2618 for (i = 0; i < n_iv_cands (data); i++)
2619 {
2620 struct iv_cand *cand = iv_cand (data, i);
2621
2622 if (cand->important)
2623 bitmap_set_bit (data->important_candidates, i);
2624 }
2625
2626 data->consider_all_candidates = (n_iv_cands (data)
2627 <= CONSIDER_ALL_CANDIDATES_BOUND);
2628
2629 if (data->consider_all_candidates)
2630 {
2631 /* We will not need "related_cands" bitmaps in this case,
2632 so release them to decrease peak memory consumption. */
2633 for (i = 0; i < n_iv_uses (data); i++)
2634 {
2635 use = iv_use (data, i);
2636 BITMAP_FREE (use->related_cands);
2637 }
2638 }
2639 else
2640 {
2641 /* Add important candidates to the related_cands bitmaps. */
2642 for (i = 0; i < n_iv_uses (data); i++)
2643 bitmap_ior_into (iv_use (data, i)->related_cands,
2644 data->important_candidates);
2645 }
2646 }
2647
2648 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2649 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2650 we allocate a simple list to every use. */
2651
2652 static void
alloc_use_cost_map(struct ivopts_data * data)2653 alloc_use_cost_map (struct ivopts_data *data)
2654 {
2655 unsigned i, size, s;
2656
2657 for (i = 0; i < n_iv_uses (data); i++)
2658 {
2659 struct iv_use *use = iv_use (data, i);
2660
2661 if (data->consider_all_candidates)
2662 size = n_iv_cands (data);
2663 else
2664 {
2665 s = bitmap_count_bits (use->related_cands);
2666
2667 /* Round up to the power of two, so that moduling by it is fast. */
2668 size = s ? (1 << ceil_log2 (s)) : 1;
2669 }
2670
2671 use->n_map_members = size;
2672 use->cost_map = XCNEWVEC (struct cost_pair, size);
2673 }
2674 }
2675
2676 /* Returns description of computation cost of expression whose runtime
2677 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2678
2679 static comp_cost
new_cost(unsigned runtime,unsigned complexity)2680 new_cost (unsigned runtime, unsigned complexity)
2681 {
2682 comp_cost cost;
2683
2684 cost.cost = runtime;
2685 cost.complexity = complexity;
2686
2687 return cost;
2688 }
2689
2690 /* Adds costs COST1 and COST2. */
2691
2692 static comp_cost
add_costs(comp_cost cost1,comp_cost cost2)2693 add_costs (comp_cost cost1, comp_cost cost2)
2694 {
2695 cost1.cost += cost2.cost;
2696 cost1.complexity += cost2.complexity;
2697
2698 return cost1;
2699 }
2700 /* Subtracts costs COST1 and COST2. */
2701
2702 static comp_cost
sub_costs(comp_cost cost1,comp_cost cost2)2703 sub_costs (comp_cost cost1, comp_cost cost2)
2704 {
2705 cost1.cost -= cost2.cost;
2706 cost1.complexity -= cost2.complexity;
2707
2708 return cost1;
2709 }
2710
2711 /* Returns a negative number if COST1 < COST2, a positive number if
2712 COST1 > COST2, and 0 if COST1 = COST2. */
2713
2714 static int
compare_costs(comp_cost cost1,comp_cost cost2)2715 compare_costs (comp_cost cost1, comp_cost cost2)
2716 {
2717 if (cost1.cost == cost2.cost)
2718 return cost1.complexity - cost2.complexity;
2719
2720 return cost1.cost - cost2.cost;
2721 }
2722
2723 /* Returns true if COST is infinite. */
2724
2725 static bool
infinite_cost_p(comp_cost cost)2726 infinite_cost_p (comp_cost cost)
2727 {
2728 return cost.cost == INFTY;
2729 }
2730
2731 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2732 on invariants DEPENDS_ON and that the value used in expressing it
2733 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2734
2735 static void
set_use_iv_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,comp_cost cost,bitmap depends_on,tree value,enum tree_code comp,int inv_expr_id)2736 set_use_iv_cost (struct ivopts_data *data,
2737 struct iv_use *use, struct iv_cand *cand,
2738 comp_cost cost, bitmap depends_on, tree value,
2739 enum tree_code comp, int inv_expr_id)
2740 {
2741 unsigned i, s;
2742
2743 if (infinite_cost_p (cost))
2744 {
2745 BITMAP_FREE (depends_on);
2746 return;
2747 }
2748
2749 if (data->consider_all_candidates)
2750 {
2751 use->cost_map[cand->id].cand = cand;
2752 use->cost_map[cand->id].cost = cost;
2753 use->cost_map[cand->id].depends_on = depends_on;
2754 use->cost_map[cand->id].value = value;
2755 use->cost_map[cand->id].comp = comp;
2756 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2757 return;
2758 }
2759
2760 /* n_map_members is a power of two, so this computes modulo. */
2761 s = cand->id & (use->n_map_members - 1);
2762 for (i = s; i < use->n_map_members; i++)
2763 if (!use->cost_map[i].cand)
2764 goto found;
2765 for (i = 0; i < s; i++)
2766 if (!use->cost_map[i].cand)
2767 goto found;
2768
2769 gcc_unreachable ();
2770
2771 found:
2772 use->cost_map[i].cand = cand;
2773 use->cost_map[i].cost = cost;
2774 use->cost_map[i].depends_on = depends_on;
2775 use->cost_map[i].value = value;
2776 use->cost_map[i].comp = comp;
2777 use->cost_map[i].inv_expr_id = inv_expr_id;
2778 }
2779
2780 /* Gets cost of (USE, CANDIDATE) pair. */
2781
2782 static struct cost_pair *
get_use_iv_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)2783 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2784 struct iv_cand *cand)
2785 {
2786 unsigned i, s;
2787 struct cost_pair *ret;
2788
2789 if (!cand)
2790 return NULL;
2791
2792 if (data->consider_all_candidates)
2793 {
2794 ret = use->cost_map + cand->id;
2795 if (!ret->cand)
2796 return NULL;
2797
2798 return ret;
2799 }
2800
2801 /* n_map_members is a power of two, so this computes modulo. */
2802 s = cand->id & (use->n_map_members - 1);
2803 for (i = s; i < use->n_map_members; i++)
2804 if (use->cost_map[i].cand == cand)
2805 return use->cost_map + i;
2806 else if (use->cost_map[i].cand == NULL)
2807 return NULL;
2808 for (i = 0; i < s; i++)
2809 if (use->cost_map[i].cand == cand)
2810 return use->cost_map + i;
2811 else if (use->cost_map[i].cand == NULL)
2812 return NULL;
2813
2814 return NULL;
2815 }
2816
2817 /* Returns estimate on cost of computing SEQ. */
2818
2819 static unsigned
seq_cost(rtx seq,bool speed)2820 seq_cost (rtx seq, bool speed)
2821 {
2822 unsigned cost = 0;
2823 rtx set;
2824
2825 for (; seq; seq = NEXT_INSN (seq))
2826 {
2827 set = single_set (seq);
2828 if (set)
2829 cost += set_src_cost (SET_SRC (set), speed);
2830 else
2831 cost++;
2832 }
2833
2834 return cost;
2835 }
2836
2837 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2838 static rtx
produce_memory_decl_rtl(tree obj,int * regno)2839 produce_memory_decl_rtl (tree obj, int *regno)
2840 {
2841 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2842 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2843 rtx x;
2844
2845 gcc_assert (obj);
2846 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2847 {
2848 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2849 x = gen_rtx_SYMBOL_REF (address_mode, name);
2850 SET_SYMBOL_REF_DECL (x, obj);
2851 x = gen_rtx_MEM (DECL_MODE (obj), x);
2852 set_mem_addr_space (x, as);
2853 targetm.encode_section_info (obj, x, true);
2854 }
2855 else
2856 {
2857 x = gen_raw_REG (address_mode, (*regno)++);
2858 x = gen_rtx_MEM (DECL_MODE (obj), x);
2859 set_mem_addr_space (x, as);
2860 }
2861
2862 return x;
2863 }
2864
2865 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2866 walk_tree. DATA contains the actual fake register number. */
2867
2868 static tree
prepare_decl_rtl(tree * expr_p,int * ws,void * data)2869 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2870 {
2871 tree obj = NULL_TREE;
2872 rtx x = NULL_RTX;
2873 int *regno = (int *) data;
2874
2875 switch (TREE_CODE (*expr_p))
2876 {
2877 case ADDR_EXPR:
2878 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2879 handled_component_p (*expr_p);
2880 expr_p = &TREE_OPERAND (*expr_p, 0))
2881 continue;
2882 obj = *expr_p;
2883 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2884 x = produce_memory_decl_rtl (obj, regno);
2885 break;
2886
2887 case SSA_NAME:
2888 *ws = 0;
2889 obj = SSA_NAME_VAR (*expr_p);
2890 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2891 if (!obj)
2892 return NULL_TREE;
2893 if (!DECL_RTL_SET_P (obj))
2894 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2895 break;
2896
2897 case VAR_DECL:
2898 case PARM_DECL:
2899 case RESULT_DECL:
2900 *ws = 0;
2901 obj = *expr_p;
2902
2903 if (DECL_RTL_SET_P (obj))
2904 break;
2905
2906 if (DECL_MODE (obj) == BLKmode)
2907 x = produce_memory_decl_rtl (obj, regno);
2908 else
2909 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2910
2911 break;
2912
2913 default:
2914 break;
2915 }
2916
2917 if (x)
2918 {
2919 decl_rtl_to_reset.safe_push (obj);
2920 SET_DECL_RTL (obj, x);
2921 }
2922
2923 return NULL_TREE;
2924 }
2925
2926 /* Determines cost of the computation of EXPR. */
2927
2928 static unsigned
computation_cost(tree expr,bool speed)2929 computation_cost (tree expr, bool speed)
2930 {
2931 rtx seq, rslt;
2932 tree type = TREE_TYPE (expr);
2933 unsigned cost;
2934 /* Avoid using hard regs in ways which may be unsupported. */
2935 int regno = LAST_VIRTUAL_REGISTER + 1;
2936 struct cgraph_node *node = cgraph_get_node (current_function_decl);
2937 enum node_frequency real_frequency = node->frequency;
2938
2939 node->frequency = NODE_FREQUENCY_NORMAL;
2940 crtl->maybe_hot_insn_p = speed;
2941 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
2942 start_sequence ();
2943 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2944 seq = get_insns ();
2945 end_sequence ();
2946 default_rtl_profile ();
2947 node->frequency = real_frequency;
2948
2949 cost = seq_cost (seq, speed);
2950 if (MEM_P (rslt))
2951 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2952 TYPE_ADDR_SPACE (type), speed);
2953 else if (!REG_P (rslt))
2954 cost += set_src_cost (rslt, speed);
2955
2956 return cost;
2957 }
2958
2959 /* Returns variable containing the value of candidate CAND at statement AT. */
2960
2961 static tree
var_at_stmt(struct loop * loop,struct iv_cand * cand,gimple stmt)2962 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2963 {
2964 if (stmt_after_increment (loop, cand, stmt))
2965 return cand->var_after;
2966 else
2967 return cand->var_before;
2968 }
2969
2970 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2971 same precision that is at least as wide as the precision of TYPE, stores
2972 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2973 type of A and B. */
2974
2975 static tree
determine_common_wider_type(tree * a,tree * b)2976 determine_common_wider_type (tree *a, tree *b)
2977 {
2978 tree wider_type = NULL;
2979 tree suba, subb;
2980 tree atype = TREE_TYPE (*a);
2981
2982 if (CONVERT_EXPR_P (*a))
2983 {
2984 suba = TREE_OPERAND (*a, 0);
2985 wider_type = TREE_TYPE (suba);
2986 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2987 return atype;
2988 }
2989 else
2990 return atype;
2991
2992 if (CONVERT_EXPR_P (*b))
2993 {
2994 subb = TREE_OPERAND (*b, 0);
2995 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2996 return atype;
2997 }
2998 else
2999 return atype;
3000
3001 *a = suba;
3002 *b = subb;
3003 return wider_type;
3004 }
3005
3006 /* Determines the expression by that USE is expressed from induction variable
3007 CAND at statement AT in LOOP. The expression is stored in a decomposed
3008 form into AFF. Returns false if USE cannot be expressed using CAND. */
3009
3010 static bool
get_computation_aff(struct loop * loop,struct iv_use * use,struct iv_cand * cand,gimple at,struct aff_tree * aff)3011 get_computation_aff (struct loop *loop,
3012 struct iv_use *use, struct iv_cand *cand, gimple at,
3013 struct aff_tree *aff)
3014 {
3015 tree ubase = use->iv->base;
3016 tree ustep = use->iv->step;
3017 tree cbase = cand->iv->base;
3018 tree cstep = cand->iv->step, cstep_common;
3019 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3020 tree common_type, var;
3021 tree uutype;
3022 aff_tree cbase_aff, var_aff;
3023 double_int rat;
3024
3025 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3026 {
3027 /* We do not have a precision to express the values of use. */
3028 return false;
3029 }
3030
3031 var = var_at_stmt (loop, cand, at);
3032 uutype = unsigned_type_for (utype);
3033
3034 /* If the conversion is not noop, perform it. */
3035 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3036 {
3037 cstep = fold_convert (uutype, cstep);
3038 cbase = fold_convert (uutype, cbase);
3039 var = fold_convert (uutype, var);
3040 }
3041
3042 if (!constant_multiple_of (ustep, cstep, &rat))
3043 return false;
3044
3045 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3046 type, we achieve better folding by computing their difference in this
3047 wider type, and cast the result to UUTYPE. We do not need to worry about
3048 overflows, as all the arithmetics will in the end be performed in UUTYPE
3049 anyway. */
3050 common_type = determine_common_wider_type (&ubase, &cbase);
3051
3052 /* use = ubase - ratio * cbase + ratio * var. */
3053 tree_to_aff_combination (ubase, common_type, aff);
3054 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3055 tree_to_aff_combination (var, uutype, &var_aff);
3056
3057 /* We need to shift the value if we are after the increment. */
3058 if (stmt_after_increment (loop, cand, at))
3059 {
3060 aff_tree cstep_aff;
3061
3062 if (common_type != uutype)
3063 cstep_common = fold_convert (common_type, cstep);
3064 else
3065 cstep_common = cstep;
3066
3067 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3068 aff_combination_add (&cbase_aff, &cstep_aff);
3069 }
3070
3071 aff_combination_scale (&cbase_aff, -rat);
3072 aff_combination_add (aff, &cbase_aff);
3073 if (common_type != uutype)
3074 aff_combination_convert (aff, uutype);
3075
3076 aff_combination_scale (&var_aff, rat);
3077 aff_combination_add (aff, &var_aff);
3078
3079 return true;
3080 }
3081
3082 /* Return the type of USE. */
3083
3084 static tree
get_use_type(struct iv_use * use)3085 get_use_type (struct iv_use *use)
3086 {
3087 tree base_type = TREE_TYPE (use->iv->base);
3088 tree type;
3089
3090 if (use->type == USE_ADDRESS)
3091 {
3092 /* The base_type may be a void pointer. Create a pointer type based on
3093 the mem_ref instead. */
3094 type = build_pointer_type (TREE_TYPE (*use->op_p));
3095 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3096 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3097 }
3098 else
3099 type = base_type;
3100
3101 return type;
3102 }
3103
3104 /* Determines the expression by that USE is expressed from induction variable
3105 CAND at statement AT in LOOP. The computation is unshared. */
3106
3107 static tree
get_computation_at(struct loop * loop,struct iv_use * use,struct iv_cand * cand,gimple at)3108 get_computation_at (struct loop *loop,
3109 struct iv_use *use, struct iv_cand *cand, gimple at)
3110 {
3111 aff_tree aff;
3112 tree type = get_use_type (use);
3113
3114 if (!get_computation_aff (loop, use, cand, at, &aff))
3115 return NULL_TREE;
3116 unshare_aff_combination (&aff);
3117 return fold_convert (type, aff_combination_to_tree (&aff));
3118 }
3119
3120 /* Determines the expression by that USE is expressed from induction variable
3121 CAND in LOOP. The computation is unshared. */
3122
3123 static tree
get_computation(struct loop * loop,struct iv_use * use,struct iv_cand * cand)3124 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3125 {
3126 return get_computation_at (loop, use, cand, use->stmt);
3127 }
3128
3129 /* Adjust the cost COST for being in loop setup rather than loop body.
3130 If we're optimizing for space, the loop setup overhead is constant;
3131 if we're optimizing for speed, amortize it over the per-iteration cost. */
3132 static unsigned
adjust_setup_cost(struct ivopts_data * data,unsigned cost)3133 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3134 {
3135 if (cost == INFTY)
3136 return cost;
3137 else if (optimize_loop_for_speed_p (data->current_loop))
3138 return cost / avg_loop_niter (data->current_loop);
3139 else
3140 return cost;
3141 }
3142
3143 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3144 validity for a memory reference accessing memory of mode MODE in
3145 address space AS. */
3146
3147
3148 bool
multiplier_allowed_in_address_p(HOST_WIDE_INT ratio,enum machine_mode mode,addr_space_t as)3149 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3150 addr_space_t as)
3151 {
3152 #define MAX_RATIO 128
3153 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3154 static vec<sbitmap> valid_mult_list;
3155 sbitmap valid_mult;
3156
3157 if (data_index >= valid_mult_list.length ())
3158 valid_mult_list.safe_grow_cleared (data_index + 1);
3159
3160 valid_mult = valid_mult_list[data_index];
3161 if (!valid_mult)
3162 {
3163 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3164 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3165 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3166 rtx addr, scaled;
3167 HOST_WIDE_INT i;
3168
3169 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3170 bitmap_clear (valid_mult);
3171 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3172 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3173 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3174 {
3175 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3176 if (memory_address_addr_space_p (mode, addr, as)
3177 || memory_address_addr_space_p (mode, scaled, as))
3178 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3179 }
3180
3181 if (dump_file && (dump_flags & TDF_DETAILS))
3182 {
3183 fprintf (dump_file, " allowed multipliers:");
3184 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3185 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3186 fprintf (dump_file, " %d", (int) i);
3187 fprintf (dump_file, "\n");
3188 fprintf (dump_file, "\n");
3189 }
3190
3191 valid_mult_list[data_index] = valid_mult;
3192 }
3193
3194 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3195 return false;
3196
3197 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3198 }
3199
3200 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3201 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3202 variable is omitted. Compute the cost for a memory reference that accesses
3203 a memory location of mode MEM_MODE in address space AS.
3204
3205 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3206 size of MEM_MODE / RATIO) is available. To make this determination, we
3207 look at the size of the increment to be made, which is given in CSTEP.
3208 CSTEP may be zero if the step is unknown.
3209 STMT_AFTER_INC is true iff the statement we're looking at is after the
3210 increment of the original biv.
3211
3212 TODO -- there must be some better way. This all is quite crude. */
3213
3214 enum ainc_type
3215 {
3216 AINC_PRE_INC, /* Pre increment. */
3217 AINC_PRE_DEC, /* Pre decrement. */
3218 AINC_POST_INC, /* Post increment. */
3219 AINC_POST_DEC, /* Post decrement. */
3220 AINC_NONE /* Also the number of auto increment types. */
3221 };
3222
3223 typedef struct address_cost_data_s
3224 {
3225 HOST_WIDE_INT min_offset, max_offset;
3226 unsigned costs[2][2][2][2];
3227 unsigned ainc_costs[AINC_NONE];
3228 } *address_cost_data;
3229
3230
3231 static comp_cost
get_address_cost(bool symbol_present,bool var_present,unsigned HOST_WIDE_INT offset,HOST_WIDE_INT ratio,HOST_WIDE_INT cstep,enum machine_mode mem_mode,addr_space_t as,bool speed,bool stmt_after_inc,bool * may_autoinc)3232 get_address_cost (bool symbol_present, bool var_present,
3233 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3234 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3235 addr_space_t as, bool speed,
3236 bool stmt_after_inc, bool *may_autoinc)
3237 {
3238 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3239 static vec<address_cost_data> address_cost_data_list;
3240 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3241 address_cost_data data;
3242 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3243 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3244 unsigned cost, acost, complexity;
3245 enum ainc_type autoinc_type;
3246 bool offset_p, ratio_p, autoinc;
3247 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3248 unsigned HOST_WIDE_INT mask;
3249 unsigned bits;
3250
3251 if (data_index >= address_cost_data_list.length ())
3252 address_cost_data_list.safe_grow_cleared (data_index + 1);
3253
3254 data = address_cost_data_list[data_index];
3255 if (!data)
3256 {
3257 HOST_WIDE_INT i;
3258 HOST_WIDE_INT rat, off = 0;
3259 int old_cse_not_expected, width;
3260 unsigned sym_p, var_p, off_p, rat_p, add_c;
3261 rtx seq, addr, base;
3262 rtx reg0, reg1;
3263
3264 data = (address_cost_data) xcalloc (1, sizeof (*data));
3265
3266 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3267
3268 width = GET_MODE_BITSIZE (address_mode) - 1;
3269 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3270 width = HOST_BITS_PER_WIDE_INT - 1;
3271 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3272
3273 for (i = width; i >= 0; i--)
3274 {
3275 off = -((unsigned HOST_WIDE_INT) 1 << i);
3276 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3277 if (memory_address_addr_space_p (mem_mode, addr, as))
3278 break;
3279 }
3280 data->min_offset = (i == -1? 0 : off);
3281
3282 for (i = width; i >= 0; i--)
3283 {
3284 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3285 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3286 if (memory_address_addr_space_p (mem_mode, addr, as))
3287 break;
3288 }
3289 if (i == -1)
3290 off = 0;
3291 data->max_offset = off;
3292
3293 if (dump_file && (dump_flags & TDF_DETAILS))
3294 {
3295 fprintf (dump_file, "get_address_cost:\n");
3296 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3297 GET_MODE_NAME (mem_mode),
3298 data->min_offset);
3299 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3300 GET_MODE_NAME (mem_mode),
3301 data->max_offset);
3302 }
3303
3304 rat = 1;
3305 for (i = 2; i <= MAX_RATIO; i++)
3306 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3307 {
3308 rat = i;
3309 break;
3310 }
3311
3312 /* Compute the cost of various addressing modes. */
3313 acost = 0;
3314 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3315 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3316
3317 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3318 || USE_STORE_PRE_DECREMENT (mem_mode))
3319 {
3320 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3321 has_predec[mem_mode]
3322 = memory_address_addr_space_p (mem_mode, addr, as);
3323
3324 if (has_predec[mem_mode])
3325 data->ainc_costs[AINC_PRE_DEC]
3326 = address_cost (addr, mem_mode, as, speed);
3327 }
3328 if (USE_LOAD_POST_DECREMENT (mem_mode)
3329 || USE_STORE_POST_DECREMENT (mem_mode))
3330 {
3331 addr = gen_rtx_POST_DEC (address_mode, reg0);
3332 has_postdec[mem_mode]
3333 = memory_address_addr_space_p (mem_mode, addr, as);
3334
3335 if (has_postdec[mem_mode])
3336 data->ainc_costs[AINC_POST_DEC]
3337 = address_cost (addr, mem_mode, as, speed);
3338 }
3339 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3340 || USE_STORE_PRE_DECREMENT (mem_mode))
3341 {
3342 addr = gen_rtx_PRE_INC (address_mode, reg0);
3343 has_preinc[mem_mode]
3344 = memory_address_addr_space_p (mem_mode, addr, as);
3345
3346 if (has_preinc[mem_mode])
3347 data->ainc_costs[AINC_PRE_INC]
3348 = address_cost (addr, mem_mode, as, speed);
3349 }
3350 if (USE_LOAD_POST_INCREMENT (mem_mode)
3351 || USE_STORE_POST_INCREMENT (mem_mode))
3352 {
3353 addr = gen_rtx_POST_INC (address_mode, reg0);
3354 has_postinc[mem_mode]
3355 = memory_address_addr_space_p (mem_mode, addr, as);
3356
3357 if (has_postinc[mem_mode])
3358 data->ainc_costs[AINC_POST_INC]
3359 = address_cost (addr, mem_mode, as, speed);
3360 }
3361 for (i = 0; i < 16; i++)
3362 {
3363 sym_p = i & 1;
3364 var_p = (i >> 1) & 1;
3365 off_p = (i >> 2) & 1;
3366 rat_p = (i >> 3) & 1;
3367
3368 addr = reg0;
3369 if (rat_p)
3370 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3371 gen_int_mode (rat, address_mode));
3372
3373 if (var_p)
3374 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3375
3376 if (sym_p)
3377 {
3378 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3379 /* ??? We can run into trouble with some backends by presenting
3380 it with symbols which haven't been properly passed through
3381 targetm.encode_section_info. By setting the local bit, we
3382 enhance the probability of things working. */
3383 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3384
3385 if (off_p)
3386 base = gen_rtx_fmt_e (CONST, address_mode,
3387 gen_rtx_fmt_ee
3388 (PLUS, address_mode, base,
3389 gen_int_mode (off, address_mode)));
3390 }
3391 else if (off_p)
3392 base = gen_int_mode (off, address_mode);
3393 else
3394 base = NULL_RTX;
3395
3396 if (base)
3397 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3398
3399 start_sequence ();
3400 /* To avoid splitting addressing modes, pretend that no cse will
3401 follow. */
3402 old_cse_not_expected = cse_not_expected;
3403 cse_not_expected = true;
3404 addr = memory_address_addr_space (mem_mode, addr, as);
3405 cse_not_expected = old_cse_not_expected;
3406 seq = get_insns ();
3407 end_sequence ();
3408
3409 acost = seq_cost (seq, speed);
3410 acost += address_cost (addr, mem_mode, as, speed);
3411
3412 if (!acost)
3413 acost = 1;
3414 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3415 }
3416
3417 /* On some targets, it is quite expensive to load symbol to a register,
3418 which makes addresses that contain symbols look much more expensive.
3419 However, the symbol will have to be loaded in any case before the
3420 loop (and quite likely we have it in register already), so it does not
3421 make much sense to penalize them too heavily. So make some final
3422 tweaks for the SYMBOL_PRESENT modes:
3423
3424 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3425 var is cheaper, use this mode with small penalty.
3426 If VAR_PRESENT is true, try whether the mode with
3427 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3428 if this is the case, use it. */
3429 add_c = add_cost (speed, address_mode);
3430 for (i = 0; i < 8; i++)
3431 {
3432 var_p = i & 1;
3433 off_p = (i >> 1) & 1;
3434 rat_p = (i >> 2) & 1;
3435
3436 acost = data->costs[0][1][off_p][rat_p] + 1;
3437 if (var_p)
3438 acost += add_c;
3439
3440 if (acost < data->costs[1][var_p][off_p][rat_p])
3441 data->costs[1][var_p][off_p][rat_p] = acost;
3442 }
3443
3444 if (dump_file && (dump_flags & TDF_DETAILS))
3445 {
3446 fprintf (dump_file, "Address costs:\n");
3447
3448 for (i = 0; i < 16; i++)
3449 {
3450 sym_p = i & 1;
3451 var_p = (i >> 1) & 1;
3452 off_p = (i >> 2) & 1;
3453 rat_p = (i >> 3) & 1;
3454
3455 fprintf (dump_file, " ");
3456 if (sym_p)
3457 fprintf (dump_file, "sym + ");
3458 if (var_p)
3459 fprintf (dump_file, "var + ");
3460 if (off_p)
3461 fprintf (dump_file, "cst + ");
3462 if (rat_p)
3463 fprintf (dump_file, "rat * ");
3464
3465 acost = data->costs[sym_p][var_p][off_p][rat_p];
3466 fprintf (dump_file, "index costs %d\n", acost);
3467 }
3468 if (has_predec[mem_mode] || has_postdec[mem_mode]
3469 || has_preinc[mem_mode] || has_postinc[mem_mode])
3470 fprintf (dump_file, " May include autoinc/dec\n");
3471 fprintf (dump_file, "\n");
3472 }
3473
3474 address_cost_data_list[data_index] = data;
3475 }
3476
3477 bits = GET_MODE_BITSIZE (address_mode);
3478 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3479 offset &= mask;
3480 if ((offset >> (bits - 1) & 1))
3481 offset |= ~mask;
3482 s_offset = offset;
3483
3484 autoinc = false;
3485 autoinc_type = AINC_NONE;
3486 msize = GET_MODE_SIZE (mem_mode);
3487 autoinc_offset = offset;
3488 if (stmt_after_inc)
3489 autoinc_offset += ratio * cstep;
3490 if (symbol_present || var_present || ratio != 1)
3491 autoinc = false;
3492 else
3493 {
3494 if (has_postinc[mem_mode] && autoinc_offset == 0
3495 && msize == cstep)
3496 autoinc_type = AINC_POST_INC;
3497 else if (has_postdec[mem_mode] && autoinc_offset == 0
3498 && msize == -cstep)
3499 autoinc_type = AINC_POST_DEC;
3500 else if (has_preinc[mem_mode] && autoinc_offset == msize
3501 && msize == cstep)
3502 autoinc_type = AINC_PRE_INC;
3503 else if (has_predec[mem_mode] && autoinc_offset == -msize
3504 && msize == -cstep)
3505 autoinc_type = AINC_PRE_DEC;
3506
3507 if (autoinc_type != AINC_NONE)
3508 autoinc = true;
3509 }
3510
3511 cost = 0;
3512 offset_p = (s_offset != 0
3513 && data->min_offset <= s_offset
3514 && s_offset <= data->max_offset);
3515 ratio_p = (ratio != 1
3516 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3517
3518 if (ratio != 1 && !ratio_p)
3519 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3520
3521 if (s_offset && !offset_p && !symbol_present)
3522 cost += add_cost (speed, address_mode);
3523
3524 if (may_autoinc)
3525 *may_autoinc = autoinc;
3526 if (autoinc)
3527 acost = data->ainc_costs[autoinc_type];
3528 else
3529 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3530 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3531 return new_cost (cost + acost, complexity);
3532 }
3533
3534 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3535 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3536 calculating the operands of EXPR. Returns true if successful, and returns
3537 the cost in COST. */
3538
3539 static bool
get_shiftadd_cost(tree expr,enum machine_mode mode,comp_cost cost0,comp_cost cost1,tree mult,bool speed,comp_cost * cost)3540 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3541 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3542 {
3543 comp_cost res;
3544 tree op1 = TREE_OPERAND (expr, 1);
3545 tree cst = TREE_OPERAND (mult, 1);
3546 tree multop = TREE_OPERAND (mult, 0);
3547 int m = exact_log2 (int_cst_value (cst));
3548 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3549 int sa_cost;
3550 bool equal_p = false;
3551
3552 if (!(m >= 0 && m < maxm))
3553 return false;
3554
3555 if (operand_equal_p (op1, mult, 0))
3556 equal_p = true;
3557
3558 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3559 ? shiftadd_cost (speed, mode, m)
3560 : (equal_p
3561 ? shiftsub1_cost (speed, mode, m)
3562 : shiftsub0_cost (speed, mode, m)));
3563 res = new_cost (sa_cost, 0);
3564 res = add_costs (res, equal_p ? cost0 : cost1);
3565
3566 STRIP_NOPS (multop);
3567 if (!is_gimple_val (multop))
3568 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3569
3570 *cost = res;
3571 return true;
3572 }
3573
3574 /* Estimates cost of forcing expression EXPR into a variable. */
3575
3576 static comp_cost
force_expr_to_var_cost(tree expr,bool speed)3577 force_expr_to_var_cost (tree expr, bool speed)
3578 {
3579 static bool costs_initialized = false;
3580 static unsigned integer_cost [2];
3581 static unsigned symbol_cost [2];
3582 static unsigned address_cost [2];
3583 tree op0, op1;
3584 comp_cost cost0, cost1, cost;
3585 enum machine_mode mode;
3586
3587 if (!costs_initialized)
3588 {
3589 tree type = build_pointer_type (integer_type_node);
3590 tree var, addr;
3591 rtx x;
3592 int i;
3593
3594 var = create_tmp_var_raw (integer_type_node, "test_var");
3595 TREE_STATIC (var) = 1;
3596 x = produce_memory_decl_rtl (var, NULL);
3597 SET_DECL_RTL (var, x);
3598
3599 addr = build1 (ADDR_EXPR, type, var);
3600
3601
3602 for (i = 0; i < 2; i++)
3603 {
3604 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3605 2000), i);
3606
3607 symbol_cost[i] = computation_cost (addr, i) + 1;
3608
3609 address_cost[i]
3610 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3611 if (dump_file && (dump_flags & TDF_DETAILS))
3612 {
3613 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3614 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3615 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3616 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3617 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3618 fprintf (dump_file, "\n");
3619 }
3620 }
3621
3622 costs_initialized = true;
3623 }
3624
3625 STRIP_NOPS (expr);
3626
3627 if (SSA_VAR_P (expr))
3628 return no_cost;
3629
3630 if (is_gimple_min_invariant (expr))
3631 {
3632 if (TREE_CODE (expr) == INTEGER_CST)
3633 return new_cost (integer_cost [speed], 0);
3634
3635 if (TREE_CODE (expr) == ADDR_EXPR)
3636 {
3637 tree obj = TREE_OPERAND (expr, 0);
3638
3639 if (TREE_CODE (obj) == VAR_DECL
3640 || TREE_CODE (obj) == PARM_DECL
3641 || TREE_CODE (obj) == RESULT_DECL)
3642 return new_cost (symbol_cost [speed], 0);
3643 }
3644
3645 return new_cost (address_cost [speed], 0);
3646 }
3647
3648 switch (TREE_CODE (expr))
3649 {
3650 case POINTER_PLUS_EXPR:
3651 case PLUS_EXPR:
3652 case MINUS_EXPR:
3653 case MULT_EXPR:
3654 op0 = TREE_OPERAND (expr, 0);
3655 op1 = TREE_OPERAND (expr, 1);
3656 STRIP_NOPS (op0);
3657 STRIP_NOPS (op1);
3658 break;
3659
3660 CASE_CONVERT:
3661 case NEGATE_EXPR:
3662 op0 = TREE_OPERAND (expr, 0);
3663 STRIP_NOPS (op0);
3664 op1 = NULL_TREE;
3665 break;
3666
3667 default:
3668 /* Just an arbitrary value, FIXME. */
3669 return new_cost (target_spill_cost[speed], 0);
3670 }
3671
3672 if (op0 == NULL_TREE
3673 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3674 cost0 = no_cost;
3675 else
3676 cost0 = force_expr_to_var_cost (op0, speed);
3677
3678 if (op1 == NULL_TREE
3679 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3680 cost1 = no_cost;
3681 else
3682 cost1 = force_expr_to_var_cost (op1, speed);
3683
3684 mode = TYPE_MODE (TREE_TYPE (expr));
3685 switch (TREE_CODE (expr))
3686 {
3687 case POINTER_PLUS_EXPR:
3688 case PLUS_EXPR:
3689 case MINUS_EXPR:
3690 case NEGATE_EXPR:
3691 cost = new_cost (add_cost (speed, mode), 0);
3692 if (TREE_CODE (expr) != NEGATE_EXPR)
3693 {
3694 tree mult = NULL_TREE;
3695 comp_cost sa_cost;
3696 if (TREE_CODE (op1) == MULT_EXPR)
3697 mult = op1;
3698 else if (TREE_CODE (op0) == MULT_EXPR)
3699 mult = op0;
3700
3701 if (mult != NULL_TREE
3702 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3703 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3704 speed, &sa_cost))
3705 return sa_cost;
3706 }
3707 break;
3708
3709 CASE_CONVERT:
3710 {
3711 tree inner_mode, outer_mode;
3712 outer_mode = TREE_TYPE (expr);
3713 inner_mode = TREE_TYPE (op0);
3714 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3715 TYPE_MODE (inner_mode), speed), 0);
3716 }
3717 break;
3718
3719 case MULT_EXPR:
3720 if (cst_and_fits_in_hwi (op0))
3721 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3722 mode, speed), 0);
3723 else if (cst_and_fits_in_hwi (op1))
3724 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3725 mode, speed), 0);
3726 else
3727 return new_cost (target_spill_cost [speed], 0);
3728 break;
3729
3730 default:
3731 gcc_unreachable ();
3732 }
3733
3734 cost = add_costs (cost, cost0);
3735 cost = add_costs (cost, cost1);
3736
3737 /* Bound the cost by target_spill_cost. The parts of complicated
3738 computations often are either loop invariant or at least can
3739 be shared between several iv uses, so letting this grow without
3740 limits would not give reasonable results. */
3741 if (cost.cost > (int) target_spill_cost [speed])
3742 cost.cost = target_spill_cost [speed];
3743
3744 return cost;
3745 }
3746
3747 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3748 invariants the computation depends on. */
3749
3750 static comp_cost
force_var_cost(struct ivopts_data * data,tree expr,bitmap * depends_on)3751 force_var_cost (struct ivopts_data *data,
3752 tree expr, bitmap *depends_on)
3753 {
3754 if (depends_on)
3755 {
3756 fd_ivopts_data = data;
3757 walk_tree (&expr, find_depends, depends_on, NULL);
3758 }
3759
3760 return force_expr_to_var_cost (expr, data->speed);
3761 }
3762
3763 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3764 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3765 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3766 invariants the computation depends on. */
3767
3768 static comp_cost
split_address_cost(struct ivopts_data * data,tree addr,bool * symbol_present,bool * var_present,unsigned HOST_WIDE_INT * offset,bitmap * depends_on)3769 split_address_cost (struct ivopts_data *data,
3770 tree addr, bool *symbol_present, bool *var_present,
3771 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3772 {
3773 tree core;
3774 HOST_WIDE_INT bitsize;
3775 HOST_WIDE_INT bitpos;
3776 tree toffset;
3777 enum machine_mode mode;
3778 int unsignedp, volatilep;
3779
3780 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3781 &unsignedp, &volatilep, false);
3782
3783 if (toffset != 0
3784 || bitpos % BITS_PER_UNIT != 0
3785 || TREE_CODE (core) != VAR_DECL)
3786 {
3787 *symbol_present = false;
3788 *var_present = true;
3789 fd_ivopts_data = data;
3790 walk_tree (&addr, find_depends, depends_on, NULL);
3791 return new_cost (target_spill_cost[data->speed], 0);
3792 }
3793
3794 *offset += bitpos / BITS_PER_UNIT;
3795 if (TREE_STATIC (core)
3796 || DECL_EXTERNAL (core))
3797 {
3798 *symbol_present = true;
3799 *var_present = false;
3800 return no_cost;
3801 }
3802
3803 *symbol_present = false;
3804 *var_present = true;
3805 return no_cost;
3806 }
3807
3808 /* Estimates cost of expressing difference of addresses E1 - E2 as
3809 var + symbol + offset. The value of offset is added to OFFSET,
3810 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3811 part is missing. DEPENDS_ON is a set of the invariants the computation
3812 depends on. */
3813
3814 static comp_cost
ptr_difference_cost(struct ivopts_data * data,tree e1,tree e2,bool * symbol_present,bool * var_present,unsigned HOST_WIDE_INT * offset,bitmap * depends_on)3815 ptr_difference_cost (struct ivopts_data *data,
3816 tree e1, tree e2, bool *symbol_present, bool *var_present,
3817 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3818 {
3819 HOST_WIDE_INT diff = 0;
3820 aff_tree aff_e1, aff_e2;
3821 tree type;
3822
3823 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3824
3825 if (ptr_difference_const (e1, e2, &diff))
3826 {
3827 *offset += diff;
3828 *symbol_present = false;
3829 *var_present = false;
3830 return no_cost;
3831 }
3832
3833 if (integer_zerop (e2))
3834 return split_address_cost (data, TREE_OPERAND (e1, 0),
3835 symbol_present, var_present, offset, depends_on);
3836
3837 *symbol_present = false;
3838 *var_present = true;
3839
3840 type = signed_type_for (TREE_TYPE (e1));
3841 tree_to_aff_combination (e1, type, &aff_e1);
3842 tree_to_aff_combination (e2, type, &aff_e2);
3843 aff_combination_scale (&aff_e2, double_int_minus_one);
3844 aff_combination_add (&aff_e1, &aff_e2);
3845
3846 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3847 }
3848
3849 /* Estimates cost of expressing difference E1 - E2 as
3850 var + symbol + offset. The value of offset is added to OFFSET,
3851 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3852 part is missing. DEPENDS_ON is a set of the invariants the computation
3853 depends on. */
3854
3855 static comp_cost
difference_cost(struct ivopts_data * data,tree e1,tree e2,bool * symbol_present,bool * var_present,unsigned HOST_WIDE_INT * offset,bitmap * depends_on)3856 difference_cost (struct ivopts_data *data,
3857 tree e1, tree e2, bool *symbol_present, bool *var_present,
3858 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3859 {
3860 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3861 unsigned HOST_WIDE_INT off1, off2;
3862 aff_tree aff_e1, aff_e2;
3863 tree type;
3864
3865 e1 = strip_offset (e1, &off1);
3866 e2 = strip_offset (e2, &off2);
3867 *offset += off1 - off2;
3868
3869 STRIP_NOPS (e1);
3870 STRIP_NOPS (e2);
3871
3872 if (TREE_CODE (e1) == ADDR_EXPR)
3873 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3874 offset, depends_on);
3875 *symbol_present = false;
3876
3877 if (operand_equal_p (e1, e2, 0))
3878 {
3879 *var_present = false;
3880 return no_cost;
3881 }
3882
3883 *var_present = true;
3884
3885 if (integer_zerop (e2))
3886 return force_var_cost (data, e1, depends_on);
3887
3888 if (integer_zerop (e1))
3889 {
3890 comp_cost cost = force_var_cost (data, e2, depends_on);
3891 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3892 return cost;
3893 }
3894
3895 type = signed_type_for (TREE_TYPE (e1));
3896 tree_to_aff_combination (e1, type, &aff_e1);
3897 tree_to_aff_combination (e2, type, &aff_e2);
3898 aff_combination_scale (&aff_e2, double_int_minus_one);
3899 aff_combination_add (&aff_e1, &aff_e2);
3900
3901 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3902 }
3903
3904 /* Returns true if AFF1 and AFF2 are identical. */
3905
3906 static bool
compare_aff_trees(aff_tree * aff1,aff_tree * aff2)3907 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3908 {
3909 unsigned i;
3910
3911 if (aff1->n != aff2->n)
3912 return false;
3913
3914 for (i = 0; i < aff1->n; i++)
3915 {
3916 if (aff1->elts[i].coef != aff2->elts[i].coef)
3917 return false;
3918
3919 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3920 return false;
3921 }
3922 return true;
3923 }
3924
3925 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3926
3927 static int
get_expr_id(struct ivopts_data * data,tree expr)3928 get_expr_id (struct ivopts_data *data, tree expr)
3929 {
3930 struct iv_inv_expr_ent ent;
3931 struct iv_inv_expr_ent **slot;
3932
3933 ent.expr = expr;
3934 ent.hash = iterative_hash_expr (expr, 0);
3935 slot = data->inv_expr_tab.find_slot (&ent, INSERT);
3936 if (*slot)
3937 return (*slot)->id;
3938
3939 *slot = XNEW (struct iv_inv_expr_ent);
3940 (*slot)->expr = expr;
3941 (*slot)->hash = ent.hash;
3942 (*slot)->id = data->inv_expr_id++;
3943 return (*slot)->id;
3944 }
3945
3946 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3947 requires a new compiler generated temporary. Returns -1 otherwise.
3948 ADDRESS_P is a flag indicating if the expression is for address
3949 computation. */
3950
3951 static int
get_loop_invariant_expr_id(struct ivopts_data * data,tree ubase,tree cbase,HOST_WIDE_INT ratio,bool address_p)3952 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3953 tree cbase, HOST_WIDE_INT ratio,
3954 bool address_p)
3955 {
3956 aff_tree ubase_aff, cbase_aff;
3957 tree expr, ub, cb;
3958
3959 STRIP_NOPS (ubase);
3960 STRIP_NOPS (cbase);
3961 ub = ubase;
3962 cb = cbase;
3963
3964 if ((TREE_CODE (ubase) == INTEGER_CST)
3965 && (TREE_CODE (cbase) == INTEGER_CST))
3966 return -1;
3967
3968 /* Strips the constant part. */
3969 if (TREE_CODE (ubase) == PLUS_EXPR
3970 || TREE_CODE (ubase) == MINUS_EXPR
3971 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3972 {
3973 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3974 ubase = TREE_OPERAND (ubase, 0);
3975 }
3976
3977 /* Strips the constant part. */
3978 if (TREE_CODE (cbase) == PLUS_EXPR
3979 || TREE_CODE (cbase) == MINUS_EXPR
3980 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3981 {
3982 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3983 cbase = TREE_OPERAND (cbase, 0);
3984 }
3985
3986 if (address_p)
3987 {
3988 if (((TREE_CODE (ubase) == SSA_NAME)
3989 || (TREE_CODE (ubase) == ADDR_EXPR
3990 && is_gimple_min_invariant (ubase)))
3991 && (TREE_CODE (cbase) == INTEGER_CST))
3992 return -1;
3993
3994 if (((TREE_CODE (cbase) == SSA_NAME)
3995 || (TREE_CODE (cbase) == ADDR_EXPR
3996 && is_gimple_min_invariant (cbase)))
3997 && (TREE_CODE (ubase) == INTEGER_CST))
3998 return -1;
3999 }
4000
4001 if (ratio == 1)
4002 {
4003 if (operand_equal_p (ubase, cbase, 0))
4004 return -1;
4005
4006 if (TREE_CODE (ubase) == ADDR_EXPR
4007 && TREE_CODE (cbase) == ADDR_EXPR)
4008 {
4009 tree usym, csym;
4010
4011 usym = TREE_OPERAND (ubase, 0);
4012 csym = TREE_OPERAND (cbase, 0);
4013 if (TREE_CODE (usym) == ARRAY_REF)
4014 {
4015 tree ind = TREE_OPERAND (usym, 1);
4016 if (TREE_CODE (ind) == INTEGER_CST
4017 && tree_fits_shwi_p (ind)
4018 && tree_to_shwi (ind) == 0)
4019 usym = TREE_OPERAND (usym, 0);
4020 }
4021 if (TREE_CODE (csym) == ARRAY_REF)
4022 {
4023 tree ind = TREE_OPERAND (csym, 1);
4024 if (TREE_CODE (ind) == INTEGER_CST
4025 && tree_fits_shwi_p (ind)
4026 && tree_to_shwi (ind) == 0)
4027 csym = TREE_OPERAND (csym, 0);
4028 }
4029 if (operand_equal_p (usym, csym, 0))
4030 return -1;
4031 }
4032 /* Now do more complex comparison */
4033 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4034 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4035 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4036 return -1;
4037 }
4038
4039 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4040 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4041
4042 aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
4043 aff_combination_add (&ubase_aff, &cbase_aff);
4044 expr = aff_combination_to_tree (&ubase_aff);
4045 return get_expr_id (data, expr);
4046 }
4047
4048
4049
4050 /* Determines the cost of the computation by that USE is expressed
4051 from induction variable CAND. If ADDRESS_P is true, we just need
4052 to create an address from it, otherwise we want to get it into
4053 register. A set of invariants we depend on is stored in
4054 DEPENDS_ON. AT is the statement at that the value is computed.
4055 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4056 addressing is likely. */
4057
4058 static comp_cost
get_computation_cost_at(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,bool address_p,bitmap * depends_on,gimple at,bool * can_autoinc,int * inv_expr_id)4059 get_computation_cost_at (struct ivopts_data *data,
4060 struct iv_use *use, struct iv_cand *cand,
4061 bool address_p, bitmap *depends_on, gimple at,
4062 bool *can_autoinc,
4063 int *inv_expr_id)
4064 {
4065 tree ubase = use->iv->base, ustep = use->iv->step;
4066 tree cbase, cstep;
4067 tree utype = TREE_TYPE (ubase), ctype;
4068 unsigned HOST_WIDE_INT cstepi, offset = 0;
4069 HOST_WIDE_INT ratio, aratio;
4070 bool var_present, symbol_present, stmt_is_after_inc;
4071 comp_cost cost;
4072 double_int rat;
4073 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4074 enum machine_mode mem_mode = (address_p
4075 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4076 : VOIDmode);
4077
4078 *depends_on = NULL;
4079
4080 /* Only consider real candidates. */
4081 if (!cand->iv)
4082 return infinite_cost;
4083
4084 cbase = cand->iv->base;
4085 cstep = cand->iv->step;
4086 ctype = TREE_TYPE (cbase);
4087
4088 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4089 {
4090 /* We do not have a precision to express the values of use. */
4091 return infinite_cost;
4092 }
4093
4094 if (address_p
4095 || (use->iv->base_object
4096 && cand->iv->base_object
4097 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4098 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4099 {
4100 /* Do not try to express address of an object with computation based
4101 on address of a different object. This may cause problems in rtl
4102 level alias analysis (that does not expect this to be happening,
4103 as this is illegal in C), and would be unlikely to be useful
4104 anyway. */
4105 if (use->iv->base_object
4106 && cand->iv->base_object
4107 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4108 return infinite_cost;
4109 }
4110
4111 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4112 {
4113 /* TODO -- add direct handling of this case. */
4114 goto fallback;
4115 }
4116
4117 /* CSTEPI is removed from the offset in case statement is after the
4118 increment. If the step is not constant, we use zero instead.
4119 This is a bit imprecise (there is the extra addition), but
4120 redundancy elimination is likely to transform the code so that
4121 it uses value of the variable before increment anyway,
4122 so it is not that much unrealistic. */
4123 if (cst_and_fits_in_hwi (cstep))
4124 cstepi = int_cst_value (cstep);
4125 else
4126 cstepi = 0;
4127
4128 if (!constant_multiple_of (ustep, cstep, &rat))
4129 return infinite_cost;
4130
4131 if (rat.fits_shwi ())
4132 ratio = rat.to_shwi ();
4133 else
4134 return infinite_cost;
4135
4136 STRIP_NOPS (cbase);
4137 ctype = TREE_TYPE (cbase);
4138
4139 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4140
4141 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4142 or ratio == 1, it is better to handle this like
4143
4144 ubase - ratio * cbase + ratio * var
4145
4146 (also holds in the case ratio == -1, TODO. */
4147
4148 if (cst_and_fits_in_hwi (cbase))
4149 {
4150 offset = - ratio * int_cst_value (cbase);
4151 cost = difference_cost (data,
4152 ubase, build_int_cst (utype, 0),
4153 &symbol_present, &var_present, &offset,
4154 depends_on);
4155 cost.cost /= avg_loop_niter (data->current_loop);
4156 }
4157 else if (ratio == 1)
4158 {
4159 tree real_cbase = cbase;
4160
4161 /* Check to see if any adjustment is needed. */
4162 if (cstepi == 0 && stmt_is_after_inc)
4163 {
4164 aff_tree real_cbase_aff;
4165 aff_tree cstep_aff;
4166
4167 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4168 &real_cbase_aff);
4169 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4170
4171 aff_combination_add (&real_cbase_aff, &cstep_aff);
4172 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4173 }
4174
4175 cost = difference_cost (data,
4176 ubase, real_cbase,
4177 &symbol_present, &var_present, &offset,
4178 depends_on);
4179 cost.cost /= avg_loop_niter (data->current_loop);
4180 }
4181 else if (address_p
4182 && !POINTER_TYPE_P (ctype)
4183 && multiplier_allowed_in_address_p
4184 (ratio, mem_mode,
4185 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4186 {
4187 cbase
4188 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4189 cost = difference_cost (data,
4190 ubase, cbase,
4191 &symbol_present, &var_present, &offset,
4192 depends_on);
4193 cost.cost /= avg_loop_niter (data->current_loop);
4194 }
4195 else
4196 {
4197 cost = force_var_cost (data, cbase, depends_on);
4198 cost = add_costs (cost,
4199 difference_cost (data,
4200 ubase, build_int_cst (utype, 0),
4201 &symbol_present, &var_present,
4202 &offset, depends_on));
4203 cost.cost /= avg_loop_niter (data->current_loop);
4204 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4205 }
4206
4207 if (inv_expr_id)
4208 {
4209 *inv_expr_id =
4210 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4211 /* Clear depends on. */
4212 if (*inv_expr_id != -1 && depends_on && *depends_on)
4213 bitmap_clear (*depends_on);
4214 }
4215
4216 /* If we are after the increment, the value of the candidate is higher by
4217 one iteration. */
4218 if (stmt_is_after_inc)
4219 offset -= ratio * cstepi;
4220
4221 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4222 (symbol/var1/const parts may be omitted). If we are looking for an
4223 address, find the cost of addressing this. */
4224 if (address_p)
4225 return add_costs (cost,
4226 get_address_cost (symbol_present, var_present,
4227 offset, ratio, cstepi,
4228 mem_mode,
4229 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4230 speed, stmt_is_after_inc,
4231 can_autoinc));
4232
4233 /* Otherwise estimate the costs for computing the expression. */
4234 if (!symbol_present && !var_present && !offset)
4235 {
4236 if (ratio != 1)
4237 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4238 return cost;
4239 }
4240
4241 /* Symbol + offset should be compile-time computable so consider that they
4242 are added once to the variable, if present. */
4243 if (var_present && (symbol_present || offset))
4244 cost.cost += adjust_setup_cost (data,
4245 add_cost (speed, TYPE_MODE (ctype)));
4246
4247 /* Having offset does not affect runtime cost in case it is added to
4248 symbol, but it increases complexity. */
4249 if (offset)
4250 cost.complexity++;
4251
4252 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4253
4254 aratio = ratio > 0 ? ratio : -ratio;
4255 if (aratio != 1)
4256 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4257 return cost;
4258
4259 fallback:
4260 if (can_autoinc)
4261 *can_autoinc = false;
4262
4263 {
4264 /* Just get the expression, expand it and measure the cost. */
4265 tree comp = get_computation_at (data->current_loop, use, cand, at);
4266
4267 if (!comp)
4268 return infinite_cost;
4269
4270 if (address_p)
4271 comp = build_simple_mem_ref (comp);
4272
4273 return new_cost (computation_cost (comp, speed), 0);
4274 }
4275 }
4276
4277 /* Determines the cost of the computation by that USE is expressed
4278 from induction variable CAND. If ADDRESS_P is true, we just need
4279 to create an address from it, otherwise we want to get it into
4280 register. A set of invariants we depend on is stored in
4281 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4282 autoinc addressing is likely. */
4283
4284 static comp_cost
get_computation_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,bool address_p,bitmap * depends_on,bool * can_autoinc,int * inv_expr_id)4285 get_computation_cost (struct ivopts_data *data,
4286 struct iv_use *use, struct iv_cand *cand,
4287 bool address_p, bitmap *depends_on,
4288 bool *can_autoinc, int *inv_expr_id)
4289 {
4290 return get_computation_cost_at (data,
4291 use, cand, address_p, depends_on, use->stmt,
4292 can_autoinc, inv_expr_id);
4293 }
4294
4295 /* Determines cost of basing replacement of USE on CAND in a generic
4296 expression. */
4297
4298 static bool
determine_use_iv_cost_generic(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)4299 determine_use_iv_cost_generic (struct ivopts_data *data,
4300 struct iv_use *use, struct iv_cand *cand)
4301 {
4302 bitmap depends_on;
4303 comp_cost cost;
4304 int inv_expr_id = -1;
4305
4306 /* The simple case first -- if we need to express value of the preserved
4307 original biv, the cost is 0. This also prevents us from counting the
4308 cost of increment twice -- once at this use and once in the cost of
4309 the candidate. */
4310 if (cand->pos == IP_ORIGINAL
4311 && cand->incremented_at == use->stmt)
4312 {
4313 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4314 ERROR_MARK, -1);
4315 return true;
4316 }
4317
4318 cost = get_computation_cost (data, use, cand, false, &depends_on,
4319 NULL, &inv_expr_id);
4320
4321 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4322 inv_expr_id);
4323
4324 return !infinite_cost_p (cost);
4325 }
4326
4327 /* Determines cost of basing replacement of USE on CAND in an address. */
4328
4329 static bool
determine_use_iv_cost_address(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)4330 determine_use_iv_cost_address (struct ivopts_data *data,
4331 struct iv_use *use, struct iv_cand *cand)
4332 {
4333 bitmap depends_on;
4334 bool can_autoinc;
4335 int inv_expr_id = -1;
4336 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4337 &can_autoinc, &inv_expr_id);
4338
4339 if (cand->ainc_use == use)
4340 {
4341 if (can_autoinc)
4342 cost.cost -= cand->cost_step;
4343 /* If we generated the candidate solely for exploiting autoincrement
4344 opportunities, and it turns out it can't be used, set the cost to
4345 infinity to make sure we ignore it. */
4346 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4347 cost = infinite_cost;
4348 }
4349 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4350 inv_expr_id);
4351
4352 return !infinite_cost_p (cost);
4353 }
4354
4355 /* Computes value of candidate CAND at position AT in iteration NITER, and
4356 stores it to VAL. */
4357
4358 static void
cand_value_at(struct loop * loop,struct iv_cand * cand,gimple at,tree niter,aff_tree * val)4359 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4360 aff_tree *val)
4361 {
4362 aff_tree step, delta, nit;
4363 struct iv *iv = cand->iv;
4364 tree type = TREE_TYPE (iv->base);
4365 tree steptype = type;
4366 if (POINTER_TYPE_P (type))
4367 steptype = sizetype;
4368 steptype = unsigned_type_for (type);
4369
4370 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4371 aff_combination_convert (&step, steptype);
4372 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4373 aff_combination_convert (&nit, steptype);
4374 aff_combination_mult (&nit, &step, &delta);
4375 if (stmt_after_increment (loop, cand, at))
4376 aff_combination_add (&delta, &step);
4377
4378 tree_to_aff_combination (iv->base, type, val);
4379 if (!POINTER_TYPE_P (type))
4380 aff_combination_convert (val, steptype);
4381 aff_combination_add (val, &delta);
4382 }
4383
4384 /* Returns period of induction variable iv. */
4385
4386 static tree
iv_period(struct iv * iv)4387 iv_period (struct iv *iv)
4388 {
4389 tree step = iv->step, period, type;
4390 tree pow2div;
4391
4392 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4393
4394 type = unsigned_type_for (TREE_TYPE (step));
4395 /* Period of the iv is lcm (step, type_range)/step -1,
4396 i.e., N*type_range/step - 1. Since type range is power
4397 of two, N == (step >> num_of_ending_zeros_binary (step),
4398 so the final result is
4399
4400 (type_range >> num_of_ending_zeros_binary (step)) - 1
4401
4402 */
4403 pow2div = num_ending_zeros (step);
4404
4405 period = build_low_bits_mask (type,
4406 (TYPE_PRECISION (type)
4407 - tree_to_uhwi (pow2div)));
4408
4409 return period;
4410 }
4411
4412 /* Returns the comparison operator used when eliminating the iv USE. */
4413
4414 static enum tree_code
iv_elimination_compare(struct ivopts_data * data,struct iv_use * use)4415 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4416 {
4417 struct loop *loop = data->current_loop;
4418 basic_block ex_bb;
4419 edge exit;
4420
4421 ex_bb = gimple_bb (use->stmt);
4422 exit = EDGE_SUCC (ex_bb, 0);
4423 if (flow_bb_inside_loop_p (loop, exit->dest))
4424 exit = EDGE_SUCC (ex_bb, 1);
4425
4426 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4427 }
4428
4429 static tree
strip_wrap_conserving_type_conversions(tree exp)4430 strip_wrap_conserving_type_conversions (tree exp)
4431 {
4432 while (tree_ssa_useless_type_conversion (exp)
4433 && (nowrap_type_p (TREE_TYPE (exp))
4434 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4435 exp = TREE_OPERAND (exp, 0);
4436 return exp;
4437 }
4438
4439 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4440 check for an exact match. */
4441
4442 static bool
expr_equal_p(tree e,tree what)4443 expr_equal_p (tree e, tree what)
4444 {
4445 gimple stmt;
4446 enum tree_code code;
4447
4448 e = strip_wrap_conserving_type_conversions (e);
4449 what = strip_wrap_conserving_type_conversions (what);
4450
4451 code = TREE_CODE (what);
4452 if (TREE_TYPE (e) != TREE_TYPE (what))
4453 return false;
4454
4455 if (operand_equal_p (e, what, 0))
4456 return true;
4457
4458 if (TREE_CODE (e) != SSA_NAME)
4459 return false;
4460
4461 stmt = SSA_NAME_DEF_STMT (e);
4462 if (gimple_code (stmt) != GIMPLE_ASSIGN
4463 || gimple_assign_rhs_code (stmt) != code)
4464 return false;
4465
4466 switch (get_gimple_rhs_class (code))
4467 {
4468 case GIMPLE_BINARY_RHS:
4469 if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4470 return false;
4471 /* Fallthru. */
4472
4473 case GIMPLE_UNARY_RHS:
4474 case GIMPLE_SINGLE_RHS:
4475 return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4476 default:
4477 return false;
4478 }
4479 }
4480
4481 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4482 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4483 calculation is performed in non-wrapping type.
4484
4485 TODO: More generally, we could test for the situation that
4486 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4487 This would require knowing the sign of OFFSET.
4488
4489 Also, we only look for the first addition in the computation of BASE.
4490 More complex analysis would be better, but introducing it just for
4491 this optimization seems like an overkill. */
4492
4493 static bool
difference_cannot_overflow_p(tree base,tree offset)4494 difference_cannot_overflow_p (tree base, tree offset)
4495 {
4496 enum tree_code code;
4497 tree e1, e2;
4498
4499 if (!nowrap_type_p (TREE_TYPE (base)))
4500 return false;
4501
4502 base = expand_simple_operations (base);
4503
4504 if (TREE_CODE (base) == SSA_NAME)
4505 {
4506 gimple stmt = SSA_NAME_DEF_STMT (base);
4507
4508 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4509 return false;
4510
4511 code = gimple_assign_rhs_code (stmt);
4512 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4513 return false;
4514
4515 e1 = gimple_assign_rhs1 (stmt);
4516 e2 = gimple_assign_rhs2 (stmt);
4517 }
4518 else
4519 {
4520 code = TREE_CODE (base);
4521 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4522 return false;
4523 e1 = TREE_OPERAND (base, 0);
4524 e2 = TREE_OPERAND (base, 1);
4525 }
4526
4527 /* TODO: deeper inspection may be necessary to prove the equality. */
4528 switch (code)
4529 {
4530 case PLUS_EXPR:
4531 return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4532 case POINTER_PLUS_EXPR:
4533 return expr_equal_p (e2, offset);
4534
4535 default:
4536 return false;
4537 }
4538 }
4539
4540 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4541 comparison with CAND. NITER describes the number of iterations of
4542 the loops. If successful, the comparison in COMP_P is altered accordingly.
4543
4544 We aim to handle the following situation:
4545
4546 sometype *base, *p;
4547 int a, b, i;
4548
4549 i = a;
4550 p = p_0 = base + a;
4551
4552 do
4553 {
4554 bla (*p);
4555 p++;
4556 i++;
4557 }
4558 while (i < b);
4559
4560 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4561 We aim to optimize this to
4562
4563 p = p_0 = base + a;
4564 do
4565 {
4566 bla (*p);
4567 p++;
4568 }
4569 while (p < p_0 - a + b);
4570
4571 This preserves the correctness, since the pointer arithmetics does not
4572 overflow. More precisely:
4573
4574 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4575 overflow in computing it or the values of p.
4576 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4577 overflow. To prove this, we use the fact that p_0 = base + a. */
4578
4579 static bool
iv_elimination_compare_lt(struct ivopts_data * data,struct iv_cand * cand,enum tree_code * comp_p,struct tree_niter_desc * niter)4580 iv_elimination_compare_lt (struct ivopts_data *data,
4581 struct iv_cand *cand, enum tree_code *comp_p,
4582 struct tree_niter_desc *niter)
4583 {
4584 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4585 struct aff_tree nit, tmpa, tmpb;
4586 enum tree_code comp;
4587 HOST_WIDE_INT step;
4588
4589 /* We need to know that the candidate induction variable does not overflow.
4590 While more complex analysis may be used to prove this, for now just
4591 check that the variable appears in the original program and that it
4592 is computed in a type that guarantees no overflows. */
4593 cand_type = TREE_TYPE (cand->iv->base);
4594 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4595 return false;
4596
4597 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4598 the calculation of the BOUND could overflow, making the comparison
4599 invalid. */
4600 if (!data->loop_single_exit_p)
4601 return false;
4602
4603 /* We need to be able to decide whether candidate is increasing or decreasing
4604 in order to choose the right comparison operator. */
4605 if (!cst_and_fits_in_hwi (cand->iv->step))
4606 return false;
4607 step = int_cst_value (cand->iv->step);
4608
4609 /* Check that the number of iterations matches the expected pattern:
4610 a + 1 > b ? 0 : b - a - 1. */
4611 mbz = niter->may_be_zero;
4612 if (TREE_CODE (mbz) == GT_EXPR)
4613 {
4614 /* Handle a + 1 > b. */
4615 tree op0 = TREE_OPERAND (mbz, 0);
4616 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4617 {
4618 a = TREE_OPERAND (op0, 0);
4619 b = TREE_OPERAND (mbz, 1);
4620 }
4621 else
4622 return false;
4623 }
4624 else if (TREE_CODE (mbz) == LT_EXPR)
4625 {
4626 tree op1 = TREE_OPERAND (mbz, 1);
4627
4628 /* Handle b < a + 1. */
4629 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4630 {
4631 a = TREE_OPERAND (op1, 0);
4632 b = TREE_OPERAND (mbz, 0);
4633 }
4634 else
4635 return false;
4636 }
4637 else
4638 return false;
4639
4640 /* Expected number of iterations is B - A - 1. Check that it matches
4641 the actual number, i.e., that B - A - NITER = 1. */
4642 tree_to_aff_combination (niter->niter, nit_type, &nit);
4643 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4644 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4645 aff_combination_scale (&nit, double_int_minus_one);
4646 aff_combination_scale (&tmpa, double_int_minus_one);
4647 aff_combination_add (&tmpb, &tmpa);
4648 aff_combination_add (&tmpb, &nit);
4649 if (tmpb.n != 0 || tmpb.offset != double_int_one)
4650 return false;
4651
4652 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4653 overflow. */
4654 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4655 cand->iv->step,
4656 fold_convert (TREE_TYPE (cand->iv->step), a));
4657 if (!difference_cannot_overflow_p (cand->iv->base, offset))
4658 return false;
4659
4660 /* Determine the new comparison operator. */
4661 comp = step < 0 ? GT_EXPR : LT_EXPR;
4662 if (*comp_p == NE_EXPR)
4663 *comp_p = comp;
4664 else if (*comp_p == EQ_EXPR)
4665 *comp_p = invert_tree_comparison (comp, false);
4666 else
4667 gcc_unreachable ();
4668
4669 return true;
4670 }
4671
4672 /* Check whether it is possible to express the condition in USE by comparison
4673 of candidate CAND. If so, store the value compared with to BOUND, and the
4674 comparison operator to COMP. */
4675
4676 static bool
may_eliminate_iv(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,tree * bound,enum tree_code * comp)4677 may_eliminate_iv (struct ivopts_data *data,
4678 struct iv_use *use, struct iv_cand *cand, tree *bound,
4679 enum tree_code *comp)
4680 {
4681 basic_block ex_bb;
4682 edge exit;
4683 tree period;
4684 struct loop *loop = data->current_loop;
4685 aff_tree bnd;
4686 struct tree_niter_desc *desc = NULL;
4687
4688 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4689 return false;
4690
4691 /* For now works only for exits that dominate the loop latch.
4692 TODO: extend to other conditions inside loop body. */
4693 ex_bb = gimple_bb (use->stmt);
4694 if (use->stmt != last_stmt (ex_bb)
4695 || gimple_code (use->stmt) != GIMPLE_COND
4696 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4697 return false;
4698
4699 exit = EDGE_SUCC (ex_bb, 0);
4700 if (flow_bb_inside_loop_p (loop, exit->dest))
4701 exit = EDGE_SUCC (ex_bb, 1);
4702 if (flow_bb_inside_loop_p (loop, exit->dest))
4703 return false;
4704
4705 desc = niter_for_exit (data, exit);
4706 if (!desc)
4707 return false;
4708
4709 /* Determine whether we can use the variable to test the exit condition.
4710 This is the case iff the period of the induction variable is greater
4711 than the number of iterations for which the exit condition is true. */
4712 period = iv_period (cand->iv);
4713
4714 /* If the number of iterations is constant, compare against it directly. */
4715 if (TREE_CODE (desc->niter) == INTEGER_CST)
4716 {
4717 /* See cand_value_at. */
4718 if (stmt_after_increment (loop, cand, use->stmt))
4719 {
4720 if (!tree_int_cst_lt (desc->niter, period))
4721 return false;
4722 }
4723 else
4724 {
4725 if (tree_int_cst_lt (period, desc->niter))
4726 return false;
4727 }
4728 }
4729
4730 /* If not, and if this is the only possible exit of the loop, see whether
4731 we can get a conservative estimate on the number of iterations of the
4732 entire loop and compare against that instead. */
4733 else
4734 {
4735 double_int period_value, max_niter;
4736
4737 max_niter = desc->max;
4738 if (stmt_after_increment (loop, cand, use->stmt))
4739 max_niter += double_int_one;
4740 period_value = tree_to_double_int (period);
4741 if (max_niter.ugt (period_value))
4742 {
4743 /* See if we can take advantage of inferred loop bound information. */
4744 if (data->loop_single_exit_p)
4745 {
4746 if (!max_loop_iterations (loop, &max_niter))
4747 return false;
4748 /* The loop bound is already adjusted by adding 1. */
4749 if (max_niter.ugt (period_value))
4750 return false;
4751 }
4752 else
4753 return false;
4754 }
4755 }
4756
4757 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4758
4759 *bound = fold_convert (TREE_TYPE (cand->iv->base),
4760 aff_combination_to_tree (&bnd));
4761 *comp = iv_elimination_compare (data, use);
4762
4763 /* It is unlikely that computing the number of iterations using division
4764 would be more profitable than keeping the original induction variable. */
4765 if (expression_expensive_p (*bound))
4766 return false;
4767
4768 /* Sometimes, it is possible to handle the situation that the number of
4769 iterations may be zero unless additional assumtions by using <
4770 instead of != in the exit condition.
4771
4772 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4773 base the exit condition on it. However, that is often too
4774 expensive. */
4775 if (!integer_zerop (desc->may_be_zero))
4776 return iv_elimination_compare_lt (data, cand, comp, desc);
4777
4778 return true;
4779 }
4780
4781 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4782 be copied, if is is used in the loop body and DATA->body_includes_call. */
4783
4784 static int
parm_decl_cost(struct ivopts_data * data,tree bound)4785 parm_decl_cost (struct ivopts_data *data, tree bound)
4786 {
4787 tree sbound = bound;
4788 STRIP_NOPS (sbound);
4789
4790 if (TREE_CODE (sbound) == SSA_NAME
4791 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4792 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4793 && data->body_includes_call)
4794 return COSTS_N_INSNS (1);
4795
4796 return 0;
4797 }
4798
4799 /* Determines cost of basing replacement of USE on CAND in a condition. */
4800
4801 static bool
determine_use_iv_cost_condition(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)4802 determine_use_iv_cost_condition (struct ivopts_data *data,
4803 struct iv_use *use, struct iv_cand *cand)
4804 {
4805 tree bound = NULL_TREE;
4806 struct iv *cmp_iv;
4807 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4808 comp_cost elim_cost, express_cost, cost, bound_cost;
4809 bool ok;
4810 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4811 tree *control_var, *bound_cst;
4812 enum tree_code comp = ERROR_MARK;
4813
4814 /* Only consider real candidates. */
4815 if (!cand->iv)
4816 {
4817 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4818 ERROR_MARK, -1);
4819 return false;
4820 }
4821
4822 /* Try iv elimination. */
4823 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4824 {
4825 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4826 if (elim_cost.cost == 0)
4827 elim_cost.cost = parm_decl_cost (data, bound);
4828 else if (TREE_CODE (bound) == INTEGER_CST)
4829 elim_cost.cost = 0;
4830 /* If we replace a loop condition 'i < n' with 'p < base + n',
4831 depends_on_elim will have 'base' and 'n' set, which implies
4832 that both 'base' and 'n' will be live during the loop. More likely,
4833 'base + n' will be loop invariant, resulting in only one live value
4834 during the loop. So in that case we clear depends_on_elim and set
4835 elim_inv_expr_id instead. */
4836 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4837 {
4838 elim_inv_expr_id = get_expr_id (data, bound);
4839 bitmap_clear (depends_on_elim);
4840 }
4841 /* The bound is a loop invariant, so it will be only computed
4842 once. */
4843 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4844 }
4845 else
4846 elim_cost = infinite_cost;
4847
4848 /* Try expressing the original giv. If it is compared with an invariant,
4849 note that we cannot get rid of it. */
4850 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4851 NULL, &cmp_iv);
4852 gcc_assert (ok);
4853
4854 /* When the condition is a comparison of the candidate IV against
4855 zero, prefer this IV.
4856
4857 TODO: The constant that we're subtracting from the cost should
4858 be target-dependent. This information should be added to the
4859 target costs for each backend. */
4860 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4861 && integer_zerop (*bound_cst)
4862 && (operand_equal_p (*control_var, cand->var_after, 0)
4863 || operand_equal_p (*control_var, cand->var_before, 0)))
4864 elim_cost.cost -= 1;
4865
4866 express_cost = get_computation_cost (data, use, cand, false,
4867 &depends_on_express, NULL,
4868 &express_inv_expr_id);
4869 fd_ivopts_data = data;
4870 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4871
4872 /* Count the cost of the original bound as well. */
4873 bound_cost = force_var_cost (data, *bound_cst, NULL);
4874 if (bound_cost.cost == 0)
4875 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4876 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4877 bound_cost.cost = 0;
4878 express_cost.cost += bound_cost.cost;
4879
4880 /* Choose the better approach, preferring the eliminated IV. */
4881 if (compare_costs (elim_cost, express_cost) <= 0)
4882 {
4883 cost = elim_cost;
4884 depends_on = depends_on_elim;
4885 depends_on_elim = NULL;
4886 inv_expr_id = elim_inv_expr_id;
4887 }
4888 else
4889 {
4890 cost = express_cost;
4891 depends_on = depends_on_express;
4892 depends_on_express = NULL;
4893 bound = NULL_TREE;
4894 comp = ERROR_MARK;
4895 inv_expr_id = express_inv_expr_id;
4896 }
4897
4898 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4899
4900 if (depends_on_elim)
4901 BITMAP_FREE (depends_on_elim);
4902 if (depends_on_express)
4903 BITMAP_FREE (depends_on_express);
4904
4905 return !infinite_cost_p (cost);
4906 }
4907
4908 /* Determines cost of basing replacement of USE on CAND. Returns false
4909 if USE cannot be based on CAND. */
4910
4911 static bool
determine_use_iv_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)4912 determine_use_iv_cost (struct ivopts_data *data,
4913 struct iv_use *use, struct iv_cand *cand)
4914 {
4915 switch (use->type)
4916 {
4917 case USE_NONLINEAR_EXPR:
4918 return determine_use_iv_cost_generic (data, use, cand);
4919
4920 case USE_ADDRESS:
4921 return determine_use_iv_cost_address (data, use, cand);
4922
4923 case USE_COMPARE:
4924 return determine_use_iv_cost_condition (data, use, cand);
4925
4926 default:
4927 gcc_unreachable ();
4928 }
4929 }
4930
4931 /* Return true if get_computation_cost indicates that autoincrement is
4932 a possibility for the pair of USE and CAND, false otherwise. */
4933
4934 static bool
autoinc_possible_for_pair(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)4935 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4936 struct iv_cand *cand)
4937 {
4938 bitmap depends_on;
4939 bool can_autoinc;
4940 comp_cost cost;
4941
4942 if (use->type != USE_ADDRESS)
4943 return false;
4944
4945 cost = get_computation_cost (data, use, cand, true, &depends_on,
4946 &can_autoinc, NULL);
4947
4948 BITMAP_FREE (depends_on);
4949
4950 return !infinite_cost_p (cost) && can_autoinc;
4951 }
4952
4953 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4954 use that allows autoincrement, and set their AINC_USE if possible. */
4955
4956 static void
set_autoinc_for_original_candidates(struct ivopts_data * data)4957 set_autoinc_for_original_candidates (struct ivopts_data *data)
4958 {
4959 unsigned i, j;
4960
4961 for (i = 0; i < n_iv_cands (data); i++)
4962 {
4963 struct iv_cand *cand = iv_cand (data, i);
4964 struct iv_use *closest_before = NULL;
4965 struct iv_use *closest_after = NULL;
4966 if (cand->pos != IP_ORIGINAL)
4967 continue;
4968
4969 for (j = 0; j < n_iv_uses (data); j++)
4970 {
4971 struct iv_use *use = iv_use (data, j);
4972 unsigned uid = gimple_uid (use->stmt);
4973
4974 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4975 continue;
4976
4977 if (uid < gimple_uid (cand->incremented_at)
4978 && (closest_before == NULL
4979 || uid > gimple_uid (closest_before->stmt)))
4980 closest_before = use;
4981
4982 if (uid > gimple_uid (cand->incremented_at)
4983 && (closest_after == NULL
4984 || uid < gimple_uid (closest_after->stmt)))
4985 closest_after = use;
4986 }
4987
4988 if (closest_before != NULL
4989 && autoinc_possible_for_pair (data, closest_before, cand))
4990 cand->ainc_use = closest_before;
4991 else if (closest_after != NULL
4992 && autoinc_possible_for_pair (data, closest_after, cand))
4993 cand->ainc_use = closest_after;
4994 }
4995 }
4996
4997 /* Finds the candidates for the induction variables. */
4998
4999 static void
find_iv_candidates(struct ivopts_data * data)5000 find_iv_candidates (struct ivopts_data *data)
5001 {
5002 /* Add commonly used ivs. */
5003 add_standard_iv_candidates (data);
5004
5005 /* Add old induction variables. */
5006 add_old_ivs_candidates (data);
5007
5008 /* Add induction variables derived from uses. */
5009 add_derived_ivs_candidates (data);
5010
5011 set_autoinc_for_original_candidates (data);
5012
5013 /* Record the important candidates. */
5014 record_important_candidates (data);
5015 }
5016
5017 /* Determines costs of basing the use of the iv on an iv candidate. */
5018
5019 static void
determine_use_iv_costs(struct ivopts_data * data)5020 determine_use_iv_costs (struct ivopts_data *data)
5021 {
5022 unsigned i, j;
5023 struct iv_use *use;
5024 struct iv_cand *cand;
5025 bitmap to_clear = BITMAP_ALLOC (NULL);
5026
5027 alloc_use_cost_map (data);
5028
5029 for (i = 0; i < n_iv_uses (data); i++)
5030 {
5031 use = iv_use (data, i);
5032
5033 if (data->consider_all_candidates)
5034 {
5035 for (j = 0; j < n_iv_cands (data); j++)
5036 {
5037 cand = iv_cand (data, j);
5038 determine_use_iv_cost (data, use, cand);
5039 }
5040 }
5041 else
5042 {
5043 bitmap_iterator bi;
5044
5045 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5046 {
5047 cand = iv_cand (data, j);
5048 if (!determine_use_iv_cost (data, use, cand))
5049 bitmap_set_bit (to_clear, j);
5050 }
5051
5052 /* Remove the candidates for that the cost is infinite from
5053 the list of related candidates. */
5054 bitmap_and_compl_into (use->related_cands, to_clear);
5055 bitmap_clear (to_clear);
5056 }
5057 }
5058
5059 BITMAP_FREE (to_clear);
5060
5061 if (dump_file && (dump_flags & TDF_DETAILS))
5062 {
5063 fprintf (dump_file, "Use-candidate costs:\n");
5064
5065 for (i = 0; i < n_iv_uses (data); i++)
5066 {
5067 use = iv_use (data, i);
5068
5069 fprintf (dump_file, "Use %d:\n", i);
5070 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5071 for (j = 0; j < use->n_map_members; j++)
5072 {
5073 if (!use->cost_map[j].cand
5074 || infinite_cost_p (use->cost_map[j].cost))
5075 continue;
5076
5077 fprintf (dump_file, " %d\t%d\t%d\t",
5078 use->cost_map[j].cand->id,
5079 use->cost_map[j].cost.cost,
5080 use->cost_map[j].cost.complexity);
5081 if (use->cost_map[j].depends_on)
5082 bitmap_print (dump_file,
5083 use->cost_map[j].depends_on, "","");
5084 if (use->cost_map[j].inv_expr_id != -1)
5085 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5086 fprintf (dump_file, "\n");
5087 }
5088
5089 fprintf (dump_file, "\n");
5090 }
5091 fprintf (dump_file, "\n");
5092 }
5093 }
5094
5095 /* Determines cost of the candidate CAND. */
5096
5097 static void
determine_iv_cost(struct ivopts_data * data,struct iv_cand * cand)5098 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5099 {
5100 comp_cost cost_base;
5101 unsigned cost, cost_step;
5102 tree base;
5103
5104 if (!cand->iv)
5105 {
5106 cand->cost = 0;
5107 return;
5108 }
5109
5110 /* There are two costs associated with the candidate -- its increment
5111 and its initialization. The second is almost negligible for any loop
5112 that rolls enough, so we take it just very little into account. */
5113
5114 base = cand->iv->base;
5115 cost_base = force_var_cost (data, base, NULL);
5116 /* It will be exceptional that the iv register happens to be initialized with
5117 the proper value at no cost. In general, there will at least be a regcopy
5118 or a const set. */
5119 if (cost_base.cost == 0)
5120 cost_base.cost = COSTS_N_INSNS (1);
5121 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5122
5123 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5124
5125 /* Prefer the original ivs unless we may gain something by replacing it.
5126 The reason is to make debugging simpler; so this is not relevant for
5127 artificial ivs created by other optimization passes. */
5128 if (cand->pos != IP_ORIGINAL
5129 || !SSA_NAME_VAR (cand->var_before)
5130 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5131 cost++;
5132
5133 /* Prefer not to insert statements into latch unless there are some
5134 already (so that we do not create unnecessary jumps). */
5135 if (cand->pos == IP_END
5136 && empty_block_p (ip_end_pos (data->current_loop)))
5137 cost++;
5138
5139 cand->cost = cost;
5140 cand->cost_step = cost_step;
5141 }
5142
5143 /* Determines costs of computation of the candidates. */
5144
5145 static void
determine_iv_costs(struct ivopts_data * data)5146 determine_iv_costs (struct ivopts_data *data)
5147 {
5148 unsigned i;
5149
5150 if (dump_file && (dump_flags & TDF_DETAILS))
5151 {
5152 fprintf (dump_file, "Candidate costs:\n");
5153 fprintf (dump_file, " cand\tcost\n");
5154 }
5155
5156 for (i = 0; i < n_iv_cands (data); i++)
5157 {
5158 struct iv_cand *cand = iv_cand (data, i);
5159
5160 determine_iv_cost (data, cand);
5161
5162 if (dump_file && (dump_flags & TDF_DETAILS))
5163 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5164 }
5165
5166 if (dump_file && (dump_flags & TDF_DETAILS))
5167 fprintf (dump_file, "\n");
5168 }
5169
5170 /* Calculates cost for having SIZE induction variables. */
5171
5172 static unsigned
ivopts_global_cost_for_size(struct ivopts_data * data,unsigned size)5173 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5174 {
5175 /* We add size to the cost, so that we prefer eliminating ivs
5176 if possible. */
5177 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5178 data->body_includes_call);
5179 }
5180
5181 /* For each size of the induction variable set determine the penalty. */
5182
5183 static void
determine_set_costs(struct ivopts_data * data)5184 determine_set_costs (struct ivopts_data *data)
5185 {
5186 unsigned j, n;
5187 gimple phi;
5188 gimple_stmt_iterator psi;
5189 tree op;
5190 struct loop *loop = data->current_loop;
5191 bitmap_iterator bi;
5192
5193 if (dump_file && (dump_flags & TDF_DETAILS))
5194 {
5195 fprintf (dump_file, "Global costs:\n");
5196 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5197 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5198 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5199 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5200 }
5201
5202 n = 0;
5203 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5204 {
5205 phi = gsi_stmt (psi);
5206 op = PHI_RESULT (phi);
5207
5208 if (virtual_operand_p (op))
5209 continue;
5210
5211 if (get_iv (data, op))
5212 continue;
5213
5214 n++;
5215 }
5216
5217 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5218 {
5219 struct version_info *info = ver_info (data, j);
5220
5221 if (info->inv_id && info->has_nonlin_use)
5222 n++;
5223 }
5224
5225 data->regs_used = n;
5226 if (dump_file && (dump_flags & TDF_DETAILS))
5227 fprintf (dump_file, " regs_used %d\n", n);
5228
5229 if (dump_file && (dump_flags & TDF_DETAILS))
5230 {
5231 fprintf (dump_file, " cost for size:\n");
5232 fprintf (dump_file, " ivs\tcost\n");
5233 for (j = 0; j <= 2 * target_avail_regs; j++)
5234 fprintf (dump_file, " %d\t%d\n", j,
5235 ivopts_global_cost_for_size (data, j));
5236 fprintf (dump_file, "\n");
5237 }
5238 }
5239
5240 /* Returns true if A is a cheaper cost pair than B. */
5241
5242 static bool
cheaper_cost_pair(struct cost_pair * a,struct cost_pair * b)5243 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5244 {
5245 int cmp;
5246
5247 if (!a)
5248 return false;
5249
5250 if (!b)
5251 return true;
5252
5253 cmp = compare_costs (a->cost, b->cost);
5254 if (cmp < 0)
5255 return true;
5256
5257 if (cmp > 0)
5258 return false;
5259
5260 /* In case the costs are the same, prefer the cheaper candidate. */
5261 if (a->cand->cost < b->cand->cost)
5262 return true;
5263
5264 return false;
5265 }
5266
5267
5268 /* Returns candidate by that USE is expressed in IVS. */
5269
5270 static struct cost_pair *
iv_ca_cand_for_use(struct iv_ca * ivs,struct iv_use * use)5271 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5272 {
5273 return ivs->cand_for_use[use->id];
5274 }
5275
5276 /* Computes the cost field of IVS structure. */
5277
5278 static void
iv_ca_recount_cost(struct ivopts_data * data,struct iv_ca * ivs)5279 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5280 {
5281 comp_cost cost = ivs->cand_use_cost;
5282
5283 cost.cost += ivs->cand_cost;
5284
5285 cost.cost += ivopts_global_cost_for_size (data,
5286 ivs->n_regs + ivs->num_used_inv_expr);
5287
5288 ivs->cost = cost;
5289 }
5290
5291 /* Remove invariants in set INVS to set IVS. */
5292
5293 static void
iv_ca_set_remove_invariants(struct iv_ca * ivs,bitmap invs)5294 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5295 {
5296 bitmap_iterator bi;
5297 unsigned iid;
5298
5299 if (!invs)
5300 return;
5301
5302 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5303 {
5304 ivs->n_invariant_uses[iid]--;
5305 if (ivs->n_invariant_uses[iid] == 0)
5306 ivs->n_regs--;
5307 }
5308 }
5309
5310 /* Set USE not to be expressed by any candidate in IVS. */
5311
5312 static void
iv_ca_set_no_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_use * use)5313 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5314 struct iv_use *use)
5315 {
5316 unsigned uid = use->id, cid;
5317 struct cost_pair *cp;
5318
5319 cp = ivs->cand_for_use[uid];
5320 if (!cp)
5321 return;
5322 cid = cp->cand->id;
5323
5324 ivs->bad_uses++;
5325 ivs->cand_for_use[uid] = NULL;
5326 ivs->n_cand_uses[cid]--;
5327
5328 if (ivs->n_cand_uses[cid] == 0)
5329 {
5330 bitmap_clear_bit (ivs->cands, cid);
5331 /* Do not count the pseudocandidates. */
5332 if (cp->cand->iv)
5333 ivs->n_regs--;
5334 ivs->n_cands--;
5335 ivs->cand_cost -= cp->cand->cost;
5336
5337 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5338 }
5339
5340 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5341
5342 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5343
5344 if (cp->inv_expr_id != -1)
5345 {
5346 ivs->used_inv_expr[cp->inv_expr_id]--;
5347 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5348 ivs->num_used_inv_expr--;
5349 }
5350 iv_ca_recount_cost (data, ivs);
5351 }
5352
5353 /* Add invariants in set INVS to set IVS. */
5354
5355 static void
iv_ca_set_add_invariants(struct iv_ca * ivs,bitmap invs)5356 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5357 {
5358 bitmap_iterator bi;
5359 unsigned iid;
5360
5361 if (!invs)
5362 return;
5363
5364 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5365 {
5366 ivs->n_invariant_uses[iid]++;
5367 if (ivs->n_invariant_uses[iid] == 1)
5368 ivs->n_regs++;
5369 }
5370 }
5371
5372 /* Set cost pair for USE in set IVS to CP. */
5373
5374 static void
iv_ca_set_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_use * use,struct cost_pair * cp)5375 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5376 struct iv_use *use, struct cost_pair *cp)
5377 {
5378 unsigned uid = use->id, cid;
5379
5380 if (ivs->cand_for_use[uid] == cp)
5381 return;
5382
5383 if (ivs->cand_for_use[uid])
5384 iv_ca_set_no_cp (data, ivs, use);
5385
5386 if (cp)
5387 {
5388 cid = cp->cand->id;
5389
5390 ivs->bad_uses--;
5391 ivs->cand_for_use[uid] = cp;
5392 ivs->n_cand_uses[cid]++;
5393 if (ivs->n_cand_uses[cid] == 1)
5394 {
5395 bitmap_set_bit (ivs->cands, cid);
5396 /* Do not count the pseudocandidates. */
5397 if (cp->cand->iv)
5398 ivs->n_regs++;
5399 ivs->n_cands++;
5400 ivs->cand_cost += cp->cand->cost;
5401
5402 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5403 }
5404
5405 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5406 iv_ca_set_add_invariants (ivs, cp->depends_on);
5407
5408 if (cp->inv_expr_id != -1)
5409 {
5410 ivs->used_inv_expr[cp->inv_expr_id]++;
5411 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5412 ivs->num_used_inv_expr++;
5413 }
5414 iv_ca_recount_cost (data, ivs);
5415 }
5416 }
5417
5418 /* Extend set IVS by expressing USE by some of the candidates in it
5419 if possible. All important candidates will be considered
5420 if IMPORTANT_CANDIDATES is true. */
5421
5422 static void
iv_ca_add_use(struct ivopts_data * data,struct iv_ca * ivs,struct iv_use * use,bool important_candidates)5423 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5424 struct iv_use *use, bool important_candidates)
5425 {
5426 struct cost_pair *best_cp = NULL, *cp;
5427 bitmap_iterator bi;
5428 bitmap cands;
5429 unsigned i;
5430
5431 gcc_assert (ivs->upto >= use->id);
5432
5433 if (ivs->upto == use->id)
5434 {
5435 ivs->upto++;
5436 ivs->bad_uses++;
5437 }
5438
5439 cands = (important_candidates ? data->important_candidates : ivs->cands);
5440 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5441 {
5442 struct iv_cand *cand = iv_cand (data, i);
5443
5444 cp = get_use_iv_cost (data, use, cand);
5445
5446 if (cheaper_cost_pair (cp, best_cp))
5447 best_cp = cp;
5448 }
5449
5450 iv_ca_set_cp (data, ivs, use, best_cp);
5451 }
5452
5453 /* Get cost for assignment IVS. */
5454
5455 static comp_cost
iv_ca_cost(struct iv_ca * ivs)5456 iv_ca_cost (struct iv_ca *ivs)
5457 {
5458 /* This was a conditional expression but it triggered a bug in
5459 Sun C 5.5. */
5460 if (ivs->bad_uses)
5461 return infinite_cost;
5462 else
5463 return ivs->cost;
5464 }
5465
5466 /* Returns true if all dependences of CP are among invariants in IVS. */
5467
5468 static bool
iv_ca_has_deps(struct iv_ca * ivs,struct cost_pair * cp)5469 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5470 {
5471 unsigned i;
5472 bitmap_iterator bi;
5473
5474 if (!cp->depends_on)
5475 return true;
5476
5477 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5478 {
5479 if (ivs->n_invariant_uses[i] == 0)
5480 return false;
5481 }
5482
5483 return true;
5484 }
5485
5486 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5487 it before NEXT_CHANGE. */
5488
5489 static struct iv_ca_delta *
iv_ca_delta_add(struct iv_use * use,struct cost_pair * old_cp,struct cost_pair * new_cp,struct iv_ca_delta * next_change)5490 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5491 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5492 {
5493 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5494
5495 change->use = use;
5496 change->old_cp = old_cp;
5497 change->new_cp = new_cp;
5498 change->next_change = next_change;
5499
5500 return change;
5501 }
5502
5503 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5504 are rewritten. */
5505
5506 static struct iv_ca_delta *
iv_ca_delta_join(struct iv_ca_delta * l1,struct iv_ca_delta * l2)5507 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5508 {
5509 struct iv_ca_delta *last;
5510
5511 if (!l2)
5512 return l1;
5513
5514 if (!l1)
5515 return l2;
5516
5517 for (last = l1; last->next_change; last = last->next_change)
5518 continue;
5519 last->next_change = l2;
5520
5521 return l1;
5522 }
5523
5524 /* Reverse the list of changes DELTA, forming the inverse to it. */
5525
5526 static struct iv_ca_delta *
iv_ca_delta_reverse(struct iv_ca_delta * delta)5527 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5528 {
5529 struct iv_ca_delta *act, *next, *prev = NULL;
5530 struct cost_pair *tmp;
5531
5532 for (act = delta; act; act = next)
5533 {
5534 next = act->next_change;
5535 act->next_change = prev;
5536 prev = act;
5537
5538 tmp = act->old_cp;
5539 act->old_cp = act->new_cp;
5540 act->new_cp = tmp;
5541 }
5542
5543 return prev;
5544 }
5545
5546 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5547 reverted instead. */
5548
5549 static void
iv_ca_delta_commit(struct ivopts_data * data,struct iv_ca * ivs,struct iv_ca_delta * delta,bool forward)5550 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5551 struct iv_ca_delta *delta, bool forward)
5552 {
5553 struct cost_pair *from, *to;
5554 struct iv_ca_delta *act;
5555
5556 if (!forward)
5557 delta = iv_ca_delta_reverse (delta);
5558
5559 for (act = delta; act; act = act->next_change)
5560 {
5561 from = act->old_cp;
5562 to = act->new_cp;
5563 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5564 iv_ca_set_cp (data, ivs, act->use, to);
5565 }
5566
5567 if (!forward)
5568 iv_ca_delta_reverse (delta);
5569 }
5570
5571 /* Returns true if CAND is used in IVS. */
5572
5573 static bool
iv_ca_cand_used_p(struct iv_ca * ivs,struct iv_cand * cand)5574 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5575 {
5576 return ivs->n_cand_uses[cand->id] > 0;
5577 }
5578
5579 /* Returns number of induction variable candidates in the set IVS. */
5580
5581 static unsigned
iv_ca_n_cands(struct iv_ca * ivs)5582 iv_ca_n_cands (struct iv_ca *ivs)
5583 {
5584 return ivs->n_cands;
5585 }
5586
5587 /* Free the list of changes DELTA. */
5588
5589 static void
iv_ca_delta_free(struct iv_ca_delta ** delta)5590 iv_ca_delta_free (struct iv_ca_delta **delta)
5591 {
5592 struct iv_ca_delta *act, *next;
5593
5594 for (act = *delta; act; act = next)
5595 {
5596 next = act->next_change;
5597 free (act);
5598 }
5599
5600 *delta = NULL;
5601 }
5602
5603 /* Allocates new iv candidates assignment. */
5604
5605 static struct iv_ca *
iv_ca_new(struct ivopts_data * data)5606 iv_ca_new (struct ivopts_data *data)
5607 {
5608 struct iv_ca *nw = XNEW (struct iv_ca);
5609
5610 nw->upto = 0;
5611 nw->bad_uses = 0;
5612 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5613 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5614 nw->cands = BITMAP_ALLOC (NULL);
5615 nw->n_cands = 0;
5616 nw->n_regs = 0;
5617 nw->cand_use_cost = no_cost;
5618 nw->cand_cost = 0;
5619 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5620 nw->cost = no_cost;
5621 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5622 nw->num_used_inv_expr = 0;
5623
5624 return nw;
5625 }
5626
5627 /* Free memory occupied by the set IVS. */
5628
5629 static void
iv_ca_free(struct iv_ca ** ivs)5630 iv_ca_free (struct iv_ca **ivs)
5631 {
5632 free ((*ivs)->cand_for_use);
5633 free ((*ivs)->n_cand_uses);
5634 BITMAP_FREE ((*ivs)->cands);
5635 free ((*ivs)->n_invariant_uses);
5636 free ((*ivs)->used_inv_expr);
5637 free (*ivs);
5638 *ivs = NULL;
5639 }
5640
5641 /* Dumps IVS to FILE. */
5642
5643 static void
iv_ca_dump(struct ivopts_data * data,FILE * file,struct iv_ca * ivs)5644 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5645 {
5646 const char *pref = " invariants ";
5647 unsigned i;
5648 comp_cost cost = iv_ca_cost (ivs);
5649
5650 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5651 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5652 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5653 bitmap_print (file, ivs->cands, " candidates: ","\n");
5654
5655 for (i = 0; i < ivs->upto; i++)
5656 {
5657 struct iv_use *use = iv_use (data, i);
5658 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5659 if (cp)
5660 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5661 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5662 else
5663 fprintf (file, " use:%d --> ??\n", use->id);
5664 }
5665
5666 for (i = 1; i <= data->max_inv_id; i++)
5667 if (ivs->n_invariant_uses[i])
5668 {
5669 fprintf (file, "%s%d", pref, i);
5670 pref = ", ";
5671 }
5672 fprintf (file, "\n\n");
5673 }
5674
5675 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5676 new set, and store differences in DELTA. Number of induction variables
5677 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5678 the function will try to find a solution with mimimal iv candidates. */
5679
5680 static comp_cost
iv_ca_extend(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_ca_delta ** delta,unsigned * n_ivs,bool min_ncand)5681 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5682 struct iv_cand *cand, struct iv_ca_delta **delta,
5683 unsigned *n_ivs, bool min_ncand)
5684 {
5685 unsigned i;
5686 comp_cost cost;
5687 struct iv_use *use;
5688 struct cost_pair *old_cp, *new_cp;
5689
5690 *delta = NULL;
5691 for (i = 0; i < ivs->upto; i++)
5692 {
5693 use = iv_use (data, i);
5694 old_cp = iv_ca_cand_for_use (ivs, use);
5695
5696 if (old_cp
5697 && old_cp->cand == cand)
5698 continue;
5699
5700 new_cp = get_use_iv_cost (data, use, cand);
5701 if (!new_cp)
5702 continue;
5703
5704 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5705 continue;
5706
5707 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5708 continue;
5709
5710 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5711 }
5712
5713 iv_ca_delta_commit (data, ivs, *delta, true);
5714 cost = iv_ca_cost (ivs);
5715 if (n_ivs)
5716 *n_ivs = iv_ca_n_cands (ivs);
5717 iv_ca_delta_commit (data, ivs, *delta, false);
5718
5719 return cost;
5720 }
5721
5722 /* Try narrowing set IVS by removing CAND. Return the cost of
5723 the new set and store the differences in DELTA. START is
5724 the candidate with which we start narrowing. */
5725
5726 static comp_cost
iv_ca_narrow(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_cand * start,struct iv_ca_delta ** delta)5727 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5728 struct iv_cand *cand, struct iv_cand *start,
5729 struct iv_ca_delta **delta)
5730 {
5731 unsigned i, ci;
5732 struct iv_use *use;
5733 struct cost_pair *old_cp, *new_cp, *cp;
5734 bitmap_iterator bi;
5735 struct iv_cand *cnd;
5736 comp_cost cost, best_cost, acost;
5737
5738 *delta = NULL;
5739 for (i = 0; i < n_iv_uses (data); i++)
5740 {
5741 use = iv_use (data, i);
5742
5743 old_cp = iv_ca_cand_for_use (ivs, use);
5744 if (old_cp->cand != cand)
5745 continue;
5746
5747 best_cost = iv_ca_cost (ivs);
5748 /* Start narrowing with START. */
5749 new_cp = get_use_iv_cost (data, use, start);
5750
5751 if (data->consider_all_candidates)
5752 {
5753 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5754 {
5755 if (ci == cand->id || (start && ci == start->id))
5756 continue;
5757
5758 cnd = iv_cand (data, ci);
5759
5760 cp = get_use_iv_cost (data, use, cnd);
5761 if (!cp)
5762 continue;
5763
5764 iv_ca_set_cp (data, ivs, use, cp);
5765 acost = iv_ca_cost (ivs);
5766
5767 if (compare_costs (acost, best_cost) < 0)
5768 {
5769 best_cost = acost;
5770 new_cp = cp;
5771 }
5772 }
5773 }
5774 else
5775 {
5776 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5777 {
5778 if (ci == cand->id || (start && ci == start->id))
5779 continue;
5780
5781 cnd = iv_cand (data, ci);
5782
5783 cp = get_use_iv_cost (data, use, cnd);
5784 if (!cp)
5785 continue;
5786
5787 iv_ca_set_cp (data, ivs, use, cp);
5788 acost = iv_ca_cost (ivs);
5789
5790 if (compare_costs (acost, best_cost) < 0)
5791 {
5792 best_cost = acost;
5793 new_cp = cp;
5794 }
5795 }
5796 }
5797 /* Restore to old cp for use. */
5798 iv_ca_set_cp (data, ivs, use, old_cp);
5799
5800 if (!new_cp)
5801 {
5802 iv_ca_delta_free (delta);
5803 return infinite_cost;
5804 }
5805
5806 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5807 }
5808
5809 iv_ca_delta_commit (data, ivs, *delta, true);
5810 cost = iv_ca_cost (ivs);
5811 iv_ca_delta_commit (data, ivs, *delta, false);
5812
5813 return cost;
5814 }
5815
5816 /* Try optimizing the set of candidates IVS by removing candidates different
5817 from to EXCEPT_CAND from it. Return cost of the new set, and store
5818 differences in DELTA. */
5819
5820 static comp_cost
iv_ca_prune(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * except_cand,struct iv_ca_delta ** delta)5821 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5822 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5823 {
5824 bitmap_iterator bi;
5825 struct iv_ca_delta *act_delta, *best_delta;
5826 unsigned i;
5827 comp_cost best_cost, acost;
5828 struct iv_cand *cand;
5829
5830 best_delta = NULL;
5831 best_cost = iv_ca_cost (ivs);
5832
5833 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5834 {
5835 cand = iv_cand (data, i);
5836
5837 if (cand == except_cand)
5838 continue;
5839
5840 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5841
5842 if (compare_costs (acost, best_cost) < 0)
5843 {
5844 best_cost = acost;
5845 iv_ca_delta_free (&best_delta);
5846 best_delta = act_delta;
5847 }
5848 else
5849 iv_ca_delta_free (&act_delta);
5850 }
5851
5852 if (!best_delta)
5853 {
5854 *delta = NULL;
5855 return best_cost;
5856 }
5857
5858 /* Recurse to possibly remove other unnecessary ivs. */
5859 iv_ca_delta_commit (data, ivs, best_delta, true);
5860 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5861 iv_ca_delta_commit (data, ivs, best_delta, false);
5862 *delta = iv_ca_delta_join (best_delta, *delta);
5863 return best_cost;
5864 }
5865
5866 /* Tries to extend the sets IVS in the best possible way in order
5867 to express the USE. If ORIGINALP is true, prefer candidates from
5868 the original set of IVs, otherwise favor important candidates not
5869 based on any memory object. */
5870
5871 static bool
try_add_cand_for(struct ivopts_data * data,struct iv_ca * ivs,struct iv_use * use,bool originalp)5872 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5873 struct iv_use *use, bool originalp)
5874 {
5875 comp_cost best_cost, act_cost;
5876 unsigned i;
5877 bitmap_iterator bi;
5878 struct iv_cand *cand;
5879 struct iv_ca_delta *best_delta = NULL, *act_delta;
5880 struct cost_pair *cp;
5881
5882 iv_ca_add_use (data, ivs, use, false);
5883 best_cost = iv_ca_cost (ivs);
5884
5885 cp = iv_ca_cand_for_use (ivs, use);
5886 if (!cp)
5887 {
5888 ivs->upto--;
5889 ivs->bad_uses--;
5890 iv_ca_add_use (data, ivs, use, true);
5891 best_cost = iv_ca_cost (ivs);
5892 cp = iv_ca_cand_for_use (ivs, use);
5893 }
5894 if (cp)
5895 {
5896 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5897 iv_ca_set_no_cp (data, ivs, use);
5898 }
5899
5900 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5901 first try important candidates not based on any memory object. Only if
5902 this fails, try the specific ones. Rationale -- in loops with many
5903 variables the best choice often is to use just one generic biv. If we
5904 added here many ivs specific to the uses, the optimization algorithm later
5905 would be likely to get stuck in a local minimum, thus causing us to create
5906 too many ivs. The approach from few ivs to more seems more likely to be
5907 successful -- starting from few ivs, replacing an expensive use by a
5908 specific iv should always be a win. */
5909 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5910 {
5911 cand = iv_cand (data, i);
5912
5913 if (originalp && cand->pos !=IP_ORIGINAL)
5914 continue;
5915
5916 if (!originalp && cand->iv->base_object != NULL_TREE)
5917 continue;
5918
5919 if (iv_ca_cand_used_p (ivs, cand))
5920 continue;
5921
5922 cp = get_use_iv_cost (data, use, cand);
5923 if (!cp)
5924 continue;
5925
5926 iv_ca_set_cp (data, ivs, use, cp);
5927 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5928 true);
5929 iv_ca_set_no_cp (data, ivs, use);
5930 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5931
5932 if (compare_costs (act_cost, best_cost) < 0)
5933 {
5934 best_cost = act_cost;
5935
5936 iv_ca_delta_free (&best_delta);
5937 best_delta = act_delta;
5938 }
5939 else
5940 iv_ca_delta_free (&act_delta);
5941 }
5942
5943 if (infinite_cost_p (best_cost))
5944 {
5945 for (i = 0; i < use->n_map_members; i++)
5946 {
5947 cp = use->cost_map + i;
5948 cand = cp->cand;
5949 if (!cand)
5950 continue;
5951
5952 /* Already tried this. */
5953 if (cand->important)
5954 {
5955 if (originalp && cand->pos == IP_ORIGINAL)
5956 continue;
5957 if (!originalp && cand->iv->base_object == NULL_TREE)
5958 continue;
5959 }
5960
5961 if (iv_ca_cand_used_p (ivs, cand))
5962 continue;
5963
5964 act_delta = NULL;
5965 iv_ca_set_cp (data, ivs, use, cp);
5966 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5967 iv_ca_set_no_cp (data, ivs, use);
5968 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5969 cp, act_delta);
5970
5971 if (compare_costs (act_cost, best_cost) < 0)
5972 {
5973 best_cost = act_cost;
5974
5975 if (best_delta)
5976 iv_ca_delta_free (&best_delta);
5977 best_delta = act_delta;
5978 }
5979 else
5980 iv_ca_delta_free (&act_delta);
5981 }
5982 }
5983
5984 iv_ca_delta_commit (data, ivs, best_delta, true);
5985 iv_ca_delta_free (&best_delta);
5986
5987 return !infinite_cost_p (best_cost);
5988 }
5989
5990 /* Finds an initial assignment of candidates to uses. */
5991
5992 static struct iv_ca *
get_initial_solution(struct ivopts_data * data,bool originalp)5993 get_initial_solution (struct ivopts_data *data, bool originalp)
5994 {
5995 struct iv_ca *ivs = iv_ca_new (data);
5996 unsigned i;
5997
5998 for (i = 0; i < n_iv_uses (data); i++)
5999 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6000 {
6001 iv_ca_free (&ivs);
6002 return NULL;
6003 }
6004
6005 return ivs;
6006 }
6007
6008 /* Tries to improve set of induction variables IVS. */
6009
6010 static bool
try_improve_iv_set(struct ivopts_data * data,struct iv_ca * ivs)6011 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
6012 {
6013 unsigned i, n_ivs;
6014 comp_cost acost, best_cost = iv_ca_cost (ivs);
6015 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6016 struct iv_cand *cand;
6017
6018 /* Try extending the set of induction variables by one. */
6019 for (i = 0; i < n_iv_cands (data); i++)
6020 {
6021 cand = iv_cand (data, i);
6022
6023 if (iv_ca_cand_used_p (ivs, cand))
6024 continue;
6025
6026 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6027 if (!act_delta)
6028 continue;
6029
6030 /* If we successfully added the candidate and the set is small enough,
6031 try optimizing it by removing other candidates. */
6032 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6033 {
6034 iv_ca_delta_commit (data, ivs, act_delta, true);
6035 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6036 iv_ca_delta_commit (data, ivs, act_delta, false);
6037 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6038 }
6039
6040 if (compare_costs (acost, best_cost) < 0)
6041 {
6042 best_cost = acost;
6043 iv_ca_delta_free (&best_delta);
6044 best_delta = act_delta;
6045 }
6046 else
6047 iv_ca_delta_free (&act_delta);
6048 }
6049
6050 if (!best_delta)
6051 {
6052 /* Try removing the candidates from the set instead. */
6053 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6054
6055 /* Nothing more we can do. */
6056 if (!best_delta)
6057 return false;
6058 }
6059
6060 iv_ca_delta_commit (data, ivs, best_delta, true);
6061 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6062 iv_ca_delta_free (&best_delta);
6063 return true;
6064 }
6065
6066 /* Attempts to find the optimal set of induction variables. We do simple
6067 greedy heuristic -- we try to replace at most one candidate in the selected
6068 solution and remove the unused ivs while this improves the cost. */
6069
6070 static struct iv_ca *
find_optimal_iv_set_1(struct ivopts_data * data,bool originalp)6071 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6072 {
6073 struct iv_ca *set;
6074
6075 /* Get the initial solution. */
6076 set = get_initial_solution (data, originalp);
6077 if (!set)
6078 {
6079 if (dump_file && (dump_flags & TDF_DETAILS))
6080 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6081 return NULL;
6082 }
6083
6084 if (dump_file && (dump_flags & TDF_DETAILS))
6085 {
6086 fprintf (dump_file, "Initial set of candidates:\n");
6087 iv_ca_dump (data, dump_file, set);
6088 }
6089
6090 while (try_improve_iv_set (data, set))
6091 {
6092 if (dump_file && (dump_flags & TDF_DETAILS))
6093 {
6094 fprintf (dump_file, "Improved to:\n");
6095 iv_ca_dump (data, dump_file, set);
6096 }
6097 }
6098
6099 return set;
6100 }
6101
6102 static struct iv_ca *
find_optimal_iv_set(struct ivopts_data * data)6103 find_optimal_iv_set (struct ivopts_data *data)
6104 {
6105 unsigned i;
6106 struct iv_ca *set, *origset;
6107 struct iv_use *use;
6108 comp_cost cost, origcost;
6109
6110 /* Determine the cost based on a strategy that starts with original IVs,
6111 and try again using a strategy that prefers candidates not based
6112 on any IVs. */
6113 origset = find_optimal_iv_set_1 (data, true);
6114 set = find_optimal_iv_set_1 (data, false);
6115
6116 if (!origset && !set)
6117 return NULL;
6118
6119 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6120 cost = set ? iv_ca_cost (set) : infinite_cost;
6121
6122 if (dump_file && (dump_flags & TDF_DETAILS))
6123 {
6124 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6125 origcost.cost, origcost.complexity);
6126 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6127 cost.cost, cost.complexity);
6128 }
6129
6130 /* Choose the one with the best cost. */
6131 if (compare_costs (origcost, cost) <= 0)
6132 {
6133 if (set)
6134 iv_ca_free (&set);
6135 set = origset;
6136 }
6137 else if (origset)
6138 iv_ca_free (&origset);
6139
6140 for (i = 0; i < n_iv_uses (data); i++)
6141 {
6142 use = iv_use (data, i);
6143 use->selected = iv_ca_cand_for_use (set, use)->cand;
6144 }
6145
6146 return set;
6147 }
6148
6149 /* Creates a new induction variable corresponding to CAND. */
6150
6151 static void
create_new_iv(struct ivopts_data * data,struct iv_cand * cand)6152 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6153 {
6154 gimple_stmt_iterator incr_pos;
6155 tree base;
6156 bool after = false;
6157
6158 if (!cand->iv)
6159 return;
6160
6161 switch (cand->pos)
6162 {
6163 case IP_NORMAL:
6164 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6165 break;
6166
6167 case IP_END:
6168 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6169 after = true;
6170 break;
6171
6172 case IP_AFTER_USE:
6173 after = true;
6174 /* fall through */
6175 case IP_BEFORE_USE:
6176 incr_pos = gsi_for_stmt (cand->incremented_at);
6177 break;
6178
6179 case IP_ORIGINAL:
6180 /* Mark that the iv is preserved. */
6181 name_info (data, cand->var_before)->preserve_biv = true;
6182 name_info (data, cand->var_after)->preserve_biv = true;
6183
6184 /* Rewrite the increment so that it uses var_before directly. */
6185 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6186 return;
6187 }
6188
6189 gimple_add_tmp_var (cand->var_before);
6190
6191 base = unshare_expr (cand->iv->base);
6192
6193 create_iv (base, unshare_expr (cand->iv->step),
6194 cand->var_before, data->current_loop,
6195 &incr_pos, after, &cand->var_before, &cand->var_after);
6196 }
6197
6198 /* Creates new induction variables described in SET. */
6199
6200 static void
create_new_ivs(struct ivopts_data * data,struct iv_ca * set)6201 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6202 {
6203 unsigned i;
6204 struct iv_cand *cand;
6205 bitmap_iterator bi;
6206
6207 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6208 {
6209 cand = iv_cand (data, i);
6210 create_new_iv (data, cand);
6211 }
6212
6213 if (dump_file && (dump_flags & TDF_DETAILS))
6214 {
6215 fprintf (dump_file, "\nSelected IV set: \n");
6216 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6217 {
6218 cand = iv_cand (data, i);
6219 dump_cand (dump_file, cand);
6220 }
6221 fprintf (dump_file, "\n");
6222 }
6223 }
6224
6225 /* Rewrites USE (definition of iv used in a nonlinear expression)
6226 using candidate CAND. */
6227
6228 static void
rewrite_use_nonlinear_expr(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)6229 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6230 struct iv_use *use, struct iv_cand *cand)
6231 {
6232 tree comp;
6233 tree op, tgt;
6234 gimple ass;
6235 gimple_stmt_iterator bsi;
6236
6237 /* An important special case -- if we are asked to express value of
6238 the original iv by itself, just exit; there is no need to
6239 introduce a new computation (that might also need casting the
6240 variable to unsigned and back). */
6241 if (cand->pos == IP_ORIGINAL
6242 && cand->incremented_at == use->stmt)
6243 {
6244 enum tree_code stmt_code;
6245
6246 gcc_assert (is_gimple_assign (use->stmt));
6247 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6248
6249 /* Check whether we may leave the computation unchanged.
6250 This is the case only if it does not rely on other
6251 computations in the loop -- otherwise, the computation
6252 we rely upon may be removed in remove_unused_ivs,
6253 thus leading to ICE. */
6254 stmt_code = gimple_assign_rhs_code (use->stmt);
6255 if (stmt_code == PLUS_EXPR
6256 || stmt_code == MINUS_EXPR
6257 || stmt_code == POINTER_PLUS_EXPR)
6258 {
6259 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6260 op = gimple_assign_rhs2 (use->stmt);
6261 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6262 op = gimple_assign_rhs1 (use->stmt);
6263 else
6264 op = NULL_TREE;
6265 }
6266 else
6267 op = NULL_TREE;
6268
6269 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6270 return;
6271 }
6272
6273 comp = get_computation (data->current_loop, use, cand);
6274 gcc_assert (comp != NULL_TREE);
6275
6276 switch (gimple_code (use->stmt))
6277 {
6278 case GIMPLE_PHI:
6279 tgt = PHI_RESULT (use->stmt);
6280
6281 /* If we should keep the biv, do not replace it. */
6282 if (name_info (data, tgt)->preserve_biv)
6283 return;
6284
6285 bsi = gsi_after_labels (gimple_bb (use->stmt));
6286 break;
6287
6288 case GIMPLE_ASSIGN:
6289 tgt = gimple_assign_lhs (use->stmt);
6290 bsi = gsi_for_stmt (use->stmt);
6291 break;
6292
6293 default:
6294 gcc_unreachable ();
6295 }
6296
6297 if (!valid_gimple_rhs_p (comp)
6298 || (gimple_code (use->stmt) != GIMPLE_PHI
6299 /* We can't allow re-allocating the stmt as it might be pointed
6300 to still. */
6301 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6302 >= gimple_num_ops (gsi_stmt (bsi)))))
6303 {
6304 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6305 true, GSI_SAME_STMT);
6306 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6307 {
6308 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6309 /* As this isn't a plain copy we have to reset alignment
6310 information. */
6311 if (SSA_NAME_PTR_INFO (comp))
6312 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6313 }
6314 }
6315
6316 if (gimple_code (use->stmt) == GIMPLE_PHI)
6317 {
6318 ass = gimple_build_assign (tgt, comp);
6319 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6320
6321 bsi = gsi_for_stmt (use->stmt);
6322 remove_phi_node (&bsi, false);
6323 }
6324 else
6325 {
6326 gimple_assign_set_rhs_from_tree (&bsi, comp);
6327 use->stmt = gsi_stmt (bsi);
6328 }
6329 }
6330
6331 /* Performs a peephole optimization to reorder the iv update statement with
6332 a mem ref to enable instruction combining in later phases. The mem ref uses
6333 the iv value before the update, so the reordering transformation requires
6334 adjustment of the offset. CAND is the selected IV_CAND.
6335
6336 Example:
6337
6338 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6339 iv2 = iv1 + 1;
6340
6341 if (t < val) (1)
6342 goto L;
6343 goto Head;
6344
6345
6346 directly propagating t over to (1) will introduce overlapping live range
6347 thus increase register pressure. This peephole transform it into:
6348
6349
6350 iv2 = iv1 + 1;
6351 t = MEM_REF (base, iv2, 8, 8);
6352 if (t < val)
6353 goto L;
6354 goto Head;
6355 */
6356
6357 static void
adjust_iv_update_pos(struct iv_cand * cand,struct iv_use * use)6358 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6359 {
6360 tree var_after;
6361 gimple iv_update, stmt;
6362 basic_block bb;
6363 gimple_stmt_iterator gsi, gsi_iv;
6364
6365 if (cand->pos != IP_NORMAL)
6366 return;
6367
6368 var_after = cand->var_after;
6369 iv_update = SSA_NAME_DEF_STMT (var_after);
6370
6371 bb = gimple_bb (iv_update);
6372 gsi = gsi_last_nondebug_bb (bb);
6373 stmt = gsi_stmt (gsi);
6374
6375 /* Only handle conditional statement for now. */
6376 if (gimple_code (stmt) != GIMPLE_COND)
6377 return;
6378
6379 gsi_prev_nondebug (&gsi);
6380 stmt = gsi_stmt (gsi);
6381 if (stmt != iv_update)
6382 return;
6383
6384 gsi_prev_nondebug (&gsi);
6385 if (gsi_end_p (gsi))
6386 return;
6387
6388 stmt = gsi_stmt (gsi);
6389 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6390 return;
6391
6392 if (stmt != use->stmt)
6393 return;
6394
6395 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6396 return;
6397
6398 if (dump_file && (dump_flags & TDF_DETAILS))
6399 {
6400 fprintf (dump_file, "Reordering \n");
6401 print_gimple_stmt (dump_file, iv_update, 0, 0);
6402 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6403 fprintf (dump_file, "\n");
6404 }
6405
6406 gsi = gsi_for_stmt (use->stmt);
6407 gsi_iv = gsi_for_stmt (iv_update);
6408 gsi_move_before (&gsi_iv, &gsi);
6409
6410 cand->pos = IP_BEFORE_USE;
6411 cand->incremented_at = use->stmt;
6412 }
6413
6414 /* Rewrites USE (address that is an iv) using candidate CAND. */
6415
6416 static void
rewrite_use_address(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)6417 rewrite_use_address (struct ivopts_data *data,
6418 struct iv_use *use, struct iv_cand *cand)
6419 {
6420 aff_tree aff;
6421 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6422 tree base_hint = NULL_TREE;
6423 tree ref, iv;
6424 bool ok;
6425
6426 adjust_iv_update_pos (cand, use);
6427 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6428 gcc_assert (ok);
6429 unshare_aff_combination (&aff);
6430
6431 /* To avoid undefined overflow problems, all IV candidates use unsigned
6432 integer types. The drawback is that this makes it impossible for
6433 create_mem_ref to distinguish an IV that is based on a memory object
6434 from one that represents simply an offset.
6435
6436 To work around this problem, we pass a hint to create_mem_ref that
6437 indicates which variable (if any) in aff is an IV based on a memory
6438 object. Note that we only consider the candidate. If this is not
6439 based on an object, the base of the reference is in some subexpression
6440 of the use -- but these will use pointer types, so they are recognized
6441 by the create_mem_ref heuristics anyway. */
6442 if (cand->iv->base_object)
6443 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6444
6445 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6446 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6447 reference_alias_ptr_type (*use->op_p),
6448 iv, base_hint, data->speed);
6449 copy_ref_info (ref, *use->op_p);
6450 *use->op_p = ref;
6451 }
6452
6453 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6454 candidate CAND. */
6455
6456 static void
rewrite_use_compare(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)6457 rewrite_use_compare (struct ivopts_data *data,
6458 struct iv_use *use, struct iv_cand *cand)
6459 {
6460 tree comp, *var_p, op, bound;
6461 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6462 enum tree_code compare;
6463 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6464 bool ok;
6465
6466 bound = cp->value;
6467 if (bound)
6468 {
6469 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6470 tree var_type = TREE_TYPE (var);
6471 gimple_seq stmts;
6472
6473 if (dump_file && (dump_flags & TDF_DETAILS))
6474 {
6475 fprintf (dump_file, "Replacing exit test: ");
6476 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6477 }
6478 compare = cp->comp;
6479 bound = unshare_expr (fold_convert (var_type, bound));
6480 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6481 if (stmts)
6482 gsi_insert_seq_on_edge_immediate (
6483 loop_preheader_edge (data->current_loop),
6484 stmts);
6485
6486 gimple_cond_set_lhs (use->stmt, var);
6487 gimple_cond_set_code (use->stmt, compare);
6488 gimple_cond_set_rhs (use->stmt, op);
6489 return;
6490 }
6491
6492 /* The induction variable elimination failed; just express the original
6493 giv. */
6494 comp = get_computation (data->current_loop, use, cand);
6495 gcc_assert (comp != NULL_TREE);
6496
6497 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6498 gcc_assert (ok);
6499
6500 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6501 true, GSI_SAME_STMT);
6502 }
6503
6504 /* Rewrites USE using candidate CAND. */
6505
6506 static void
rewrite_use(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)6507 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6508 {
6509 switch (use->type)
6510 {
6511 case USE_NONLINEAR_EXPR:
6512 rewrite_use_nonlinear_expr (data, use, cand);
6513 break;
6514
6515 case USE_ADDRESS:
6516 rewrite_use_address (data, use, cand);
6517 break;
6518
6519 case USE_COMPARE:
6520 rewrite_use_compare (data, use, cand);
6521 break;
6522
6523 default:
6524 gcc_unreachable ();
6525 }
6526
6527 update_stmt (use->stmt);
6528 }
6529
6530 /* Rewrite the uses using the selected induction variables. */
6531
6532 static void
rewrite_uses(struct ivopts_data * data)6533 rewrite_uses (struct ivopts_data *data)
6534 {
6535 unsigned i;
6536 struct iv_cand *cand;
6537 struct iv_use *use;
6538
6539 for (i = 0; i < n_iv_uses (data); i++)
6540 {
6541 use = iv_use (data, i);
6542 cand = use->selected;
6543 gcc_assert (cand);
6544
6545 rewrite_use (data, use, cand);
6546 }
6547 }
6548
6549 /* Removes the ivs that are not used after rewriting. */
6550
6551 static void
remove_unused_ivs(struct ivopts_data * data)6552 remove_unused_ivs (struct ivopts_data *data)
6553 {
6554 unsigned j;
6555 bitmap_iterator bi;
6556 bitmap toremove = BITMAP_ALLOC (NULL);
6557
6558 /* Figure out an order in which to release SSA DEFs so that we don't
6559 release something that we'd have to propagate into a debug stmt
6560 afterwards. */
6561 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6562 {
6563 struct version_info *info;
6564
6565 info = ver_info (data, j);
6566 if (info->iv
6567 && !integer_zerop (info->iv->step)
6568 && !info->inv_id
6569 && !info->iv->have_use_for
6570 && !info->preserve_biv)
6571 {
6572 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6573
6574 tree def = info->iv->ssa_name;
6575
6576 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6577 {
6578 imm_use_iterator imm_iter;
6579 use_operand_p use_p;
6580 gimple stmt;
6581 int count = 0;
6582
6583 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6584 {
6585 if (!gimple_debug_bind_p (stmt))
6586 continue;
6587
6588 /* We just want to determine whether to do nothing
6589 (count == 0), to substitute the computed
6590 expression into a single use of the SSA DEF by
6591 itself (count == 1), or to use a debug temp
6592 because the SSA DEF is used multiple times or as
6593 part of a larger expression (count > 1). */
6594 count++;
6595 if (gimple_debug_bind_get_value (stmt) != def)
6596 count++;
6597
6598 if (count > 1)
6599 BREAK_FROM_IMM_USE_STMT (imm_iter);
6600 }
6601
6602 if (!count)
6603 continue;
6604
6605 struct iv_use dummy_use;
6606 struct iv_cand *best_cand = NULL, *cand;
6607 unsigned i, best_pref = 0, cand_pref;
6608
6609 memset (&dummy_use, 0, sizeof (dummy_use));
6610 dummy_use.iv = info->iv;
6611 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6612 {
6613 cand = iv_use (data, i)->selected;
6614 if (cand == best_cand)
6615 continue;
6616 cand_pref = operand_equal_p (cand->iv->step,
6617 info->iv->step, 0)
6618 ? 4 : 0;
6619 cand_pref
6620 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6621 == TYPE_MODE (TREE_TYPE (info->iv->base))
6622 ? 2 : 0;
6623 cand_pref
6624 += TREE_CODE (cand->iv->base) == INTEGER_CST
6625 ? 1 : 0;
6626 if (best_cand == NULL || best_pref < cand_pref)
6627 {
6628 best_cand = cand;
6629 best_pref = cand_pref;
6630 }
6631 }
6632
6633 if (!best_cand)
6634 continue;
6635
6636 tree comp = get_computation_at (data->current_loop,
6637 &dummy_use, best_cand,
6638 SSA_NAME_DEF_STMT (def));
6639 if (!comp)
6640 continue;
6641
6642 if (count > 1)
6643 {
6644 tree vexpr = make_node (DEBUG_EXPR_DECL);
6645 DECL_ARTIFICIAL (vexpr) = 1;
6646 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6647 if (SSA_NAME_VAR (def))
6648 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6649 else
6650 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6651 gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6652 gimple_stmt_iterator gsi;
6653
6654 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6655 gsi = gsi_after_labels (gimple_bb
6656 (SSA_NAME_DEF_STMT (def)));
6657 else
6658 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6659
6660 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6661 comp = vexpr;
6662 }
6663
6664 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6665 {
6666 if (!gimple_debug_bind_p (stmt))
6667 continue;
6668
6669 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6670 SET_USE (use_p, comp);
6671
6672 update_stmt (stmt);
6673 }
6674 }
6675 }
6676 }
6677
6678 release_defs_bitset (toremove);
6679
6680 BITMAP_FREE (toremove);
6681 }
6682
6683 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6684 for pointer_map_traverse. */
6685
6686 static bool
free_tree_niter_desc(const void * key ATTRIBUTE_UNUSED,void ** value,void * data ATTRIBUTE_UNUSED)6687 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6688 void *data ATTRIBUTE_UNUSED)
6689 {
6690 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6691
6692 free (niter);
6693 return true;
6694 }
6695
6696 /* Frees data allocated by the optimization of a single loop. */
6697
6698 static void
free_loop_data(struct ivopts_data * data)6699 free_loop_data (struct ivopts_data *data)
6700 {
6701 unsigned i, j;
6702 bitmap_iterator bi;
6703 tree obj;
6704
6705 if (data->niters)
6706 {
6707 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6708 pointer_map_destroy (data->niters);
6709 data->niters = NULL;
6710 }
6711
6712 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6713 {
6714 struct version_info *info;
6715
6716 info = ver_info (data, i);
6717 free (info->iv);
6718 info->iv = NULL;
6719 info->has_nonlin_use = false;
6720 info->preserve_biv = false;
6721 info->inv_id = 0;
6722 }
6723 bitmap_clear (data->relevant);
6724 bitmap_clear (data->important_candidates);
6725
6726 for (i = 0; i < n_iv_uses (data); i++)
6727 {
6728 struct iv_use *use = iv_use (data, i);
6729
6730 free (use->iv);
6731 BITMAP_FREE (use->related_cands);
6732 for (j = 0; j < use->n_map_members; j++)
6733 if (use->cost_map[j].depends_on)
6734 BITMAP_FREE (use->cost_map[j].depends_on);
6735 free (use->cost_map);
6736 free (use);
6737 }
6738 data->iv_uses.truncate (0);
6739
6740 for (i = 0; i < n_iv_cands (data); i++)
6741 {
6742 struct iv_cand *cand = iv_cand (data, i);
6743
6744 free (cand->iv);
6745 if (cand->depends_on)
6746 BITMAP_FREE (cand->depends_on);
6747 free (cand);
6748 }
6749 data->iv_candidates.truncate (0);
6750
6751 if (data->version_info_size < num_ssa_names)
6752 {
6753 data->version_info_size = 2 * num_ssa_names;
6754 free (data->version_info);
6755 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6756 }
6757
6758 data->max_inv_id = 0;
6759
6760 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6761 SET_DECL_RTL (obj, NULL_RTX);
6762
6763 decl_rtl_to_reset.truncate (0);
6764
6765 data->inv_expr_tab.empty ();
6766 data->inv_expr_id = 0;
6767 }
6768
6769 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6770 loop tree. */
6771
6772 static void
tree_ssa_iv_optimize_finalize(struct ivopts_data * data)6773 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6774 {
6775 free_loop_data (data);
6776 free (data->version_info);
6777 BITMAP_FREE (data->relevant);
6778 BITMAP_FREE (data->important_candidates);
6779
6780 decl_rtl_to_reset.release ();
6781 data->iv_uses.release ();
6782 data->iv_candidates.release ();
6783 data->inv_expr_tab.dispose ();
6784 }
6785
6786 /* Returns true if the loop body BODY includes any function calls. */
6787
6788 static bool
loop_body_includes_call(basic_block * body,unsigned num_nodes)6789 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6790 {
6791 gimple_stmt_iterator gsi;
6792 unsigned i;
6793
6794 for (i = 0; i < num_nodes; i++)
6795 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6796 {
6797 gimple stmt = gsi_stmt (gsi);
6798 if (is_gimple_call (stmt)
6799 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6800 return true;
6801 }
6802 return false;
6803 }
6804
6805 /* Optimizes the LOOP. Returns true if anything changed. */
6806
6807 static bool
tree_ssa_iv_optimize_loop(struct ivopts_data * data,struct loop * loop)6808 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6809 {
6810 bool changed = false;
6811 struct iv_ca *iv_ca;
6812 edge exit = single_dom_exit (loop);
6813 basic_block *body;
6814
6815 gcc_assert (!data->niters);
6816 data->current_loop = loop;
6817 data->speed = optimize_loop_for_speed_p (loop);
6818
6819 if (dump_file && (dump_flags & TDF_DETAILS))
6820 {
6821 fprintf (dump_file, "Processing loop %d\n", loop->num);
6822
6823 if (exit)
6824 {
6825 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6826 exit->src->index, exit->dest->index);
6827 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6828 fprintf (dump_file, "\n");
6829 }
6830
6831 fprintf (dump_file, "\n");
6832 }
6833
6834 body = get_loop_body (loop);
6835 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6836 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6837 free (body);
6838
6839 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6840
6841 /* For each ssa name determines whether it behaves as an induction variable
6842 in some loop. */
6843 if (!find_induction_variables (data))
6844 goto finish;
6845
6846 /* Finds interesting uses (item 1). */
6847 find_interesting_uses (data);
6848 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6849 goto finish;
6850
6851 /* Finds candidates for the induction variables (item 2). */
6852 find_iv_candidates (data);
6853
6854 /* Calculates the costs (item 3, part 1). */
6855 determine_iv_costs (data);
6856 determine_use_iv_costs (data);
6857 determine_set_costs (data);
6858
6859 /* Find the optimal set of induction variables (item 3, part 2). */
6860 iv_ca = find_optimal_iv_set (data);
6861 if (!iv_ca)
6862 goto finish;
6863 changed = true;
6864
6865 /* Create the new induction variables (item 4, part 1). */
6866 create_new_ivs (data, iv_ca);
6867 iv_ca_free (&iv_ca);
6868
6869 /* Rewrite the uses (item 4, part 2). */
6870 rewrite_uses (data);
6871
6872 /* Remove the ivs that are unused after rewriting. */
6873 remove_unused_ivs (data);
6874
6875 /* We have changed the structure of induction variables; it might happen
6876 that definitions in the scev database refer to some of them that were
6877 eliminated. */
6878 scev_reset ();
6879
6880 finish:
6881 free_loop_data (data);
6882
6883 return changed;
6884 }
6885
6886 /* Main entry point. Optimizes induction variables in loops. */
6887
6888 void
tree_ssa_iv_optimize(void)6889 tree_ssa_iv_optimize (void)
6890 {
6891 struct loop *loop;
6892 struct ivopts_data data;
6893
6894 tree_ssa_iv_optimize_init (&data);
6895
6896 /* Optimize the loops starting with the innermost ones. */
6897 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
6898 {
6899 if (dump_file && (dump_flags & TDF_DETAILS))
6900 flow_loop_dump (loop, dump_file, NULL, 1);
6901
6902 tree_ssa_iv_optimize_loop (&data, loop);
6903 }
6904
6905 tree_ssa_iv_optimize_finalize (&data);
6906 }
6907