1 /* Induction variable optimizations.
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 2) Candidates for the induction variables are found. This includes
34
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
38
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
42
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
52
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
55
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
58
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
63
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "backend.h"
68 #include "rtl.h"
69 #include "tree.h"
70 #include "gimple.h"
71 #include "cfghooks.h"
72 #include "tree-pass.h"
73 #include "tm_p.h"
74 #include "ssa.h"
75 #include "expmed.h"
76 #include "insn-config.h"
77 #include "emit-rtl.h"
78 #include "recog.h"
79 #include "cgraph.h"
80 #include "gimple-pretty-print.h"
81 #include "alias.h"
82 #include "fold-const.h"
83 #include "stor-layout.h"
84 #include "tree-eh.h"
85 #include "gimplify.h"
86 #include "gimple-iterator.h"
87 #include "gimplify-me.h"
88 #include "tree-cfg.h"
89 #include "tree-ssa-loop-ivopts.h"
90 #include "tree-ssa-loop-manip.h"
91 #include "tree-ssa-loop-niter.h"
92 #include "tree-ssa-loop.h"
93 #include "explow.h"
94 #include "expr.h"
95 #include "tree-dfa.h"
96 #include "tree-ssa.h"
97 #include "cfgloop.h"
98 #include "tree-scalar-evolution.h"
99 #include "params.h"
100 #include "tree-affine.h"
101 #include "tree-ssa-propagate.h"
102 #include "tree-ssa-address.h"
103 #include "builtins.h"
104 #include "tree-vectorizer.h"
105
106 /* FIXME: Expressions are expanded to RTL in this pass to determine the
107 cost of different addressing modes. This should be moved to a TBD
108 interface between the GIMPLE and RTL worlds. */
109
110 /* The infinite cost. */
111 #define INFTY 10000000
112
113 #define AVG_LOOP_NITER(LOOP) 5
114
115 /* Returns the expected number of loop iterations for LOOP.
116 The average trip count is computed from profile data if it
117 exists. */
118
119 static inline HOST_WIDE_INT
avg_loop_niter(struct loop * loop)120 avg_loop_niter (struct loop *loop)
121 {
122 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
123 if (niter == -1)
124 return AVG_LOOP_NITER (loop);
125
126 return niter;
127 }
128
129 /* Representation of the induction variable. */
130 struct iv
131 {
132 tree base; /* Initial value of the iv. */
133 tree base_object; /* A memory object to that the induction variable points. */
134 tree step; /* Step of the iv (constant only). */
135 tree ssa_name; /* The ssa name with the value. */
136 unsigned use_id; /* The identifier in the use if it is the case. */
137 bool biv_p; /* Is it a biv? */
138 bool have_use_for; /* Do we already have a use for it? */
139 bool no_overflow; /* True if the iv doesn't overflow. */
140 bool have_address_use;/* For biv, indicate if it's used in any address
141 type use. */
142 };
143
144 /* Per-ssa version information (induction variable descriptions, etc.). */
145 struct version_info
146 {
147 tree name; /* The ssa name. */
148 struct iv *iv; /* Induction variable description. */
149 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
150 an expression that is not an induction variable. */
151 bool preserve_biv; /* For the original biv, whether to preserve it. */
152 unsigned inv_id; /* Id of an invariant. */
153 };
154
155 /* Types of uses. */
156 enum use_type
157 {
158 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
159 USE_ADDRESS, /* Use in an address. */
160 USE_COMPARE /* Use is a compare. */
161 };
162
163 /* Cost of a computation. */
164 struct comp_cost
165 {
166 int cost; /* The runtime cost. */
167 unsigned complexity; /* The estimate of the complexity of the code for
168 the computation (in no concrete units --
169 complexity field should be larger for more
170 complex expressions and addressing modes). */
171 int scratch; /* Scratch used during cost computation. */
172 };
173
174 static const comp_cost no_cost = {0, 0, 0};
175 static const comp_cost infinite_cost = {INFTY, INFTY, INFTY};
176
177 /* The candidate - cost pair. */
178 struct cost_pair
179 {
180 struct iv_cand *cand; /* The candidate. */
181 comp_cost cost; /* The cost. */
182 bitmap depends_on; /* The list of invariants that have to be
183 preserved. */
184 tree value; /* For final value elimination, the expression for
185 the final value of the iv. For iv elimination,
186 the new bound to compare with. */
187 enum tree_code comp; /* For iv elimination, the comparison. */
188 int inv_expr_id; /* Loop invariant expression id. */
189 };
190
191 /* Use. */
192 struct iv_use
193 {
194 unsigned id; /* The id of the use. */
195 unsigned sub_id; /* The id of the sub use. */
196 enum use_type type; /* Type of the use. */
197 struct iv *iv; /* The induction variable it is based on. */
198 gimple *stmt; /* Statement in that it occurs. */
199 tree *op_p; /* The place where it occurs. */
200 bitmap related_cands; /* The set of "related" iv candidates, plus the common
201 important ones. */
202
203 unsigned n_map_members; /* Number of candidates in the cost_map list. */
204 struct cost_pair *cost_map;
205 /* The costs wrto the iv candidates. */
206
207 struct iv_cand *selected;
208 /* The selected candidate. */
209
210 struct iv_use *next; /* The next sub use. */
211 tree addr_base; /* Base address with const offset stripped. */
212 unsigned HOST_WIDE_INT addr_offset;
213 /* Const offset stripped from base address. */
214 };
215
216 /* The position where the iv is computed. */
217 enum iv_position
218 {
219 IP_NORMAL, /* At the end, just before the exit condition. */
220 IP_END, /* At the end of the latch block. */
221 IP_BEFORE_USE, /* Immediately before a specific use. */
222 IP_AFTER_USE, /* Immediately after a specific use. */
223 IP_ORIGINAL /* The original biv. */
224 };
225
226 /* The induction variable candidate. */
227 struct iv_cand
228 {
229 unsigned id; /* The number of the candidate. */
230 bool important; /* Whether this is an "important" candidate, i.e. such
231 that it should be considered by all uses. */
232 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
233 gimple *incremented_at;/* For original biv, the statement where it is
234 incremented. */
235 tree var_before; /* The variable used for it before increment. */
236 tree var_after; /* The variable used for it after increment. */
237 struct iv *iv; /* The value of the candidate. NULL for
238 "pseudocandidate" used to indicate the possibility
239 to replace the final value of an iv by direct
240 computation of the value. */
241 unsigned cost; /* Cost of the candidate. */
242 unsigned cost_step; /* Cost of the candidate's increment operation. */
243 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
244 where it is incremented. */
245 bitmap depends_on; /* The list of invariants that are used in step of the
246 biv. */
247 struct iv *orig_iv; /* The original iv if this cand is added from biv with
248 smaller type. */
249 };
250
251 /* Hashtable entry for common candidate derived from iv uses. */
252 struct iv_common_cand
253 {
254 tree base;
255 tree step;
256 /* IV uses from which this common candidate is derived. */
257 auto_vec<iv_use *> uses;
258 hashval_t hash;
259 };
260
261 /* Hashtable helpers. */
262
263 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
264 {
265 static inline hashval_t hash (const iv_common_cand *);
266 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
267 };
268
269 /* Hash function for possible common candidates. */
270
271 inline hashval_t
hash(const iv_common_cand * ccand)272 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
273 {
274 return ccand->hash;
275 }
276
277 /* Hash table equality function for common candidates. */
278
279 inline bool
equal(const iv_common_cand * ccand1,const iv_common_cand * ccand2)280 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
281 const iv_common_cand *ccand2)
282 {
283 return (ccand1->hash == ccand2->hash
284 && operand_equal_p (ccand1->base, ccand2->base, 0)
285 && operand_equal_p (ccand1->step, ccand2->step, 0)
286 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
287 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
288 }
289
290 /* Loop invariant expression hashtable entry. */
291 struct iv_inv_expr_ent
292 {
293 tree expr;
294 int id;
295 hashval_t hash;
296 };
297
298 /* Hashtable helpers. */
299
300 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
301 {
302 static inline hashval_t hash (const iv_inv_expr_ent *);
303 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
304 };
305
306 /* Hash function for loop invariant expressions. */
307
308 inline hashval_t
hash(const iv_inv_expr_ent * expr)309 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
310 {
311 return expr->hash;
312 }
313
314 /* Hash table equality function for expressions. */
315
316 inline bool
equal(const iv_inv_expr_ent * expr1,const iv_inv_expr_ent * expr2)317 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
318 const iv_inv_expr_ent *expr2)
319 {
320 return expr1->hash == expr2->hash
321 && operand_equal_p (expr1->expr, expr2->expr, 0);
322 }
323
324 struct ivopts_data
325 {
326 /* The currently optimized loop. */
327 struct loop *current_loop;
328 source_location loop_loc;
329
330 /* Numbers of iterations for all exits of the current loop. */
331 hash_map<edge, tree_niter_desc *> *niters;
332
333 /* Number of registers used in it. */
334 unsigned regs_used;
335
336 /* The size of version_info array allocated. */
337 unsigned version_info_size;
338
339 /* The array of information for the ssa names. */
340 struct version_info *version_info;
341
342 /* The hashtable of loop invariant expressions created
343 by ivopt. */
344 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
345
346 /* Loop invariant expression id. */
347 int inv_expr_id;
348
349 /* The bitmap of indices in version_info whose value was changed. */
350 bitmap relevant;
351
352 /* The uses of induction variables. */
353 vec<iv_use *> iv_uses;
354
355 /* The candidates. */
356 vec<iv_cand *> iv_candidates;
357
358 /* A bitmap of important candidates. */
359 bitmap important_candidates;
360
361 /* Cache used by tree_to_aff_combination_expand. */
362 hash_map<tree, name_expansion *> *name_expansion_cache;
363
364 /* The hashtable of common candidates derived from iv uses. */
365 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
366
367 /* The common candidates. */
368 vec<iv_common_cand *> iv_common_cands;
369
370 /* The maximum invariant id. */
371 unsigned max_inv_id;
372
373 /* Number of no_overflow BIVs which are not used in memory address. */
374 unsigned bivs_not_used_in_addr;
375
376 /* Obstack for iv structure. */
377 struct obstack iv_obstack;
378
379 /* Whether to consider just related and important candidates when replacing a
380 use. */
381 bool consider_all_candidates;
382
383 /* Are we optimizing for speed? */
384 bool speed;
385
386 /* Whether the loop body includes any function calls. */
387 bool body_includes_call;
388
389 /* Whether the loop body can only be exited via single exit. */
390 bool loop_single_exit_p;
391 };
392
393 /* An assignment of iv candidates to uses. */
394
395 struct iv_ca
396 {
397 /* The number of uses covered by the assignment. */
398 unsigned upto;
399
400 /* Number of uses that cannot be expressed by the candidates in the set. */
401 unsigned bad_uses;
402
403 /* Candidate assigned to a use, together with the related costs. */
404 struct cost_pair **cand_for_use;
405
406 /* Number of times each candidate is used. */
407 unsigned *n_cand_uses;
408
409 /* The candidates used. */
410 bitmap cands;
411
412 /* The number of candidates in the set. */
413 unsigned n_cands;
414
415 /* Total number of registers needed. */
416 unsigned n_regs;
417
418 /* Total cost of expressing uses. */
419 comp_cost cand_use_cost;
420
421 /* Total cost of candidates. */
422 unsigned cand_cost;
423
424 /* Number of times each invariant is used. */
425 unsigned *n_invariant_uses;
426
427 /* The array holding the number of uses of each loop
428 invariant expressions created by ivopt. */
429 unsigned *used_inv_expr;
430
431 /* The number of created loop invariants. */
432 unsigned num_used_inv_expr;
433
434 /* Total cost of the assignment. */
435 comp_cost cost;
436 };
437
438 /* Difference of two iv candidate assignments. */
439
440 struct iv_ca_delta
441 {
442 /* Changed use. */
443 struct iv_use *use;
444
445 /* An old assignment (for rollback purposes). */
446 struct cost_pair *old_cp;
447
448 /* A new assignment. */
449 struct cost_pair *new_cp;
450
451 /* Next change in the list. */
452 struct iv_ca_delta *next_change;
453 };
454
455 /* Bound on number of candidates below that all candidates are considered. */
456
457 #define CONSIDER_ALL_CANDIDATES_BOUND \
458 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
459
460 /* If there are more iv occurrences, we just give up (it is quite unlikely that
461 optimizing such a loop would help, and it would take ages). */
462
463 #define MAX_CONSIDERED_USES \
464 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
465
466 /* If there are at most this number of ivs in the set, try removing unnecessary
467 ivs from the set always. */
468
469 #define ALWAYS_PRUNE_CAND_SET_BOUND \
470 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
471
472 /* The list of trees for that the decl_rtl field must be reset is stored
473 here. */
474
475 static vec<tree> decl_rtl_to_reset;
476
477 static comp_cost force_expr_to_var_cost (tree, bool);
478
479 /* Number of uses recorded in DATA. */
480
481 static inline unsigned
n_iv_uses(struct ivopts_data * data)482 n_iv_uses (struct ivopts_data *data)
483 {
484 return data->iv_uses.length ();
485 }
486
487 /* Ith use recorded in DATA. */
488
489 static inline struct iv_use *
iv_use(struct ivopts_data * data,unsigned i)490 iv_use (struct ivopts_data *data, unsigned i)
491 {
492 return data->iv_uses[i];
493 }
494
495 /* Number of candidates recorded in DATA. */
496
497 static inline unsigned
n_iv_cands(struct ivopts_data * data)498 n_iv_cands (struct ivopts_data *data)
499 {
500 return data->iv_candidates.length ();
501 }
502
503 /* Ith candidate recorded in DATA. */
504
505 static inline struct iv_cand *
iv_cand(struct ivopts_data * data,unsigned i)506 iv_cand (struct ivopts_data *data, unsigned i)
507 {
508 return data->iv_candidates[i];
509 }
510
511 /* The single loop exit if it dominates the latch, NULL otherwise. */
512
513 edge
single_dom_exit(struct loop * loop)514 single_dom_exit (struct loop *loop)
515 {
516 edge exit = single_exit (loop);
517
518 if (!exit)
519 return NULL;
520
521 if (!just_once_each_iteration_p (loop, exit->src))
522 return NULL;
523
524 return exit;
525 }
526
527 /* Dumps information about the induction variable IV to FILE. */
528
529 void
dump_iv(FILE * file,struct iv * iv,bool dump_name)530 dump_iv (FILE *file, struct iv *iv, bool dump_name)
531 {
532 if (iv->ssa_name && dump_name)
533 {
534 fprintf (file, "ssa name ");
535 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
536 fprintf (file, "\n");
537 }
538
539 fprintf (file, " type ");
540 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
541 fprintf (file, "\n");
542
543 if (iv->step)
544 {
545 fprintf (file, " base ");
546 print_generic_expr (file, iv->base, TDF_SLIM);
547 fprintf (file, "\n");
548
549 fprintf (file, " step ");
550 print_generic_expr (file, iv->step, TDF_SLIM);
551 fprintf (file, "\n");
552 }
553 else
554 {
555 fprintf (file, " invariant ");
556 print_generic_expr (file, iv->base, TDF_SLIM);
557 fprintf (file, "\n");
558 }
559
560 if (iv->base_object)
561 {
562 fprintf (file, " base object ");
563 print_generic_expr (file, iv->base_object, TDF_SLIM);
564 fprintf (file, "\n");
565 }
566
567 if (iv->biv_p)
568 fprintf (file, " is a biv\n");
569
570 if (iv->no_overflow)
571 fprintf (file, " iv doesn't overflow wrto loop niter\n");
572 }
573
574 /* Dumps information about the USE to FILE. */
575
576 void
dump_use(FILE * file,struct iv_use * use)577 dump_use (FILE *file, struct iv_use *use)
578 {
579 fprintf (file, "use %d", use->id);
580 if (use->sub_id)
581 fprintf (file, ".%d", use->sub_id);
582
583 fprintf (file, "\n");
584
585 switch (use->type)
586 {
587 case USE_NONLINEAR_EXPR:
588 fprintf (file, " generic\n");
589 break;
590
591 case USE_ADDRESS:
592 fprintf (file, " address\n");
593 break;
594
595 case USE_COMPARE:
596 fprintf (file, " compare\n");
597 break;
598
599 default:
600 gcc_unreachable ();
601 }
602
603 fprintf (file, " in statement ");
604 print_gimple_stmt (file, use->stmt, 0, 0);
605 fprintf (file, "\n");
606
607 fprintf (file, " at position ");
608 if (use->op_p)
609 print_generic_expr (file, *use->op_p, TDF_SLIM);
610 fprintf (file, "\n");
611
612 dump_iv (file, use->iv, false);
613
614 if (use->related_cands)
615 {
616 fprintf (file, " related candidates ");
617 dump_bitmap (file, use->related_cands);
618 }
619 }
620
621 /* Dumps information about the uses to FILE. */
622
623 void
dump_uses(FILE * file,struct ivopts_data * data)624 dump_uses (FILE *file, struct ivopts_data *data)
625 {
626 unsigned i;
627 struct iv_use *use;
628
629 for (i = 0; i < n_iv_uses (data); i++)
630 {
631 use = iv_use (data, i);
632 do
633 {
634 dump_use (file, use);
635 use = use->next;
636 }
637 while (use);
638 fprintf (file, "\n");
639 }
640 }
641
642 /* Dumps information about induction variable candidate CAND to FILE. */
643
644 void
dump_cand(FILE * file,struct iv_cand * cand)645 dump_cand (FILE *file, struct iv_cand *cand)
646 {
647 struct iv *iv = cand->iv;
648
649 fprintf (file, "candidate %d%s\n",
650 cand->id, cand->important ? " (important)" : "");
651
652 if (cand->depends_on)
653 {
654 fprintf (file, " depends on ");
655 dump_bitmap (file, cand->depends_on);
656 }
657
658 if (!iv)
659 {
660 fprintf (file, " final value replacement\n");
661 return;
662 }
663
664 if (cand->var_before)
665 {
666 fprintf (file, " var_before ");
667 print_generic_expr (file, cand->var_before, TDF_SLIM);
668 fprintf (file, "\n");
669 }
670 if (cand->var_after)
671 {
672 fprintf (file, " var_after ");
673 print_generic_expr (file, cand->var_after, TDF_SLIM);
674 fprintf (file, "\n");
675 }
676
677 switch (cand->pos)
678 {
679 case IP_NORMAL:
680 fprintf (file, " incremented before exit test\n");
681 break;
682
683 case IP_BEFORE_USE:
684 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
685 break;
686
687 case IP_AFTER_USE:
688 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
689 break;
690
691 case IP_END:
692 fprintf (file, " incremented at end\n");
693 break;
694
695 case IP_ORIGINAL:
696 fprintf (file, " original biv\n");
697 break;
698 }
699
700 dump_iv (file, iv, false);
701 }
702
703 /* Returns the info for ssa version VER. */
704
705 static inline struct version_info *
ver_info(struct ivopts_data * data,unsigned ver)706 ver_info (struct ivopts_data *data, unsigned ver)
707 {
708 return data->version_info + ver;
709 }
710
711 /* Returns the info for ssa name NAME. */
712
713 static inline struct version_info *
name_info(struct ivopts_data * data,tree name)714 name_info (struct ivopts_data *data, tree name)
715 {
716 return ver_info (data, SSA_NAME_VERSION (name));
717 }
718
719 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
720 emitted in LOOP. */
721
722 static bool
stmt_after_ip_normal_pos(struct loop * loop,gimple * stmt)723 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
724 {
725 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
726
727 gcc_assert (bb);
728
729 if (sbb == loop->latch)
730 return true;
731
732 if (sbb != bb)
733 return false;
734
735 return stmt == last_stmt (bb);
736 }
737
738 /* Returns true if STMT if after the place where the original induction
739 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
740 if the positions are identical. */
741
742 static bool
stmt_after_inc_pos(struct iv_cand * cand,gimple * stmt,bool true_if_equal)743 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
744 {
745 basic_block cand_bb = gimple_bb (cand->incremented_at);
746 basic_block stmt_bb = gimple_bb (stmt);
747
748 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
749 return false;
750
751 if (stmt_bb != cand_bb)
752 return true;
753
754 if (true_if_equal
755 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
756 return true;
757 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
758 }
759
760 /* Returns true if STMT if after the place where the induction variable
761 CAND is incremented in LOOP. */
762
763 static bool
stmt_after_increment(struct loop * loop,struct iv_cand * cand,gimple * stmt)764 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
765 {
766 switch (cand->pos)
767 {
768 case IP_END:
769 return false;
770
771 case IP_NORMAL:
772 return stmt_after_ip_normal_pos (loop, stmt);
773
774 case IP_ORIGINAL:
775 case IP_AFTER_USE:
776 return stmt_after_inc_pos (cand, stmt, false);
777
778 case IP_BEFORE_USE:
779 return stmt_after_inc_pos (cand, stmt, true);
780
781 default:
782 gcc_unreachable ();
783 }
784 }
785
786 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
787
788 static bool
abnormal_ssa_name_p(tree exp)789 abnormal_ssa_name_p (tree exp)
790 {
791 if (!exp)
792 return false;
793
794 if (TREE_CODE (exp) != SSA_NAME)
795 return false;
796
797 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
798 }
799
800 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
801 abnormal phi node. Callback for for_each_index. */
802
803 static bool
idx_contains_abnormal_ssa_name_p(tree base,tree * index,void * data ATTRIBUTE_UNUSED)804 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
805 void *data ATTRIBUTE_UNUSED)
806 {
807 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
808 {
809 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
810 return false;
811 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
812 return false;
813 }
814
815 return !abnormal_ssa_name_p (*index);
816 }
817
818 /* Returns true if EXPR contains a ssa name that occurs in an
819 abnormal phi node. */
820
821 bool
contains_abnormal_ssa_name_p(tree expr)822 contains_abnormal_ssa_name_p (tree expr)
823 {
824 enum tree_code code;
825 enum tree_code_class codeclass;
826
827 if (!expr)
828 return false;
829
830 code = TREE_CODE (expr);
831 codeclass = TREE_CODE_CLASS (code);
832
833 if (code == SSA_NAME)
834 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
835
836 if (code == INTEGER_CST
837 || is_gimple_min_invariant (expr))
838 return false;
839
840 if (code == ADDR_EXPR)
841 return !for_each_index (&TREE_OPERAND (expr, 0),
842 idx_contains_abnormal_ssa_name_p,
843 NULL);
844
845 if (code == COND_EXPR)
846 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
847 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
848 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
849
850 switch (codeclass)
851 {
852 case tcc_binary:
853 case tcc_comparison:
854 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
855 return true;
856
857 /* Fallthru. */
858 case tcc_unary:
859 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
860 return true;
861
862 break;
863
864 default:
865 gcc_unreachable ();
866 }
867
868 return false;
869 }
870
871 /* Returns the structure describing number of iterations determined from
872 EXIT of DATA->current_loop, or NULL if something goes wrong. */
873
874 static struct tree_niter_desc *
niter_for_exit(struct ivopts_data * data,edge exit)875 niter_for_exit (struct ivopts_data *data, edge exit)
876 {
877 struct tree_niter_desc *desc;
878 tree_niter_desc **slot;
879
880 if (!data->niters)
881 {
882 data->niters = new hash_map<edge, tree_niter_desc *>;
883 slot = NULL;
884 }
885 else
886 slot = data->niters->get (exit);
887
888 if (!slot)
889 {
890 /* Try to determine number of iterations. We cannot safely work with ssa
891 names that appear in phi nodes on abnormal edges, so that we do not
892 create overlapping life ranges for them (PR 27283). */
893 desc = XNEW (struct tree_niter_desc);
894 if (!number_of_iterations_exit (data->current_loop,
895 exit, desc, true)
896 || contains_abnormal_ssa_name_p (desc->niter))
897 {
898 XDELETE (desc);
899 desc = NULL;
900 }
901 data->niters->put (exit, desc);
902 }
903 else
904 desc = *slot;
905
906 return desc;
907 }
908
909 /* Returns the structure describing number of iterations determined from
910 single dominating exit of DATA->current_loop, or NULL if something
911 goes wrong. */
912
913 static struct tree_niter_desc *
niter_for_single_dom_exit(struct ivopts_data * data)914 niter_for_single_dom_exit (struct ivopts_data *data)
915 {
916 edge exit = single_dom_exit (data->current_loop);
917
918 if (!exit)
919 return NULL;
920
921 return niter_for_exit (data, exit);
922 }
923
924 /* Initializes data structures used by the iv optimization pass, stored
925 in DATA. */
926
927 static void
tree_ssa_iv_optimize_init(struct ivopts_data * data)928 tree_ssa_iv_optimize_init (struct ivopts_data *data)
929 {
930 data->version_info_size = 2 * num_ssa_names;
931 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
932 data->relevant = BITMAP_ALLOC (NULL);
933 data->important_candidates = BITMAP_ALLOC (NULL);
934 data->max_inv_id = 0;
935 data->niters = NULL;
936 data->iv_uses.create (20);
937 data->iv_candidates.create (20);
938 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
939 data->inv_expr_id = 0;
940 data->name_expansion_cache = NULL;
941 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
942 data->iv_common_cands.create (20);
943 decl_rtl_to_reset.create (20);
944 gcc_obstack_init (&data->iv_obstack);
945 }
946
947 /* Returns a memory object to that EXPR points. In case we are able to
948 determine that it does not point to any such object, NULL is returned. */
949
950 static tree
determine_base_object(tree expr)951 determine_base_object (tree expr)
952 {
953 enum tree_code code = TREE_CODE (expr);
954 tree base, obj;
955
956 /* If this is a pointer casted to any type, we need to determine
957 the base object for the pointer; so handle conversions before
958 throwing away non-pointer expressions. */
959 if (CONVERT_EXPR_P (expr))
960 return determine_base_object (TREE_OPERAND (expr, 0));
961
962 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
963 return NULL_TREE;
964
965 switch (code)
966 {
967 case INTEGER_CST:
968 return NULL_TREE;
969
970 case ADDR_EXPR:
971 obj = TREE_OPERAND (expr, 0);
972 base = get_base_address (obj);
973
974 if (!base)
975 return expr;
976
977 if (TREE_CODE (base) == MEM_REF)
978 return determine_base_object (TREE_OPERAND (base, 0));
979
980 return fold_convert (ptr_type_node,
981 build_fold_addr_expr (base));
982
983 case POINTER_PLUS_EXPR:
984 return determine_base_object (TREE_OPERAND (expr, 0));
985
986 case PLUS_EXPR:
987 case MINUS_EXPR:
988 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
989 gcc_unreachable ();
990
991 default:
992 return fold_convert (ptr_type_node, expr);
993 }
994 }
995
996 /* Return true if address expression with non-DECL_P operand appears
997 in EXPR. */
998
999 static bool
contain_complex_addr_expr(tree expr)1000 contain_complex_addr_expr (tree expr)
1001 {
1002 bool res = false;
1003
1004 STRIP_NOPS (expr);
1005 switch (TREE_CODE (expr))
1006 {
1007 case POINTER_PLUS_EXPR:
1008 case PLUS_EXPR:
1009 case MINUS_EXPR:
1010 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1011 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1012 break;
1013
1014 case ADDR_EXPR:
1015 return (!DECL_P (TREE_OPERAND (expr, 0)));
1016
1017 default:
1018 return false;
1019 }
1020
1021 return res;
1022 }
1023
1024 /* Allocates an induction variable with given initial value BASE and step STEP
1025 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1026
1027 static struct iv *
1028 alloc_iv (struct ivopts_data *data, tree base, tree step,
1029 bool no_overflow = false)
1030 {
1031 tree expr = base;
1032 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1033 sizeof (struct iv));
1034 gcc_assert (step != NULL_TREE);
1035
1036 /* Lower address expression in base except ones with DECL_P as operand.
1037 By doing this:
1038 1) More accurate cost can be computed for address expressions;
1039 2) Duplicate candidates won't be created for bases in different
1040 forms, like &a[0] and &a. */
1041 STRIP_NOPS (expr);
1042 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1043 || contain_complex_addr_expr (expr))
1044 {
1045 aff_tree comb;
1046 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1047 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1048 }
1049
1050 iv->base = base;
1051 iv->base_object = determine_base_object (base);
1052 iv->step = step;
1053 iv->biv_p = false;
1054 iv->have_use_for = false;
1055 iv->use_id = 0;
1056 iv->ssa_name = NULL_TREE;
1057 iv->no_overflow = no_overflow;
1058 iv->have_address_use = false;
1059
1060 return iv;
1061 }
1062
1063 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1064 doesn't overflow. */
1065
1066 static void
set_iv(struct ivopts_data * data,tree iv,tree base,tree step,bool no_overflow)1067 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1068 bool no_overflow)
1069 {
1070 struct version_info *info = name_info (data, iv);
1071
1072 gcc_assert (!info->iv);
1073
1074 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1075 info->iv = alloc_iv (data, base, step, no_overflow);
1076 info->iv->ssa_name = iv;
1077 }
1078
1079 /* Finds induction variable declaration for VAR. */
1080
1081 static struct iv *
get_iv(struct ivopts_data * data,tree var)1082 get_iv (struct ivopts_data *data, tree var)
1083 {
1084 basic_block bb;
1085 tree type = TREE_TYPE (var);
1086
1087 if (!POINTER_TYPE_P (type)
1088 && !INTEGRAL_TYPE_P (type))
1089 return NULL;
1090
1091 if (!name_info (data, var)->iv)
1092 {
1093 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1094
1095 if (!bb
1096 || !flow_bb_inside_loop_p (data->current_loop, bb))
1097 set_iv (data, var, var, build_int_cst (type, 0), true);
1098 }
1099
1100 return name_info (data, var)->iv;
1101 }
1102
1103 /* Return the first non-invariant ssa var found in EXPR. */
1104
1105 static tree
extract_single_var_from_expr(tree expr)1106 extract_single_var_from_expr (tree expr)
1107 {
1108 int i, n;
1109 tree tmp;
1110 enum tree_code code;
1111
1112 if (!expr || is_gimple_min_invariant (expr))
1113 return NULL;
1114
1115 code = TREE_CODE (expr);
1116 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1117 {
1118 n = TREE_OPERAND_LENGTH (expr);
1119 for (i = 0; i < n; i++)
1120 {
1121 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1122
1123 if (tmp)
1124 return tmp;
1125 }
1126 }
1127 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1128 }
1129
1130 /* Finds basic ivs. */
1131
1132 static bool
find_bivs(struct ivopts_data * data)1133 find_bivs (struct ivopts_data *data)
1134 {
1135 gphi *phi;
1136 affine_iv iv;
1137 tree step, type, base, stop;
1138 bool found = false;
1139 struct loop *loop = data->current_loop;
1140 gphi_iterator psi;
1141
1142 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1143 {
1144 phi = psi.phi ();
1145
1146 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1147 continue;
1148
1149 if (virtual_operand_p (PHI_RESULT (phi)))
1150 continue;
1151
1152 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1153 continue;
1154
1155 if (integer_zerop (iv.step))
1156 continue;
1157
1158 step = iv.step;
1159 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1160 /* Stop expanding iv base at the first ssa var referred by iv step.
1161 Ideally we should stop at any ssa var, because that's expensive
1162 and unusual to happen, we just do it on the first one.
1163
1164 See PR64705 for the rationale. */
1165 stop = extract_single_var_from_expr (step);
1166 base = expand_simple_operations (base, stop);
1167 if (contains_abnormal_ssa_name_p (base)
1168 || contains_abnormal_ssa_name_p (step))
1169 continue;
1170
1171 type = TREE_TYPE (PHI_RESULT (phi));
1172 base = fold_convert (type, base);
1173 if (step)
1174 {
1175 if (POINTER_TYPE_P (type))
1176 step = convert_to_ptrofftype (step);
1177 else
1178 step = fold_convert (type, step);
1179 }
1180
1181 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1182 found = true;
1183 }
1184
1185 return found;
1186 }
1187
1188 /* Marks basic ivs. */
1189
1190 static void
mark_bivs(struct ivopts_data * data)1191 mark_bivs (struct ivopts_data *data)
1192 {
1193 gphi *phi;
1194 gimple *def;
1195 tree var;
1196 struct iv *iv, *incr_iv;
1197 struct loop *loop = data->current_loop;
1198 basic_block incr_bb;
1199 gphi_iterator psi;
1200
1201 data->bivs_not_used_in_addr = 0;
1202 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1203 {
1204 phi = psi.phi ();
1205
1206 iv = get_iv (data, PHI_RESULT (phi));
1207 if (!iv)
1208 continue;
1209
1210 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1211 def = SSA_NAME_DEF_STMT (var);
1212 /* Don't mark iv peeled from other one as biv. */
1213 if (def
1214 && gimple_code (def) == GIMPLE_PHI
1215 && gimple_bb (def) == loop->header)
1216 continue;
1217
1218 incr_iv = get_iv (data, var);
1219 if (!incr_iv)
1220 continue;
1221
1222 /* If the increment is in the subloop, ignore it. */
1223 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1224 if (incr_bb->loop_father != data->current_loop
1225 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1226 continue;
1227
1228 iv->biv_p = true;
1229 incr_iv->biv_p = true;
1230 if (iv->no_overflow)
1231 data->bivs_not_used_in_addr++;
1232 if (incr_iv->no_overflow)
1233 data->bivs_not_used_in_addr++;
1234 }
1235 }
1236
1237 /* Checks whether STMT defines a linear induction variable and stores its
1238 parameters to IV. */
1239
1240 static bool
find_givs_in_stmt_scev(struct ivopts_data * data,gimple * stmt,affine_iv * iv)1241 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1242 {
1243 tree lhs, stop;
1244 struct loop *loop = data->current_loop;
1245
1246 iv->base = NULL_TREE;
1247 iv->step = NULL_TREE;
1248
1249 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1250 return false;
1251
1252 lhs = gimple_assign_lhs (stmt);
1253 if (TREE_CODE (lhs) != SSA_NAME)
1254 return false;
1255
1256 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1257 return false;
1258
1259 /* Stop expanding iv base at the first ssa var referred by iv step.
1260 Ideally we should stop at any ssa var, because that's expensive
1261 and unusual to happen, we just do it on the first one.
1262
1263 See PR64705 for the rationale. */
1264 stop = extract_single_var_from_expr (iv->step);
1265 iv->base = expand_simple_operations (iv->base, stop);
1266 if (contains_abnormal_ssa_name_p (iv->base)
1267 || contains_abnormal_ssa_name_p (iv->step))
1268 return false;
1269
1270 /* If STMT could throw, then do not consider STMT as defining a GIV.
1271 While this will suppress optimizations, we can not safely delete this
1272 GIV and associated statements, even if it appears it is not used. */
1273 if (stmt_could_throw_p (stmt))
1274 return false;
1275
1276 return true;
1277 }
1278
1279 /* Finds general ivs in statement STMT. */
1280
1281 static void
find_givs_in_stmt(struct ivopts_data * data,gimple * stmt)1282 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1283 {
1284 affine_iv iv;
1285
1286 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1287 return;
1288
1289 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1290 }
1291
1292 /* Finds general ivs in basic block BB. */
1293
1294 static void
find_givs_in_bb(struct ivopts_data * data,basic_block bb)1295 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1296 {
1297 gimple_stmt_iterator bsi;
1298
1299 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1300 find_givs_in_stmt (data, gsi_stmt (bsi));
1301 }
1302
1303 /* Finds general ivs. */
1304
1305 static void
find_givs(struct ivopts_data * data)1306 find_givs (struct ivopts_data *data)
1307 {
1308 struct loop *loop = data->current_loop;
1309 basic_block *body = get_loop_body_in_dom_order (loop);
1310 unsigned i;
1311
1312 for (i = 0; i < loop->num_nodes; i++)
1313 find_givs_in_bb (data, body[i]);
1314 free (body);
1315 }
1316
1317 /* For each ssa name defined in LOOP determines whether it is an induction
1318 variable and if so, its initial value and step. */
1319
1320 static bool
find_induction_variables(struct ivopts_data * data)1321 find_induction_variables (struct ivopts_data *data)
1322 {
1323 unsigned i;
1324 bitmap_iterator bi;
1325
1326 if (!find_bivs (data))
1327 return false;
1328
1329 find_givs (data);
1330 mark_bivs (data);
1331
1332 if (dump_file && (dump_flags & TDF_DETAILS))
1333 {
1334 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1335
1336 if (niter)
1337 {
1338 fprintf (dump_file, " number of iterations ");
1339 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1340 if (!integer_zerop (niter->may_be_zero))
1341 {
1342 fprintf (dump_file, "; zero if ");
1343 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1344 }
1345 fprintf (dump_file, "\n\n");
1346 };
1347
1348 fprintf (dump_file, "Induction variables:\n\n");
1349
1350 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1351 {
1352 if (ver_info (data, i)->iv)
1353 dump_iv (dump_file, ver_info (data, i)->iv, true);
1354 }
1355 }
1356
1357 return true;
1358 }
1359
1360 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1361 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1362 is the const offset stripped from IV base. For uses of other types,
1363 ADDR_BASE and ADDR_OFFSET are zero by default. */
1364
1365 static struct iv_use *
1366 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1367 gimple *stmt, enum use_type use_type, tree addr_base = NULL,
1368 unsigned HOST_WIDE_INT addr_offset = 0)
1369 {
1370 struct iv_use *use = XCNEW (struct iv_use);
1371
1372 use->id = n_iv_uses (data);
1373 use->sub_id = 0;
1374 use->type = use_type;
1375 use->iv = iv;
1376 use->stmt = stmt;
1377 use->op_p = use_p;
1378 use->related_cands = BITMAP_ALLOC (NULL);
1379 use->next = NULL;
1380 use->addr_base = addr_base;
1381 use->addr_offset = addr_offset;
1382
1383 data->iv_uses.safe_push (use);
1384
1385 return use;
1386 }
1387
1388 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1389 The sub use is recorded under the one whose use id is ID_GROUP. */
1390
1391 static struct iv_use *
record_sub_use(struct ivopts_data * data,tree * use_p,struct iv * iv,gimple * stmt,enum use_type use_type,tree addr_base,unsigned HOST_WIDE_INT addr_offset,unsigned int id_group)1392 record_sub_use (struct ivopts_data *data, tree *use_p,
1393 struct iv *iv, gimple *stmt, enum use_type use_type,
1394 tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1395 unsigned int id_group)
1396 {
1397 struct iv_use *use = XCNEW (struct iv_use);
1398 struct iv_use *group = iv_use (data, id_group);
1399
1400 use->id = group->id;
1401 use->sub_id = 0;
1402 use->type = use_type;
1403 use->iv = iv;
1404 use->stmt = stmt;
1405 use->op_p = use_p;
1406 use->related_cands = NULL;
1407 use->addr_base = addr_base;
1408 use->addr_offset = addr_offset;
1409
1410 /* Sub use list is maintained in offset ascending order. */
1411 if (addr_offset <= group->addr_offset)
1412 {
1413 use->related_cands = group->related_cands;
1414 group->related_cands = NULL;
1415 use->next = group;
1416 data->iv_uses[id_group] = use;
1417 }
1418 else
1419 {
1420 struct iv_use *pre;
1421 do
1422 {
1423 pre = group;
1424 group = group->next;
1425 }
1426 while (group && addr_offset > group->addr_offset);
1427 use->next = pre->next;
1428 pre->next = use;
1429 }
1430
1431 return use;
1432 }
1433
1434 /* Checks whether OP is a loop-level invariant and if so, records it.
1435 NONLINEAR_USE is true if the invariant is used in a way we do not
1436 handle specially. */
1437
1438 static void
record_invariant(struct ivopts_data * data,tree op,bool nonlinear_use)1439 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1440 {
1441 basic_block bb;
1442 struct version_info *info;
1443
1444 if (TREE_CODE (op) != SSA_NAME
1445 || virtual_operand_p (op))
1446 return;
1447
1448 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1449 if (bb
1450 && flow_bb_inside_loop_p (data->current_loop, bb))
1451 return;
1452
1453 info = name_info (data, op);
1454 info->name = op;
1455 info->has_nonlin_use |= nonlinear_use;
1456 if (!info->inv_id)
1457 info->inv_id = ++data->max_inv_id;
1458 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1459 }
1460
1461 /* Checks whether the use OP is interesting and if so, records it. */
1462
1463 static struct iv_use *
find_interesting_uses_op(struct ivopts_data * data,tree op)1464 find_interesting_uses_op (struct ivopts_data *data, tree op)
1465 {
1466 struct iv *iv;
1467 gimple *stmt;
1468 struct iv_use *use;
1469
1470 if (TREE_CODE (op) != SSA_NAME)
1471 return NULL;
1472
1473 iv = get_iv (data, op);
1474 if (!iv)
1475 return NULL;
1476
1477 if (iv->have_use_for)
1478 {
1479 use = iv_use (data, iv->use_id);
1480
1481 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1482 return use;
1483 }
1484
1485 if (integer_zerop (iv->step))
1486 {
1487 record_invariant (data, op, true);
1488 return NULL;
1489 }
1490 iv->have_use_for = true;
1491
1492 stmt = SSA_NAME_DEF_STMT (op);
1493 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1494 || is_gimple_assign (stmt));
1495
1496 use = record_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1497 iv->use_id = use->id;
1498
1499 return use;
1500 }
1501
1502 /* Given a condition in statement STMT, checks whether it is a compare
1503 of an induction variable and an invariant. If this is the case,
1504 CONTROL_VAR is set to location of the iv, BOUND to the location of
1505 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1506 induction variable descriptions, and true is returned. If this is not
1507 the case, CONTROL_VAR and BOUND are set to the arguments of the
1508 condition and false is returned. */
1509
1510 static bool
extract_cond_operands(struct ivopts_data * data,gimple * stmt,tree ** control_var,tree ** bound,struct iv ** iv_var,struct iv ** iv_bound)1511 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1512 tree **control_var, tree **bound,
1513 struct iv **iv_var, struct iv **iv_bound)
1514 {
1515 /* The objects returned when COND has constant operands. */
1516 static struct iv const_iv;
1517 static tree zero;
1518 tree *op0 = &zero, *op1 = &zero;
1519 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1520 bool ret = false;
1521
1522 if (gimple_code (stmt) == GIMPLE_COND)
1523 {
1524 gcond *cond_stmt = as_a <gcond *> (stmt);
1525 op0 = gimple_cond_lhs_ptr (cond_stmt);
1526 op1 = gimple_cond_rhs_ptr (cond_stmt);
1527 }
1528 else
1529 {
1530 op0 = gimple_assign_rhs1_ptr (stmt);
1531 op1 = gimple_assign_rhs2_ptr (stmt);
1532 }
1533
1534 zero = integer_zero_node;
1535 const_iv.step = integer_zero_node;
1536
1537 if (TREE_CODE (*op0) == SSA_NAME)
1538 iv0 = get_iv (data, *op0);
1539 if (TREE_CODE (*op1) == SSA_NAME)
1540 iv1 = get_iv (data, *op1);
1541
1542 /* Exactly one of the compared values must be an iv, and the other one must
1543 be an invariant. */
1544 if (!iv0 || !iv1)
1545 goto end;
1546
1547 if (integer_zerop (iv0->step))
1548 {
1549 /* Control variable may be on the other side. */
1550 std::swap (op0, op1);
1551 std::swap (iv0, iv1);
1552 }
1553 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1554
1555 end:
1556 if (control_var)
1557 *control_var = op0;
1558 if (iv_var)
1559 *iv_var = iv0;
1560 if (bound)
1561 *bound = op1;
1562 if (iv_bound)
1563 *iv_bound = iv1;
1564
1565 return ret;
1566 }
1567
1568 /* Checks whether the condition in STMT is interesting and if so,
1569 records it. */
1570
1571 static void
find_interesting_uses_cond(struct ivopts_data * data,gimple * stmt)1572 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1573 {
1574 tree *var_p, *bound_p;
1575 struct iv *var_iv;
1576
1577 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1578 {
1579 find_interesting_uses_op (data, *var_p);
1580 find_interesting_uses_op (data, *bound_p);
1581 return;
1582 }
1583
1584 record_use (data, NULL, var_iv, stmt, USE_COMPARE);
1585 }
1586
1587 /* Returns the outermost loop EXPR is obviously invariant in
1588 relative to the loop LOOP, i.e. if all its operands are defined
1589 outside of the returned loop. Returns NULL if EXPR is not
1590 even obviously invariant in LOOP. */
1591
1592 struct loop *
outermost_invariant_loop_for_expr(struct loop * loop,tree expr)1593 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1594 {
1595 basic_block def_bb;
1596 unsigned i, len;
1597
1598 if (is_gimple_min_invariant (expr))
1599 return current_loops->tree_root;
1600
1601 if (TREE_CODE (expr) == SSA_NAME)
1602 {
1603 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1604 if (def_bb)
1605 {
1606 if (flow_bb_inside_loop_p (loop, def_bb))
1607 return NULL;
1608 return superloop_at_depth (loop,
1609 loop_depth (def_bb->loop_father) + 1);
1610 }
1611
1612 return current_loops->tree_root;
1613 }
1614
1615 if (!EXPR_P (expr))
1616 return NULL;
1617
1618 unsigned maxdepth = 0;
1619 len = TREE_OPERAND_LENGTH (expr);
1620 for (i = 0; i < len; i++)
1621 {
1622 struct loop *ivloop;
1623 if (!TREE_OPERAND (expr, i))
1624 continue;
1625
1626 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1627 if (!ivloop)
1628 return NULL;
1629 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1630 }
1631
1632 return superloop_at_depth (loop, maxdepth);
1633 }
1634
1635 /* Returns true if expression EXPR is obviously invariant in LOOP,
1636 i.e. if all its operands are defined outside of the LOOP. LOOP
1637 should not be the function body. */
1638
1639 bool
expr_invariant_in_loop_p(struct loop * loop,tree expr)1640 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1641 {
1642 basic_block def_bb;
1643 unsigned i, len;
1644
1645 gcc_assert (loop_depth (loop) > 0);
1646
1647 if (is_gimple_min_invariant (expr))
1648 return true;
1649
1650 if (TREE_CODE (expr) == SSA_NAME)
1651 {
1652 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1653 if (def_bb
1654 && flow_bb_inside_loop_p (loop, def_bb))
1655 return false;
1656
1657 return true;
1658 }
1659
1660 if (!EXPR_P (expr))
1661 return false;
1662
1663 len = TREE_OPERAND_LENGTH (expr);
1664 for (i = 0; i < len; i++)
1665 if (TREE_OPERAND (expr, i)
1666 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1667 return false;
1668
1669 return true;
1670 }
1671
1672 /* Given expression EXPR which computes inductive values with respect
1673 to loop recorded in DATA, this function returns biv from which EXPR
1674 is derived by tracing definition chains of ssa variables in EXPR. */
1675
1676 static struct iv*
find_deriving_biv_for_expr(struct ivopts_data * data,tree expr)1677 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1678 {
1679 struct iv *iv;
1680 unsigned i, n;
1681 tree e2, e1;
1682 enum tree_code code;
1683 gimple *stmt;
1684
1685 if (expr == NULL_TREE)
1686 return NULL;
1687
1688 if (is_gimple_min_invariant (expr))
1689 return NULL;
1690
1691 code = TREE_CODE (expr);
1692 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1693 {
1694 n = TREE_OPERAND_LENGTH (expr);
1695 for (i = 0; i < n; i++)
1696 {
1697 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1698 if (iv)
1699 return iv;
1700 }
1701 }
1702
1703 /* Stop if it's not ssa name. */
1704 if (code != SSA_NAME)
1705 return NULL;
1706
1707 iv = get_iv (data, expr);
1708 if (!iv || integer_zerop (iv->step))
1709 return NULL;
1710 else if (iv->biv_p)
1711 return iv;
1712
1713 stmt = SSA_NAME_DEF_STMT (expr);
1714 if (gphi *phi = dyn_cast <gphi *> (stmt))
1715 {
1716 ssa_op_iter iter;
1717 use_operand_p use_p;
1718
1719 if (virtual_operand_p (gimple_phi_result (phi)))
1720 return NULL;
1721
1722 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1723 {
1724 tree use = USE_FROM_PTR (use_p);
1725 iv = find_deriving_biv_for_expr (data, use);
1726 if (iv)
1727 return iv;
1728 }
1729 return NULL;
1730 }
1731 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1732 return NULL;
1733
1734 e1 = gimple_assign_rhs1 (stmt);
1735 code = gimple_assign_rhs_code (stmt);
1736 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1737 return find_deriving_biv_for_expr (data, e1);
1738
1739 switch (code)
1740 {
1741 case MULT_EXPR:
1742 case PLUS_EXPR:
1743 case MINUS_EXPR:
1744 case POINTER_PLUS_EXPR:
1745 /* Increments, decrements and multiplications by a constant
1746 are simple. */
1747 e2 = gimple_assign_rhs2 (stmt);
1748 iv = find_deriving_biv_for_expr (data, e2);
1749 if (iv)
1750 return iv;
1751
1752 /* Fallthru. */
1753 CASE_CONVERT:
1754 /* Casts are simple. */
1755 return find_deriving_biv_for_expr (data, e1);
1756
1757 default:
1758 break;
1759 }
1760
1761 return NULL;
1762 }
1763
1764 /* Record BIV, its predecessor and successor that they are used in
1765 address type uses. */
1766
1767 static void
record_biv_for_address_use(struct ivopts_data * data,struct iv * biv)1768 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1769 {
1770 unsigned i;
1771 tree type, base_1, base_2;
1772 bitmap_iterator bi;
1773
1774 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1775 || biv->have_address_use || !biv->no_overflow)
1776 return;
1777
1778 type = TREE_TYPE (biv->base);
1779 if (!INTEGRAL_TYPE_P (type))
1780 return;
1781
1782 biv->have_address_use = true;
1783 data->bivs_not_used_in_addr--;
1784 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1785 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1786 {
1787 struct iv *iv = ver_info (data, i)->iv;
1788
1789 if (!iv || !iv->biv_p || integer_zerop (iv->step)
1790 || iv->have_address_use || !iv->no_overflow)
1791 continue;
1792
1793 if (type != TREE_TYPE (iv->base)
1794 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1795 continue;
1796
1797 if (!operand_equal_p (biv->step, iv->step, 0))
1798 continue;
1799
1800 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1801 if (operand_equal_p (base_1, iv->base, 0)
1802 || operand_equal_p (base_2, biv->base, 0))
1803 {
1804 iv->have_address_use = true;
1805 data->bivs_not_used_in_addr--;
1806 }
1807 }
1808 }
1809
1810 /* Cumulates the steps of indices into DATA and replaces their values with the
1811 initial ones. Returns false when the value of the index cannot be determined.
1812 Callback for for_each_index. */
1813
1814 struct ifs_ivopts_data
1815 {
1816 struct ivopts_data *ivopts_data;
1817 gimple *stmt;
1818 tree step;
1819 };
1820
1821 static bool
idx_find_step(tree base,tree * idx,void * data)1822 idx_find_step (tree base, tree *idx, void *data)
1823 {
1824 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1825 struct iv *iv;
1826 bool use_overflow_semantics = false;
1827 tree step, iv_base, iv_step, lbound, off;
1828 struct loop *loop = dta->ivopts_data->current_loop;
1829
1830 /* If base is a component ref, require that the offset of the reference
1831 be invariant. */
1832 if (TREE_CODE (base) == COMPONENT_REF)
1833 {
1834 off = component_ref_field_offset (base);
1835 return expr_invariant_in_loop_p (loop, off);
1836 }
1837
1838 /* If base is array, first check whether we will be able to move the
1839 reference out of the loop (in order to take its address in strength
1840 reduction). In order for this to work we need both lower bound
1841 and step to be loop invariants. */
1842 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1843 {
1844 /* Moreover, for a range, the size needs to be invariant as well. */
1845 if (TREE_CODE (base) == ARRAY_RANGE_REF
1846 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1847 return false;
1848
1849 step = array_ref_element_size (base);
1850 lbound = array_ref_low_bound (base);
1851
1852 if (!expr_invariant_in_loop_p (loop, step)
1853 || !expr_invariant_in_loop_p (loop, lbound))
1854 return false;
1855 }
1856
1857 if (TREE_CODE (*idx) != SSA_NAME)
1858 return true;
1859
1860 iv = get_iv (dta->ivopts_data, *idx);
1861 if (!iv)
1862 return false;
1863
1864 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1865 *&x[0], which is not folded and does not trigger the
1866 ARRAY_REF path below. */
1867 *idx = iv->base;
1868
1869 if (integer_zerop (iv->step))
1870 return true;
1871
1872 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1873 {
1874 step = array_ref_element_size (base);
1875
1876 /* We only handle addresses whose step is an integer constant. */
1877 if (TREE_CODE (step) != INTEGER_CST)
1878 return false;
1879 }
1880 else
1881 /* The step for pointer arithmetics already is 1 byte. */
1882 step = size_one_node;
1883
1884 iv_base = iv->base;
1885 iv_step = iv->step;
1886 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1887 use_overflow_semantics = true;
1888
1889 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1890 sizetype, &iv_base, &iv_step, dta->stmt,
1891 use_overflow_semantics))
1892 {
1893 /* The index might wrap. */
1894 return false;
1895 }
1896
1897 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1898 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1899
1900 if (dta->ivopts_data->bivs_not_used_in_addr)
1901 {
1902 if (!iv->biv_p)
1903 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
1904
1905 record_biv_for_address_use (dta->ivopts_data, iv);
1906 }
1907 return true;
1908 }
1909
1910 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1911 object is passed to it in DATA. */
1912
1913 static bool
idx_record_use(tree base,tree * idx,void * vdata)1914 idx_record_use (tree base, tree *idx,
1915 void *vdata)
1916 {
1917 struct ivopts_data *data = (struct ivopts_data *) vdata;
1918 find_interesting_uses_op (data, *idx);
1919 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1920 {
1921 find_interesting_uses_op (data, array_ref_element_size (base));
1922 find_interesting_uses_op (data, array_ref_low_bound (base));
1923 }
1924 return true;
1925 }
1926
1927 /* If we can prove that TOP = cst * BOT for some constant cst,
1928 store cst to MUL and return true. Otherwise return false.
1929 The returned value is always sign-extended, regardless of the
1930 signedness of TOP and BOT. */
1931
1932 static bool
constant_multiple_of(tree top,tree bot,widest_int * mul)1933 constant_multiple_of (tree top, tree bot, widest_int *mul)
1934 {
1935 tree mby;
1936 enum tree_code code;
1937 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1938 widest_int res, p0, p1;
1939
1940 STRIP_NOPS (top);
1941 STRIP_NOPS (bot);
1942
1943 if (operand_equal_p (top, bot, 0))
1944 {
1945 *mul = 1;
1946 return true;
1947 }
1948
1949 code = TREE_CODE (top);
1950 switch (code)
1951 {
1952 case MULT_EXPR:
1953 mby = TREE_OPERAND (top, 1);
1954 if (TREE_CODE (mby) != INTEGER_CST)
1955 return false;
1956
1957 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1958 return false;
1959
1960 *mul = wi::sext (res * wi::to_widest (mby), precision);
1961 return true;
1962
1963 case PLUS_EXPR:
1964 case MINUS_EXPR:
1965 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1966 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1967 return false;
1968
1969 if (code == MINUS_EXPR)
1970 p1 = -p1;
1971 *mul = wi::sext (p0 + p1, precision);
1972 return true;
1973
1974 case INTEGER_CST:
1975 if (TREE_CODE (bot) != INTEGER_CST)
1976 return false;
1977
1978 p0 = widest_int::from (top, SIGNED);
1979 p1 = widest_int::from (bot, SIGNED);
1980 if (p1 == 0)
1981 return false;
1982 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1983 return res == 0;
1984
1985 default:
1986 return false;
1987 }
1988 }
1989
1990 /* Return true if memory reference REF with step STEP may be unaligned. */
1991
1992 static bool
may_be_unaligned_p(tree ref,tree step)1993 may_be_unaligned_p (tree ref, tree step)
1994 {
1995 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1996 thus they are not misaligned. */
1997 if (TREE_CODE (ref) == TARGET_MEM_REF)
1998 return false;
1999
2000 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2001 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2002 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2003
2004 unsigned HOST_WIDE_INT bitpos;
2005 unsigned int ref_align;
2006 get_object_alignment_1 (ref, &ref_align, &bitpos);
2007 if (ref_align < align
2008 || (bitpos % align) != 0
2009 || (bitpos % BITS_PER_UNIT) != 0)
2010 return true;
2011
2012 unsigned int trailing_zeros = tree_ctz (step);
2013 if (trailing_zeros < HOST_BITS_PER_INT
2014 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2015 return true;
2016
2017 return false;
2018 }
2019
2020 /* Return true if EXPR may be non-addressable. */
2021
2022 bool
may_be_nonaddressable_p(tree expr)2023 may_be_nonaddressable_p (tree expr)
2024 {
2025 switch (TREE_CODE (expr))
2026 {
2027 case TARGET_MEM_REF:
2028 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2029 target, thus they are always addressable. */
2030 return false;
2031
2032 case MEM_REF:
2033 /* Likewise for MEM_REFs, modulo the storage order. */
2034 return REF_REVERSE_STORAGE_ORDER (expr);
2035
2036 case BIT_FIELD_REF:
2037 if (REF_REVERSE_STORAGE_ORDER (expr))
2038 return true;
2039 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2040
2041 case COMPONENT_REF:
2042 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2043 return true;
2044 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2045 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2046
2047 case ARRAY_REF:
2048 case ARRAY_RANGE_REF:
2049 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2050 return true;
2051 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2052
2053 case VIEW_CONVERT_EXPR:
2054 /* This kind of view-conversions may wrap non-addressable objects
2055 and make them look addressable. After some processing the
2056 non-addressability may be uncovered again, causing ADDR_EXPRs
2057 of inappropriate objects to be built. */
2058 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2059 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2060 return true;
2061 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2062
2063 CASE_CONVERT:
2064 return true;
2065
2066 default:
2067 break;
2068 }
2069
2070 return false;
2071 }
2072
2073 static tree
2074 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
2075
2076 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
2077 If there is an existing use which has same stripped iv base and step,
2078 this function records this one as a sub use to that; otherwise records
2079 it as a normal one. */
2080
2081 static struct iv_use *
record_group_use(struct ivopts_data * data,tree * use_p,struct iv * iv,gimple * stmt,enum use_type use_type)2082 record_group_use (struct ivopts_data *data, tree *use_p,
2083 struct iv *iv, gimple *stmt, enum use_type use_type)
2084 {
2085 unsigned int i;
2086 struct iv_use *use;
2087 tree addr_base;
2088 unsigned HOST_WIDE_INT addr_offset;
2089
2090 /* Only support sub use for address type uses, that is, with base
2091 object. */
2092 if (!iv->base_object)
2093 return record_use (data, use_p, iv, stmt, use_type);
2094
2095 addr_base = strip_offset (iv->base, &addr_offset);
2096 for (i = 0; i < n_iv_uses (data); i++)
2097 {
2098 use = iv_use (data, i);
2099 if (use->type != USE_ADDRESS || !use->iv->base_object)
2100 continue;
2101
2102 /* Check if it has the same stripped base and step. */
2103 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
2104 && operand_equal_p (iv->step, use->iv->step, 0)
2105 && operand_equal_p (addr_base, use->addr_base, 0))
2106 break;
2107 }
2108
2109 if (i == n_iv_uses (data))
2110 return record_use (data, use_p, iv, stmt,
2111 use_type, addr_base, addr_offset);
2112 else
2113 return record_sub_use (data, use_p, iv, stmt,
2114 use_type, addr_base, addr_offset, i);
2115 }
2116
2117 /* Finds addresses in *OP_P inside STMT. */
2118
2119 static void
find_interesting_uses_address(struct ivopts_data * data,gimple * stmt,tree * op_p)2120 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2121 tree *op_p)
2122 {
2123 tree base = *op_p, step = size_zero_node;
2124 struct iv *civ;
2125 struct ifs_ivopts_data ifs_ivopts_data;
2126
2127 /* Do not play with volatile memory references. A bit too conservative,
2128 perhaps, but safe. */
2129 if (gimple_has_volatile_ops (stmt))
2130 goto fail;
2131
2132 /* Ignore bitfields for now. Not really something terribly complicated
2133 to handle. TODO. */
2134 if (TREE_CODE (base) == BIT_FIELD_REF)
2135 goto fail;
2136
2137 base = unshare_expr (base);
2138
2139 if (TREE_CODE (base) == TARGET_MEM_REF)
2140 {
2141 tree type = build_pointer_type (TREE_TYPE (base));
2142 tree astep;
2143
2144 if (TMR_BASE (base)
2145 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2146 {
2147 civ = get_iv (data, TMR_BASE (base));
2148 if (!civ)
2149 goto fail;
2150
2151 TMR_BASE (base) = civ->base;
2152 step = civ->step;
2153 }
2154 if (TMR_INDEX2 (base)
2155 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2156 {
2157 civ = get_iv (data, TMR_INDEX2 (base));
2158 if (!civ)
2159 goto fail;
2160
2161 TMR_INDEX2 (base) = civ->base;
2162 step = civ->step;
2163 }
2164 if (TMR_INDEX (base)
2165 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2166 {
2167 civ = get_iv (data, TMR_INDEX (base));
2168 if (!civ)
2169 goto fail;
2170
2171 TMR_INDEX (base) = civ->base;
2172 astep = civ->step;
2173
2174 if (astep)
2175 {
2176 if (TMR_STEP (base))
2177 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2178
2179 step = fold_build2 (PLUS_EXPR, type, step, astep);
2180 }
2181 }
2182
2183 if (integer_zerop (step))
2184 goto fail;
2185 base = tree_mem_ref_addr (type, base);
2186 }
2187 else
2188 {
2189 ifs_ivopts_data.ivopts_data = data;
2190 ifs_ivopts_data.stmt = stmt;
2191 ifs_ivopts_data.step = size_zero_node;
2192 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2193 || integer_zerop (ifs_ivopts_data.step))
2194 goto fail;
2195 step = ifs_ivopts_data.step;
2196
2197 /* Check that the base expression is addressable. This needs
2198 to be done after substituting bases of IVs into it. */
2199 if (may_be_nonaddressable_p (base))
2200 goto fail;
2201
2202 /* Moreover, on strict alignment platforms, check that it is
2203 sufficiently aligned. */
2204 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2205 goto fail;
2206
2207 base = build_fold_addr_expr (base);
2208
2209 /* Substituting bases of IVs into the base expression might
2210 have caused folding opportunities. */
2211 if (TREE_CODE (base) == ADDR_EXPR)
2212 {
2213 tree *ref = &TREE_OPERAND (base, 0);
2214 while (handled_component_p (*ref))
2215 ref = &TREE_OPERAND (*ref, 0);
2216 if (TREE_CODE (*ref) == MEM_REF)
2217 {
2218 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2219 TREE_OPERAND (*ref, 0),
2220 TREE_OPERAND (*ref, 1));
2221 if (tem)
2222 *ref = tem;
2223 }
2224 }
2225 }
2226
2227 civ = alloc_iv (data, base, step);
2228 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2229 return;
2230
2231 fail:
2232 for_each_index (op_p, idx_record_use, data);
2233 }
2234
2235 /* Finds and records invariants used in STMT. */
2236
2237 static void
find_invariants_stmt(struct ivopts_data * data,gimple * stmt)2238 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2239 {
2240 ssa_op_iter iter;
2241 use_operand_p use_p;
2242 tree op;
2243
2244 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2245 {
2246 op = USE_FROM_PTR (use_p);
2247 record_invariant (data, op, false);
2248 }
2249 }
2250
2251 /* Finds interesting uses of induction variables in the statement STMT. */
2252
2253 static void
find_interesting_uses_stmt(struct ivopts_data * data,gimple * stmt)2254 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2255 {
2256 struct iv *iv;
2257 tree op, *lhs, *rhs;
2258 ssa_op_iter iter;
2259 use_operand_p use_p;
2260 enum tree_code code;
2261
2262 find_invariants_stmt (data, stmt);
2263
2264 if (gimple_code (stmt) == GIMPLE_COND)
2265 {
2266 find_interesting_uses_cond (data, stmt);
2267 return;
2268 }
2269
2270 if (is_gimple_assign (stmt))
2271 {
2272 lhs = gimple_assign_lhs_ptr (stmt);
2273 rhs = gimple_assign_rhs1_ptr (stmt);
2274
2275 if (TREE_CODE (*lhs) == SSA_NAME)
2276 {
2277 /* If the statement defines an induction variable, the uses are not
2278 interesting by themselves. */
2279
2280 iv = get_iv (data, *lhs);
2281
2282 if (iv && !integer_zerop (iv->step))
2283 return;
2284 }
2285
2286 code = gimple_assign_rhs_code (stmt);
2287 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2288 && (REFERENCE_CLASS_P (*rhs)
2289 || is_gimple_val (*rhs)))
2290 {
2291 if (REFERENCE_CLASS_P (*rhs))
2292 find_interesting_uses_address (data, stmt, rhs);
2293 else
2294 find_interesting_uses_op (data, *rhs);
2295
2296 if (REFERENCE_CLASS_P (*lhs))
2297 find_interesting_uses_address (data, stmt, lhs);
2298 return;
2299 }
2300 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2301 {
2302 find_interesting_uses_cond (data, stmt);
2303 return;
2304 }
2305
2306 /* TODO -- we should also handle address uses of type
2307
2308 memory = call (whatever);
2309
2310 and
2311
2312 call (memory). */
2313 }
2314
2315 if (gimple_code (stmt) == GIMPLE_PHI
2316 && gimple_bb (stmt) == data->current_loop->header)
2317 {
2318 iv = get_iv (data, PHI_RESULT (stmt));
2319
2320 if (iv && !integer_zerop (iv->step))
2321 return;
2322 }
2323
2324 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2325 {
2326 op = USE_FROM_PTR (use_p);
2327
2328 if (TREE_CODE (op) != SSA_NAME)
2329 continue;
2330
2331 iv = get_iv (data, op);
2332 if (!iv)
2333 continue;
2334
2335 find_interesting_uses_op (data, op);
2336 }
2337 }
2338
2339 /* Finds interesting uses of induction variables outside of loops
2340 on loop exit edge EXIT. */
2341
2342 static void
find_interesting_uses_outside(struct ivopts_data * data,edge exit)2343 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2344 {
2345 gphi *phi;
2346 gphi_iterator psi;
2347 tree def;
2348
2349 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2350 {
2351 phi = psi.phi ();
2352 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2353 if (!virtual_operand_p (def))
2354 find_interesting_uses_op (data, def);
2355 }
2356 }
2357
2358 /* Finds uses of the induction variables that are interesting. */
2359
2360 static void
find_interesting_uses(struct ivopts_data * data)2361 find_interesting_uses (struct ivopts_data *data)
2362 {
2363 basic_block bb;
2364 gimple_stmt_iterator bsi;
2365 basic_block *body = get_loop_body (data->current_loop);
2366 unsigned i;
2367 struct version_info *info;
2368 edge e;
2369
2370 if (dump_file && (dump_flags & TDF_DETAILS))
2371 fprintf (dump_file, "Uses:\n\n");
2372
2373 for (i = 0; i < data->current_loop->num_nodes; i++)
2374 {
2375 edge_iterator ei;
2376 bb = body[i];
2377
2378 FOR_EACH_EDGE (e, ei, bb->succs)
2379 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2380 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2381 find_interesting_uses_outside (data, e);
2382
2383 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2384 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2385 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2386 if (!is_gimple_debug (gsi_stmt (bsi)))
2387 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2388 }
2389
2390 if (dump_file && (dump_flags & TDF_DETAILS))
2391 {
2392 bitmap_iterator bi;
2393
2394 fprintf (dump_file, "\n");
2395
2396 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2397 {
2398 info = ver_info (data, i);
2399 if (info->inv_id)
2400 {
2401 fprintf (dump_file, " ");
2402 print_generic_expr (dump_file, info->name, TDF_SLIM);
2403 fprintf (dump_file, " is invariant (%d)%s\n",
2404 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2405 }
2406 }
2407
2408 fprintf (dump_file, "\n");
2409 }
2410
2411 free (body);
2412 }
2413
2414 /* Compute maximum offset of [base + offset] addressing mode
2415 for memory reference represented by USE. */
2416
2417 static HOST_WIDE_INT
compute_max_addr_offset(struct iv_use * use)2418 compute_max_addr_offset (struct iv_use *use)
2419 {
2420 int width;
2421 rtx reg, addr;
2422 HOST_WIDE_INT i, off;
2423 unsigned list_index, num;
2424 addr_space_t as;
2425 machine_mode mem_mode, addr_mode;
2426 static vec<HOST_WIDE_INT> max_offset_list;
2427
2428 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2429 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2430
2431 num = max_offset_list.length ();
2432 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2433 if (list_index >= num)
2434 {
2435 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2436 for (; num < max_offset_list.length (); num++)
2437 max_offset_list[num] = -1;
2438 }
2439
2440 off = max_offset_list[list_index];
2441 if (off != -1)
2442 return off;
2443
2444 addr_mode = targetm.addr_space.address_mode (as);
2445 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2446 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2447
2448 width = GET_MODE_BITSIZE (addr_mode) - 1;
2449 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2450 width = HOST_BITS_PER_WIDE_INT - 1;
2451
2452 for (i = width; i > 0; i--)
2453 {
2454 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2455 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2456 if (memory_address_addr_space_p (mem_mode, addr, as))
2457 break;
2458
2459 /* For some strict-alignment targets, the offset must be naturally
2460 aligned. Try an aligned offset if mem_mode is not QImode. */
2461 off = ((unsigned HOST_WIDE_INT) 1 << i);
2462 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2463 {
2464 off -= GET_MODE_SIZE (mem_mode);
2465 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2466 if (memory_address_addr_space_p (mem_mode, addr, as))
2467 break;
2468 }
2469 }
2470 if (i == 0)
2471 off = 0;
2472
2473 max_offset_list[list_index] = off;
2474 return off;
2475 }
2476
2477 /* Check if all small groups should be split. Return true if and
2478 only if:
2479
2480 1) At least one groups contain two uses with different offsets.
2481 2) No group contains more than two uses with different offsets.
2482
2483 Return false otherwise. We want to split such groups because:
2484
2485 1) Small groups don't have much benefit and may interfer with
2486 general candidate selection.
2487 2) Size for problem with only small groups is usually small and
2488 general algorithm can handle it well.
2489
2490 TODO -- Above claim may not hold when auto increment is supported. */
2491
2492 static bool
split_all_small_groups(struct ivopts_data * data)2493 split_all_small_groups (struct ivopts_data *data)
2494 {
2495 bool split_p = false;
2496 unsigned int i, n, distinct;
2497 struct iv_use *pre, *use;
2498
2499 n = n_iv_uses (data);
2500 for (i = 0; i < n; i++)
2501 {
2502 use = iv_use (data, i);
2503 if (!use->next)
2504 continue;
2505
2506 distinct = 1;
2507 gcc_assert (use->type == USE_ADDRESS);
2508 for (pre = use, use = use->next; use; pre = use, use = use->next)
2509 {
2510 if (pre->addr_offset != use->addr_offset)
2511 distinct++;
2512
2513 if (distinct > 2)
2514 return false;
2515 }
2516 if (distinct == 2)
2517 split_p = true;
2518 }
2519
2520 return split_p;
2521 }
2522
2523 /* For each group of address type uses, this function further groups
2524 these uses according to the maximum offset supported by target's
2525 [base + offset] addressing mode. */
2526
2527 static void
group_address_uses(struct ivopts_data * data)2528 group_address_uses (struct ivopts_data *data)
2529 {
2530 HOST_WIDE_INT max_offset = -1;
2531 unsigned int i, n, sub_id;
2532 struct iv_use *pre, *use;
2533 unsigned HOST_WIDE_INT addr_offset_first;
2534
2535 /* Reset max offset to split all small groups. */
2536 if (split_all_small_groups (data))
2537 max_offset = 0;
2538
2539 n = n_iv_uses (data);
2540 for (i = 0; i < n; i++)
2541 {
2542 use = iv_use (data, i);
2543 if (!use->next)
2544 continue;
2545
2546 gcc_assert (use->type == USE_ADDRESS);
2547 if (max_offset != 0)
2548 max_offset = compute_max_addr_offset (use);
2549
2550 while (use)
2551 {
2552 sub_id = 0;
2553 addr_offset_first = use->addr_offset;
2554 /* Only uses with offset that can fit in offset part against
2555 the first use can be grouped together. */
2556 for (pre = use, use = use->next;
2557 use && (use->addr_offset - addr_offset_first
2558 <= (unsigned HOST_WIDE_INT) max_offset);
2559 pre = use, use = use->next)
2560 {
2561 use->id = pre->id;
2562 use->sub_id = ++sub_id;
2563 }
2564
2565 /* Break the list and create new group. */
2566 if (use)
2567 {
2568 pre->next = NULL;
2569 use->id = n_iv_uses (data);
2570 use->related_cands = BITMAP_ALLOC (NULL);
2571 data->iv_uses.safe_push (use);
2572 }
2573 }
2574 }
2575
2576 if (dump_file && (dump_flags & TDF_DETAILS))
2577 dump_uses (dump_file, data);
2578 }
2579
2580 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2581 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2582 we are at the top-level of the processed address. */
2583
2584 static tree
strip_offset_1(tree expr,bool inside_addr,bool top_compref,HOST_WIDE_INT * offset)2585 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2586 HOST_WIDE_INT *offset)
2587 {
2588 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2589 enum tree_code code;
2590 tree type, orig_type = TREE_TYPE (expr);
2591 HOST_WIDE_INT off0, off1, st;
2592 tree orig_expr = expr;
2593
2594 STRIP_NOPS (expr);
2595
2596 type = TREE_TYPE (expr);
2597 code = TREE_CODE (expr);
2598 *offset = 0;
2599
2600 switch (code)
2601 {
2602 case INTEGER_CST:
2603 if (!cst_and_fits_in_hwi (expr)
2604 || integer_zerop (expr))
2605 return orig_expr;
2606
2607 *offset = int_cst_value (expr);
2608 return build_int_cst (orig_type, 0);
2609
2610 case POINTER_PLUS_EXPR:
2611 case PLUS_EXPR:
2612 case MINUS_EXPR:
2613 op0 = TREE_OPERAND (expr, 0);
2614 op1 = TREE_OPERAND (expr, 1);
2615
2616 op0 = strip_offset_1 (op0, false, false, &off0);
2617 op1 = strip_offset_1 (op1, false, false, &off1);
2618
2619 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2620 if (op0 == TREE_OPERAND (expr, 0)
2621 && op1 == TREE_OPERAND (expr, 1))
2622 return orig_expr;
2623
2624 if (integer_zerop (op1))
2625 expr = op0;
2626 else if (integer_zerop (op0))
2627 {
2628 if (code == MINUS_EXPR)
2629 expr = fold_build1 (NEGATE_EXPR, type, op1);
2630 else
2631 expr = op1;
2632 }
2633 else
2634 expr = fold_build2 (code, type, op0, op1);
2635
2636 return fold_convert (orig_type, expr);
2637
2638 case MULT_EXPR:
2639 op1 = TREE_OPERAND (expr, 1);
2640 if (!cst_and_fits_in_hwi (op1))
2641 return orig_expr;
2642
2643 op0 = TREE_OPERAND (expr, 0);
2644 op0 = strip_offset_1 (op0, false, false, &off0);
2645 if (op0 == TREE_OPERAND (expr, 0))
2646 return orig_expr;
2647
2648 *offset = off0 * int_cst_value (op1);
2649 if (integer_zerop (op0))
2650 expr = op0;
2651 else
2652 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2653
2654 return fold_convert (orig_type, expr);
2655
2656 case ARRAY_REF:
2657 case ARRAY_RANGE_REF:
2658 if (!inside_addr)
2659 return orig_expr;
2660
2661 step = array_ref_element_size (expr);
2662 if (!cst_and_fits_in_hwi (step))
2663 break;
2664
2665 st = int_cst_value (step);
2666 op1 = TREE_OPERAND (expr, 1);
2667 op1 = strip_offset_1 (op1, false, false, &off1);
2668 *offset = off1 * st;
2669
2670 if (top_compref
2671 && integer_zerop (op1))
2672 {
2673 /* Strip the component reference completely. */
2674 op0 = TREE_OPERAND (expr, 0);
2675 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2676 *offset += off0;
2677 return op0;
2678 }
2679 break;
2680
2681 case COMPONENT_REF:
2682 {
2683 tree field;
2684
2685 if (!inside_addr)
2686 return orig_expr;
2687
2688 tmp = component_ref_field_offset (expr);
2689 field = TREE_OPERAND (expr, 1);
2690 if (top_compref
2691 && cst_and_fits_in_hwi (tmp)
2692 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2693 {
2694 HOST_WIDE_INT boffset, abs_off;
2695
2696 /* Strip the component reference completely. */
2697 op0 = TREE_OPERAND (expr, 0);
2698 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2699 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2700 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2701 if (boffset < 0)
2702 abs_off = -abs_off;
2703
2704 *offset = off0 + int_cst_value (tmp) + abs_off;
2705 return op0;
2706 }
2707 }
2708 break;
2709
2710 case ADDR_EXPR:
2711 op0 = TREE_OPERAND (expr, 0);
2712 op0 = strip_offset_1 (op0, true, true, &off0);
2713 *offset += off0;
2714
2715 if (op0 == TREE_OPERAND (expr, 0))
2716 return orig_expr;
2717
2718 expr = build_fold_addr_expr (op0);
2719 return fold_convert (orig_type, expr);
2720
2721 case MEM_REF:
2722 /* ??? Offset operand? */
2723 inside_addr = false;
2724 break;
2725
2726 default:
2727 return orig_expr;
2728 }
2729
2730 /* Default handling of expressions for that we want to recurse into
2731 the first operand. */
2732 op0 = TREE_OPERAND (expr, 0);
2733 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2734 *offset += off0;
2735
2736 if (op0 == TREE_OPERAND (expr, 0)
2737 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2738 return orig_expr;
2739
2740 expr = copy_node (expr);
2741 TREE_OPERAND (expr, 0) = op0;
2742 if (op1)
2743 TREE_OPERAND (expr, 1) = op1;
2744
2745 /* Inside address, we might strip the top level component references,
2746 thus changing type of the expression. Handling of ADDR_EXPR
2747 will fix that. */
2748 expr = fold_convert (orig_type, expr);
2749
2750 return expr;
2751 }
2752
2753 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2754
2755 static tree
strip_offset(tree expr,unsigned HOST_WIDE_INT * offset)2756 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2757 {
2758 HOST_WIDE_INT off;
2759 tree core = strip_offset_1 (expr, false, false, &off);
2760 *offset = off;
2761 return core;
2762 }
2763
2764 /* Returns variant of TYPE that can be used as base for different uses.
2765 We return unsigned type with the same precision, which avoids problems
2766 with overflows. */
2767
2768 static tree
generic_type_for(tree type)2769 generic_type_for (tree type)
2770 {
2771 if (POINTER_TYPE_P (type))
2772 return unsigned_type_for (type);
2773
2774 if (TYPE_UNSIGNED (type))
2775 return type;
2776
2777 return unsigned_type_for (type);
2778 }
2779
2780 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2781 the bitmap to that we should store it. */
2782
2783 static struct ivopts_data *fd_ivopts_data;
2784 static tree
find_depends(tree * expr_p,int * ws ATTRIBUTE_UNUSED,void * data)2785 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2786 {
2787 bitmap *depends_on = (bitmap *) data;
2788 struct version_info *info;
2789
2790 if (TREE_CODE (*expr_p) != SSA_NAME)
2791 return NULL_TREE;
2792 info = name_info (fd_ivopts_data, *expr_p);
2793
2794 if (!info->inv_id || info->has_nonlin_use)
2795 return NULL_TREE;
2796
2797 if (!*depends_on)
2798 *depends_on = BITMAP_ALLOC (NULL);
2799 bitmap_set_bit (*depends_on, info->inv_id);
2800
2801 return NULL_TREE;
2802 }
2803
2804 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2805 position to POS. If USE is not NULL, the candidate is set as related to
2806 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2807 replacement of the final value of the iv by a direct computation. */
2808
2809 static struct iv_cand *
2810 add_candidate_1 (struct ivopts_data *data,
2811 tree base, tree step, bool important, enum iv_position pos,
2812 struct iv_use *use, gimple *incremented_at,
2813 struct iv *orig_iv = NULL)
2814 {
2815 unsigned i;
2816 struct iv_cand *cand = NULL;
2817 tree type, orig_type;
2818
2819 /* -fkeep-gc-roots-live means that we have to keep a real pointer
2820 live, but the ivopts code may replace a real pointer with one
2821 pointing before or after the memory block that is then adjusted
2822 into the memory block during the loop. FIXME: It would likely be
2823 better to actually force the pointer live and still use ivopts;
2824 for example, it would be enough to write the pointer into memory
2825 and keep it there until after the loop. */
2826 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
2827 return NULL;
2828
2829 /* For non-original variables, make sure their values are computed in a type
2830 that does not invoke undefined behavior on overflows (since in general,
2831 we cannot prove that these induction variables are non-wrapping). */
2832 if (pos != IP_ORIGINAL)
2833 {
2834 orig_type = TREE_TYPE (base);
2835 type = generic_type_for (orig_type);
2836 if (type != orig_type)
2837 {
2838 base = fold_convert (type, base);
2839 step = fold_convert (type, step);
2840 }
2841 }
2842
2843 for (i = 0; i < n_iv_cands (data); i++)
2844 {
2845 cand = iv_cand (data, i);
2846
2847 if (cand->pos != pos)
2848 continue;
2849
2850 if (cand->incremented_at != incremented_at
2851 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2852 && cand->ainc_use != use))
2853 continue;
2854
2855 if (!cand->iv)
2856 {
2857 if (!base && !step)
2858 break;
2859
2860 continue;
2861 }
2862
2863 if (!base && !step)
2864 continue;
2865
2866 if (operand_equal_p (base, cand->iv->base, 0)
2867 && operand_equal_p (step, cand->iv->step, 0)
2868 && (TYPE_PRECISION (TREE_TYPE (base))
2869 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2870 break;
2871 }
2872
2873 if (i == n_iv_cands (data))
2874 {
2875 cand = XCNEW (struct iv_cand);
2876 cand->id = i;
2877
2878 if (!base && !step)
2879 cand->iv = NULL;
2880 else
2881 cand->iv = alloc_iv (data, base, step);
2882
2883 cand->pos = pos;
2884 if (pos != IP_ORIGINAL && cand->iv)
2885 {
2886 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2887 cand->var_after = cand->var_before;
2888 }
2889 cand->important = important;
2890 cand->incremented_at = incremented_at;
2891 data->iv_candidates.safe_push (cand);
2892
2893 if (step
2894 && TREE_CODE (step) != INTEGER_CST)
2895 {
2896 fd_ivopts_data = data;
2897 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2898 }
2899
2900 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2901 cand->ainc_use = use;
2902 else
2903 cand->ainc_use = NULL;
2904
2905 cand->orig_iv = orig_iv;
2906 if (dump_file && (dump_flags & TDF_DETAILS))
2907 dump_cand (dump_file, cand);
2908 }
2909
2910 if (important && !cand->important)
2911 {
2912 cand->important = true;
2913 if (dump_file && (dump_flags & TDF_DETAILS))
2914 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2915 }
2916
2917 if (use)
2918 {
2919 bitmap_set_bit (use->related_cands, i);
2920 if (dump_file && (dump_flags & TDF_DETAILS))
2921 fprintf (dump_file, "Candidate %d is related to use %d\n",
2922 cand->id, use->id);
2923 }
2924
2925 return cand;
2926 }
2927
2928 /* Returns true if incrementing the induction variable at the end of the LOOP
2929 is allowed.
2930
2931 The purpose is to avoid splitting latch edge with a biv increment, thus
2932 creating a jump, possibly confusing other optimization passes and leaving
2933 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2934 is not available (so we do not have a better alternative), or if the latch
2935 edge is already nonempty. */
2936
2937 static bool
allow_ip_end_pos_p(struct loop * loop)2938 allow_ip_end_pos_p (struct loop *loop)
2939 {
2940 if (!ip_normal_pos (loop))
2941 return true;
2942
2943 if (!empty_block_p (ip_end_pos (loop)))
2944 return true;
2945
2946 return false;
2947 }
2948
2949 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2950 Important field is set to IMPORTANT. */
2951
2952 static void
add_autoinc_candidates(struct ivopts_data * data,tree base,tree step,bool important,struct iv_use * use)2953 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2954 bool important, struct iv_use *use)
2955 {
2956 basic_block use_bb = gimple_bb (use->stmt);
2957 machine_mode mem_mode;
2958 unsigned HOST_WIDE_INT cstepi;
2959
2960 /* If we insert the increment in any position other than the standard
2961 ones, we must ensure that it is incremented once per iteration.
2962 It must not be in an inner nested loop, or one side of an if
2963 statement. */
2964 if (use_bb->loop_father != data->current_loop
2965 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2966 || stmt_could_throw_p (use->stmt)
2967 || !cst_and_fits_in_hwi (step))
2968 return;
2969
2970 cstepi = int_cst_value (step);
2971
2972 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2973 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2974 || USE_STORE_PRE_INCREMENT (mem_mode))
2975 && GET_MODE_SIZE (mem_mode) == cstepi)
2976 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2977 || USE_STORE_PRE_DECREMENT (mem_mode))
2978 && GET_MODE_SIZE (mem_mode) == -cstepi))
2979 {
2980 enum tree_code code = MINUS_EXPR;
2981 tree new_base;
2982 tree new_step = step;
2983
2984 if (POINTER_TYPE_P (TREE_TYPE (base)))
2985 {
2986 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2987 code = POINTER_PLUS_EXPR;
2988 }
2989 else
2990 new_step = fold_convert (TREE_TYPE (base), new_step);
2991 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2992 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2993 use->stmt);
2994 }
2995 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2996 || USE_STORE_POST_INCREMENT (mem_mode))
2997 && GET_MODE_SIZE (mem_mode) == cstepi)
2998 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2999 || USE_STORE_POST_DECREMENT (mem_mode))
3000 && GET_MODE_SIZE (mem_mode) == -cstepi))
3001 {
3002 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3003 use->stmt);
3004 }
3005 }
3006
3007 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3008 position to POS. If USE is not NULL, the candidate is set as related to
3009 it. The candidate computation is scheduled before exit condition and at
3010 the end of loop. */
3011
3012 static void
3013 add_candidate (struct ivopts_data *data,
3014 tree base, tree step, bool important, struct iv_use *use,
3015 struct iv *orig_iv = NULL)
3016 {
3017 gcc_assert (use == NULL || use->sub_id == 0);
3018
3019 if (ip_normal_pos (data->current_loop))
3020 add_candidate_1 (data, base, step, important,
3021 IP_NORMAL, use, NULL, orig_iv);
3022 if (ip_end_pos (data->current_loop)
3023 && allow_ip_end_pos_p (data->current_loop))
3024 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3025 }
3026
3027 /* Adds standard iv candidates. */
3028
3029 static void
add_standard_iv_candidates(struct ivopts_data * data)3030 add_standard_iv_candidates (struct ivopts_data *data)
3031 {
3032 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3033
3034 /* The same for a double-integer type if it is still fast enough. */
3035 if (TYPE_PRECISION
3036 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3037 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3038 add_candidate (data, build_int_cst (long_integer_type_node, 0),
3039 build_int_cst (long_integer_type_node, 1), true, NULL);
3040
3041 /* The same for a double-integer type if it is still fast enough. */
3042 if (TYPE_PRECISION
3043 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3044 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3045 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3046 build_int_cst (long_long_integer_type_node, 1), true, NULL);
3047 }
3048
3049
3050 /* Adds candidates bases on the old induction variable IV. */
3051
3052 static void
add_iv_candidate_for_biv(struct ivopts_data * data,struct iv * iv)3053 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3054 {
3055 gimple *phi;
3056 tree def;
3057 struct iv_cand *cand;
3058
3059 /* Check if this biv is used in address type use. */
3060 if (iv->no_overflow && iv->have_address_use
3061 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3062 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3063 {
3064 tree base = fold_convert (sizetype, iv->base);
3065 tree step = fold_convert (sizetype, iv->step);
3066
3067 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3068 add_candidate (data, base, step, true, NULL, iv);
3069 /* Add iv cand of the original type only if it has nonlinear use. */
3070 if (iv->have_use_for)
3071 add_candidate (data, iv->base, iv->step, true, NULL);
3072 }
3073 else
3074 add_candidate (data, iv->base, iv->step, true, NULL);
3075
3076 /* The same, but with initial value zero. */
3077 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3078 add_candidate (data, size_int (0), iv->step, true, NULL);
3079 else
3080 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3081 iv->step, true, NULL);
3082
3083 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3084 if (gimple_code (phi) == GIMPLE_PHI)
3085 {
3086 /* Additionally record the possibility of leaving the original iv
3087 untouched. */
3088 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3089 /* Don't add candidate if it's from another PHI node because
3090 it's an affine iv appearing in the form of PEELED_CHREC. */
3091 phi = SSA_NAME_DEF_STMT (def);
3092 if (gimple_code (phi) != GIMPLE_PHI)
3093 {
3094 cand = add_candidate_1 (data,
3095 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3096 SSA_NAME_DEF_STMT (def));
3097 if (cand)
3098 {
3099 cand->var_before = iv->ssa_name;
3100 cand->var_after = def;
3101 }
3102 }
3103 else
3104 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3105 }
3106 }
3107
3108 /* Adds candidates based on the old induction variables. */
3109
3110 static void
add_iv_candidate_for_bivs(struct ivopts_data * data)3111 add_iv_candidate_for_bivs (struct ivopts_data *data)
3112 {
3113 unsigned i;
3114 struct iv *iv;
3115 bitmap_iterator bi;
3116
3117 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3118 {
3119 iv = ver_info (data, i)->iv;
3120 if (iv && iv->biv_p && !integer_zerop (iv->step))
3121 add_iv_candidate_for_biv (data, iv);
3122 }
3123 }
3124
3125 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3126
3127 static void
record_common_cand(struct ivopts_data * data,tree base,tree step,struct iv_use * use)3128 record_common_cand (struct ivopts_data *data, tree base,
3129 tree step, struct iv_use *use)
3130 {
3131 struct iv_common_cand ent;
3132 struct iv_common_cand **slot;
3133
3134 gcc_assert (use != NULL);
3135
3136 ent.base = base;
3137 ent.step = step;
3138 ent.hash = iterative_hash_expr (base, 0);
3139 ent.hash = iterative_hash_expr (step, ent.hash);
3140
3141 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3142 if (*slot == NULL)
3143 {
3144 *slot = new iv_common_cand ();
3145 (*slot)->base = base;
3146 (*slot)->step = step;
3147 (*slot)->uses.create (8);
3148 (*slot)->hash = ent.hash;
3149 data->iv_common_cands.safe_push ((*slot));
3150 }
3151 (*slot)->uses.safe_push (use);
3152 return;
3153 }
3154
3155 /* Comparison function used to sort common candidates. */
3156
3157 static int
common_cand_cmp(const void * p1,const void * p2)3158 common_cand_cmp (const void *p1, const void *p2)
3159 {
3160 unsigned n1, n2;
3161 const struct iv_common_cand *const *const ccand1
3162 = (const struct iv_common_cand *const *)p1;
3163 const struct iv_common_cand *const *const ccand2
3164 = (const struct iv_common_cand *const *)p2;
3165
3166 n1 = (*ccand1)->uses.length ();
3167 n2 = (*ccand2)->uses.length ();
3168 return n2 - n1;
3169 }
3170
3171 /* Adds IV candidates based on common candidated recorded. */
3172
3173 static void
add_iv_candidate_derived_from_uses(struct ivopts_data * data)3174 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3175 {
3176 unsigned i, j;
3177 struct iv_cand *cand_1, *cand_2;
3178
3179 data->iv_common_cands.qsort (common_cand_cmp);
3180 for (i = 0; i < data->iv_common_cands.length (); i++)
3181 {
3182 struct iv_common_cand *ptr = data->iv_common_cands[i];
3183
3184 /* Only add IV candidate if it's derived from multiple uses. */
3185 if (ptr->uses.length () <= 1)
3186 break;
3187
3188 cand_1 = NULL;
3189 cand_2 = NULL;
3190 if (ip_normal_pos (data->current_loop))
3191 cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3192 false, IP_NORMAL, NULL, NULL);
3193
3194 if (ip_end_pos (data->current_loop)
3195 && allow_ip_end_pos_p (data->current_loop))
3196 cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3197 false, IP_END, NULL, NULL);
3198
3199 /* Bind deriving uses and the new candidates. */
3200 for (j = 0; j < ptr->uses.length (); j++)
3201 {
3202 struct iv_use *use = ptr->uses[j];
3203 if (cand_1)
3204 bitmap_set_bit (use->related_cands, cand_1->id);
3205 if (cand_2)
3206 bitmap_set_bit (use->related_cands, cand_2->id);
3207 }
3208 }
3209
3210 /* Release data since it is useless from this point. */
3211 data->iv_common_cand_tab->empty ();
3212 data->iv_common_cands.truncate (0);
3213 }
3214
3215 /* Adds candidates based on the value of USE's iv. */
3216
3217 static void
add_iv_candidate_for_use(struct ivopts_data * data,struct iv_use * use)3218 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3219 {
3220 unsigned HOST_WIDE_INT offset;
3221 tree base;
3222 tree basetype;
3223 struct iv *iv = use->iv;
3224
3225 add_candidate (data, iv->base, iv->step, false, use);
3226
3227 /* Record common candidate for use in case it can be shared by others. */
3228 record_common_cand (data, iv->base, iv->step, use);
3229
3230 /* Record common candidate with initial value zero. */
3231 basetype = TREE_TYPE (iv->base);
3232 if (POINTER_TYPE_P (basetype))
3233 basetype = sizetype;
3234 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3235
3236 /* Record common candidate with constant offset stripped in base.
3237 Like the use itself, we also add candidate directly for it. */
3238 base = strip_offset (iv->base, &offset);
3239 if (offset || base != iv->base)
3240 {
3241 record_common_cand (data, base, iv->step, use);
3242 add_candidate (data, base, iv->step, false, use);
3243 }
3244
3245 /* Record common candidate with base_object removed in base. */
3246 if (iv->base_object != NULL)
3247 {
3248 unsigned i;
3249 aff_tree aff_base;
3250 tree step, base_object = iv->base_object;
3251
3252 base = iv->base;
3253 step = iv->step;
3254 STRIP_NOPS (base);
3255 STRIP_NOPS (step);
3256 STRIP_NOPS (base_object);
3257 tree_to_aff_combination (base, TREE_TYPE (base), &aff_base);
3258 for (i = 0; i < aff_base.n; i++)
3259 {
3260 if (aff_base.elts[i].coef != 1)
3261 continue;
3262
3263 if (operand_equal_p (aff_base.elts[i].val, base_object, 0))
3264 break;
3265 }
3266 if (i < aff_base.n)
3267 {
3268 aff_combination_remove_elt (&aff_base, i);
3269 base = aff_combination_to_tree (&aff_base);
3270 basetype = TREE_TYPE (base);
3271 if (POINTER_TYPE_P (basetype))
3272 basetype = sizetype;
3273
3274 step = fold_convert (basetype, step);
3275 record_common_cand (data, base, step, use);
3276 /* Also record common candidate with offset stripped. */
3277 base = strip_offset (base, &offset);
3278 if (offset)
3279 record_common_cand (data, base, step, use);
3280 }
3281 }
3282
3283 /* At last, add auto-incremental candidates. Make such variables
3284 important since other iv uses with same base object may be based
3285 on it. */
3286 if (use != NULL && use->type == USE_ADDRESS)
3287 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3288 }
3289
3290 /* Adds candidates based on the uses. */
3291
3292 static void
add_iv_candidate_for_uses(struct ivopts_data * data)3293 add_iv_candidate_for_uses (struct ivopts_data *data)
3294 {
3295 unsigned i;
3296
3297 for (i = 0; i < n_iv_uses (data); i++)
3298 {
3299 struct iv_use *use = iv_use (data, i);
3300
3301 if (!use)
3302 continue;
3303
3304 switch (use->type)
3305 {
3306 case USE_NONLINEAR_EXPR:
3307 case USE_COMPARE:
3308 case USE_ADDRESS:
3309 /* Just add the ivs based on the value of the iv used here. */
3310 add_iv_candidate_for_use (data, use);
3311 break;
3312
3313 default:
3314 gcc_unreachable ();
3315 }
3316 }
3317 add_iv_candidate_derived_from_uses (data);
3318 }
3319
3320 /* Record important candidates and add them to related_cands bitmaps. */
3321
3322 static void
record_important_candidates(struct ivopts_data * data)3323 record_important_candidates (struct ivopts_data *data)
3324 {
3325 unsigned i;
3326 struct iv_use *use;
3327
3328 for (i = 0; i < n_iv_cands (data); i++)
3329 {
3330 struct iv_cand *cand = iv_cand (data, i);
3331
3332 if (cand->important)
3333 bitmap_set_bit (data->important_candidates, i);
3334 }
3335
3336 data->consider_all_candidates = (n_iv_cands (data)
3337 <= CONSIDER_ALL_CANDIDATES_BOUND);
3338
3339 /* Add important candidates to uses' related_cands bitmaps. */
3340 for (i = 0; i < n_iv_uses (data); i++)
3341 {
3342 use = iv_use (data, i);
3343 bitmap_ior_into (use->related_cands, data->important_candidates);
3344 }
3345 }
3346
3347 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3348 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3349 we allocate a simple list to every use. */
3350
3351 static void
alloc_use_cost_map(struct ivopts_data * data)3352 alloc_use_cost_map (struct ivopts_data *data)
3353 {
3354 unsigned i, size, s;
3355
3356 for (i = 0; i < n_iv_uses (data); i++)
3357 {
3358 struct iv_use *use = iv_use (data, i);
3359
3360 if (data->consider_all_candidates)
3361 size = n_iv_cands (data);
3362 else
3363 {
3364 s = bitmap_count_bits (use->related_cands);
3365
3366 /* Round up to the power of two, so that moduling by it is fast. */
3367 size = s ? (1 << ceil_log2 (s)) : 1;
3368 }
3369
3370 use->n_map_members = size;
3371 use->cost_map = XCNEWVEC (struct cost_pair, size);
3372 }
3373 }
3374
3375 /* Returns description of computation cost of expression whose runtime
3376 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3377
3378 static comp_cost
new_cost(unsigned runtime,unsigned complexity)3379 new_cost (unsigned runtime, unsigned complexity)
3380 {
3381 comp_cost cost;
3382
3383 cost.cost = runtime;
3384 cost.complexity = complexity;
3385
3386 return cost;
3387 }
3388
3389 /* Returns true if COST is infinite. */
3390
3391 static bool
infinite_cost_p(comp_cost cost)3392 infinite_cost_p (comp_cost cost)
3393 {
3394 return cost.cost == INFTY;
3395 }
3396
3397 /* Adds costs COST1 and COST2. */
3398
3399 static comp_cost
add_costs(comp_cost cost1,comp_cost cost2)3400 add_costs (comp_cost cost1, comp_cost cost2)
3401 {
3402 if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3403 return infinite_cost;
3404
3405 cost1.cost += cost2.cost;
3406 cost1.complexity += cost2.complexity;
3407
3408 return cost1;
3409 }
3410 /* Subtracts costs COST1 and COST2. */
3411
3412 static comp_cost
sub_costs(comp_cost cost1,comp_cost cost2)3413 sub_costs (comp_cost cost1, comp_cost cost2)
3414 {
3415 cost1.cost -= cost2.cost;
3416 cost1.complexity -= cost2.complexity;
3417
3418 return cost1;
3419 }
3420
3421 /* Returns a negative number if COST1 < COST2, a positive number if
3422 COST1 > COST2, and 0 if COST1 = COST2. */
3423
3424 static int
compare_costs(comp_cost cost1,comp_cost cost2)3425 compare_costs (comp_cost cost1, comp_cost cost2)
3426 {
3427 if (cost1.cost == cost2.cost)
3428 return cost1.complexity - cost2.complexity;
3429
3430 return cost1.cost - cost2.cost;
3431 }
3432
3433 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3434 on invariants DEPENDS_ON and that the value used in expressing it
3435 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3436
3437 static void
set_use_iv_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,comp_cost cost,bitmap depends_on,tree value,enum tree_code comp,int inv_expr_id)3438 set_use_iv_cost (struct ivopts_data *data,
3439 struct iv_use *use, struct iv_cand *cand,
3440 comp_cost cost, bitmap depends_on, tree value,
3441 enum tree_code comp, int inv_expr_id)
3442 {
3443 unsigned i, s;
3444
3445 if (infinite_cost_p (cost))
3446 {
3447 BITMAP_FREE (depends_on);
3448 return;
3449 }
3450
3451 if (data->consider_all_candidates)
3452 {
3453 use->cost_map[cand->id].cand = cand;
3454 use->cost_map[cand->id].cost = cost;
3455 use->cost_map[cand->id].depends_on = depends_on;
3456 use->cost_map[cand->id].value = value;
3457 use->cost_map[cand->id].comp = comp;
3458 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3459 return;
3460 }
3461
3462 /* n_map_members is a power of two, so this computes modulo. */
3463 s = cand->id & (use->n_map_members - 1);
3464 for (i = s; i < use->n_map_members; i++)
3465 if (!use->cost_map[i].cand)
3466 goto found;
3467 for (i = 0; i < s; i++)
3468 if (!use->cost_map[i].cand)
3469 goto found;
3470
3471 gcc_unreachable ();
3472
3473 found:
3474 use->cost_map[i].cand = cand;
3475 use->cost_map[i].cost = cost;
3476 use->cost_map[i].depends_on = depends_on;
3477 use->cost_map[i].value = value;
3478 use->cost_map[i].comp = comp;
3479 use->cost_map[i].inv_expr_id = inv_expr_id;
3480 }
3481
3482 /* Gets cost of (USE, CANDIDATE) pair. */
3483
3484 static struct cost_pair *
get_use_iv_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)3485 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3486 struct iv_cand *cand)
3487 {
3488 unsigned i, s;
3489 struct cost_pair *ret;
3490
3491 if (!cand)
3492 return NULL;
3493
3494 if (data->consider_all_candidates)
3495 {
3496 ret = use->cost_map + cand->id;
3497 if (!ret->cand)
3498 return NULL;
3499
3500 return ret;
3501 }
3502
3503 /* n_map_members is a power of two, so this computes modulo. */
3504 s = cand->id & (use->n_map_members - 1);
3505 for (i = s; i < use->n_map_members; i++)
3506 if (use->cost_map[i].cand == cand)
3507 return use->cost_map + i;
3508 else if (use->cost_map[i].cand == NULL)
3509 return NULL;
3510 for (i = 0; i < s; i++)
3511 if (use->cost_map[i].cand == cand)
3512 return use->cost_map + i;
3513 else if (use->cost_map[i].cand == NULL)
3514 return NULL;
3515
3516 return NULL;
3517 }
3518
3519 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3520 static rtx
produce_memory_decl_rtl(tree obj,int * regno)3521 produce_memory_decl_rtl (tree obj, int *regno)
3522 {
3523 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3524 machine_mode address_mode = targetm.addr_space.address_mode (as);
3525 rtx x;
3526
3527 gcc_assert (obj);
3528 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3529 {
3530 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3531 x = gen_rtx_SYMBOL_REF (address_mode, name);
3532 SET_SYMBOL_REF_DECL (x, obj);
3533 x = gen_rtx_MEM (DECL_MODE (obj), x);
3534 set_mem_addr_space (x, as);
3535 targetm.encode_section_info (obj, x, true);
3536 }
3537 else
3538 {
3539 x = gen_raw_REG (address_mode, (*regno)++);
3540 x = gen_rtx_MEM (DECL_MODE (obj), x);
3541 set_mem_addr_space (x, as);
3542 }
3543
3544 return x;
3545 }
3546
3547 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3548 walk_tree. DATA contains the actual fake register number. */
3549
3550 static tree
prepare_decl_rtl(tree * expr_p,int * ws,void * data)3551 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3552 {
3553 tree obj = NULL_TREE;
3554 rtx x = NULL_RTX;
3555 int *regno = (int *) data;
3556
3557 switch (TREE_CODE (*expr_p))
3558 {
3559 case ADDR_EXPR:
3560 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3561 handled_component_p (*expr_p);
3562 expr_p = &TREE_OPERAND (*expr_p, 0))
3563 continue;
3564 obj = *expr_p;
3565 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3566 x = produce_memory_decl_rtl (obj, regno);
3567 break;
3568
3569 case SSA_NAME:
3570 *ws = 0;
3571 obj = SSA_NAME_VAR (*expr_p);
3572 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3573 if (!obj)
3574 return NULL_TREE;
3575 if (!DECL_RTL_SET_P (obj))
3576 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3577 break;
3578
3579 case VAR_DECL:
3580 case PARM_DECL:
3581 case RESULT_DECL:
3582 *ws = 0;
3583 obj = *expr_p;
3584
3585 if (DECL_RTL_SET_P (obj))
3586 break;
3587
3588 if (DECL_MODE (obj) == BLKmode)
3589 x = produce_memory_decl_rtl (obj, regno);
3590 else
3591 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3592
3593 break;
3594
3595 default:
3596 break;
3597 }
3598
3599 if (x)
3600 {
3601 decl_rtl_to_reset.safe_push (obj);
3602 SET_DECL_RTL (obj, x);
3603 }
3604
3605 return NULL_TREE;
3606 }
3607
3608 /* Determines cost of the computation of EXPR. */
3609
3610 static unsigned
computation_cost(tree expr,bool speed)3611 computation_cost (tree expr, bool speed)
3612 {
3613 rtx_insn *seq;
3614 rtx rslt;
3615 tree type = TREE_TYPE (expr);
3616 unsigned cost;
3617 /* Avoid using hard regs in ways which may be unsupported. */
3618 int regno = LAST_VIRTUAL_REGISTER + 1;
3619 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3620 enum node_frequency real_frequency = node->frequency;
3621
3622 node->frequency = NODE_FREQUENCY_NORMAL;
3623 crtl->maybe_hot_insn_p = speed;
3624 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
3625 start_sequence ();
3626 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3627 seq = get_insns ();
3628 end_sequence ();
3629 default_rtl_profile ();
3630 node->frequency = real_frequency;
3631
3632 cost = seq_cost (seq, speed);
3633 if (MEM_P (rslt))
3634 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3635 TYPE_ADDR_SPACE (type), speed);
3636 else if (!REG_P (rslt))
3637 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3638
3639 return cost;
3640 }
3641
3642 /* Returns variable containing the value of candidate CAND at statement AT. */
3643
3644 static tree
var_at_stmt(struct loop * loop,struct iv_cand * cand,gimple * stmt)3645 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3646 {
3647 if (stmt_after_increment (loop, cand, stmt))
3648 return cand->var_after;
3649 else
3650 return cand->var_before;
3651 }
3652
3653 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3654 same precision that is at least as wide as the precision of TYPE, stores
3655 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3656 type of A and B. */
3657
3658 static tree
determine_common_wider_type(tree * a,tree * b)3659 determine_common_wider_type (tree *a, tree *b)
3660 {
3661 tree wider_type = NULL;
3662 tree suba, subb;
3663 tree atype = TREE_TYPE (*a);
3664
3665 if (CONVERT_EXPR_P (*a))
3666 {
3667 suba = TREE_OPERAND (*a, 0);
3668 wider_type = TREE_TYPE (suba);
3669 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3670 return atype;
3671 }
3672 else
3673 return atype;
3674
3675 if (CONVERT_EXPR_P (*b))
3676 {
3677 subb = TREE_OPERAND (*b, 0);
3678 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3679 return atype;
3680 }
3681 else
3682 return atype;
3683
3684 *a = suba;
3685 *b = subb;
3686 return wider_type;
3687 }
3688
3689 /* Determines the expression by that USE is expressed from induction variable
3690 CAND at statement AT in LOOP. The expression is stored in a decomposed
3691 form into AFF. Returns false if USE cannot be expressed using CAND. */
3692
3693 static bool
get_computation_aff(struct loop * loop,struct iv_use * use,struct iv_cand * cand,gimple * at,struct aff_tree * aff)3694 get_computation_aff (struct loop *loop,
3695 struct iv_use *use, struct iv_cand *cand, gimple *at,
3696 struct aff_tree *aff)
3697 {
3698 tree ubase = use->iv->base;
3699 tree ustep = use->iv->step;
3700 tree cbase = cand->iv->base;
3701 tree cstep = cand->iv->step, cstep_common;
3702 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3703 tree common_type, var;
3704 tree uutype;
3705 aff_tree cbase_aff, var_aff;
3706 widest_int rat;
3707
3708 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3709 {
3710 /* We do not have a precision to express the values of use. */
3711 return false;
3712 }
3713
3714 var = var_at_stmt (loop, cand, at);
3715 uutype = unsigned_type_for (utype);
3716
3717 /* If the conversion is not noop, perform it. */
3718 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3719 {
3720 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3721 && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3722 {
3723 tree inner_base, inner_step, inner_type;
3724 inner_base = TREE_OPERAND (cbase, 0);
3725 if (CONVERT_EXPR_P (cstep))
3726 inner_step = TREE_OPERAND (cstep, 0);
3727 else
3728 inner_step = cstep;
3729
3730 inner_type = TREE_TYPE (inner_base);
3731 /* If candidate is added from a biv whose type is smaller than
3732 ctype, we know both candidate and the biv won't overflow.
3733 In this case, it's safe to skip the convertion in candidate.
3734 As an example, (unsigned short)((unsigned long)A) equals to
3735 (unsigned short)A, if A has a type no larger than short. */
3736 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3737 {
3738 cbase = inner_base;
3739 cstep = inner_step;
3740 }
3741 }
3742 cstep = fold_convert (uutype, cstep);
3743 cbase = fold_convert (uutype, cbase);
3744 var = fold_convert (uutype, var);
3745 }
3746
3747 /* Ratio is 1 when computing the value of biv cand by itself.
3748 We can't rely on constant_multiple_of in this case because the
3749 use is created after the original biv is selected. The call
3750 could fail because of inconsistent fold behavior. See PR68021
3751 for more information. */
3752 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3753 {
3754 gcc_assert (is_gimple_assign (use->stmt));
3755 gcc_assert (use->iv->ssa_name == cand->var_after);
3756 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3757 rat = 1;
3758 }
3759 else if (!constant_multiple_of (ustep, cstep, &rat))
3760 return false;
3761
3762 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3763 type, we achieve better folding by computing their difference in this
3764 wider type, and cast the result to UUTYPE. We do not need to worry about
3765 overflows, as all the arithmetics will in the end be performed in UUTYPE
3766 anyway. */
3767 common_type = determine_common_wider_type (&ubase, &cbase);
3768
3769 /* use = ubase - ratio * cbase + ratio * var. */
3770 tree_to_aff_combination (ubase, common_type, aff);
3771 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3772 tree_to_aff_combination (var, uutype, &var_aff);
3773
3774 /* We need to shift the value if we are after the increment. */
3775 if (stmt_after_increment (loop, cand, at))
3776 {
3777 aff_tree cstep_aff;
3778
3779 if (common_type != uutype)
3780 cstep_common = fold_convert (common_type, cstep);
3781 else
3782 cstep_common = cstep;
3783
3784 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3785 aff_combination_add (&cbase_aff, &cstep_aff);
3786 }
3787
3788 aff_combination_scale (&cbase_aff, -rat);
3789 aff_combination_add (aff, &cbase_aff);
3790 if (common_type != uutype)
3791 aff_combination_convert (aff, uutype);
3792
3793 aff_combination_scale (&var_aff, rat);
3794 aff_combination_add (aff, &var_aff);
3795
3796 return true;
3797 }
3798
3799 /* Return the type of USE. */
3800
3801 static tree
get_use_type(struct iv_use * use)3802 get_use_type (struct iv_use *use)
3803 {
3804 tree base_type = TREE_TYPE (use->iv->base);
3805 tree type;
3806
3807 if (use->type == USE_ADDRESS)
3808 {
3809 /* The base_type may be a void pointer. Create a pointer type based on
3810 the mem_ref instead. */
3811 type = build_pointer_type (TREE_TYPE (*use->op_p));
3812 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3813 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3814 }
3815 else
3816 type = base_type;
3817
3818 return type;
3819 }
3820
3821 /* Determines the expression by that USE is expressed from induction variable
3822 CAND at statement AT in LOOP. The computation is unshared. */
3823
3824 static tree
get_computation_at(struct loop * loop,struct iv_use * use,struct iv_cand * cand,gimple * at)3825 get_computation_at (struct loop *loop,
3826 struct iv_use *use, struct iv_cand *cand, gimple *at)
3827 {
3828 aff_tree aff;
3829 tree type = get_use_type (use);
3830
3831 if (!get_computation_aff (loop, use, cand, at, &aff))
3832 return NULL_TREE;
3833 unshare_aff_combination (&aff);
3834 return fold_convert (type, aff_combination_to_tree (&aff));
3835 }
3836
3837 /* Determines the expression by that USE is expressed from induction variable
3838 CAND in LOOP. The computation is unshared. */
3839
3840 static tree
get_computation(struct loop * loop,struct iv_use * use,struct iv_cand * cand)3841 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3842 {
3843 return get_computation_at (loop, use, cand, use->stmt);
3844 }
3845
3846 /* Adjust the cost COST for being in loop setup rather than loop body.
3847 If we're optimizing for space, the loop setup overhead is constant;
3848 if we're optimizing for speed, amortize it over the per-iteration cost. */
3849 static unsigned
adjust_setup_cost(struct ivopts_data * data,unsigned cost)3850 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3851 {
3852 if (cost == INFTY)
3853 return cost;
3854 else if (optimize_loop_for_speed_p (data->current_loop))
3855 return cost / avg_loop_niter (data->current_loop);
3856 else
3857 return cost;
3858 }
3859
3860 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3861 validity for a memory reference accessing memory of mode MODE in
3862 address space AS. */
3863
3864
3865 bool
multiplier_allowed_in_address_p(HOST_WIDE_INT ratio,machine_mode mode,addr_space_t as)3866 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3867 addr_space_t as)
3868 {
3869 #define MAX_RATIO 128
3870 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3871 static vec<sbitmap> valid_mult_list;
3872 sbitmap valid_mult;
3873
3874 if (data_index >= valid_mult_list.length ())
3875 valid_mult_list.safe_grow_cleared (data_index + 1);
3876
3877 valid_mult = valid_mult_list[data_index];
3878 if (!valid_mult)
3879 {
3880 machine_mode address_mode = targetm.addr_space.address_mode (as);
3881 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3882 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3883 rtx addr, scaled;
3884 HOST_WIDE_INT i;
3885
3886 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3887 bitmap_clear (valid_mult);
3888 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3889 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3890 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3891 {
3892 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3893 if (memory_address_addr_space_p (mode, addr, as)
3894 || memory_address_addr_space_p (mode, scaled, as))
3895 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3896 }
3897
3898 if (dump_file && (dump_flags & TDF_DETAILS))
3899 {
3900 fprintf (dump_file, " allowed multipliers:");
3901 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3902 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3903 fprintf (dump_file, " %d", (int) i);
3904 fprintf (dump_file, "\n");
3905 fprintf (dump_file, "\n");
3906 }
3907
3908 valid_mult_list[data_index] = valid_mult;
3909 }
3910
3911 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3912 return false;
3913
3914 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3915 }
3916
3917 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3918 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3919 variable is omitted. Compute the cost for a memory reference that accesses
3920 a memory location of mode MEM_MODE in address space AS.
3921
3922 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3923 size of MEM_MODE / RATIO) is available. To make this determination, we
3924 look at the size of the increment to be made, which is given in CSTEP.
3925 CSTEP may be zero if the step is unknown.
3926 STMT_AFTER_INC is true iff the statement we're looking at is after the
3927 increment of the original biv.
3928
3929 TODO -- there must be some better way. This all is quite crude. */
3930
3931 enum ainc_type
3932 {
3933 AINC_PRE_INC, /* Pre increment. */
3934 AINC_PRE_DEC, /* Pre decrement. */
3935 AINC_POST_INC, /* Post increment. */
3936 AINC_POST_DEC, /* Post decrement. */
3937 AINC_NONE /* Also the number of auto increment types. */
3938 };
3939
3940 struct address_cost_data
3941 {
3942 HOST_WIDE_INT min_offset, max_offset;
3943 unsigned costs[2][2][2][2];
3944 unsigned ainc_costs[AINC_NONE];
3945 };
3946
3947
3948 static comp_cost
get_address_cost(bool symbol_present,bool var_present,unsigned HOST_WIDE_INT offset,HOST_WIDE_INT ratio,HOST_WIDE_INT cstep,machine_mode mem_mode,addr_space_t as,bool speed,bool stmt_after_inc,bool * may_autoinc)3949 get_address_cost (bool symbol_present, bool var_present,
3950 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3951 HOST_WIDE_INT cstep, machine_mode mem_mode,
3952 addr_space_t as, bool speed,
3953 bool stmt_after_inc, bool *may_autoinc)
3954 {
3955 machine_mode address_mode = targetm.addr_space.address_mode (as);
3956 static vec<address_cost_data *> address_cost_data_list;
3957 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3958 address_cost_data *data;
3959 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3960 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3961 unsigned cost, acost, complexity;
3962 enum ainc_type autoinc_type;
3963 bool offset_p, ratio_p, autoinc;
3964 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3965 unsigned HOST_WIDE_INT mask;
3966 unsigned bits;
3967
3968 if (data_index >= address_cost_data_list.length ())
3969 address_cost_data_list.safe_grow_cleared (data_index + 1);
3970
3971 data = address_cost_data_list[data_index];
3972 if (!data)
3973 {
3974 HOST_WIDE_INT i;
3975 HOST_WIDE_INT rat, off = 0;
3976 int old_cse_not_expected, width;
3977 unsigned sym_p, var_p, off_p, rat_p, add_c;
3978 rtx_insn *seq;
3979 rtx addr, base;
3980 rtx reg0, reg1;
3981
3982 data = (address_cost_data *) xcalloc (1, sizeof (*data));
3983
3984 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3985
3986 width = GET_MODE_BITSIZE (address_mode) - 1;
3987 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3988 width = HOST_BITS_PER_WIDE_INT - 1;
3989 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3990
3991 for (i = width; i >= 0; i--)
3992 {
3993 off = -((unsigned HOST_WIDE_INT) 1 << i);
3994 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3995 if (memory_address_addr_space_p (mem_mode, addr, as))
3996 break;
3997 }
3998 data->min_offset = (i == -1? 0 : off);
3999
4000 for (i = width; i >= 0; i--)
4001 {
4002 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
4003 XEXP (addr, 1) = gen_int_mode (off, address_mode);
4004 if (memory_address_addr_space_p (mem_mode, addr, as))
4005 break;
4006 /* For some strict-alignment targets, the offset must be naturally
4007 aligned. Try an aligned offset if mem_mode is not QImode. */
4008 off = mem_mode != QImode
4009 ? ((unsigned HOST_WIDE_INT) 1 << i)
4010 - GET_MODE_SIZE (mem_mode)
4011 : 0;
4012 if (off > 0)
4013 {
4014 XEXP (addr, 1) = gen_int_mode (off, address_mode);
4015 if (memory_address_addr_space_p (mem_mode, addr, as))
4016 break;
4017 }
4018 }
4019 if (i == -1)
4020 off = 0;
4021 data->max_offset = off;
4022
4023 if (dump_file && (dump_flags & TDF_DETAILS))
4024 {
4025 fprintf (dump_file, "get_address_cost:\n");
4026 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4027 GET_MODE_NAME (mem_mode),
4028 data->min_offset);
4029 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4030 GET_MODE_NAME (mem_mode),
4031 data->max_offset);
4032 }
4033
4034 rat = 1;
4035 for (i = 2; i <= MAX_RATIO; i++)
4036 if (multiplier_allowed_in_address_p (i, mem_mode, as))
4037 {
4038 rat = i;
4039 break;
4040 }
4041
4042 /* Compute the cost of various addressing modes. */
4043 acost = 0;
4044 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
4045 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
4046
4047 if (USE_LOAD_PRE_DECREMENT (mem_mode)
4048 || USE_STORE_PRE_DECREMENT (mem_mode))
4049 {
4050 addr = gen_rtx_PRE_DEC (address_mode, reg0);
4051 has_predec[mem_mode]
4052 = memory_address_addr_space_p (mem_mode, addr, as);
4053
4054 if (has_predec[mem_mode])
4055 data->ainc_costs[AINC_PRE_DEC]
4056 = address_cost (addr, mem_mode, as, speed);
4057 }
4058 if (USE_LOAD_POST_DECREMENT (mem_mode)
4059 || USE_STORE_POST_DECREMENT (mem_mode))
4060 {
4061 addr = gen_rtx_POST_DEC (address_mode, reg0);
4062 has_postdec[mem_mode]
4063 = memory_address_addr_space_p (mem_mode, addr, as);
4064
4065 if (has_postdec[mem_mode])
4066 data->ainc_costs[AINC_POST_DEC]
4067 = address_cost (addr, mem_mode, as, speed);
4068 }
4069 if (USE_LOAD_PRE_INCREMENT (mem_mode)
4070 || USE_STORE_PRE_DECREMENT (mem_mode))
4071 {
4072 addr = gen_rtx_PRE_INC (address_mode, reg0);
4073 has_preinc[mem_mode]
4074 = memory_address_addr_space_p (mem_mode, addr, as);
4075
4076 if (has_preinc[mem_mode])
4077 data->ainc_costs[AINC_PRE_INC]
4078 = address_cost (addr, mem_mode, as, speed);
4079 }
4080 if (USE_LOAD_POST_INCREMENT (mem_mode)
4081 || USE_STORE_POST_INCREMENT (mem_mode))
4082 {
4083 addr = gen_rtx_POST_INC (address_mode, reg0);
4084 has_postinc[mem_mode]
4085 = memory_address_addr_space_p (mem_mode, addr, as);
4086
4087 if (has_postinc[mem_mode])
4088 data->ainc_costs[AINC_POST_INC]
4089 = address_cost (addr, mem_mode, as, speed);
4090 }
4091 for (i = 0; i < 16; i++)
4092 {
4093 sym_p = i & 1;
4094 var_p = (i >> 1) & 1;
4095 off_p = (i >> 2) & 1;
4096 rat_p = (i >> 3) & 1;
4097
4098 addr = reg0;
4099 if (rat_p)
4100 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
4101 gen_int_mode (rat, address_mode));
4102
4103 if (var_p)
4104 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
4105
4106 if (sym_p)
4107 {
4108 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
4109 /* ??? We can run into trouble with some backends by presenting
4110 it with symbols which haven't been properly passed through
4111 targetm.encode_section_info. By setting the local bit, we
4112 enhance the probability of things working. */
4113 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
4114
4115 if (off_p)
4116 base = gen_rtx_fmt_e (CONST, address_mode,
4117 gen_rtx_fmt_ee
4118 (PLUS, address_mode, base,
4119 gen_int_mode (off, address_mode)));
4120 }
4121 else if (off_p)
4122 base = gen_int_mode (off, address_mode);
4123 else
4124 base = NULL_RTX;
4125
4126 if (base)
4127 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
4128
4129 start_sequence ();
4130 /* To avoid splitting addressing modes, pretend that no cse will
4131 follow. */
4132 old_cse_not_expected = cse_not_expected;
4133 cse_not_expected = true;
4134 addr = memory_address_addr_space (mem_mode, addr, as);
4135 cse_not_expected = old_cse_not_expected;
4136 seq = get_insns ();
4137 end_sequence ();
4138
4139 acost = seq_cost (seq, speed);
4140 acost += address_cost (addr, mem_mode, as, speed);
4141
4142 if (!acost)
4143 acost = 1;
4144 data->costs[sym_p][var_p][off_p][rat_p] = acost;
4145 }
4146
4147 /* On some targets, it is quite expensive to load symbol to a register,
4148 which makes addresses that contain symbols look much more expensive.
4149 However, the symbol will have to be loaded in any case before the
4150 loop (and quite likely we have it in register already), so it does not
4151 make much sense to penalize them too heavily. So make some final
4152 tweaks for the SYMBOL_PRESENT modes:
4153
4154 If VAR_PRESENT is false, and the mode obtained by changing symbol to
4155 var is cheaper, use this mode with small penalty.
4156 If VAR_PRESENT is true, try whether the mode with
4157 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
4158 if this is the case, use it. */
4159 add_c = add_cost (speed, address_mode);
4160 for (i = 0; i < 8; i++)
4161 {
4162 var_p = i & 1;
4163 off_p = (i >> 1) & 1;
4164 rat_p = (i >> 2) & 1;
4165
4166 acost = data->costs[0][1][off_p][rat_p] + 1;
4167 if (var_p)
4168 acost += add_c;
4169
4170 if (acost < data->costs[1][var_p][off_p][rat_p])
4171 data->costs[1][var_p][off_p][rat_p] = acost;
4172 }
4173
4174 if (dump_file && (dump_flags & TDF_DETAILS))
4175 {
4176 fprintf (dump_file, "Address costs:\n");
4177
4178 for (i = 0; i < 16; i++)
4179 {
4180 sym_p = i & 1;
4181 var_p = (i >> 1) & 1;
4182 off_p = (i >> 2) & 1;
4183 rat_p = (i >> 3) & 1;
4184
4185 fprintf (dump_file, " ");
4186 if (sym_p)
4187 fprintf (dump_file, "sym + ");
4188 if (var_p)
4189 fprintf (dump_file, "var + ");
4190 if (off_p)
4191 fprintf (dump_file, "cst + ");
4192 if (rat_p)
4193 fprintf (dump_file, "rat * ");
4194
4195 acost = data->costs[sym_p][var_p][off_p][rat_p];
4196 fprintf (dump_file, "index costs %d\n", acost);
4197 }
4198 if (has_predec[mem_mode] || has_postdec[mem_mode]
4199 || has_preinc[mem_mode] || has_postinc[mem_mode])
4200 fprintf (dump_file, " May include autoinc/dec\n");
4201 fprintf (dump_file, "\n");
4202 }
4203
4204 address_cost_data_list[data_index] = data;
4205 }
4206
4207 bits = GET_MODE_BITSIZE (address_mode);
4208 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4209 offset &= mask;
4210 if ((offset >> (bits - 1) & 1))
4211 offset |= ~mask;
4212 s_offset = offset;
4213
4214 autoinc = false;
4215 autoinc_type = AINC_NONE;
4216 msize = GET_MODE_SIZE (mem_mode);
4217 autoinc_offset = offset;
4218 if (stmt_after_inc)
4219 autoinc_offset += ratio * cstep;
4220 if (symbol_present || var_present || ratio != 1)
4221 autoinc = false;
4222 else
4223 {
4224 if (has_postinc[mem_mode] && autoinc_offset == 0
4225 && msize == cstep)
4226 autoinc_type = AINC_POST_INC;
4227 else if (has_postdec[mem_mode] && autoinc_offset == 0
4228 && msize == -cstep)
4229 autoinc_type = AINC_POST_DEC;
4230 else if (has_preinc[mem_mode] && autoinc_offset == msize
4231 && msize == cstep)
4232 autoinc_type = AINC_PRE_INC;
4233 else if (has_predec[mem_mode] && autoinc_offset == -msize
4234 && msize == -cstep)
4235 autoinc_type = AINC_PRE_DEC;
4236
4237 if (autoinc_type != AINC_NONE)
4238 autoinc = true;
4239 }
4240
4241 cost = 0;
4242 offset_p = (s_offset != 0
4243 && data->min_offset <= s_offset
4244 && s_offset <= data->max_offset);
4245 ratio_p = (ratio != 1
4246 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4247
4248 if (ratio != 1 && !ratio_p)
4249 cost += mult_by_coeff_cost (ratio, address_mode, speed);
4250
4251 if (s_offset && !offset_p && !symbol_present)
4252 cost += add_cost (speed, address_mode);
4253
4254 if (may_autoinc)
4255 *may_autoinc = autoinc;
4256 if (autoinc)
4257 acost = data->ainc_costs[autoinc_type];
4258 else
4259 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4260 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4261 return new_cost (cost + acost, complexity);
4262 }
4263
4264 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4265 EXPR operand holding the shift. COST0 and COST1 are the costs for
4266 calculating the operands of EXPR. Returns true if successful, and returns
4267 the cost in COST. */
4268
4269 static bool
get_shiftadd_cost(tree expr,machine_mode mode,comp_cost cost0,comp_cost cost1,tree mult,bool speed,comp_cost * cost)4270 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4271 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4272 {
4273 comp_cost res;
4274 tree op1 = TREE_OPERAND (expr, 1);
4275 tree cst = TREE_OPERAND (mult, 1);
4276 tree multop = TREE_OPERAND (mult, 0);
4277 int m = exact_log2 (int_cst_value (cst));
4278 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4279 int as_cost, sa_cost;
4280 bool mult_in_op1;
4281
4282 if (!(m >= 0 && m < maxm))
4283 return false;
4284
4285 STRIP_NOPS (op1);
4286 mult_in_op1 = operand_equal_p (op1, mult, 0);
4287
4288 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4289
4290 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4291 use that in preference to a shift insn followed by an add insn. */
4292 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4293 ? shiftadd_cost (speed, mode, m)
4294 : (mult_in_op1
4295 ? shiftsub1_cost (speed, mode, m)
4296 : shiftsub0_cost (speed, mode, m)));
4297
4298 res = new_cost (MIN (as_cost, sa_cost), 0);
4299 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
4300
4301 STRIP_NOPS (multop);
4302 if (!is_gimple_val (multop))
4303 res = add_costs (res, force_expr_to_var_cost (multop, speed));
4304
4305 *cost = res;
4306 return true;
4307 }
4308
4309 /* Estimates cost of forcing expression EXPR into a variable. */
4310
4311 static comp_cost
force_expr_to_var_cost(tree expr,bool speed)4312 force_expr_to_var_cost (tree expr, bool speed)
4313 {
4314 static bool costs_initialized = false;
4315 static unsigned integer_cost [2];
4316 static unsigned symbol_cost [2];
4317 static unsigned address_cost [2];
4318 tree op0, op1;
4319 comp_cost cost0, cost1, cost;
4320 machine_mode mode;
4321
4322 if (!costs_initialized)
4323 {
4324 tree type = build_pointer_type (integer_type_node);
4325 tree var, addr;
4326 rtx x;
4327 int i;
4328
4329 var = create_tmp_var_raw (integer_type_node, "test_var");
4330 TREE_STATIC (var) = 1;
4331 x = produce_memory_decl_rtl (var, NULL);
4332 SET_DECL_RTL (var, x);
4333
4334 addr = build1 (ADDR_EXPR, type, var);
4335
4336
4337 for (i = 0; i < 2; i++)
4338 {
4339 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4340 2000), i);
4341
4342 symbol_cost[i] = computation_cost (addr, i) + 1;
4343
4344 address_cost[i]
4345 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4346 if (dump_file && (dump_flags & TDF_DETAILS))
4347 {
4348 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4349 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4350 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4351 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4352 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4353 fprintf (dump_file, "\n");
4354 }
4355 }
4356
4357 costs_initialized = true;
4358 }
4359
4360 STRIP_NOPS (expr);
4361
4362 if (SSA_VAR_P (expr))
4363 return no_cost;
4364
4365 if (is_gimple_min_invariant (expr))
4366 {
4367 if (TREE_CODE (expr) == INTEGER_CST)
4368 return new_cost (integer_cost [speed], 0);
4369
4370 if (TREE_CODE (expr) == ADDR_EXPR)
4371 {
4372 tree obj = TREE_OPERAND (expr, 0);
4373
4374 if (TREE_CODE (obj) == VAR_DECL
4375 || TREE_CODE (obj) == PARM_DECL
4376 || TREE_CODE (obj) == RESULT_DECL)
4377 return new_cost (symbol_cost [speed], 0);
4378 }
4379
4380 return new_cost (address_cost [speed], 0);
4381 }
4382
4383 switch (TREE_CODE (expr))
4384 {
4385 case POINTER_PLUS_EXPR:
4386 case PLUS_EXPR:
4387 case MINUS_EXPR:
4388 case MULT_EXPR:
4389 op0 = TREE_OPERAND (expr, 0);
4390 op1 = TREE_OPERAND (expr, 1);
4391 STRIP_NOPS (op0);
4392 STRIP_NOPS (op1);
4393 break;
4394
4395 CASE_CONVERT:
4396 case NEGATE_EXPR:
4397 op0 = TREE_OPERAND (expr, 0);
4398 STRIP_NOPS (op0);
4399 op1 = NULL_TREE;
4400 break;
4401
4402 default:
4403 /* Just an arbitrary value, FIXME. */
4404 return new_cost (target_spill_cost[speed], 0);
4405 }
4406
4407 if (op0 == NULL_TREE
4408 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4409 cost0 = no_cost;
4410 else
4411 cost0 = force_expr_to_var_cost (op0, speed);
4412
4413 if (op1 == NULL_TREE
4414 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4415 cost1 = no_cost;
4416 else
4417 cost1 = force_expr_to_var_cost (op1, speed);
4418
4419 mode = TYPE_MODE (TREE_TYPE (expr));
4420 switch (TREE_CODE (expr))
4421 {
4422 case POINTER_PLUS_EXPR:
4423 case PLUS_EXPR:
4424 case MINUS_EXPR:
4425 case NEGATE_EXPR:
4426 cost = new_cost (add_cost (speed, mode), 0);
4427 if (TREE_CODE (expr) != NEGATE_EXPR)
4428 {
4429 tree mult = NULL_TREE;
4430 comp_cost sa_cost;
4431 if (TREE_CODE (op1) == MULT_EXPR)
4432 mult = op1;
4433 else if (TREE_CODE (op0) == MULT_EXPR)
4434 mult = op0;
4435
4436 if (mult != NULL_TREE
4437 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4438 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4439 speed, &sa_cost))
4440 return sa_cost;
4441 }
4442 break;
4443
4444 CASE_CONVERT:
4445 {
4446 tree inner_mode, outer_mode;
4447 outer_mode = TREE_TYPE (expr);
4448 inner_mode = TREE_TYPE (op0);
4449 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4450 TYPE_MODE (inner_mode), speed), 0);
4451 }
4452 break;
4453
4454 case MULT_EXPR:
4455 if (cst_and_fits_in_hwi (op0))
4456 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4457 mode, speed), 0);
4458 else if (cst_and_fits_in_hwi (op1))
4459 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4460 mode, speed), 0);
4461 else
4462 return new_cost (target_spill_cost [speed], 0);
4463 break;
4464
4465 default:
4466 gcc_unreachable ();
4467 }
4468
4469 cost = add_costs (cost, cost0);
4470 cost = add_costs (cost, cost1);
4471
4472 /* Bound the cost by target_spill_cost. The parts of complicated
4473 computations often are either loop invariant or at least can
4474 be shared between several iv uses, so letting this grow without
4475 limits would not give reasonable results. */
4476 if (cost.cost > (int) target_spill_cost [speed])
4477 cost.cost = target_spill_cost [speed];
4478
4479 return cost;
4480 }
4481
4482 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4483 invariants the computation depends on. */
4484
4485 static comp_cost
force_var_cost(struct ivopts_data * data,tree expr,bitmap * depends_on)4486 force_var_cost (struct ivopts_data *data,
4487 tree expr, bitmap *depends_on)
4488 {
4489 if (depends_on)
4490 {
4491 fd_ivopts_data = data;
4492 walk_tree (&expr, find_depends, depends_on, NULL);
4493 }
4494
4495 return force_expr_to_var_cost (expr, data->speed);
4496 }
4497
4498 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4499 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4500 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4501 invariants the computation depends on. */
4502
4503 static comp_cost
split_address_cost(struct ivopts_data * data,tree addr,bool * symbol_present,bool * var_present,unsigned HOST_WIDE_INT * offset,bitmap * depends_on)4504 split_address_cost (struct ivopts_data *data,
4505 tree addr, bool *symbol_present, bool *var_present,
4506 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4507 {
4508 tree core;
4509 HOST_WIDE_INT bitsize;
4510 HOST_WIDE_INT bitpos;
4511 tree toffset;
4512 machine_mode mode;
4513 int unsignedp, reversep, volatilep;
4514
4515 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4516 &unsignedp, &reversep, &volatilep, false);
4517
4518 if (toffset != 0
4519 || bitpos % BITS_PER_UNIT != 0
4520 || reversep
4521 || TREE_CODE (core) != VAR_DECL)
4522 {
4523 *symbol_present = false;
4524 *var_present = true;
4525 fd_ivopts_data = data;
4526 if (depends_on)
4527 walk_tree (&addr, find_depends, depends_on, NULL);
4528
4529 return new_cost (target_spill_cost[data->speed], 0);
4530 }
4531
4532 *offset += bitpos / BITS_PER_UNIT;
4533 if (TREE_STATIC (core)
4534 || DECL_EXTERNAL (core))
4535 {
4536 *symbol_present = true;
4537 *var_present = false;
4538 return no_cost;
4539 }
4540
4541 *symbol_present = false;
4542 *var_present = true;
4543 return no_cost;
4544 }
4545
4546 /* Estimates cost of expressing difference of addresses E1 - E2 as
4547 var + symbol + offset. The value of offset is added to OFFSET,
4548 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4549 part is missing. DEPENDS_ON is a set of the invariants the computation
4550 depends on. */
4551
4552 static comp_cost
ptr_difference_cost(struct ivopts_data * data,tree e1,tree e2,bool * symbol_present,bool * var_present,unsigned HOST_WIDE_INT * offset,bitmap * depends_on)4553 ptr_difference_cost (struct ivopts_data *data,
4554 tree e1, tree e2, bool *symbol_present, bool *var_present,
4555 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4556 {
4557 HOST_WIDE_INT diff = 0;
4558 aff_tree aff_e1, aff_e2;
4559 tree type;
4560
4561 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4562
4563 if (ptr_difference_const (e1, e2, &diff))
4564 {
4565 *offset += diff;
4566 *symbol_present = false;
4567 *var_present = false;
4568 return no_cost;
4569 }
4570
4571 if (integer_zerop (e2))
4572 return split_address_cost (data, TREE_OPERAND (e1, 0),
4573 symbol_present, var_present, offset, depends_on);
4574
4575 *symbol_present = false;
4576 *var_present = true;
4577
4578 type = signed_type_for (TREE_TYPE (e1));
4579 tree_to_aff_combination (e1, type, &aff_e1);
4580 tree_to_aff_combination (e2, type, &aff_e2);
4581 aff_combination_scale (&aff_e2, -1);
4582 aff_combination_add (&aff_e1, &aff_e2);
4583
4584 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4585 }
4586
4587 /* Estimates cost of expressing difference E1 - E2 as
4588 var + symbol + offset. The value of offset is added to OFFSET,
4589 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4590 part is missing. DEPENDS_ON is a set of the invariants the computation
4591 depends on. */
4592
4593 static comp_cost
difference_cost(struct ivopts_data * data,tree e1,tree e2,bool * symbol_present,bool * var_present,unsigned HOST_WIDE_INT * offset,bitmap * depends_on)4594 difference_cost (struct ivopts_data *data,
4595 tree e1, tree e2, bool *symbol_present, bool *var_present,
4596 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4597 {
4598 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4599 unsigned HOST_WIDE_INT off1, off2;
4600 aff_tree aff_e1, aff_e2;
4601 tree type;
4602
4603 e1 = strip_offset (e1, &off1);
4604 e2 = strip_offset (e2, &off2);
4605 *offset += off1 - off2;
4606
4607 STRIP_NOPS (e1);
4608 STRIP_NOPS (e2);
4609
4610 if (TREE_CODE (e1) == ADDR_EXPR)
4611 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4612 offset, depends_on);
4613 *symbol_present = false;
4614
4615 if (operand_equal_p (e1, e2, 0))
4616 {
4617 *var_present = false;
4618 return no_cost;
4619 }
4620
4621 *var_present = true;
4622
4623 if (integer_zerop (e2))
4624 return force_var_cost (data, e1, depends_on);
4625
4626 if (integer_zerop (e1))
4627 {
4628 comp_cost cost = force_var_cost (data, e2, depends_on);
4629 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4630 return cost;
4631 }
4632
4633 type = signed_type_for (TREE_TYPE (e1));
4634 tree_to_aff_combination (e1, type, &aff_e1);
4635 tree_to_aff_combination (e2, type, &aff_e2);
4636 aff_combination_scale (&aff_e2, -1);
4637 aff_combination_add (&aff_e1, &aff_e2);
4638
4639 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4640 }
4641
4642 /* Returns true if AFF1 and AFF2 are identical. */
4643
4644 static bool
compare_aff_trees(aff_tree * aff1,aff_tree * aff2)4645 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4646 {
4647 unsigned i;
4648
4649 if (aff1->n != aff2->n)
4650 return false;
4651
4652 for (i = 0; i < aff1->n; i++)
4653 {
4654 if (aff1->elts[i].coef != aff2->elts[i].coef)
4655 return false;
4656
4657 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4658 return false;
4659 }
4660 return true;
4661 }
4662
4663 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4664
4665 static int
get_expr_id(struct ivopts_data * data,tree expr)4666 get_expr_id (struct ivopts_data *data, tree expr)
4667 {
4668 struct iv_inv_expr_ent ent;
4669 struct iv_inv_expr_ent **slot;
4670
4671 ent.expr = expr;
4672 ent.hash = iterative_hash_expr (expr, 0);
4673 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4674 if (*slot)
4675 return (*slot)->id;
4676
4677 *slot = XNEW (struct iv_inv_expr_ent);
4678 (*slot)->expr = expr;
4679 (*slot)->hash = ent.hash;
4680 (*slot)->id = data->inv_expr_id++;
4681 return (*slot)->id;
4682 }
4683
4684 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4685 requires a new compiler generated temporary. Returns -1 otherwise.
4686 ADDRESS_P is a flag indicating if the expression is for address
4687 computation. */
4688
4689 static int
get_loop_invariant_expr_id(struct ivopts_data * data,tree ubase,tree cbase,HOST_WIDE_INT ratio,bool address_p)4690 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4691 tree cbase, HOST_WIDE_INT ratio,
4692 bool address_p)
4693 {
4694 aff_tree ubase_aff, cbase_aff;
4695 tree expr, ub, cb;
4696
4697 STRIP_NOPS (ubase);
4698 STRIP_NOPS (cbase);
4699 ub = ubase;
4700 cb = cbase;
4701
4702 if ((TREE_CODE (ubase) == INTEGER_CST)
4703 && (TREE_CODE (cbase) == INTEGER_CST))
4704 return -1;
4705
4706 /* Strips the constant part. */
4707 if (TREE_CODE (ubase) == PLUS_EXPR
4708 || TREE_CODE (ubase) == MINUS_EXPR
4709 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4710 {
4711 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4712 ubase = TREE_OPERAND (ubase, 0);
4713 }
4714
4715 /* Strips the constant part. */
4716 if (TREE_CODE (cbase) == PLUS_EXPR
4717 || TREE_CODE (cbase) == MINUS_EXPR
4718 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4719 {
4720 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4721 cbase = TREE_OPERAND (cbase, 0);
4722 }
4723
4724 if (address_p)
4725 {
4726 if (((TREE_CODE (ubase) == SSA_NAME)
4727 || (TREE_CODE (ubase) == ADDR_EXPR
4728 && is_gimple_min_invariant (ubase)))
4729 && (TREE_CODE (cbase) == INTEGER_CST))
4730 return -1;
4731
4732 if (((TREE_CODE (cbase) == SSA_NAME)
4733 || (TREE_CODE (cbase) == ADDR_EXPR
4734 && is_gimple_min_invariant (cbase)))
4735 && (TREE_CODE (ubase) == INTEGER_CST))
4736 return -1;
4737 }
4738
4739 if (ratio == 1)
4740 {
4741 if (operand_equal_p (ubase, cbase, 0))
4742 return -1;
4743
4744 if (TREE_CODE (ubase) == ADDR_EXPR
4745 && TREE_CODE (cbase) == ADDR_EXPR)
4746 {
4747 tree usym, csym;
4748
4749 usym = TREE_OPERAND (ubase, 0);
4750 csym = TREE_OPERAND (cbase, 0);
4751 if (TREE_CODE (usym) == ARRAY_REF)
4752 {
4753 tree ind = TREE_OPERAND (usym, 1);
4754 if (TREE_CODE (ind) == INTEGER_CST
4755 && tree_fits_shwi_p (ind)
4756 && tree_to_shwi (ind) == 0)
4757 usym = TREE_OPERAND (usym, 0);
4758 }
4759 if (TREE_CODE (csym) == ARRAY_REF)
4760 {
4761 tree ind = TREE_OPERAND (csym, 1);
4762 if (TREE_CODE (ind) == INTEGER_CST
4763 && tree_fits_shwi_p (ind)
4764 && tree_to_shwi (ind) == 0)
4765 csym = TREE_OPERAND (csym, 0);
4766 }
4767 if (operand_equal_p (usym, csym, 0))
4768 return -1;
4769 }
4770 /* Now do more complex comparison */
4771 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4772 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4773 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4774 return -1;
4775 }
4776
4777 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4778 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4779
4780 aff_combination_scale (&cbase_aff, -1 * ratio);
4781 aff_combination_add (&ubase_aff, &cbase_aff);
4782 expr = aff_combination_to_tree (&ubase_aff);
4783 return get_expr_id (data, expr);
4784 }
4785
4786
4787
4788 /* Determines the cost of the computation by that USE is expressed
4789 from induction variable CAND. If ADDRESS_P is true, we just need
4790 to create an address from it, otherwise we want to get it into
4791 register. A set of invariants we depend on is stored in
4792 DEPENDS_ON. AT is the statement at that the value is computed.
4793 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4794 addressing is likely. */
4795
4796 static comp_cost
get_computation_cost_at(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,bool address_p,bitmap * depends_on,gimple * at,bool * can_autoinc,int * inv_expr_id)4797 get_computation_cost_at (struct ivopts_data *data,
4798 struct iv_use *use, struct iv_cand *cand,
4799 bool address_p, bitmap *depends_on, gimple *at,
4800 bool *can_autoinc,
4801 int *inv_expr_id)
4802 {
4803 tree ubase = use->iv->base, ustep = use->iv->step;
4804 tree cbase, cstep;
4805 tree utype = TREE_TYPE (ubase), ctype;
4806 unsigned HOST_WIDE_INT cstepi, offset = 0;
4807 HOST_WIDE_INT ratio, aratio;
4808 bool var_present, symbol_present, stmt_is_after_inc;
4809 comp_cost cost;
4810 widest_int rat;
4811 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4812 machine_mode mem_mode = (address_p
4813 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4814 : VOIDmode);
4815
4816 if (depends_on)
4817 *depends_on = NULL;
4818
4819 /* Only consider real candidates. */
4820 if (!cand->iv)
4821 return infinite_cost;
4822
4823 cbase = cand->iv->base;
4824 cstep = cand->iv->step;
4825 ctype = TREE_TYPE (cbase);
4826
4827 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4828 {
4829 /* We do not have a precision to express the values of use. */
4830 return infinite_cost;
4831 }
4832
4833 if (address_p
4834 || (use->iv->base_object
4835 && cand->iv->base_object
4836 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4837 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4838 {
4839 /* Do not try to express address of an object with computation based
4840 on address of a different object. This may cause problems in rtl
4841 level alias analysis (that does not expect this to be happening,
4842 as this is illegal in C), and would be unlikely to be useful
4843 anyway. */
4844 if (use->iv->base_object
4845 && cand->iv->base_object
4846 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4847 return infinite_cost;
4848 }
4849
4850 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4851 {
4852 /* TODO -- add direct handling of this case. */
4853 goto fallback;
4854 }
4855
4856 /* CSTEPI is removed from the offset in case statement is after the
4857 increment. If the step is not constant, we use zero instead.
4858 This is a bit imprecise (there is the extra addition), but
4859 redundancy elimination is likely to transform the code so that
4860 it uses value of the variable before increment anyway,
4861 so it is not that much unrealistic. */
4862 if (cst_and_fits_in_hwi (cstep))
4863 cstepi = int_cst_value (cstep);
4864 else
4865 cstepi = 0;
4866
4867 if (!constant_multiple_of (ustep, cstep, &rat))
4868 return infinite_cost;
4869
4870 if (wi::fits_shwi_p (rat))
4871 ratio = rat.to_shwi ();
4872 else
4873 return infinite_cost;
4874
4875 STRIP_NOPS (cbase);
4876 ctype = TREE_TYPE (cbase);
4877
4878 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4879
4880 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4881 or ratio == 1, it is better to handle this like
4882
4883 ubase - ratio * cbase + ratio * var
4884
4885 (also holds in the case ratio == -1, TODO. */
4886
4887 if (cst_and_fits_in_hwi (cbase))
4888 {
4889 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4890 cost = difference_cost (data,
4891 ubase, build_int_cst (utype, 0),
4892 &symbol_present, &var_present, &offset,
4893 depends_on);
4894 cost.cost /= avg_loop_niter (data->current_loop);
4895 }
4896 else if (ratio == 1)
4897 {
4898 tree real_cbase = cbase;
4899
4900 /* Check to see if any adjustment is needed. */
4901 if (cstepi == 0 && stmt_is_after_inc)
4902 {
4903 aff_tree real_cbase_aff;
4904 aff_tree cstep_aff;
4905
4906 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4907 &real_cbase_aff);
4908 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4909
4910 aff_combination_add (&real_cbase_aff, &cstep_aff);
4911 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4912 }
4913
4914 cost = difference_cost (data,
4915 ubase, real_cbase,
4916 &symbol_present, &var_present, &offset,
4917 depends_on);
4918 cost.cost /= avg_loop_niter (data->current_loop);
4919 }
4920 else if (address_p
4921 && !POINTER_TYPE_P (ctype)
4922 && multiplier_allowed_in_address_p
4923 (ratio, mem_mode,
4924 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4925 {
4926 if (cstepi == 0 && stmt_is_after_inc)
4927 {
4928 if (POINTER_TYPE_P (ctype))
4929 cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4930 else
4931 cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4932 }
4933 cbase
4934 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4935 cost = difference_cost (data,
4936 ubase, cbase,
4937 &symbol_present, &var_present, &offset,
4938 depends_on);
4939 cost.cost /= avg_loop_niter (data->current_loop);
4940 }
4941 else
4942 {
4943 cost = force_var_cost (data, cbase, depends_on);
4944 cost = add_costs (cost,
4945 difference_cost (data,
4946 ubase, build_int_cst (utype, 0),
4947 &symbol_present, &var_present,
4948 &offset, depends_on));
4949 cost.cost /= avg_loop_niter (data->current_loop);
4950 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4951 }
4952
4953 /* Record setup cost in scrach field. */
4954 cost.scratch = cost.cost;
4955 /* Set of invariants depended on by sub use has already been computed
4956 for the first use in the group. */
4957 if (use->sub_id)
4958 {
4959 cost.cost = 0;
4960 if (depends_on && *depends_on)
4961 bitmap_clear (*depends_on);
4962 }
4963 else if (inv_expr_id)
4964 {
4965 *inv_expr_id =
4966 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4967 /* Clear depends on. */
4968 if (*inv_expr_id != -1 && depends_on && *depends_on)
4969 bitmap_clear (*depends_on);
4970 }
4971
4972 /* If we are after the increment, the value of the candidate is higher by
4973 one iteration. */
4974 if (stmt_is_after_inc)
4975 offset -= ratio * cstepi;
4976
4977 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4978 (symbol/var1/const parts may be omitted). If we are looking for an
4979 address, find the cost of addressing this. */
4980 if (address_p)
4981 return add_costs (cost,
4982 get_address_cost (symbol_present, var_present,
4983 offset, ratio, cstepi,
4984 mem_mode,
4985 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4986 speed, stmt_is_after_inc,
4987 can_autoinc));
4988
4989 /* Otherwise estimate the costs for computing the expression. */
4990 if (!symbol_present && !var_present && !offset)
4991 {
4992 if (ratio != 1)
4993 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4994 return cost;
4995 }
4996
4997 /* Symbol + offset should be compile-time computable so consider that they
4998 are added once to the variable, if present. */
4999 if (var_present && (symbol_present || offset))
5000 cost.cost += adjust_setup_cost (data,
5001 add_cost (speed, TYPE_MODE (ctype)));
5002
5003 /* Having offset does not affect runtime cost in case it is added to
5004 symbol, but it increases complexity. */
5005 if (offset)
5006 cost.complexity++;
5007
5008 cost.cost += add_cost (speed, TYPE_MODE (ctype));
5009
5010 aratio = ratio > 0 ? ratio : -ratio;
5011 if (aratio != 1)
5012 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
5013 return cost;
5014
5015 fallback:
5016 if (can_autoinc)
5017 *can_autoinc = false;
5018
5019 {
5020 /* Just get the expression, expand it and measure the cost. */
5021 tree comp = get_computation_at (data->current_loop, use, cand, at);
5022
5023 if (!comp)
5024 return infinite_cost;
5025
5026 if (address_p)
5027 comp = build_simple_mem_ref (comp);
5028
5029 cost = new_cost (computation_cost (comp, speed), 0);
5030 cost.scratch = 0;
5031 return cost;
5032 }
5033 }
5034
5035 /* Determines the cost of the computation by that USE is expressed
5036 from induction variable CAND. If ADDRESS_P is true, we just need
5037 to create an address from it, otherwise we want to get it into
5038 register. A set of invariants we depend on is stored in
5039 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
5040 autoinc addressing is likely. */
5041
5042 static comp_cost
get_computation_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,bool address_p,bitmap * depends_on,bool * can_autoinc,int * inv_expr_id)5043 get_computation_cost (struct ivopts_data *data,
5044 struct iv_use *use, struct iv_cand *cand,
5045 bool address_p, bitmap *depends_on,
5046 bool *can_autoinc, int *inv_expr_id)
5047 {
5048 return get_computation_cost_at (data,
5049 use, cand, address_p, depends_on, use->stmt,
5050 can_autoinc, inv_expr_id);
5051 }
5052
5053 /* Determines cost of basing replacement of USE on CAND in a generic
5054 expression. */
5055
5056 static bool
determine_use_iv_cost_generic(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5057 determine_use_iv_cost_generic (struct ivopts_data *data,
5058 struct iv_use *use, struct iv_cand *cand)
5059 {
5060 bitmap depends_on;
5061 comp_cost cost;
5062 int inv_expr_id = -1;
5063
5064 /* The simple case first -- if we need to express value of the preserved
5065 original biv, the cost is 0. This also prevents us from counting the
5066 cost of increment twice -- once at this use and once in the cost of
5067 the candidate. */
5068 if (cand->pos == IP_ORIGINAL
5069 && cand->incremented_at == use->stmt)
5070 {
5071 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
5072 ERROR_MARK, -1);
5073 return true;
5074 }
5075
5076 cost = get_computation_cost (data, use, cand, false, &depends_on,
5077 NULL, &inv_expr_id);
5078
5079 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
5080 inv_expr_id);
5081
5082 return !infinite_cost_p (cost);
5083 }
5084
5085 /* Determines cost of basing replacement of USE on CAND in an address. */
5086
5087 static bool
determine_use_iv_cost_address(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5088 determine_use_iv_cost_address (struct ivopts_data *data,
5089 struct iv_use *use, struct iv_cand *cand)
5090 {
5091 bitmap depends_on;
5092 bool can_autoinc, first;
5093 int inv_expr_id = -1;
5094 struct iv_use *sub_use;
5095 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
5096 &can_autoinc, &inv_expr_id);
5097 comp_cost sub_cost = cost;
5098
5099 if (cand->ainc_use == use)
5100 {
5101 if (can_autoinc)
5102 cost.cost -= cand->cost_step;
5103 /* If we generated the candidate solely for exploiting autoincrement
5104 opportunities, and it turns out it can't be used, set the cost to
5105 infinity to make sure we ignore it. */
5106 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5107 cost = infinite_cost;
5108 }
5109
5110 if (!infinite_cost_p (cost) && use->next)
5111 {
5112 first = true;
5113 sub_use = use->next;
5114 /* We don't want to add setup cost for sub-uses. */
5115 sub_cost.cost -= sub_cost.scratch;
5116 /* Add cost for sub uses in group. */
5117 do
5118 {
5119 /* Compute cost for the first sub use with different offset to
5120 the main use and add it afterwards. Costs for these uses
5121 could be quite different. Given below uses in a group:
5122 use 0 : {base + A + offset_0, step}
5123 use 0.1: {base + A + offset_0, step}
5124 use 0.2: {base + A + offset_1, step}
5125 use 0.3: {base + A + offset_2, step}
5126 when we need to compute costs with candidate:
5127 cand 1 : {base + B + offset_0, step}
5128
5129 The first sub use with different offset is use 0.2, its cost
5130 is larger than cost of use 0/0.1 because we need to compute:
5131 A - B + offset_1 - offset_0
5132 rather than:
5133 A - B. */
5134 if (first && use->addr_offset != sub_use->addr_offset)
5135 {
5136 first = false;
5137 sub_cost = get_computation_cost (data, sub_use, cand, true,
5138 NULL, &can_autoinc, NULL);
5139 if (infinite_cost_p (sub_cost))
5140 {
5141 cost = infinite_cost;
5142 break;
5143 }
5144 }
5145
5146 cost = add_costs (cost, sub_cost);
5147 sub_use = sub_use->next;
5148 }
5149 while (sub_use);
5150 }
5151
5152 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
5153 inv_expr_id);
5154
5155 return !infinite_cost_p (cost);
5156 }
5157
5158 /* Computes value of candidate CAND at position AT in iteration NITER, and
5159 stores it to VAL. */
5160
5161 static void
cand_value_at(struct loop * loop,struct iv_cand * cand,gimple * at,tree niter,aff_tree * val)5162 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5163 aff_tree *val)
5164 {
5165 aff_tree step, delta, nit;
5166 struct iv *iv = cand->iv;
5167 tree type = TREE_TYPE (iv->base);
5168 tree steptype = type;
5169 if (POINTER_TYPE_P (type))
5170 steptype = sizetype;
5171 steptype = unsigned_type_for (type);
5172
5173 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5174 aff_combination_convert (&step, steptype);
5175 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5176 aff_combination_convert (&nit, steptype);
5177 aff_combination_mult (&nit, &step, &delta);
5178 if (stmt_after_increment (loop, cand, at))
5179 aff_combination_add (&delta, &step);
5180
5181 tree_to_aff_combination (iv->base, type, val);
5182 if (!POINTER_TYPE_P (type))
5183 aff_combination_convert (val, steptype);
5184 aff_combination_add (val, &delta);
5185 }
5186
5187 /* Returns period of induction variable iv. */
5188
5189 static tree
iv_period(struct iv * iv)5190 iv_period (struct iv *iv)
5191 {
5192 tree step = iv->step, period, type;
5193 tree pow2div;
5194
5195 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5196
5197 type = unsigned_type_for (TREE_TYPE (step));
5198 /* Period of the iv is lcm (step, type_range)/step -1,
5199 i.e., N*type_range/step - 1. Since type range is power
5200 of two, N == (step >> num_of_ending_zeros_binary (step),
5201 so the final result is
5202
5203 (type_range >> num_of_ending_zeros_binary (step)) - 1
5204
5205 */
5206 pow2div = num_ending_zeros (step);
5207
5208 period = build_low_bits_mask (type,
5209 (TYPE_PRECISION (type)
5210 - tree_to_uhwi (pow2div)));
5211
5212 return period;
5213 }
5214
5215 /* Returns the comparison operator used when eliminating the iv USE. */
5216
5217 static enum tree_code
iv_elimination_compare(struct ivopts_data * data,struct iv_use * use)5218 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5219 {
5220 struct loop *loop = data->current_loop;
5221 basic_block ex_bb;
5222 edge exit;
5223
5224 ex_bb = gimple_bb (use->stmt);
5225 exit = EDGE_SUCC (ex_bb, 0);
5226 if (flow_bb_inside_loop_p (loop, exit->dest))
5227 exit = EDGE_SUCC (ex_bb, 1);
5228
5229 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5230 }
5231
5232 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5233 we only detect the situation that BASE = SOMETHING + OFFSET, where the
5234 calculation is performed in non-wrapping type.
5235
5236 TODO: More generally, we could test for the situation that
5237 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5238 This would require knowing the sign of OFFSET. */
5239
5240 static bool
difference_cannot_overflow_p(struct ivopts_data * data,tree base,tree offset)5241 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5242 {
5243 enum tree_code code;
5244 tree e1, e2;
5245 aff_tree aff_e1, aff_e2, aff_offset;
5246
5247 if (!nowrap_type_p (TREE_TYPE (base)))
5248 return false;
5249
5250 base = expand_simple_operations (base);
5251
5252 if (TREE_CODE (base) == SSA_NAME)
5253 {
5254 gimple *stmt = SSA_NAME_DEF_STMT (base);
5255
5256 if (gimple_code (stmt) != GIMPLE_ASSIGN)
5257 return false;
5258
5259 code = gimple_assign_rhs_code (stmt);
5260 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5261 return false;
5262
5263 e1 = gimple_assign_rhs1 (stmt);
5264 e2 = gimple_assign_rhs2 (stmt);
5265 }
5266 else
5267 {
5268 code = TREE_CODE (base);
5269 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5270 return false;
5271 e1 = TREE_OPERAND (base, 0);
5272 e2 = TREE_OPERAND (base, 1);
5273 }
5274
5275 /* Use affine expansion as deeper inspection to prove the equality. */
5276 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5277 &aff_e2, &data->name_expansion_cache);
5278 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5279 &aff_offset, &data->name_expansion_cache);
5280 aff_combination_scale (&aff_offset, -1);
5281 switch (code)
5282 {
5283 case PLUS_EXPR:
5284 aff_combination_add (&aff_e2, &aff_offset);
5285 if (aff_combination_zero_p (&aff_e2))
5286 return true;
5287
5288 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5289 &aff_e1, &data->name_expansion_cache);
5290 aff_combination_add (&aff_e1, &aff_offset);
5291 return aff_combination_zero_p (&aff_e1);
5292
5293 case POINTER_PLUS_EXPR:
5294 aff_combination_add (&aff_e2, &aff_offset);
5295 return aff_combination_zero_p (&aff_e2);
5296
5297 default:
5298 return false;
5299 }
5300 }
5301
5302 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5303 comparison with CAND. NITER describes the number of iterations of
5304 the loops. If successful, the comparison in COMP_P is altered accordingly.
5305
5306 We aim to handle the following situation:
5307
5308 sometype *base, *p;
5309 int a, b, i;
5310
5311 i = a;
5312 p = p_0 = base + a;
5313
5314 do
5315 {
5316 bla (*p);
5317 p++;
5318 i++;
5319 }
5320 while (i < b);
5321
5322 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5323 We aim to optimize this to
5324
5325 p = p_0 = base + a;
5326 do
5327 {
5328 bla (*p);
5329 p++;
5330 }
5331 while (p < p_0 - a + b);
5332
5333 This preserves the correctness, since the pointer arithmetics does not
5334 overflow. More precisely:
5335
5336 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5337 overflow in computing it or the values of p.
5338 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5339 overflow. To prove this, we use the fact that p_0 = base + a. */
5340
5341 static bool
iv_elimination_compare_lt(struct ivopts_data * data,struct iv_cand * cand,enum tree_code * comp_p,struct tree_niter_desc * niter)5342 iv_elimination_compare_lt (struct ivopts_data *data,
5343 struct iv_cand *cand, enum tree_code *comp_p,
5344 struct tree_niter_desc *niter)
5345 {
5346 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5347 struct aff_tree nit, tmpa, tmpb;
5348 enum tree_code comp;
5349 HOST_WIDE_INT step;
5350
5351 /* We need to know that the candidate induction variable does not overflow.
5352 While more complex analysis may be used to prove this, for now just
5353 check that the variable appears in the original program and that it
5354 is computed in a type that guarantees no overflows. */
5355 cand_type = TREE_TYPE (cand->iv->base);
5356 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5357 return false;
5358
5359 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5360 the calculation of the BOUND could overflow, making the comparison
5361 invalid. */
5362 if (!data->loop_single_exit_p)
5363 return false;
5364
5365 /* We need to be able to decide whether candidate is increasing or decreasing
5366 in order to choose the right comparison operator. */
5367 if (!cst_and_fits_in_hwi (cand->iv->step))
5368 return false;
5369 step = int_cst_value (cand->iv->step);
5370
5371 /* Check that the number of iterations matches the expected pattern:
5372 a + 1 > b ? 0 : b - a - 1. */
5373 mbz = niter->may_be_zero;
5374 if (TREE_CODE (mbz) == GT_EXPR)
5375 {
5376 /* Handle a + 1 > b. */
5377 tree op0 = TREE_OPERAND (mbz, 0);
5378 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5379 {
5380 a = TREE_OPERAND (op0, 0);
5381 b = TREE_OPERAND (mbz, 1);
5382 }
5383 else
5384 return false;
5385 }
5386 else if (TREE_CODE (mbz) == LT_EXPR)
5387 {
5388 tree op1 = TREE_OPERAND (mbz, 1);
5389
5390 /* Handle b < a + 1. */
5391 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5392 {
5393 a = TREE_OPERAND (op1, 0);
5394 b = TREE_OPERAND (mbz, 0);
5395 }
5396 else
5397 return false;
5398 }
5399 else
5400 return false;
5401
5402 /* Expected number of iterations is B - A - 1. Check that it matches
5403 the actual number, i.e., that B - A - NITER = 1. */
5404 tree_to_aff_combination (niter->niter, nit_type, &nit);
5405 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5406 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5407 aff_combination_scale (&nit, -1);
5408 aff_combination_scale (&tmpa, -1);
5409 aff_combination_add (&tmpb, &tmpa);
5410 aff_combination_add (&tmpb, &nit);
5411 if (tmpb.n != 0 || tmpb.offset != 1)
5412 return false;
5413
5414 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5415 overflow. */
5416 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5417 cand->iv->step,
5418 fold_convert (TREE_TYPE (cand->iv->step), a));
5419 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5420 return false;
5421
5422 /* Determine the new comparison operator. */
5423 comp = step < 0 ? GT_EXPR : LT_EXPR;
5424 if (*comp_p == NE_EXPR)
5425 *comp_p = comp;
5426 else if (*comp_p == EQ_EXPR)
5427 *comp_p = invert_tree_comparison (comp, false);
5428 else
5429 gcc_unreachable ();
5430
5431 return true;
5432 }
5433
5434 /* Check whether it is possible to express the condition in USE by comparison
5435 of candidate CAND. If so, store the value compared with to BOUND, and the
5436 comparison operator to COMP. */
5437
5438 static bool
may_eliminate_iv(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,tree * bound,enum tree_code * comp)5439 may_eliminate_iv (struct ivopts_data *data,
5440 struct iv_use *use, struct iv_cand *cand, tree *bound,
5441 enum tree_code *comp)
5442 {
5443 basic_block ex_bb;
5444 edge exit;
5445 tree period;
5446 struct loop *loop = data->current_loop;
5447 aff_tree bnd;
5448 struct tree_niter_desc *desc = NULL;
5449
5450 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5451 return false;
5452
5453 /* For now works only for exits that dominate the loop latch.
5454 TODO: extend to other conditions inside loop body. */
5455 ex_bb = gimple_bb (use->stmt);
5456 if (use->stmt != last_stmt (ex_bb)
5457 || gimple_code (use->stmt) != GIMPLE_COND
5458 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5459 return false;
5460
5461 exit = EDGE_SUCC (ex_bb, 0);
5462 if (flow_bb_inside_loop_p (loop, exit->dest))
5463 exit = EDGE_SUCC (ex_bb, 1);
5464 if (flow_bb_inside_loop_p (loop, exit->dest))
5465 return false;
5466
5467 desc = niter_for_exit (data, exit);
5468 if (!desc)
5469 return false;
5470
5471 /* Determine whether we can use the variable to test the exit condition.
5472 This is the case iff the period of the induction variable is greater
5473 than the number of iterations for which the exit condition is true. */
5474 period = iv_period (cand->iv);
5475
5476 /* If the number of iterations is constant, compare against it directly. */
5477 if (TREE_CODE (desc->niter) == INTEGER_CST)
5478 {
5479 /* See cand_value_at. */
5480 if (stmt_after_increment (loop, cand, use->stmt))
5481 {
5482 if (!tree_int_cst_lt (desc->niter, period))
5483 return false;
5484 }
5485 else
5486 {
5487 if (tree_int_cst_lt (period, desc->niter))
5488 return false;
5489 }
5490 }
5491
5492 /* If not, and if this is the only possible exit of the loop, see whether
5493 we can get a conservative estimate on the number of iterations of the
5494 entire loop and compare against that instead. */
5495 else
5496 {
5497 widest_int period_value, max_niter;
5498
5499 max_niter = desc->max;
5500 if (stmt_after_increment (loop, cand, use->stmt))
5501 max_niter += 1;
5502 period_value = wi::to_widest (period);
5503 if (wi::gtu_p (max_niter, period_value))
5504 {
5505 /* See if we can take advantage of inferred loop bound information. */
5506 if (data->loop_single_exit_p)
5507 {
5508 if (!max_loop_iterations (loop, &max_niter))
5509 return false;
5510 /* The loop bound is already adjusted by adding 1. */
5511 if (wi::gtu_p (max_niter, period_value))
5512 return false;
5513 }
5514 else
5515 return false;
5516 }
5517 }
5518
5519 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5520
5521 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5522 aff_combination_to_tree (&bnd));
5523 *comp = iv_elimination_compare (data, use);
5524
5525 /* It is unlikely that computing the number of iterations using division
5526 would be more profitable than keeping the original induction variable. */
5527 if (expression_expensive_p (*bound))
5528 return false;
5529
5530 /* Sometimes, it is possible to handle the situation that the number of
5531 iterations may be zero unless additional assumtions by using <
5532 instead of != in the exit condition.
5533
5534 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5535 base the exit condition on it. However, that is often too
5536 expensive. */
5537 if (!integer_zerop (desc->may_be_zero))
5538 return iv_elimination_compare_lt (data, cand, comp, desc);
5539
5540 return true;
5541 }
5542
5543 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5544 be copied, if it is used in the loop body and DATA->body_includes_call. */
5545
5546 static int
parm_decl_cost(struct ivopts_data * data,tree bound)5547 parm_decl_cost (struct ivopts_data *data, tree bound)
5548 {
5549 tree sbound = bound;
5550 STRIP_NOPS (sbound);
5551
5552 if (TREE_CODE (sbound) == SSA_NAME
5553 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5554 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5555 && data->body_includes_call)
5556 return COSTS_N_INSNS (1);
5557
5558 return 0;
5559 }
5560
5561 /* Determines cost of basing replacement of USE on CAND in a condition. */
5562
5563 static bool
determine_use_iv_cost_condition(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5564 determine_use_iv_cost_condition (struct ivopts_data *data,
5565 struct iv_use *use, struct iv_cand *cand)
5566 {
5567 tree bound = NULL_TREE;
5568 struct iv *cmp_iv;
5569 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5570 comp_cost elim_cost, express_cost, cost, bound_cost;
5571 bool ok;
5572 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5573 tree *control_var, *bound_cst;
5574 enum tree_code comp = ERROR_MARK;
5575
5576 /* Only consider real candidates. */
5577 if (!cand->iv)
5578 {
5579 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5580 ERROR_MARK, -1);
5581 return false;
5582 }
5583
5584 /* Try iv elimination. */
5585 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5586 {
5587 elim_cost = force_var_cost (data, bound, &depends_on_elim);
5588 if (elim_cost.cost == 0)
5589 elim_cost.cost = parm_decl_cost (data, bound);
5590 else if (TREE_CODE (bound) == INTEGER_CST)
5591 elim_cost.cost = 0;
5592 /* If we replace a loop condition 'i < n' with 'p < base + n',
5593 depends_on_elim will have 'base' and 'n' set, which implies
5594 that both 'base' and 'n' will be live during the loop. More likely,
5595 'base + n' will be loop invariant, resulting in only one live value
5596 during the loop. So in that case we clear depends_on_elim and set
5597 elim_inv_expr_id instead. */
5598 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5599 {
5600 elim_inv_expr_id = get_expr_id (data, bound);
5601 bitmap_clear (depends_on_elim);
5602 }
5603 /* The bound is a loop invariant, so it will be only computed
5604 once. */
5605 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5606 }
5607 else
5608 elim_cost = infinite_cost;
5609
5610 /* Try expressing the original giv. If it is compared with an invariant,
5611 note that we cannot get rid of it. */
5612 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5613 NULL, &cmp_iv);
5614 gcc_assert (ok);
5615
5616 /* When the condition is a comparison of the candidate IV against
5617 zero, prefer this IV.
5618
5619 TODO: The constant that we're subtracting from the cost should
5620 be target-dependent. This information should be added to the
5621 target costs for each backend. */
5622 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5623 && integer_zerop (*bound_cst)
5624 && (operand_equal_p (*control_var, cand->var_after, 0)
5625 || operand_equal_p (*control_var, cand->var_before, 0)))
5626 elim_cost.cost -= 1;
5627
5628 express_cost = get_computation_cost (data, use, cand, false,
5629 &depends_on_express, NULL,
5630 &express_inv_expr_id);
5631 fd_ivopts_data = data;
5632 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5633
5634 /* Count the cost of the original bound as well. */
5635 bound_cost = force_var_cost (data, *bound_cst, NULL);
5636 if (bound_cost.cost == 0)
5637 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5638 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5639 bound_cost.cost = 0;
5640 express_cost.cost += bound_cost.cost;
5641
5642 /* Choose the better approach, preferring the eliminated IV. */
5643 if (compare_costs (elim_cost, express_cost) <= 0)
5644 {
5645 cost = elim_cost;
5646 depends_on = depends_on_elim;
5647 depends_on_elim = NULL;
5648 inv_expr_id = elim_inv_expr_id;
5649 }
5650 else
5651 {
5652 cost = express_cost;
5653 depends_on = depends_on_express;
5654 depends_on_express = NULL;
5655 bound = NULL_TREE;
5656 comp = ERROR_MARK;
5657 inv_expr_id = express_inv_expr_id;
5658 }
5659
5660 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5661
5662 if (depends_on_elim)
5663 BITMAP_FREE (depends_on_elim);
5664 if (depends_on_express)
5665 BITMAP_FREE (depends_on_express);
5666
5667 return !infinite_cost_p (cost);
5668 }
5669
5670 /* Determines cost of basing replacement of USE on CAND. Returns false
5671 if USE cannot be based on CAND. */
5672
5673 static bool
determine_use_iv_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5674 determine_use_iv_cost (struct ivopts_data *data,
5675 struct iv_use *use, struct iv_cand *cand)
5676 {
5677 switch (use->type)
5678 {
5679 case USE_NONLINEAR_EXPR:
5680 return determine_use_iv_cost_generic (data, use, cand);
5681
5682 case USE_ADDRESS:
5683 return determine_use_iv_cost_address (data, use, cand);
5684
5685 case USE_COMPARE:
5686 return determine_use_iv_cost_condition (data, use, cand);
5687
5688 default:
5689 gcc_unreachable ();
5690 }
5691 }
5692
5693 /* Return true if get_computation_cost indicates that autoincrement is
5694 a possibility for the pair of USE and CAND, false otherwise. */
5695
5696 static bool
autoinc_possible_for_pair(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5697 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5698 struct iv_cand *cand)
5699 {
5700 bitmap depends_on;
5701 bool can_autoinc;
5702 comp_cost cost;
5703
5704 if (use->type != USE_ADDRESS)
5705 return false;
5706
5707 cost = get_computation_cost (data, use, cand, true, &depends_on,
5708 &can_autoinc, NULL);
5709
5710 BITMAP_FREE (depends_on);
5711
5712 return !infinite_cost_p (cost) && can_autoinc;
5713 }
5714
5715 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5716 use that allows autoincrement, and set their AINC_USE if possible. */
5717
5718 static void
set_autoinc_for_original_candidates(struct ivopts_data * data)5719 set_autoinc_for_original_candidates (struct ivopts_data *data)
5720 {
5721 unsigned i, j;
5722
5723 for (i = 0; i < n_iv_cands (data); i++)
5724 {
5725 struct iv_cand *cand = iv_cand (data, i);
5726 struct iv_use *closest_before = NULL;
5727 struct iv_use *closest_after = NULL;
5728 if (cand->pos != IP_ORIGINAL)
5729 continue;
5730
5731 for (j = 0; j < n_iv_uses (data); j++)
5732 {
5733 struct iv_use *use = iv_use (data, j);
5734 unsigned uid = gimple_uid (use->stmt);
5735
5736 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5737 continue;
5738
5739 if (uid < gimple_uid (cand->incremented_at)
5740 && (closest_before == NULL
5741 || uid > gimple_uid (closest_before->stmt)))
5742 closest_before = use;
5743
5744 if (uid > gimple_uid (cand->incremented_at)
5745 && (closest_after == NULL
5746 || uid < gimple_uid (closest_after->stmt)))
5747 closest_after = use;
5748 }
5749
5750 if (closest_before != NULL
5751 && autoinc_possible_for_pair (data, closest_before, cand))
5752 cand->ainc_use = closest_before;
5753 else if (closest_after != NULL
5754 && autoinc_possible_for_pair (data, closest_after, cand))
5755 cand->ainc_use = closest_after;
5756 }
5757 }
5758
5759 /* Finds the candidates for the induction variables. */
5760
5761 static void
find_iv_candidates(struct ivopts_data * data)5762 find_iv_candidates (struct ivopts_data *data)
5763 {
5764 /* Add commonly used ivs. */
5765 add_standard_iv_candidates (data);
5766
5767 /* Add old induction variables. */
5768 add_iv_candidate_for_bivs (data);
5769
5770 /* Add induction variables derived from uses. */
5771 add_iv_candidate_for_uses (data);
5772
5773 set_autoinc_for_original_candidates (data);
5774
5775 /* Record the important candidates. */
5776 record_important_candidates (data);
5777 }
5778
5779 /* Determines costs of basing the use of the iv on an iv candidate. */
5780
5781 static void
determine_use_iv_costs(struct ivopts_data * data)5782 determine_use_iv_costs (struct ivopts_data *data)
5783 {
5784 unsigned i, j;
5785 struct iv_use *use;
5786 struct iv_cand *cand;
5787 bitmap to_clear = BITMAP_ALLOC (NULL);
5788
5789 alloc_use_cost_map (data);
5790
5791 for (i = 0; i < n_iv_uses (data); i++)
5792 {
5793 use = iv_use (data, i);
5794
5795 if (data->consider_all_candidates)
5796 {
5797 for (j = 0; j < n_iv_cands (data); j++)
5798 {
5799 cand = iv_cand (data, j);
5800 determine_use_iv_cost (data, use, cand);
5801 }
5802 }
5803 else
5804 {
5805 bitmap_iterator bi;
5806
5807 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5808 {
5809 cand = iv_cand (data, j);
5810 if (!determine_use_iv_cost (data, use, cand))
5811 bitmap_set_bit (to_clear, j);
5812 }
5813
5814 /* Remove the candidates for that the cost is infinite from
5815 the list of related candidates. */
5816 bitmap_and_compl_into (use->related_cands, to_clear);
5817 bitmap_clear (to_clear);
5818 }
5819 }
5820
5821 BITMAP_FREE (to_clear);
5822
5823 if (dump_file && (dump_flags & TDF_DETAILS))
5824 {
5825 fprintf (dump_file, "Use-candidate costs:\n");
5826
5827 for (i = 0; i < n_iv_uses (data); i++)
5828 {
5829 use = iv_use (data, i);
5830
5831 fprintf (dump_file, "Use %d:\n", i);
5832 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5833 for (j = 0; j < use->n_map_members; j++)
5834 {
5835 if (!use->cost_map[j].cand
5836 || infinite_cost_p (use->cost_map[j].cost))
5837 continue;
5838
5839 fprintf (dump_file, " %d\t%d\t%d\t",
5840 use->cost_map[j].cand->id,
5841 use->cost_map[j].cost.cost,
5842 use->cost_map[j].cost.complexity);
5843 if (use->cost_map[j].depends_on)
5844 bitmap_print (dump_file,
5845 use->cost_map[j].depends_on, "","");
5846 if (use->cost_map[j].inv_expr_id != -1)
5847 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5848 fprintf (dump_file, "\n");
5849 }
5850
5851 fprintf (dump_file, "\n");
5852 }
5853 fprintf (dump_file, "\n");
5854 }
5855 }
5856
5857 /* Determines cost of the candidate CAND. */
5858
5859 static void
determine_iv_cost(struct ivopts_data * data,struct iv_cand * cand)5860 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5861 {
5862 comp_cost cost_base;
5863 unsigned cost, cost_step;
5864 tree base;
5865
5866 if (!cand->iv)
5867 {
5868 cand->cost = 0;
5869 return;
5870 }
5871
5872 /* There are two costs associated with the candidate -- its increment
5873 and its initialization. The second is almost negligible for any loop
5874 that rolls enough, so we take it just very little into account. */
5875
5876 base = cand->iv->base;
5877 cost_base = force_var_cost (data, base, NULL);
5878 /* It will be exceptional that the iv register happens to be initialized with
5879 the proper value at no cost. In general, there will at least be a regcopy
5880 or a const set. */
5881 if (cost_base.cost == 0)
5882 cost_base.cost = COSTS_N_INSNS (1);
5883 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5884
5885 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5886
5887 /* Prefer the original ivs unless we may gain something by replacing it.
5888 The reason is to make debugging simpler; so this is not relevant for
5889 artificial ivs created by other optimization passes. */
5890 if (cand->pos != IP_ORIGINAL
5891 || !SSA_NAME_VAR (cand->var_before)
5892 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5893 cost++;
5894
5895 /* Prefer not to insert statements into latch unless there are some
5896 already (so that we do not create unnecessary jumps). */
5897 if (cand->pos == IP_END
5898 && empty_block_p (ip_end_pos (data->current_loop)))
5899 cost++;
5900
5901 cand->cost = cost;
5902 cand->cost_step = cost_step;
5903 }
5904
5905 /* Determines costs of computation of the candidates. */
5906
5907 static void
determine_iv_costs(struct ivopts_data * data)5908 determine_iv_costs (struct ivopts_data *data)
5909 {
5910 unsigned i;
5911
5912 if (dump_file && (dump_flags & TDF_DETAILS))
5913 {
5914 fprintf (dump_file, "Candidate costs:\n");
5915 fprintf (dump_file, " cand\tcost\n");
5916 }
5917
5918 for (i = 0; i < n_iv_cands (data); i++)
5919 {
5920 struct iv_cand *cand = iv_cand (data, i);
5921
5922 determine_iv_cost (data, cand);
5923
5924 if (dump_file && (dump_flags & TDF_DETAILS))
5925 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5926 }
5927
5928 if (dump_file && (dump_flags & TDF_DETAILS))
5929 fprintf (dump_file, "\n");
5930 }
5931
5932 /* Calculates cost for having SIZE induction variables. */
5933
5934 static unsigned
ivopts_global_cost_for_size(struct ivopts_data * data,unsigned size)5935 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5936 {
5937 /* We add size to the cost, so that we prefer eliminating ivs
5938 if possible. */
5939 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5940 data->body_includes_call);
5941 }
5942
5943 /* For each size of the induction variable set determine the penalty. */
5944
5945 static void
determine_set_costs(struct ivopts_data * data)5946 determine_set_costs (struct ivopts_data *data)
5947 {
5948 unsigned j, n;
5949 gphi *phi;
5950 gphi_iterator psi;
5951 tree op;
5952 struct loop *loop = data->current_loop;
5953 bitmap_iterator bi;
5954
5955 if (dump_file && (dump_flags & TDF_DETAILS))
5956 {
5957 fprintf (dump_file, "Global costs:\n");
5958 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5959 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5960 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5961 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5962 }
5963
5964 n = 0;
5965 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5966 {
5967 phi = psi.phi ();
5968 op = PHI_RESULT (phi);
5969
5970 if (virtual_operand_p (op))
5971 continue;
5972
5973 if (get_iv (data, op))
5974 continue;
5975
5976 n++;
5977 }
5978
5979 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5980 {
5981 struct version_info *info = ver_info (data, j);
5982
5983 if (info->inv_id && info->has_nonlin_use)
5984 n++;
5985 }
5986
5987 data->regs_used = n;
5988 if (dump_file && (dump_flags & TDF_DETAILS))
5989 fprintf (dump_file, " regs_used %d\n", n);
5990
5991 if (dump_file && (dump_flags & TDF_DETAILS))
5992 {
5993 fprintf (dump_file, " cost for size:\n");
5994 fprintf (dump_file, " ivs\tcost\n");
5995 for (j = 0; j <= 2 * target_avail_regs; j++)
5996 fprintf (dump_file, " %d\t%d\n", j,
5997 ivopts_global_cost_for_size (data, j));
5998 fprintf (dump_file, "\n");
5999 }
6000 }
6001
6002 /* Returns true if A is a cheaper cost pair than B. */
6003
6004 static bool
cheaper_cost_pair(struct cost_pair * a,struct cost_pair * b)6005 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
6006 {
6007 int cmp;
6008
6009 if (!a)
6010 return false;
6011
6012 if (!b)
6013 return true;
6014
6015 cmp = compare_costs (a->cost, b->cost);
6016 if (cmp < 0)
6017 return true;
6018
6019 if (cmp > 0)
6020 return false;
6021
6022 /* In case the costs are the same, prefer the cheaper candidate. */
6023 if (a->cand->cost < b->cand->cost)
6024 return true;
6025
6026 return false;
6027 }
6028
6029
6030 /* Returns candidate by that USE is expressed in IVS. */
6031
6032 static struct cost_pair *
iv_ca_cand_for_use(struct iv_ca * ivs,struct iv_use * use)6033 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
6034 {
6035 return ivs->cand_for_use[use->id];
6036 }
6037
6038 /* Computes the cost field of IVS structure. */
6039
6040 static void
iv_ca_recount_cost(struct ivopts_data * data,struct iv_ca * ivs)6041 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
6042 {
6043 comp_cost cost = ivs->cand_use_cost;
6044
6045 cost.cost += ivs->cand_cost;
6046
6047 cost.cost += ivopts_global_cost_for_size (data,
6048 ivs->n_regs + ivs->num_used_inv_expr);
6049
6050 ivs->cost = cost;
6051 }
6052
6053 /* Remove invariants in set INVS to set IVS. */
6054
6055 static void
iv_ca_set_remove_invariants(struct iv_ca * ivs,bitmap invs)6056 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
6057 {
6058 bitmap_iterator bi;
6059 unsigned iid;
6060
6061 if (!invs)
6062 return;
6063
6064 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6065 {
6066 ivs->n_invariant_uses[iid]--;
6067 if (ivs->n_invariant_uses[iid] == 0)
6068 ivs->n_regs--;
6069 }
6070 }
6071
6072 /* Set USE not to be expressed by any candidate in IVS. */
6073
6074 static void
iv_ca_set_no_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_use * use)6075 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
6076 struct iv_use *use)
6077 {
6078 unsigned uid = use->id, cid;
6079 struct cost_pair *cp;
6080
6081 cp = ivs->cand_for_use[uid];
6082 if (!cp)
6083 return;
6084 cid = cp->cand->id;
6085
6086 ivs->bad_uses++;
6087 ivs->cand_for_use[uid] = NULL;
6088 ivs->n_cand_uses[cid]--;
6089
6090 if (ivs->n_cand_uses[cid] == 0)
6091 {
6092 bitmap_clear_bit (ivs->cands, cid);
6093 /* Do not count the pseudocandidates. */
6094 if (cp->cand->iv)
6095 ivs->n_regs--;
6096 ivs->n_cands--;
6097 ivs->cand_cost -= cp->cand->cost;
6098
6099 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
6100 }
6101
6102 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
6103
6104 iv_ca_set_remove_invariants (ivs, cp->depends_on);
6105
6106 if (cp->inv_expr_id != -1)
6107 {
6108 ivs->used_inv_expr[cp->inv_expr_id]--;
6109 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
6110 ivs->num_used_inv_expr--;
6111 }
6112 iv_ca_recount_cost (data, ivs);
6113 }
6114
6115 /* Add invariants in set INVS to set IVS. */
6116
6117 static void
iv_ca_set_add_invariants(struct iv_ca * ivs,bitmap invs)6118 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
6119 {
6120 bitmap_iterator bi;
6121 unsigned iid;
6122
6123 if (!invs)
6124 return;
6125
6126 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6127 {
6128 ivs->n_invariant_uses[iid]++;
6129 if (ivs->n_invariant_uses[iid] == 1)
6130 ivs->n_regs++;
6131 }
6132 }
6133
6134 /* Set cost pair for USE in set IVS to CP. */
6135
6136 static void
iv_ca_set_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_use * use,struct cost_pair * cp)6137 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
6138 struct iv_use *use, struct cost_pair *cp)
6139 {
6140 unsigned uid = use->id, cid;
6141
6142 if (ivs->cand_for_use[uid] == cp)
6143 return;
6144
6145 if (ivs->cand_for_use[uid])
6146 iv_ca_set_no_cp (data, ivs, use);
6147
6148 if (cp)
6149 {
6150 cid = cp->cand->id;
6151
6152 ivs->bad_uses--;
6153 ivs->cand_for_use[uid] = cp;
6154 ivs->n_cand_uses[cid]++;
6155 if (ivs->n_cand_uses[cid] == 1)
6156 {
6157 bitmap_set_bit (ivs->cands, cid);
6158 /* Do not count the pseudocandidates. */
6159 if (cp->cand->iv)
6160 ivs->n_regs++;
6161 ivs->n_cands++;
6162 ivs->cand_cost += cp->cand->cost;
6163
6164 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
6165 }
6166
6167 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
6168 iv_ca_set_add_invariants (ivs, cp->depends_on);
6169
6170 if (cp->inv_expr_id != -1)
6171 {
6172 ivs->used_inv_expr[cp->inv_expr_id]++;
6173 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
6174 ivs->num_used_inv_expr++;
6175 }
6176 iv_ca_recount_cost (data, ivs);
6177 }
6178 }
6179
6180 /* Extend set IVS by expressing USE by some of the candidates in it
6181 if possible. Consider all important candidates if candidates in
6182 set IVS don't give any result. */
6183
6184 static void
iv_ca_add_use(struct ivopts_data * data,struct iv_ca * ivs,struct iv_use * use)6185 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
6186 struct iv_use *use)
6187 {
6188 struct cost_pair *best_cp = NULL, *cp;
6189 bitmap_iterator bi;
6190 unsigned i;
6191 struct iv_cand *cand;
6192
6193 gcc_assert (ivs->upto >= use->id);
6194 ivs->upto++;
6195 ivs->bad_uses++;
6196
6197 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6198 {
6199 cand = iv_cand (data, i);
6200 cp = get_use_iv_cost (data, use, cand);
6201 if (cheaper_cost_pair (cp, best_cp))
6202 best_cp = cp;
6203 }
6204
6205 if (best_cp == NULL)
6206 {
6207 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6208 {
6209 cand = iv_cand (data, i);
6210 cp = get_use_iv_cost (data, use, cand);
6211 if (cheaper_cost_pair (cp, best_cp))
6212 best_cp = cp;
6213 }
6214 }
6215
6216 iv_ca_set_cp (data, ivs, use, best_cp);
6217 }
6218
6219 /* Get cost for assignment IVS. */
6220
6221 static comp_cost
iv_ca_cost(struct iv_ca * ivs)6222 iv_ca_cost (struct iv_ca *ivs)
6223 {
6224 /* This was a conditional expression but it triggered a bug in
6225 Sun C 5.5. */
6226 if (ivs->bad_uses)
6227 return infinite_cost;
6228 else
6229 return ivs->cost;
6230 }
6231
6232 /* Returns true if all dependences of CP are among invariants in IVS. */
6233
6234 static bool
iv_ca_has_deps(struct iv_ca * ivs,struct cost_pair * cp)6235 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6236 {
6237 unsigned i;
6238 bitmap_iterator bi;
6239
6240 if (!cp->depends_on)
6241 return true;
6242
6243 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6244 {
6245 if (ivs->n_invariant_uses[i] == 0)
6246 return false;
6247 }
6248
6249 return true;
6250 }
6251
6252 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
6253 it before NEXT_CHANGE. */
6254
6255 static struct iv_ca_delta *
iv_ca_delta_add(struct iv_use * use,struct cost_pair * old_cp,struct cost_pair * new_cp,struct iv_ca_delta * next_change)6256 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
6257 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
6258 {
6259 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6260
6261 change->use = use;
6262 change->old_cp = old_cp;
6263 change->new_cp = new_cp;
6264 change->next_change = next_change;
6265
6266 return change;
6267 }
6268
6269 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6270 are rewritten. */
6271
6272 static struct iv_ca_delta *
iv_ca_delta_join(struct iv_ca_delta * l1,struct iv_ca_delta * l2)6273 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6274 {
6275 struct iv_ca_delta *last;
6276
6277 if (!l2)
6278 return l1;
6279
6280 if (!l1)
6281 return l2;
6282
6283 for (last = l1; last->next_change; last = last->next_change)
6284 continue;
6285 last->next_change = l2;
6286
6287 return l1;
6288 }
6289
6290 /* Reverse the list of changes DELTA, forming the inverse to it. */
6291
6292 static struct iv_ca_delta *
iv_ca_delta_reverse(struct iv_ca_delta * delta)6293 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6294 {
6295 struct iv_ca_delta *act, *next, *prev = NULL;
6296
6297 for (act = delta; act; act = next)
6298 {
6299 next = act->next_change;
6300 act->next_change = prev;
6301 prev = act;
6302
6303 std::swap (act->old_cp, act->new_cp);
6304 }
6305
6306 return prev;
6307 }
6308
6309 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6310 reverted instead. */
6311
6312 static void
iv_ca_delta_commit(struct ivopts_data * data,struct iv_ca * ivs,struct iv_ca_delta * delta,bool forward)6313 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6314 struct iv_ca_delta *delta, bool forward)
6315 {
6316 struct cost_pair *from, *to;
6317 struct iv_ca_delta *act;
6318
6319 if (!forward)
6320 delta = iv_ca_delta_reverse (delta);
6321
6322 for (act = delta; act; act = act->next_change)
6323 {
6324 from = act->old_cp;
6325 to = act->new_cp;
6326 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
6327 iv_ca_set_cp (data, ivs, act->use, to);
6328 }
6329
6330 if (!forward)
6331 iv_ca_delta_reverse (delta);
6332 }
6333
6334 /* Returns true if CAND is used in IVS. */
6335
6336 static bool
iv_ca_cand_used_p(struct iv_ca * ivs,struct iv_cand * cand)6337 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6338 {
6339 return ivs->n_cand_uses[cand->id] > 0;
6340 }
6341
6342 /* Returns number of induction variable candidates in the set IVS. */
6343
6344 static unsigned
iv_ca_n_cands(struct iv_ca * ivs)6345 iv_ca_n_cands (struct iv_ca *ivs)
6346 {
6347 return ivs->n_cands;
6348 }
6349
6350 /* Free the list of changes DELTA. */
6351
6352 static void
iv_ca_delta_free(struct iv_ca_delta ** delta)6353 iv_ca_delta_free (struct iv_ca_delta **delta)
6354 {
6355 struct iv_ca_delta *act, *next;
6356
6357 for (act = *delta; act; act = next)
6358 {
6359 next = act->next_change;
6360 free (act);
6361 }
6362
6363 *delta = NULL;
6364 }
6365
6366 /* Allocates new iv candidates assignment. */
6367
6368 static struct iv_ca *
iv_ca_new(struct ivopts_data * data)6369 iv_ca_new (struct ivopts_data *data)
6370 {
6371 struct iv_ca *nw = XNEW (struct iv_ca);
6372
6373 nw->upto = 0;
6374 nw->bad_uses = 0;
6375 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
6376 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
6377 nw->cands = BITMAP_ALLOC (NULL);
6378 nw->n_cands = 0;
6379 nw->n_regs = 0;
6380 nw->cand_use_cost = no_cost;
6381 nw->cand_cost = 0;
6382 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6383 nw->cost = no_cost;
6384 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
6385 nw->num_used_inv_expr = 0;
6386
6387 return nw;
6388 }
6389
6390 /* Free memory occupied by the set IVS. */
6391
6392 static void
iv_ca_free(struct iv_ca ** ivs)6393 iv_ca_free (struct iv_ca **ivs)
6394 {
6395 free ((*ivs)->cand_for_use);
6396 free ((*ivs)->n_cand_uses);
6397 BITMAP_FREE ((*ivs)->cands);
6398 free ((*ivs)->n_invariant_uses);
6399 free ((*ivs)->used_inv_expr);
6400 free (*ivs);
6401 *ivs = NULL;
6402 }
6403
6404 /* Dumps IVS to FILE. */
6405
6406 static void
iv_ca_dump(struct ivopts_data * data,FILE * file,struct iv_ca * ivs)6407 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6408 {
6409 const char *pref = " invariants ";
6410 unsigned i;
6411 comp_cost cost = iv_ca_cost (ivs);
6412
6413 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
6414 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
6415 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6416 bitmap_print (file, ivs->cands, " candidates: ","\n");
6417
6418 for (i = 0; i < ivs->upto; i++)
6419 {
6420 struct iv_use *use = iv_use (data, i);
6421 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
6422 if (cp)
6423 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6424 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6425 else
6426 fprintf (file, " use:%d --> ??\n", use->id);
6427 }
6428
6429 for (i = 1; i <= data->max_inv_id; i++)
6430 if (ivs->n_invariant_uses[i])
6431 {
6432 fprintf (file, "%s%d", pref, i);
6433 pref = ", ";
6434 }
6435 fprintf (file, "\n\n");
6436 }
6437
6438 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6439 new set, and store differences in DELTA. Number of induction variables
6440 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6441 the function will try to find a solution with mimimal iv candidates. */
6442
6443 static comp_cost
iv_ca_extend(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_ca_delta ** delta,unsigned * n_ivs,bool min_ncand)6444 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6445 struct iv_cand *cand, struct iv_ca_delta **delta,
6446 unsigned *n_ivs, bool min_ncand)
6447 {
6448 unsigned i;
6449 comp_cost cost;
6450 struct iv_use *use;
6451 struct cost_pair *old_cp, *new_cp;
6452
6453 *delta = NULL;
6454 for (i = 0; i < ivs->upto; i++)
6455 {
6456 use = iv_use (data, i);
6457 old_cp = iv_ca_cand_for_use (ivs, use);
6458
6459 if (old_cp
6460 && old_cp->cand == cand)
6461 continue;
6462
6463 new_cp = get_use_iv_cost (data, use, cand);
6464 if (!new_cp)
6465 continue;
6466
6467 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6468 continue;
6469
6470 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6471 continue;
6472
6473 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6474 }
6475
6476 iv_ca_delta_commit (data, ivs, *delta, true);
6477 cost = iv_ca_cost (ivs);
6478 if (n_ivs)
6479 *n_ivs = iv_ca_n_cands (ivs);
6480 iv_ca_delta_commit (data, ivs, *delta, false);
6481
6482 return cost;
6483 }
6484
6485 /* Try narrowing set IVS by removing CAND. Return the cost of
6486 the new set and store the differences in DELTA. START is
6487 the candidate with which we start narrowing. */
6488
6489 static comp_cost
iv_ca_narrow(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_cand * start,struct iv_ca_delta ** delta)6490 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6491 struct iv_cand *cand, struct iv_cand *start,
6492 struct iv_ca_delta **delta)
6493 {
6494 unsigned i, ci;
6495 struct iv_use *use;
6496 struct cost_pair *old_cp, *new_cp, *cp;
6497 bitmap_iterator bi;
6498 struct iv_cand *cnd;
6499 comp_cost cost, best_cost, acost;
6500
6501 *delta = NULL;
6502 for (i = 0; i < n_iv_uses (data); i++)
6503 {
6504 use = iv_use (data, i);
6505
6506 old_cp = iv_ca_cand_for_use (ivs, use);
6507 if (old_cp->cand != cand)
6508 continue;
6509
6510 best_cost = iv_ca_cost (ivs);
6511 /* Start narrowing with START. */
6512 new_cp = get_use_iv_cost (data, use, start);
6513
6514 if (data->consider_all_candidates)
6515 {
6516 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6517 {
6518 if (ci == cand->id || (start && ci == start->id))
6519 continue;
6520
6521 cnd = iv_cand (data, ci);
6522
6523 cp = get_use_iv_cost (data, use, cnd);
6524 if (!cp)
6525 continue;
6526
6527 iv_ca_set_cp (data, ivs, use, cp);
6528 acost = iv_ca_cost (ivs);
6529
6530 if (compare_costs (acost, best_cost) < 0)
6531 {
6532 best_cost = acost;
6533 new_cp = cp;
6534 }
6535 }
6536 }
6537 else
6538 {
6539 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6540 {
6541 if (ci == cand->id || (start && ci == start->id))
6542 continue;
6543
6544 cnd = iv_cand (data, ci);
6545
6546 cp = get_use_iv_cost (data, use, cnd);
6547 if (!cp)
6548 continue;
6549
6550 iv_ca_set_cp (data, ivs, use, cp);
6551 acost = iv_ca_cost (ivs);
6552
6553 if (compare_costs (acost, best_cost) < 0)
6554 {
6555 best_cost = acost;
6556 new_cp = cp;
6557 }
6558 }
6559 }
6560 /* Restore to old cp for use. */
6561 iv_ca_set_cp (data, ivs, use, old_cp);
6562
6563 if (!new_cp)
6564 {
6565 iv_ca_delta_free (delta);
6566 return infinite_cost;
6567 }
6568
6569 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6570 }
6571
6572 iv_ca_delta_commit (data, ivs, *delta, true);
6573 cost = iv_ca_cost (ivs);
6574 iv_ca_delta_commit (data, ivs, *delta, false);
6575
6576 return cost;
6577 }
6578
6579 /* Try optimizing the set of candidates IVS by removing candidates different
6580 from to EXCEPT_CAND from it. Return cost of the new set, and store
6581 differences in DELTA. */
6582
6583 static comp_cost
iv_ca_prune(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * except_cand,struct iv_ca_delta ** delta)6584 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6585 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6586 {
6587 bitmap_iterator bi;
6588 struct iv_ca_delta *act_delta, *best_delta;
6589 unsigned i;
6590 comp_cost best_cost, acost;
6591 struct iv_cand *cand;
6592
6593 best_delta = NULL;
6594 best_cost = iv_ca_cost (ivs);
6595
6596 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6597 {
6598 cand = iv_cand (data, i);
6599
6600 if (cand == except_cand)
6601 continue;
6602
6603 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6604
6605 if (compare_costs (acost, best_cost) < 0)
6606 {
6607 best_cost = acost;
6608 iv_ca_delta_free (&best_delta);
6609 best_delta = act_delta;
6610 }
6611 else
6612 iv_ca_delta_free (&act_delta);
6613 }
6614
6615 if (!best_delta)
6616 {
6617 *delta = NULL;
6618 return best_cost;
6619 }
6620
6621 /* Recurse to possibly remove other unnecessary ivs. */
6622 iv_ca_delta_commit (data, ivs, best_delta, true);
6623 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6624 iv_ca_delta_commit (data, ivs, best_delta, false);
6625 *delta = iv_ca_delta_join (best_delta, *delta);
6626 return best_cost;
6627 }
6628
6629 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6630 cheaper local cost for USE than BEST_CP. Return pointer to
6631 the corresponding cost_pair, otherwise just return BEST_CP. */
6632
6633 static struct cost_pair*
cheaper_cost_with_cand(struct ivopts_data * data,struct iv_use * use,unsigned int cand_idx,struct iv_cand * old_cand,struct cost_pair * best_cp)6634 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6635 unsigned int cand_idx, struct iv_cand *old_cand,
6636 struct cost_pair *best_cp)
6637 {
6638 struct iv_cand *cand;
6639 struct cost_pair *cp;
6640
6641 gcc_assert (old_cand != NULL && best_cp != NULL);
6642 if (cand_idx == old_cand->id)
6643 return best_cp;
6644
6645 cand = iv_cand (data, cand_idx);
6646 cp = get_use_iv_cost (data, use, cand);
6647 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6648 return cp;
6649
6650 return best_cp;
6651 }
6652
6653 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6654 which are used by more than one iv uses. For each of those candidates,
6655 this function tries to represent iv uses under that candidate using
6656 other ones with lower local cost, then tries to prune the new set.
6657 If the new set has lower cost, It returns the new cost after recording
6658 candidate replacement in list DELTA. */
6659
6660 static comp_cost
iv_ca_replace(struct ivopts_data * data,struct iv_ca * ivs,struct iv_ca_delta ** delta)6661 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6662 struct iv_ca_delta **delta)
6663 {
6664 bitmap_iterator bi, bj;
6665 unsigned int i, j, k;
6666 struct iv_use *use;
6667 struct iv_cand *cand;
6668 comp_cost orig_cost, acost;
6669 struct iv_ca_delta *act_delta, *tmp_delta;
6670 struct cost_pair *old_cp, *best_cp = NULL;
6671
6672 *delta = NULL;
6673 orig_cost = iv_ca_cost (ivs);
6674
6675 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6676 {
6677 if (ivs->n_cand_uses[i] == 1
6678 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6679 continue;
6680
6681 cand = iv_cand (data, i);
6682
6683 act_delta = NULL;
6684 /* Represent uses under current candidate using other ones with
6685 lower local cost. */
6686 for (j = 0; j < ivs->upto; j++)
6687 {
6688 use = iv_use (data, j);
6689 old_cp = iv_ca_cand_for_use (ivs, use);
6690
6691 if (old_cp->cand != cand)
6692 continue;
6693
6694 best_cp = old_cp;
6695 if (data->consider_all_candidates)
6696 for (k = 0; k < n_iv_cands (data); k++)
6697 best_cp = cheaper_cost_with_cand (data, use, k,
6698 old_cp->cand, best_cp);
6699 else
6700 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6701 best_cp = cheaper_cost_with_cand (data, use, k,
6702 old_cp->cand, best_cp);
6703
6704 if (best_cp == old_cp)
6705 continue;
6706
6707 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6708 }
6709 /* No need for further prune. */
6710 if (!act_delta)
6711 continue;
6712
6713 /* Prune the new candidate set. */
6714 iv_ca_delta_commit (data, ivs, act_delta, true);
6715 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6716 iv_ca_delta_commit (data, ivs, act_delta, false);
6717 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6718
6719 if (compare_costs (acost, orig_cost) < 0)
6720 {
6721 *delta = act_delta;
6722 return acost;
6723 }
6724 else
6725 iv_ca_delta_free (&act_delta);
6726 }
6727
6728 return orig_cost;
6729 }
6730
6731 /* Tries to extend the sets IVS in the best possible way in order
6732 to express the USE. If ORIGINALP is true, prefer candidates from
6733 the original set of IVs, otherwise favor important candidates not
6734 based on any memory object. */
6735
6736 static bool
try_add_cand_for(struct ivopts_data * data,struct iv_ca * ivs,struct iv_use * use,bool originalp)6737 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6738 struct iv_use *use, bool originalp)
6739 {
6740 comp_cost best_cost, act_cost;
6741 unsigned i;
6742 bitmap_iterator bi;
6743 struct iv_cand *cand;
6744 struct iv_ca_delta *best_delta = NULL, *act_delta;
6745 struct cost_pair *cp;
6746
6747 iv_ca_add_use (data, ivs, use);
6748 best_cost = iv_ca_cost (ivs);
6749 cp = iv_ca_cand_for_use (ivs, use);
6750 if (cp)
6751 {
6752 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6753 iv_ca_set_no_cp (data, ivs, use);
6754 }
6755
6756 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6757 first try important candidates not based on any memory object. Only if
6758 this fails, try the specific ones. Rationale -- in loops with many
6759 variables the best choice often is to use just one generic biv. If we
6760 added here many ivs specific to the uses, the optimization algorithm later
6761 would be likely to get stuck in a local minimum, thus causing us to create
6762 too many ivs. The approach from few ivs to more seems more likely to be
6763 successful -- starting from few ivs, replacing an expensive use by a
6764 specific iv should always be a win. */
6765 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, i, bi)
6766 {
6767 cand = iv_cand (data, i);
6768
6769 if (originalp && cand->pos !=IP_ORIGINAL)
6770 continue;
6771
6772 if (!originalp && cand->iv->base_object != NULL_TREE)
6773 continue;
6774
6775 if (iv_ca_cand_used_p (ivs, cand))
6776 continue;
6777
6778 cp = get_use_iv_cost (data, use, cand);
6779 if (!cp)
6780 continue;
6781
6782 iv_ca_set_cp (data, ivs, use, cp);
6783 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6784 true);
6785 iv_ca_set_no_cp (data, ivs, use);
6786 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6787
6788 if (compare_costs (act_cost, best_cost) < 0)
6789 {
6790 best_cost = act_cost;
6791
6792 iv_ca_delta_free (&best_delta);
6793 best_delta = act_delta;
6794 }
6795 else
6796 iv_ca_delta_free (&act_delta);
6797 }
6798
6799 if (infinite_cost_p (best_cost))
6800 {
6801 for (i = 0; i < use->n_map_members; i++)
6802 {
6803 cp = use->cost_map + i;
6804 cand = cp->cand;
6805 if (!cand)
6806 continue;
6807
6808 /* Already tried this. */
6809 if (cand->important)
6810 {
6811 if (originalp && cand->pos == IP_ORIGINAL)
6812 continue;
6813 if (!originalp && cand->iv->base_object == NULL_TREE)
6814 continue;
6815 }
6816
6817 if (iv_ca_cand_used_p (ivs, cand))
6818 continue;
6819
6820 act_delta = NULL;
6821 iv_ca_set_cp (data, ivs, use, cp);
6822 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6823 iv_ca_set_no_cp (data, ivs, use);
6824 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6825 cp, act_delta);
6826
6827 if (compare_costs (act_cost, best_cost) < 0)
6828 {
6829 best_cost = act_cost;
6830
6831 if (best_delta)
6832 iv_ca_delta_free (&best_delta);
6833 best_delta = act_delta;
6834 }
6835 else
6836 iv_ca_delta_free (&act_delta);
6837 }
6838 }
6839
6840 iv_ca_delta_commit (data, ivs, best_delta, true);
6841 iv_ca_delta_free (&best_delta);
6842
6843 return !infinite_cost_p (best_cost);
6844 }
6845
6846 /* Finds an initial assignment of candidates to uses. */
6847
6848 static struct iv_ca *
get_initial_solution(struct ivopts_data * data,bool originalp)6849 get_initial_solution (struct ivopts_data *data, bool originalp)
6850 {
6851 struct iv_ca *ivs = iv_ca_new (data);
6852 unsigned i;
6853
6854 for (i = 0; i < n_iv_uses (data); i++)
6855 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6856 {
6857 iv_ca_free (&ivs);
6858 return NULL;
6859 }
6860
6861 return ivs;
6862 }
6863
6864 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6865 points to a bool variable, this function tries to break local
6866 optimal fixed-point by replacing candidates in IVS if it's true. */
6867
6868 static bool
try_improve_iv_set(struct ivopts_data * data,struct iv_ca * ivs,bool * try_replace_p)6869 try_improve_iv_set (struct ivopts_data *data,
6870 struct iv_ca *ivs, bool *try_replace_p)
6871 {
6872 unsigned i, n_ivs;
6873 comp_cost acost, best_cost = iv_ca_cost (ivs);
6874 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6875 struct iv_cand *cand;
6876
6877 /* Try extending the set of induction variables by one. */
6878 for (i = 0; i < n_iv_cands (data); i++)
6879 {
6880 cand = iv_cand (data, i);
6881
6882 if (iv_ca_cand_used_p (ivs, cand))
6883 continue;
6884
6885 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6886 if (!act_delta)
6887 continue;
6888
6889 /* If we successfully added the candidate and the set is small enough,
6890 try optimizing it by removing other candidates. */
6891 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6892 {
6893 iv_ca_delta_commit (data, ivs, act_delta, true);
6894 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6895 iv_ca_delta_commit (data, ivs, act_delta, false);
6896 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6897 }
6898
6899 if (compare_costs (acost, best_cost) < 0)
6900 {
6901 best_cost = acost;
6902 iv_ca_delta_free (&best_delta);
6903 best_delta = act_delta;
6904 }
6905 else
6906 iv_ca_delta_free (&act_delta);
6907 }
6908
6909 if (!best_delta)
6910 {
6911 /* Try removing the candidates from the set instead. */
6912 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6913
6914 if (!best_delta && *try_replace_p)
6915 {
6916 *try_replace_p = false;
6917 /* So far candidate selecting algorithm tends to choose fewer IVs
6918 so that it can handle cases in which loops have many variables
6919 but the best choice is often to use only one general biv. One
6920 weakness is it can't handle opposite cases, in which different
6921 candidates should be chosen with respect to each use. To solve
6922 the problem, we replace candidates in a manner described by the
6923 comments of iv_ca_replace, thus give general algorithm a chance
6924 to break local optimal fixed-point in these cases. */
6925 best_cost = iv_ca_replace (data, ivs, &best_delta);
6926 }
6927
6928 if (!best_delta)
6929 return false;
6930 }
6931
6932 iv_ca_delta_commit (data, ivs, best_delta, true);
6933 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6934 iv_ca_delta_free (&best_delta);
6935 return true;
6936 }
6937
6938 /* Attempts to find the optimal set of induction variables. We do simple
6939 greedy heuristic -- we try to replace at most one candidate in the selected
6940 solution and remove the unused ivs while this improves the cost. */
6941
6942 static struct iv_ca *
find_optimal_iv_set_1(struct ivopts_data * data,bool originalp)6943 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6944 {
6945 struct iv_ca *set;
6946 bool try_replace_p = true;
6947
6948 /* Get the initial solution. */
6949 set = get_initial_solution (data, originalp);
6950 if (!set)
6951 {
6952 if (dump_file && (dump_flags & TDF_DETAILS))
6953 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6954 return NULL;
6955 }
6956
6957 if (dump_file && (dump_flags & TDF_DETAILS))
6958 {
6959 fprintf (dump_file, "Initial set of candidates:\n");
6960 iv_ca_dump (data, dump_file, set);
6961 }
6962
6963 while (try_improve_iv_set (data, set, &try_replace_p))
6964 {
6965 if (dump_file && (dump_flags & TDF_DETAILS))
6966 {
6967 fprintf (dump_file, "Improved to:\n");
6968 iv_ca_dump (data, dump_file, set);
6969 }
6970 }
6971
6972 return set;
6973 }
6974
6975 static struct iv_ca *
find_optimal_iv_set(struct ivopts_data * data)6976 find_optimal_iv_set (struct ivopts_data *data)
6977 {
6978 unsigned i;
6979 struct iv_ca *set, *origset;
6980 struct iv_use *use;
6981 comp_cost cost, origcost;
6982
6983 /* Determine the cost based on a strategy that starts with original IVs,
6984 and try again using a strategy that prefers candidates not based
6985 on any IVs. */
6986 origset = find_optimal_iv_set_1 (data, true);
6987 set = find_optimal_iv_set_1 (data, false);
6988
6989 if (!origset && !set)
6990 return NULL;
6991
6992 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6993 cost = set ? iv_ca_cost (set) : infinite_cost;
6994
6995 if (dump_file && (dump_flags & TDF_DETAILS))
6996 {
6997 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6998 origcost.cost, origcost.complexity);
6999 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
7000 cost.cost, cost.complexity);
7001 }
7002
7003 /* Choose the one with the best cost. */
7004 if (compare_costs (origcost, cost) <= 0)
7005 {
7006 if (set)
7007 iv_ca_free (&set);
7008 set = origset;
7009 }
7010 else if (origset)
7011 iv_ca_free (&origset);
7012
7013 for (i = 0; i < n_iv_uses (data); i++)
7014 {
7015 use = iv_use (data, i);
7016 use->selected = iv_ca_cand_for_use (set, use)->cand;
7017 }
7018
7019 return set;
7020 }
7021
7022 /* Creates a new induction variable corresponding to CAND. */
7023
7024 static void
create_new_iv(struct ivopts_data * data,struct iv_cand * cand)7025 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7026 {
7027 gimple_stmt_iterator incr_pos;
7028 tree base;
7029 bool after = false;
7030
7031 if (!cand->iv)
7032 return;
7033
7034 switch (cand->pos)
7035 {
7036 case IP_NORMAL:
7037 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7038 break;
7039
7040 case IP_END:
7041 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7042 after = true;
7043 break;
7044
7045 case IP_AFTER_USE:
7046 after = true;
7047 /* fall through */
7048 case IP_BEFORE_USE:
7049 incr_pos = gsi_for_stmt (cand->incremented_at);
7050 break;
7051
7052 case IP_ORIGINAL:
7053 /* Mark that the iv is preserved. */
7054 name_info (data, cand->var_before)->preserve_biv = true;
7055 name_info (data, cand->var_after)->preserve_biv = true;
7056
7057 /* Rewrite the increment so that it uses var_before directly. */
7058 find_interesting_uses_op (data, cand->var_after)->selected = cand;
7059 return;
7060 }
7061
7062 gimple_add_tmp_var (cand->var_before);
7063
7064 base = unshare_expr (cand->iv->base);
7065
7066 create_iv (base, unshare_expr (cand->iv->step),
7067 cand->var_before, data->current_loop,
7068 &incr_pos, after, &cand->var_before, &cand->var_after);
7069 }
7070
7071 /* Creates new induction variables described in SET. */
7072
7073 static void
create_new_ivs(struct ivopts_data * data,struct iv_ca * set)7074 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
7075 {
7076 unsigned i;
7077 struct iv_cand *cand;
7078 bitmap_iterator bi;
7079
7080 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7081 {
7082 cand = iv_cand (data, i);
7083 create_new_iv (data, cand);
7084 }
7085
7086 if (dump_file && (dump_flags & TDF_DETAILS))
7087 {
7088 fprintf (dump_file, "Selected IV set for loop %d",
7089 data->current_loop->num);
7090 if (data->loop_loc != UNKNOWN_LOCATION)
7091 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7092 LOCATION_LINE (data->loop_loc));
7093 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7094 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7095 {
7096 cand = iv_cand (data, i);
7097 dump_cand (dump_file, cand);
7098 }
7099 fprintf (dump_file, "\n");
7100 }
7101 }
7102
7103 /* Rewrites USE (definition of iv used in a nonlinear expression)
7104 using candidate CAND. */
7105
7106 static void
rewrite_use_nonlinear_expr(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7107 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7108 struct iv_use *use, struct iv_cand *cand)
7109 {
7110 tree comp;
7111 tree op, tgt;
7112 gassign *ass;
7113 gimple_stmt_iterator bsi;
7114
7115 /* An important special case -- if we are asked to express value of
7116 the original iv by itself, just exit; there is no need to
7117 introduce a new computation (that might also need casting the
7118 variable to unsigned and back). */
7119 if (cand->pos == IP_ORIGINAL
7120 && cand->incremented_at == use->stmt)
7121 {
7122 enum tree_code stmt_code;
7123
7124 gcc_assert (is_gimple_assign (use->stmt));
7125 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7126
7127 /* Check whether we may leave the computation unchanged.
7128 This is the case only if it does not rely on other
7129 computations in the loop -- otherwise, the computation
7130 we rely upon may be removed in remove_unused_ivs,
7131 thus leading to ICE. */
7132 stmt_code = gimple_assign_rhs_code (use->stmt);
7133 if (stmt_code == PLUS_EXPR
7134 || stmt_code == MINUS_EXPR
7135 || stmt_code == POINTER_PLUS_EXPR)
7136 {
7137 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7138 op = gimple_assign_rhs2 (use->stmt);
7139 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7140 op = gimple_assign_rhs1 (use->stmt);
7141 else
7142 op = NULL_TREE;
7143 }
7144 else
7145 op = NULL_TREE;
7146
7147 if (op && expr_invariant_in_loop_p (data->current_loop, op))
7148 return;
7149 }
7150
7151 comp = get_computation (data->current_loop, use, cand);
7152 gcc_assert (comp != NULL_TREE);
7153
7154 switch (gimple_code (use->stmt))
7155 {
7156 case GIMPLE_PHI:
7157 tgt = PHI_RESULT (use->stmt);
7158
7159 /* If we should keep the biv, do not replace it. */
7160 if (name_info (data, tgt)->preserve_biv)
7161 return;
7162
7163 bsi = gsi_after_labels (gimple_bb (use->stmt));
7164 break;
7165
7166 case GIMPLE_ASSIGN:
7167 tgt = gimple_assign_lhs (use->stmt);
7168 bsi = gsi_for_stmt (use->stmt);
7169 break;
7170
7171 default:
7172 gcc_unreachable ();
7173 }
7174
7175 if (!valid_gimple_rhs_p (comp)
7176 || (gimple_code (use->stmt) != GIMPLE_PHI
7177 /* We can't allow re-allocating the stmt as it might be pointed
7178 to still. */
7179 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7180 >= gimple_num_ops (gsi_stmt (bsi)))))
7181 {
7182 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
7183 true, GSI_SAME_STMT);
7184 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7185 {
7186 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7187 /* As this isn't a plain copy we have to reset alignment
7188 information. */
7189 if (SSA_NAME_PTR_INFO (comp))
7190 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7191 }
7192 }
7193
7194 if (gimple_code (use->stmt) == GIMPLE_PHI)
7195 {
7196 ass = gimple_build_assign (tgt, comp);
7197 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7198
7199 bsi = gsi_for_stmt (use->stmt);
7200 remove_phi_node (&bsi, false);
7201 }
7202 else
7203 {
7204 gimple_assign_set_rhs_from_tree (&bsi, comp);
7205 use->stmt = gsi_stmt (bsi);
7206 }
7207 }
7208
7209 /* Performs a peephole optimization to reorder the iv update statement with
7210 a mem ref to enable instruction combining in later phases. The mem ref uses
7211 the iv value before the update, so the reordering transformation requires
7212 adjustment of the offset. CAND is the selected IV_CAND.
7213
7214 Example:
7215
7216 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7217 iv2 = iv1 + 1;
7218
7219 if (t < val) (1)
7220 goto L;
7221 goto Head;
7222
7223
7224 directly propagating t over to (1) will introduce overlapping live range
7225 thus increase register pressure. This peephole transform it into:
7226
7227
7228 iv2 = iv1 + 1;
7229 t = MEM_REF (base, iv2, 8, 8);
7230 if (t < val)
7231 goto L;
7232 goto Head;
7233 */
7234
7235 static void
adjust_iv_update_pos(struct iv_cand * cand,struct iv_use * use)7236 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7237 {
7238 tree var_after;
7239 gimple *iv_update, *stmt;
7240 basic_block bb;
7241 gimple_stmt_iterator gsi, gsi_iv;
7242
7243 if (cand->pos != IP_NORMAL)
7244 return;
7245
7246 var_after = cand->var_after;
7247 iv_update = SSA_NAME_DEF_STMT (var_after);
7248
7249 bb = gimple_bb (iv_update);
7250 gsi = gsi_last_nondebug_bb (bb);
7251 stmt = gsi_stmt (gsi);
7252
7253 /* Only handle conditional statement for now. */
7254 if (gimple_code (stmt) != GIMPLE_COND)
7255 return;
7256
7257 gsi_prev_nondebug (&gsi);
7258 stmt = gsi_stmt (gsi);
7259 if (stmt != iv_update)
7260 return;
7261
7262 gsi_prev_nondebug (&gsi);
7263 if (gsi_end_p (gsi))
7264 return;
7265
7266 stmt = gsi_stmt (gsi);
7267 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7268 return;
7269
7270 if (stmt != use->stmt)
7271 return;
7272
7273 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7274 return;
7275
7276 if (dump_file && (dump_flags & TDF_DETAILS))
7277 {
7278 fprintf (dump_file, "Reordering \n");
7279 print_gimple_stmt (dump_file, iv_update, 0, 0);
7280 print_gimple_stmt (dump_file, use->stmt, 0, 0);
7281 fprintf (dump_file, "\n");
7282 }
7283
7284 gsi = gsi_for_stmt (use->stmt);
7285 gsi_iv = gsi_for_stmt (iv_update);
7286 gsi_move_before (&gsi_iv, &gsi);
7287
7288 cand->pos = IP_BEFORE_USE;
7289 cand->incremented_at = use->stmt;
7290 }
7291
7292 /* Rewrites USE (address that is an iv) using candidate CAND. */
7293
7294 static void
rewrite_use_address_1(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7295 rewrite_use_address_1 (struct ivopts_data *data,
7296 struct iv_use *use, struct iv_cand *cand)
7297 {
7298 aff_tree aff;
7299 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7300 tree base_hint = NULL_TREE;
7301 tree ref, iv;
7302 bool ok;
7303
7304 adjust_iv_update_pos (cand, use);
7305 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7306 gcc_assert (ok);
7307 unshare_aff_combination (&aff);
7308
7309 /* To avoid undefined overflow problems, all IV candidates use unsigned
7310 integer types. The drawback is that this makes it impossible for
7311 create_mem_ref to distinguish an IV that is based on a memory object
7312 from one that represents simply an offset.
7313
7314 To work around this problem, we pass a hint to create_mem_ref that
7315 indicates which variable (if any) in aff is an IV based on a memory
7316 object. Note that we only consider the candidate. If this is not
7317 based on an object, the base of the reference is in some subexpression
7318 of the use -- but these will use pointer types, so they are recognized
7319 by the create_mem_ref heuristics anyway. */
7320 if (cand->iv->base_object)
7321 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7322
7323 iv = var_at_stmt (data->current_loop, cand, use->stmt);
7324 tree type = TREE_TYPE (*use->op_p);
7325 unsigned int align = get_object_alignment (*use->op_p);
7326 if (align != TYPE_ALIGN (type))
7327 type = build_aligned_type (type, align);
7328 ref = create_mem_ref (&bsi, type, &aff,
7329 reference_alias_ptr_type (*use->op_p),
7330 iv, base_hint, data->speed);
7331 copy_ref_info (ref, *use->op_p);
7332 *use->op_p = ref;
7333 }
7334
7335 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
7336 first use of a group, rewrites sub uses in the group too. */
7337
7338 static void
rewrite_use_address(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7339 rewrite_use_address (struct ivopts_data *data,
7340 struct iv_use *use, struct iv_cand *cand)
7341 {
7342 struct iv_use *next;
7343
7344 gcc_assert (use->sub_id == 0);
7345 rewrite_use_address_1 (data, use, cand);
7346 update_stmt (use->stmt);
7347
7348 for (next = use->next; next != NULL; next = next->next)
7349 {
7350 rewrite_use_address_1 (data, next, cand);
7351 update_stmt (next->stmt);
7352 }
7353
7354 return;
7355 }
7356
7357 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7358 candidate CAND. */
7359
7360 static void
rewrite_use_compare(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7361 rewrite_use_compare (struct ivopts_data *data,
7362 struct iv_use *use, struct iv_cand *cand)
7363 {
7364 tree comp, *var_p, op, bound;
7365 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7366 enum tree_code compare;
7367 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
7368 bool ok;
7369
7370 bound = cp->value;
7371 if (bound)
7372 {
7373 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7374 tree var_type = TREE_TYPE (var);
7375 gimple_seq stmts;
7376
7377 if (dump_file && (dump_flags & TDF_DETAILS))
7378 {
7379 fprintf (dump_file, "Replacing exit test: ");
7380 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7381 }
7382 compare = cp->comp;
7383 bound = unshare_expr (fold_convert (var_type, bound));
7384 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7385 if (stmts)
7386 gsi_insert_seq_on_edge_immediate (
7387 loop_preheader_edge (data->current_loop),
7388 stmts);
7389
7390 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7391 gimple_cond_set_lhs (cond_stmt, var);
7392 gimple_cond_set_code (cond_stmt, compare);
7393 gimple_cond_set_rhs (cond_stmt, op);
7394 return;
7395 }
7396
7397 /* The induction variable elimination failed; just express the original
7398 giv. */
7399 comp = get_computation (data->current_loop, use, cand);
7400 gcc_assert (comp != NULL_TREE);
7401
7402 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7403 gcc_assert (ok);
7404
7405 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7406 true, GSI_SAME_STMT);
7407 }
7408
7409 /* Rewrites USE using candidate CAND. */
7410
7411 static void
rewrite_use(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7412 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
7413 {
7414 switch (use->type)
7415 {
7416 case USE_NONLINEAR_EXPR:
7417 rewrite_use_nonlinear_expr (data, use, cand);
7418 break;
7419
7420 case USE_ADDRESS:
7421 rewrite_use_address (data, use, cand);
7422 break;
7423
7424 case USE_COMPARE:
7425 rewrite_use_compare (data, use, cand);
7426 break;
7427
7428 default:
7429 gcc_unreachable ();
7430 }
7431
7432 update_stmt (use->stmt);
7433 }
7434
7435 /* Rewrite the uses using the selected induction variables. */
7436
7437 static void
rewrite_uses(struct ivopts_data * data)7438 rewrite_uses (struct ivopts_data *data)
7439 {
7440 unsigned i;
7441 struct iv_cand *cand;
7442 struct iv_use *use;
7443
7444 for (i = 0; i < n_iv_uses (data); i++)
7445 {
7446 use = iv_use (data, i);
7447 cand = use->selected;
7448 gcc_assert (cand);
7449
7450 rewrite_use (data, use, cand);
7451 }
7452 }
7453
7454 /* Removes the ivs that are not used after rewriting. */
7455
7456 static void
remove_unused_ivs(struct ivopts_data * data)7457 remove_unused_ivs (struct ivopts_data *data)
7458 {
7459 unsigned j;
7460 bitmap_iterator bi;
7461 bitmap toremove = BITMAP_ALLOC (NULL);
7462
7463 /* Figure out an order in which to release SSA DEFs so that we don't
7464 release something that we'd have to propagate into a debug stmt
7465 afterwards. */
7466 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7467 {
7468 struct version_info *info;
7469
7470 info = ver_info (data, j);
7471 if (info->iv
7472 && !integer_zerop (info->iv->step)
7473 && !info->inv_id
7474 && !info->iv->have_use_for
7475 && !info->preserve_biv)
7476 {
7477 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7478
7479 tree def = info->iv->ssa_name;
7480
7481 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7482 {
7483 imm_use_iterator imm_iter;
7484 use_operand_p use_p;
7485 gimple *stmt;
7486 int count = 0;
7487
7488 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7489 {
7490 if (!gimple_debug_bind_p (stmt))
7491 continue;
7492
7493 /* We just want to determine whether to do nothing
7494 (count == 0), to substitute the computed
7495 expression into a single use of the SSA DEF by
7496 itself (count == 1), or to use a debug temp
7497 because the SSA DEF is used multiple times or as
7498 part of a larger expression (count > 1). */
7499 count++;
7500 if (gimple_debug_bind_get_value (stmt) != def)
7501 count++;
7502
7503 if (count > 1)
7504 BREAK_FROM_IMM_USE_STMT (imm_iter);
7505 }
7506
7507 if (!count)
7508 continue;
7509
7510 struct iv_use dummy_use;
7511 struct iv_cand *best_cand = NULL, *cand;
7512 unsigned i, best_pref = 0, cand_pref;
7513
7514 memset (&dummy_use, 0, sizeof (dummy_use));
7515 dummy_use.iv = info->iv;
7516 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7517 {
7518 cand = iv_use (data, i)->selected;
7519 if (cand == best_cand)
7520 continue;
7521 cand_pref = operand_equal_p (cand->iv->step,
7522 info->iv->step, 0)
7523 ? 4 : 0;
7524 cand_pref
7525 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7526 == TYPE_MODE (TREE_TYPE (info->iv->base))
7527 ? 2 : 0;
7528 cand_pref
7529 += TREE_CODE (cand->iv->base) == INTEGER_CST
7530 ? 1 : 0;
7531 if (best_cand == NULL || best_pref < cand_pref)
7532 {
7533 best_cand = cand;
7534 best_pref = cand_pref;
7535 }
7536 }
7537
7538 if (!best_cand)
7539 continue;
7540
7541 tree comp = get_computation_at (data->current_loop,
7542 &dummy_use, best_cand,
7543 SSA_NAME_DEF_STMT (def));
7544 if (!comp)
7545 continue;
7546
7547 if (count > 1)
7548 {
7549 tree vexpr = make_node (DEBUG_EXPR_DECL);
7550 DECL_ARTIFICIAL (vexpr) = 1;
7551 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7552 if (SSA_NAME_VAR (def))
7553 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7554 else
7555 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7556 gdebug *def_temp
7557 = gimple_build_debug_bind (vexpr, comp, NULL);
7558 gimple_stmt_iterator gsi;
7559
7560 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7561 gsi = gsi_after_labels (gimple_bb
7562 (SSA_NAME_DEF_STMT (def)));
7563 else
7564 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7565
7566 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7567 comp = vexpr;
7568 }
7569
7570 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7571 {
7572 if (!gimple_debug_bind_p (stmt))
7573 continue;
7574
7575 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7576 SET_USE (use_p, comp);
7577
7578 update_stmt (stmt);
7579 }
7580 }
7581 }
7582 }
7583
7584 release_defs_bitset (toremove);
7585
7586 BITMAP_FREE (toremove);
7587 }
7588
7589 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7590 for hash_map::traverse. */
7591
7592 bool
free_tree_niter_desc(edge const &,tree_niter_desc * const & value,void *)7593 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7594 {
7595 free (value);
7596 return true;
7597 }
7598
7599 /* Frees data allocated by the optimization of a single loop. */
7600
7601 static void
free_loop_data(struct ivopts_data * data)7602 free_loop_data (struct ivopts_data *data)
7603 {
7604 unsigned i, j;
7605 bitmap_iterator bi;
7606 tree obj;
7607
7608 if (data->niters)
7609 {
7610 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7611 delete data->niters;
7612 data->niters = NULL;
7613 }
7614
7615 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7616 {
7617 struct version_info *info;
7618
7619 info = ver_info (data, i);
7620 info->iv = NULL;
7621 info->has_nonlin_use = false;
7622 info->preserve_biv = false;
7623 info->inv_id = 0;
7624 }
7625 bitmap_clear (data->relevant);
7626 bitmap_clear (data->important_candidates);
7627
7628 for (i = 0; i < n_iv_uses (data); i++)
7629 {
7630 struct iv_use *use = iv_use (data, i);
7631 struct iv_use *pre = use, *sub = use->next;
7632
7633 while (sub)
7634 {
7635 gcc_assert (sub->related_cands == NULL);
7636 gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7637
7638 pre = sub;
7639 sub = sub->next;
7640 free (pre);
7641 }
7642
7643 BITMAP_FREE (use->related_cands);
7644 for (j = 0; j < use->n_map_members; j++)
7645 if (use->cost_map[j].depends_on)
7646 BITMAP_FREE (use->cost_map[j].depends_on);
7647 free (use->cost_map);
7648 free (use);
7649 }
7650 data->iv_uses.truncate (0);
7651
7652 for (i = 0; i < n_iv_cands (data); i++)
7653 {
7654 struct iv_cand *cand = iv_cand (data, i);
7655
7656 if (cand->depends_on)
7657 BITMAP_FREE (cand->depends_on);
7658 free (cand);
7659 }
7660 data->iv_candidates.truncate (0);
7661
7662 if (data->version_info_size < num_ssa_names)
7663 {
7664 data->version_info_size = 2 * num_ssa_names;
7665 free (data->version_info);
7666 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7667 }
7668
7669 data->max_inv_id = 0;
7670
7671 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7672 SET_DECL_RTL (obj, NULL_RTX);
7673
7674 decl_rtl_to_reset.truncate (0);
7675
7676 data->inv_expr_tab->empty ();
7677 data->inv_expr_id = 0;
7678
7679 data->iv_common_cand_tab->empty ();
7680 data->iv_common_cands.truncate (0);
7681 }
7682
7683 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7684 loop tree. */
7685
7686 static void
tree_ssa_iv_optimize_finalize(struct ivopts_data * data)7687 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7688 {
7689 free_loop_data (data);
7690 free (data->version_info);
7691 BITMAP_FREE (data->relevant);
7692 BITMAP_FREE (data->important_candidates);
7693
7694 decl_rtl_to_reset.release ();
7695 data->iv_uses.release ();
7696 data->iv_candidates.release ();
7697 delete data->inv_expr_tab;
7698 data->inv_expr_tab = NULL;
7699 free_affine_expand_cache (&data->name_expansion_cache);
7700 delete data->iv_common_cand_tab;
7701 data->iv_common_cand_tab = NULL;
7702 data->iv_common_cands.release ();
7703 obstack_free (&data->iv_obstack, NULL);
7704 }
7705
7706 /* Returns true if the loop body BODY includes any function calls. */
7707
7708 static bool
loop_body_includes_call(basic_block * body,unsigned num_nodes)7709 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7710 {
7711 gimple_stmt_iterator gsi;
7712 unsigned i;
7713
7714 for (i = 0; i < num_nodes; i++)
7715 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7716 {
7717 gimple *stmt = gsi_stmt (gsi);
7718 if (is_gimple_call (stmt)
7719 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7720 return true;
7721 }
7722 return false;
7723 }
7724
7725 /* Optimizes the LOOP. Returns true if anything changed. */
7726
7727 static bool
tree_ssa_iv_optimize_loop(struct ivopts_data * data,struct loop * loop)7728 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7729 {
7730 bool changed = false;
7731 struct iv_ca *iv_ca;
7732 edge exit = single_dom_exit (loop);
7733 basic_block *body;
7734
7735 gcc_assert (!data->niters);
7736 data->current_loop = loop;
7737 data->loop_loc = find_loop_location (loop);
7738 data->speed = optimize_loop_for_speed_p (loop);
7739
7740 if (dump_file && (dump_flags & TDF_DETAILS))
7741 {
7742 fprintf (dump_file, "Processing loop %d", loop->num);
7743 if (data->loop_loc != UNKNOWN_LOCATION)
7744 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7745 LOCATION_LINE (data->loop_loc));
7746 fprintf (dump_file, "\n");
7747
7748 if (exit)
7749 {
7750 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7751 exit->src->index, exit->dest->index);
7752 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7753 fprintf (dump_file, "\n");
7754 }
7755
7756 fprintf (dump_file, "\n");
7757 }
7758
7759 body = get_loop_body (loop);
7760 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7761 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7762 free (body);
7763
7764 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7765
7766 /* For each ssa name determines whether it behaves as an induction variable
7767 in some loop. */
7768 if (!find_induction_variables (data))
7769 goto finish;
7770
7771 /* Finds interesting uses (item 1). */
7772 find_interesting_uses (data);
7773 group_address_uses (data);
7774 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7775 goto finish;
7776
7777 /* Finds candidates for the induction variables (item 2). */
7778 find_iv_candidates (data);
7779
7780 /* Calculates the costs (item 3, part 1). */
7781 determine_iv_costs (data);
7782 determine_use_iv_costs (data);
7783 determine_set_costs (data);
7784
7785 /* Find the optimal set of induction variables (item 3, part 2). */
7786 iv_ca = find_optimal_iv_set (data);
7787 if (!iv_ca)
7788 goto finish;
7789 changed = true;
7790
7791 /* Create the new induction variables (item 4, part 1). */
7792 create_new_ivs (data, iv_ca);
7793 iv_ca_free (&iv_ca);
7794
7795 /* Rewrite the uses (item 4, part 2). */
7796 rewrite_uses (data);
7797
7798 /* Remove the ivs that are unused after rewriting. */
7799 remove_unused_ivs (data);
7800
7801 /* We have changed the structure of induction variables; it might happen
7802 that definitions in the scev database refer to some of them that were
7803 eliminated. */
7804 scev_reset ();
7805
7806 finish:
7807 free_loop_data (data);
7808
7809 return changed;
7810 }
7811
7812 /* Main entry point. Optimizes induction variables in loops. */
7813
7814 void
tree_ssa_iv_optimize(void)7815 tree_ssa_iv_optimize (void)
7816 {
7817 struct loop *loop;
7818 struct ivopts_data data;
7819
7820 tree_ssa_iv_optimize_init (&data);
7821
7822 /* Optimize the loops starting with the innermost ones. */
7823 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7824 {
7825 if (dump_file && (dump_flags & TDF_DETAILS))
7826 flow_loop_dump (loop, dump_file, NULL, 1);
7827
7828 tree_ssa_iv_optimize_loop (&data, loop);
7829 }
7830
7831 tree_ssa_iv_optimize_finalize (&data);
7832 }
7833