1 /* Induction variable optimizations.
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 Note the interesting uses are categorized and handled in group.
34 Generally, address type uses are grouped together if their iv bases
35 are different in constant offset.
36
37 2) Candidates for the induction variables are found. This includes
38
39 -- old induction variables
40 -- the variables defined by expressions derived from the "interesting
41 groups/uses" above
42
43 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 cost function assigns a cost to sets of induction variables and consists
45 of three parts:
46
47 -- The group/use costs. Each of the interesting groups/uses chooses
48 the best induction variable in the set and adds its cost to the sum.
49 The cost reflects the time spent on modifying the induction variables
50 value to be usable for the given purpose (adding base and offset for
51 arrays, etc.).
52 -- The variable costs. Each of the variables has a cost assigned that
53 reflects the costs associated with incrementing the value of the
54 variable. The original variables are somewhat preferred.
55 -- The set cost. Depending on the size of the set, extra cost may be
56 added to reflect register pressure.
57
58 All the costs are defined in a machine-specific way, using the target
59 hooks and machine descriptions to determine them.
60
61 4) The trees are transformed to use the new variables, the dead code is
62 removed.
63
64 All of this is done loop by loop. Doing it globally is theoretically
65 possible, it might give a better performance and it might enable us
66 to decide costs more precisely, but getting all the interactions right
67 would be complicated. */
68
69 #include "config.h"
70 #include "system.h"
71 #include "coretypes.h"
72 #include "backend.h"
73 #include "rtl.h"
74 #include "tree.h"
75 #include "gimple.h"
76 #include "cfghooks.h"
77 #include "tree-pass.h"
78 #include "memmodel.h"
79 #include "tm_p.h"
80 #include "ssa.h"
81 #include "expmed.h"
82 #include "insn-config.h"
83 #include "emit-rtl.h"
84 #include "recog.h"
85 #include "cgraph.h"
86 #include "gimple-pretty-print.h"
87 #include "alias.h"
88 #include "fold-const.h"
89 #include "stor-layout.h"
90 #include "tree-eh.h"
91 #include "gimplify.h"
92 #include "gimple-iterator.h"
93 #include "gimplify-me.h"
94 #include "tree-cfg.h"
95 #include "tree-ssa-loop-ivopts.h"
96 #include "tree-ssa-loop-manip.h"
97 #include "tree-ssa-loop-niter.h"
98 #include "tree-ssa-loop.h"
99 #include "explow.h"
100 #include "expr.h"
101 #include "tree-dfa.h"
102 #include "tree-ssa.h"
103 #include "cfgloop.h"
104 #include "tree-scalar-evolution.h"
105 #include "params.h"
106 #include "tree-affine.h"
107 #include "tree-ssa-propagate.h"
108 #include "tree-ssa-address.h"
109 #include "builtins.h"
110 #include "tree-vectorizer.h"
111
112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
113 cost of different addressing modes. This should be moved to a TBD
114 interface between the GIMPLE and RTL worlds. */
115
116 /* The infinite cost. */
117 #define INFTY 10000000
118
119 /* Returns the expected number of loop iterations for LOOP.
120 The average trip count is computed from profile data if it
121 exists. */
122
123 static inline HOST_WIDE_INT
avg_loop_niter(struct loop * loop)124 avg_loop_niter (struct loop *loop)
125 {
126 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
127 if (niter == -1)
128 {
129 niter = likely_max_stmt_executions_int (loop);
130
131 if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
132 return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
133 }
134
135 return niter;
136 }
137
138 struct iv_use;
139
140 /* Representation of the induction variable. */
141 struct iv
142 {
143 tree base; /* Initial value of the iv. */
144 tree base_object; /* A memory object to that the induction variable points. */
145 tree step; /* Step of the iv (constant only). */
146 tree ssa_name; /* The ssa name with the value. */
147 struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
148 bool biv_p; /* Is it a biv? */
149 bool no_overflow; /* True if the iv doesn't overflow. */
150 bool have_address_use;/* For biv, indicate if it's used in any address
151 type use. */
152 };
153
154 /* Per-ssa version information (induction variable descriptions, etc.). */
155 struct version_info
156 {
157 tree name; /* The ssa name. */
158 struct iv *iv; /* Induction variable description. */
159 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
160 an expression that is not an induction variable. */
161 bool preserve_biv; /* For the original biv, whether to preserve it. */
162 unsigned inv_id; /* Id of an invariant. */
163 };
164
165 /* Types of uses. */
166 enum use_type
167 {
168 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
169 USE_REF_ADDRESS, /* Use is an address for an explicit memory
170 reference. */
171 USE_PTR_ADDRESS, /* Use is a pointer argument to a function in
172 cases where the expansion of the function
173 will turn the argument into a normal address. */
174 USE_COMPARE /* Use is a compare. */
175 };
176
177 /* Cost of a computation. */
178 struct comp_cost
179 {
comp_costcomp_cost180 comp_cost (): cost (0), complexity (0), scratch (0)
181 {}
182
183 comp_cost (int cost, unsigned complexity, int scratch = 0)
costcomp_cost184 : cost (cost), complexity (complexity), scratch (scratch)
185 {}
186
187 /* Returns true if COST is infinite. */
188 bool infinite_cost_p ();
189
190 /* Adds costs COST1 and COST2. */
191 friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
192
193 /* Adds COST to the comp_cost. */
194 comp_cost operator+= (comp_cost cost);
195
196 /* Adds constant C to this comp_cost. */
197 comp_cost operator+= (HOST_WIDE_INT c);
198
199 /* Subtracts constant C to this comp_cost. */
200 comp_cost operator-= (HOST_WIDE_INT c);
201
202 /* Divide the comp_cost by constant C. */
203 comp_cost operator/= (HOST_WIDE_INT c);
204
205 /* Multiply the comp_cost by constant C. */
206 comp_cost operator*= (HOST_WIDE_INT c);
207
208 /* Subtracts costs COST1 and COST2. */
209 friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
210
211 /* Subtracts COST from this comp_cost. */
212 comp_cost operator-= (comp_cost cost);
213
214 /* Returns true if COST1 is smaller than COST2. */
215 friend bool operator< (comp_cost cost1, comp_cost cost2);
216
217 /* Returns true if COST1 and COST2 are equal. */
218 friend bool operator== (comp_cost cost1, comp_cost cost2);
219
220 /* Returns true if COST1 is smaller or equal than COST2. */
221 friend bool operator<= (comp_cost cost1, comp_cost cost2);
222
223 int cost; /* The runtime cost. */
224 unsigned complexity; /* The estimate of the complexity of the code for
225 the computation (in no concrete units --
226 complexity field should be larger for more
227 complex expressions and addressing modes). */
228 int scratch; /* Scratch used during cost computation. */
229 };
230
231 static const comp_cost no_cost;
232 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
233
234 bool
infinite_cost_p()235 comp_cost::infinite_cost_p ()
236 {
237 return cost == INFTY;
238 }
239
240 comp_cost
241 operator+ (comp_cost cost1, comp_cost cost2)
242 {
243 if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
244 return infinite_cost;
245
246 cost1.cost += cost2.cost;
247 cost1.complexity += cost2.complexity;
248
249 return cost1;
250 }
251
252 comp_cost
253 operator- (comp_cost cost1, comp_cost cost2)
254 {
255 if (cost1.infinite_cost_p ())
256 return infinite_cost;
257
258 gcc_assert (!cost2.infinite_cost_p ());
259
260 cost1.cost -= cost2.cost;
261 cost1.complexity -= cost2.complexity;
262
263 return cost1;
264 }
265
266 comp_cost
267 comp_cost::operator+= (comp_cost cost)
268 {
269 *this = *this + cost;
270 return *this;
271 }
272
273 comp_cost
274 comp_cost::operator+= (HOST_WIDE_INT c)
275 {
276 if (infinite_cost_p ())
277 return *this;
278
279 this->cost += c;
280
281 return *this;
282 }
283
284 comp_cost
285 comp_cost::operator-= (HOST_WIDE_INT c)
286 {
287 if (infinite_cost_p ())
288 return *this;
289
290 this->cost -= c;
291
292 return *this;
293 }
294
295 comp_cost
296 comp_cost::operator/= (HOST_WIDE_INT c)
297 {
298 if (infinite_cost_p ())
299 return *this;
300
301 this->cost /= c;
302
303 return *this;
304 }
305
306 comp_cost
307 comp_cost::operator*= (HOST_WIDE_INT c)
308 {
309 if (infinite_cost_p ())
310 return *this;
311
312 this->cost *= c;
313
314 return *this;
315 }
316
317 comp_cost
318 comp_cost::operator-= (comp_cost cost)
319 {
320 *this = *this - cost;
321 return *this;
322 }
323
324 bool
325 operator< (comp_cost cost1, comp_cost cost2)
326 {
327 if (cost1.cost == cost2.cost)
328 return cost1.complexity < cost2.complexity;
329
330 return cost1.cost < cost2.cost;
331 }
332
333 bool
334 operator== (comp_cost cost1, comp_cost cost2)
335 {
336 return cost1.cost == cost2.cost
337 && cost1.complexity == cost2.complexity;
338 }
339
340 bool
341 operator<= (comp_cost cost1, comp_cost cost2)
342 {
343 return cost1 < cost2 || cost1 == cost2;
344 }
345
346 struct iv_inv_expr_ent;
347
348 /* The candidate - cost pair. */
349 struct cost_pair
350 {
351 struct iv_cand *cand; /* The candidate. */
352 comp_cost cost; /* The cost. */
353 enum tree_code comp; /* For iv elimination, the comparison. */
354 bitmap inv_vars; /* The list of invariant ssa_vars that have to be
355 preserved when representing iv_use with iv_cand. */
356 bitmap inv_exprs; /* The list of newly created invariant expressions
357 when representing iv_use with iv_cand. */
358 tree value; /* For final value elimination, the expression for
359 the final value of the iv. For iv elimination,
360 the new bound to compare with. */
361 };
362
363 /* Use. */
364 struct iv_use
365 {
366 unsigned id; /* The id of the use. */
367 unsigned group_id; /* The group id the use belongs to. */
368 enum use_type type; /* Type of the use. */
369 tree mem_type; /* The memory type to use when testing whether an
370 address is legitimate, and what the address's
371 cost is. */
372 struct iv *iv; /* The induction variable it is based on. */
373 gimple *stmt; /* Statement in that it occurs. */
374 tree *op_p; /* The place where it occurs. */
375
376 tree addr_base; /* Base address with const offset stripped. */
377 poly_uint64_pod addr_offset;
378 /* Const offset stripped from base address. */
379 };
380
381 /* Group of uses. */
382 struct iv_group
383 {
384 /* The id of the group. */
385 unsigned id;
386 /* Uses of the group are of the same type. */
387 enum use_type type;
388 /* The set of "related" IV candidates, plus the important ones. */
389 bitmap related_cands;
390 /* Number of IV candidates in the cost_map. */
391 unsigned n_map_members;
392 /* The costs wrto the iv candidates. */
393 struct cost_pair *cost_map;
394 /* The selected candidate for the group. */
395 struct iv_cand *selected;
396 /* Uses in the group. */
397 vec<struct iv_use *> vuses;
398 };
399
400 /* The position where the iv is computed. */
401 enum iv_position
402 {
403 IP_NORMAL, /* At the end, just before the exit condition. */
404 IP_END, /* At the end of the latch block. */
405 IP_BEFORE_USE, /* Immediately before a specific use. */
406 IP_AFTER_USE, /* Immediately after a specific use. */
407 IP_ORIGINAL /* The original biv. */
408 };
409
410 /* The induction variable candidate. */
411 struct iv_cand
412 {
413 unsigned id; /* The number of the candidate. */
414 bool important; /* Whether this is an "important" candidate, i.e. such
415 that it should be considered by all uses. */
416 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
417 gimple *incremented_at;/* For original biv, the statement where it is
418 incremented. */
419 tree var_before; /* The variable used for it before increment. */
420 tree var_after; /* The variable used for it after increment. */
421 struct iv *iv; /* The value of the candidate. NULL for
422 "pseudocandidate" used to indicate the possibility
423 to replace the final value of an iv by direct
424 computation of the value. */
425 unsigned cost; /* Cost of the candidate. */
426 unsigned cost_step; /* Cost of the candidate's increment operation. */
427 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
428 where it is incremented. */
429 bitmap inv_vars; /* The list of invariant ssa_vars used in step of the
430 iv_cand. */
431 bitmap inv_exprs; /* If step is more complicated than a single ssa_var,
432 hanlde it as a new invariant expression which will
433 be hoisted out of loop. */
434 struct iv *orig_iv; /* The original iv if this cand is added from biv with
435 smaller type. */
436 };
437
438 /* Hashtable entry for common candidate derived from iv uses. */
439 struct iv_common_cand
440 {
441 tree base;
442 tree step;
443 /* IV uses from which this common candidate is derived. */
444 auto_vec<struct iv_use *> uses;
445 hashval_t hash;
446 };
447
448 /* Hashtable helpers. */
449
450 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
451 {
452 static inline hashval_t hash (const iv_common_cand *);
453 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
454 };
455
456 /* Hash function for possible common candidates. */
457
458 inline hashval_t
hash(const iv_common_cand * ccand)459 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
460 {
461 return ccand->hash;
462 }
463
464 /* Hash table equality function for common candidates. */
465
466 inline bool
equal(const iv_common_cand * ccand1,const iv_common_cand * ccand2)467 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
468 const iv_common_cand *ccand2)
469 {
470 return (ccand1->hash == ccand2->hash
471 && operand_equal_p (ccand1->base, ccand2->base, 0)
472 && operand_equal_p (ccand1->step, ccand2->step, 0)
473 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
474 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
475 }
476
477 /* Loop invariant expression hashtable entry. */
478
479 struct iv_inv_expr_ent
480 {
481 /* Tree expression of the entry. */
482 tree expr;
483 /* Unique indentifier. */
484 int id;
485 /* Hash value. */
486 hashval_t hash;
487 };
488
489 /* Sort iv_inv_expr_ent pair A and B by id field. */
490
491 static int
sort_iv_inv_expr_ent(const void * a,const void * b)492 sort_iv_inv_expr_ent (const void *a, const void *b)
493 {
494 const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
495 const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
496
497 unsigned id1 = (*e1)->id;
498 unsigned id2 = (*e2)->id;
499
500 if (id1 < id2)
501 return -1;
502 else if (id1 > id2)
503 return 1;
504 else
505 return 0;
506 }
507
508 /* Hashtable helpers. */
509
510 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
511 {
512 static inline hashval_t hash (const iv_inv_expr_ent *);
513 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
514 };
515
516 /* Return true if uses of type TYPE represent some form of address. */
517
518 inline bool
address_p(use_type type)519 address_p (use_type type)
520 {
521 return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
522 }
523
524 /* Hash function for loop invariant expressions. */
525
526 inline hashval_t
hash(const iv_inv_expr_ent * expr)527 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
528 {
529 return expr->hash;
530 }
531
532 /* Hash table equality function for expressions. */
533
534 inline bool
equal(const iv_inv_expr_ent * expr1,const iv_inv_expr_ent * expr2)535 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
536 const iv_inv_expr_ent *expr2)
537 {
538 return expr1->hash == expr2->hash
539 && operand_equal_p (expr1->expr, expr2->expr, 0);
540 }
541
542 struct ivopts_data
543 {
544 /* The currently optimized loop. */
545 struct loop *current_loop;
546 location_t loop_loc;
547
548 /* Numbers of iterations for all exits of the current loop. */
549 hash_map<edge, tree_niter_desc *> *niters;
550
551 /* Number of registers used in it. */
552 unsigned regs_used;
553
554 /* The size of version_info array allocated. */
555 unsigned version_info_size;
556
557 /* The array of information for the ssa names. */
558 struct version_info *version_info;
559
560 /* The hashtable of loop invariant expressions created
561 by ivopt. */
562 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
563
564 /* The bitmap of indices in version_info whose value was changed. */
565 bitmap relevant;
566
567 /* The uses of induction variables. */
568 vec<iv_group *> vgroups;
569
570 /* The candidates. */
571 vec<iv_cand *> vcands;
572
573 /* A bitmap of important candidates. */
574 bitmap important_candidates;
575
576 /* Cache used by tree_to_aff_combination_expand. */
577 hash_map<tree, name_expansion *> *name_expansion_cache;
578
579 /* The hashtable of common candidates derived from iv uses. */
580 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
581
582 /* The common candidates. */
583 vec<iv_common_cand *> iv_common_cands;
584
585 /* The maximum invariant variable id. */
586 unsigned max_inv_var_id;
587
588 /* The maximum invariant expression id. */
589 unsigned max_inv_expr_id;
590
591 /* Number of no_overflow BIVs which are not used in memory address. */
592 unsigned bivs_not_used_in_addr;
593
594 /* Obstack for iv structure. */
595 struct obstack iv_obstack;
596
597 /* Whether to consider just related and important candidates when replacing a
598 use. */
599 bool consider_all_candidates;
600
601 /* Are we optimizing for speed? */
602 bool speed;
603
604 /* Whether the loop body includes any function calls. */
605 bool body_includes_call;
606
607 /* Whether the loop body can only be exited via single exit. */
608 bool loop_single_exit_p;
609 };
610
611 /* An assignment of iv candidates to uses. */
612
613 struct iv_ca
614 {
615 /* The number of uses covered by the assignment. */
616 unsigned upto;
617
618 /* Number of uses that cannot be expressed by the candidates in the set. */
619 unsigned bad_groups;
620
621 /* Candidate assigned to a use, together with the related costs. */
622 struct cost_pair **cand_for_group;
623
624 /* Number of times each candidate is used. */
625 unsigned *n_cand_uses;
626
627 /* The candidates used. */
628 bitmap cands;
629
630 /* The number of candidates in the set. */
631 unsigned n_cands;
632
633 /* The number of invariants needed, including both invariant variants and
634 invariant expressions. */
635 unsigned n_invs;
636
637 /* Total cost of expressing uses. */
638 comp_cost cand_use_cost;
639
640 /* Total cost of candidates. */
641 unsigned cand_cost;
642
643 /* Number of times each invariant variable is used. */
644 unsigned *n_inv_var_uses;
645
646 /* Number of times each invariant expression is used. */
647 unsigned *n_inv_expr_uses;
648
649 /* Total cost of the assignment. */
650 comp_cost cost;
651 };
652
653 /* Difference of two iv candidate assignments. */
654
655 struct iv_ca_delta
656 {
657 /* Changed group. */
658 struct iv_group *group;
659
660 /* An old assignment (for rollback purposes). */
661 struct cost_pair *old_cp;
662
663 /* A new assignment. */
664 struct cost_pair *new_cp;
665
666 /* Next change in the list. */
667 struct iv_ca_delta *next;
668 };
669
670 /* Bound on number of candidates below that all candidates are considered. */
671
672 #define CONSIDER_ALL_CANDIDATES_BOUND \
673 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
674
675 /* If there are more iv occurrences, we just give up (it is quite unlikely that
676 optimizing such a loop would help, and it would take ages). */
677
678 #define MAX_CONSIDERED_GROUPS \
679 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
680
681 /* If there are at most this number of ivs in the set, try removing unnecessary
682 ivs from the set always. */
683
684 #define ALWAYS_PRUNE_CAND_SET_BOUND \
685 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
686
687 /* The list of trees for that the decl_rtl field must be reset is stored
688 here. */
689
690 static vec<tree> decl_rtl_to_reset;
691
692 static comp_cost force_expr_to_var_cost (tree, bool);
693
694 /* The single loop exit if it dominates the latch, NULL otherwise. */
695
696 edge
single_dom_exit(struct loop * loop)697 single_dom_exit (struct loop *loop)
698 {
699 edge exit = single_exit (loop);
700
701 if (!exit)
702 return NULL;
703
704 if (!just_once_each_iteration_p (loop, exit->src))
705 return NULL;
706
707 return exit;
708 }
709
710 /* Dumps information about the induction variable IV to FILE. Don't dump
711 variable's name if DUMP_NAME is FALSE. The information is dumped with
712 preceding spaces indicated by INDENT_LEVEL. */
713
714 void
dump_iv(FILE * file,struct iv * iv,bool dump_name,unsigned indent_level)715 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
716 {
717 const char *p;
718 const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
719
720 if (indent_level > 4)
721 indent_level = 4;
722 p = spaces + 8 - (indent_level << 1);
723
724 fprintf (file, "%sIV struct:\n", p);
725 if (iv->ssa_name && dump_name)
726 {
727 fprintf (file, "%s SSA_NAME:\t", p);
728 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
729 fprintf (file, "\n");
730 }
731
732 fprintf (file, "%s Type:\t", p);
733 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
734 fprintf (file, "\n");
735
736 fprintf (file, "%s Base:\t", p);
737 print_generic_expr (file, iv->base, TDF_SLIM);
738 fprintf (file, "\n");
739
740 fprintf (file, "%s Step:\t", p);
741 print_generic_expr (file, iv->step, TDF_SLIM);
742 fprintf (file, "\n");
743
744 if (iv->base_object)
745 {
746 fprintf (file, "%s Object:\t", p);
747 print_generic_expr (file, iv->base_object, TDF_SLIM);
748 fprintf (file, "\n");
749 }
750
751 fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
752
753 fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
754 p, iv->no_overflow ? "No-overflow" : "Overflow");
755 }
756
757 /* Dumps information about the USE to FILE. */
758
759 void
dump_use(FILE * file,struct iv_use * use)760 dump_use (FILE *file, struct iv_use *use)
761 {
762 fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
763 fprintf (file, " At stmt:\t");
764 print_gimple_stmt (file, use->stmt, 0);
765 fprintf (file, " At pos:\t");
766 if (use->op_p)
767 print_generic_expr (file, *use->op_p, TDF_SLIM);
768 fprintf (file, "\n");
769 dump_iv (file, use->iv, false, 2);
770 }
771
772 /* Dumps information about the uses to FILE. */
773
774 void
dump_groups(FILE * file,struct ivopts_data * data)775 dump_groups (FILE *file, struct ivopts_data *data)
776 {
777 unsigned i, j;
778 struct iv_group *group;
779
780 for (i = 0; i < data->vgroups.length (); i++)
781 {
782 group = data->vgroups[i];
783 fprintf (file, "Group %d:\n", group->id);
784 if (group->type == USE_NONLINEAR_EXPR)
785 fprintf (file, " Type:\tGENERIC\n");
786 else if (group->type == USE_REF_ADDRESS)
787 fprintf (file, " Type:\tREFERENCE ADDRESS\n");
788 else if (group->type == USE_PTR_ADDRESS)
789 fprintf (file, " Type:\tPOINTER ARGUMENT ADDRESS\n");
790 else
791 {
792 gcc_assert (group->type == USE_COMPARE);
793 fprintf (file, " Type:\tCOMPARE\n");
794 }
795 for (j = 0; j < group->vuses.length (); j++)
796 dump_use (file, group->vuses[j]);
797 }
798 }
799
800 /* Dumps information about induction variable candidate CAND to FILE. */
801
802 void
dump_cand(FILE * file,struct iv_cand * cand)803 dump_cand (FILE *file, struct iv_cand *cand)
804 {
805 struct iv *iv = cand->iv;
806
807 fprintf (file, "Candidate %d:\n", cand->id);
808 if (cand->inv_vars)
809 {
810 fprintf (file, " Depend on inv.vars: ");
811 dump_bitmap (file, cand->inv_vars);
812 }
813 if (cand->inv_exprs)
814 {
815 fprintf (file, " Depend on inv.exprs: ");
816 dump_bitmap (file, cand->inv_exprs);
817 }
818
819 if (cand->var_before)
820 {
821 fprintf (file, " Var befor: ");
822 print_generic_expr (file, cand->var_before, TDF_SLIM);
823 fprintf (file, "\n");
824 }
825 if (cand->var_after)
826 {
827 fprintf (file, " Var after: ");
828 print_generic_expr (file, cand->var_after, TDF_SLIM);
829 fprintf (file, "\n");
830 }
831
832 switch (cand->pos)
833 {
834 case IP_NORMAL:
835 fprintf (file, " Incr POS: before exit test\n");
836 break;
837
838 case IP_BEFORE_USE:
839 fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
840 break;
841
842 case IP_AFTER_USE:
843 fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
844 break;
845
846 case IP_END:
847 fprintf (file, " Incr POS: at end\n");
848 break;
849
850 case IP_ORIGINAL:
851 fprintf (file, " Incr POS: orig biv\n");
852 break;
853 }
854
855 dump_iv (file, iv, false, 1);
856 }
857
858 /* Returns the info for ssa version VER. */
859
860 static inline struct version_info *
ver_info(struct ivopts_data * data,unsigned ver)861 ver_info (struct ivopts_data *data, unsigned ver)
862 {
863 return data->version_info + ver;
864 }
865
866 /* Returns the info for ssa name NAME. */
867
868 static inline struct version_info *
name_info(struct ivopts_data * data,tree name)869 name_info (struct ivopts_data *data, tree name)
870 {
871 return ver_info (data, SSA_NAME_VERSION (name));
872 }
873
874 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
875 emitted in LOOP. */
876
877 static bool
stmt_after_ip_normal_pos(struct loop * loop,gimple * stmt)878 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
879 {
880 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
881
882 gcc_assert (bb);
883
884 if (sbb == loop->latch)
885 return true;
886
887 if (sbb != bb)
888 return false;
889
890 return stmt == last_stmt (bb);
891 }
892
893 /* Returns true if STMT if after the place where the original induction
894 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
895 if the positions are identical. */
896
897 static bool
stmt_after_inc_pos(struct iv_cand * cand,gimple * stmt,bool true_if_equal)898 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
899 {
900 basic_block cand_bb = gimple_bb (cand->incremented_at);
901 basic_block stmt_bb = gimple_bb (stmt);
902
903 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
904 return false;
905
906 if (stmt_bb != cand_bb)
907 return true;
908
909 if (true_if_equal
910 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
911 return true;
912 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
913 }
914
915 /* Returns true if STMT if after the place where the induction variable
916 CAND is incremented in LOOP. */
917
918 static bool
stmt_after_increment(struct loop * loop,struct iv_cand * cand,gimple * stmt)919 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
920 {
921 switch (cand->pos)
922 {
923 case IP_END:
924 return false;
925
926 case IP_NORMAL:
927 return stmt_after_ip_normal_pos (loop, stmt);
928
929 case IP_ORIGINAL:
930 case IP_AFTER_USE:
931 return stmt_after_inc_pos (cand, stmt, false);
932
933 case IP_BEFORE_USE:
934 return stmt_after_inc_pos (cand, stmt, true);
935
936 default:
937 gcc_unreachable ();
938 }
939 }
940
941 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
942
943 static bool
abnormal_ssa_name_p(tree exp)944 abnormal_ssa_name_p (tree exp)
945 {
946 if (!exp)
947 return false;
948
949 if (TREE_CODE (exp) != SSA_NAME)
950 return false;
951
952 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
953 }
954
955 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
956 abnormal phi node. Callback for for_each_index. */
957
958 static bool
idx_contains_abnormal_ssa_name_p(tree base,tree * index,void * data ATTRIBUTE_UNUSED)959 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
960 void *data ATTRIBUTE_UNUSED)
961 {
962 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
963 {
964 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
965 return false;
966 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
967 return false;
968 }
969
970 return !abnormal_ssa_name_p (*index);
971 }
972
973 /* Returns true if EXPR contains a ssa name that occurs in an
974 abnormal phi node. */
975
976 bool
contains_abnormal_ssa_name_p(tree expr)977 contains_abnormal_ssa_name_p (tree expr)
978 {
979 enum tree_code code;
980 enum tree_code_class codeclass;
981
982 if (!expr)
983 return false;
984
985 code = TREE_CODE (expr);
986 codeclass = TREE_CODE_CLASS (code);
987
988 if (code == CALL_EXPR)
989 {
990 tree arg;
991 call_expr_arg_iterator iter;
992 FOR_EACH_CALL_EXPR_ARG (arg, iter, expr)
993 if (contains_abnormal_ssa_name_p (arg))
994 return true;
995 return false;
996 }
997
998 if (code == SSA_NAME)
999 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
1000
1001 if (code == INTEGER_CST
1002 || is_gimple_min_invariant (expr))
1003 return false;
1004
1005 if (code == ADDR_EXPR)
1006 return !for_each_index (&TREE_OPERAND (expr, 0),
1007 idx_contains_abnormal_ssa_name_p,
1008 NULL);
1009
1010 if (code == COND_EXPR)
1011 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
1012 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
1013 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
1014
1015 switch (codeclass)
1016 {
1017 case tcc_binary:
1018 case tcc_comparison:
1019 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
1020 return true;
1021
1022 /* Fallthru. */
1023 case tcc_unary:
1024 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
1025 return true;
1026
1027 break;
1028
1029 default:
1030 gcc_unreachable ();
1031 }
1032
1033 return false;
1034 }
1035
1036 /* Returns the structure describing number of iterations determined from
1037 EXIT of DATA->current_loop, or NULL if something goes wrong. */
1038
1039 static struct tree_niter_desc *
niter_for_exit(struct ivopts_data * data,edge exit)1040 niter_for_exit (struct ivopts_data *data, edge exit)
1041 {
1042 struct tree_niter_desc *desc;
1043 tree_niter_desc **slot;
1044
1045 if (!data->niters)
1046 {
1047 data->niters = new hash_map<edge, tree_niter_desc *>;
1048 slot = NULL;
1049 }
1050 else
1051 slot = data->niters->get (exit);
1052
1053 if (!slot)
1054 {
1055 /* Try to determine number of iterations. We cannot safely work with ssa
1056 names that appear in phi nodes on abnormal edges, so that we do not
1057 create overlapping life ranges for them (PR 27283). */
1058 desc = XNEW (struct tree_niter_desc);
1059 if (!number_of_iterations_exit (data->current_loop,
1060 exit, desc, true)
1061 || contains_abnormal_ssa_name_p (desc->niter))
1062 {
1063 XDELETE (desc);
1064 desc = NULL;
1065 }
1066 data->niters->put (exit, desc);
1067 }
1068 else
1069 desc = *slot;
1070
1071 return desc;
1072 }
1073
1074 /* Returns the structure describing number of iterations determined from
1075 single dominating exit of DATA->current_loop, or NULL if something
1076 goes wrong. */
1077
1078 static struct tree_niter_desc *
niter_for_single_dom_exit(struct ivopts_data * data)1079 niter_for_single_dom_exit (struct ivopts_data *data)
1080 {
1081 edge exit = single_dom_exit (data->current_loop);
1082
1083 if (!exit)
1084 return NULL;
1085
1086 return niter_for_exit (data, exit);
1087 }
1088
1089 /* Initializes data structures used by the iv optimization pass, stored
1090 in DATA. */
1091
1092 static void
tree_ssa_iv_optimize_init(struct ivopts_data * data)1093 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1094 {
1095 data->version_info_size = 2 * num_ssa_names;
1096 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1097 data->relevant = BITMAP_ALLOC (NULL);
1098 data->important_candidates = BITMAP_ALLOC (NULL);
1099 data->max_inv_var_id = 0;
1100 data->max_inv_expr_id = 0;
1101 data->niters = NULL;
1102 data->vgroups.create (20);
1103 data->vcands.create (20);
1104 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1105 data->name_expansion_cache = NULL;
1106 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1107 data->iv_common_cands.create (20);
1108 decl_rtl_to_reset.create (20);
1109 gcc_obstack_init (&data->iv_obstack);
1110 }
1111
1112 /* Returns a memory object to that EXPR points. In case we are able to
1113 determine that it does not point to any such object, NULL is returned. */
1114
1115 static tree
determine_base_object(tree expr)1116 determine_base_object (tree expr)
1117 {
1118 enum tree_code code = TREE_CODE (expr);
1119 tree base, obj;
1120
1121 /* If this is a pointer casted to any type, we need to determine
1122 the base object for the pointer; so handle conversions before
1123 throwing away non-pointer expressions. */
1124 if (CONVERT_EXPR_P (expr))
1125 return determine_base_object (TREE_OPERAND (expr, 0));
1126
1127 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1128 return NULL_TREE;
1129
1130 switch (code)
1131 {
1132 case INTEGER_CST:
1133 return NULL_TREE;
1134
1135 case ADDR_EXPR:
1136 obj = TREE_OPERAND (expr, 0);
1137 base = get_base_address (obj);
1138
1139 if (!base)
1140 return expr;
1141
1142 if (TREE_CODE (base) == MEM_REF)
1143 return determine_base_object (TREE_OPERAND (base, 0));
1144
1145 return fold_convert (ptr_type_node,
1146 build_fold_addr_expr (base));
1147
1148 case POINTER_PLUS_EXPR:
1149 return determine_base_object (TREE_OPERAND (expr, 0));
1150
1151 case PLUS_EXPR:
1152 case MINUS_EXPR:
1153 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
1154 gcc_unreachable ();
1155
1156 default:
1157 if (POLY_INT_CST_P (expr))
1158 return NULL_TREE;
1159 return fold_convert (ptr_type_node, expr);
1160 }
1161 }
1162
1163 /* Return true if address expression with non-DECL_P operand appears
1164 in EXPR. */
1165
1166 static bool
contain_complex_addr_expr(tree expr)1167 contain_complex_addr_expr (tree expr)
1168 {
1169 bool res = false;
1170
1171 STRIP_NOPS (expr);
1172 switch (TREE_CODE (expr))
1173 {
1174 case POINTER_PLUS_EXPR:
1175 case PLUS_EXPR:
1176 case MINUS_EXPR:
1177 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1178 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1179 break;
1180
1181 case ADDR_EXPR:
1182 return (!DECL_P (TREE_OPERAND (expr, 0)));
1183
1184 default:
1185 return false;
1186 }
1187
1188 return res;
1189 }
1190
1191 /* Allocates an induction variable with given initial value BASE and step STEP
1192 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1193
1194 static struct iv *
1195 alloc_iv (struct ivopts_data *data, tree base, tree step,
1196 bool no_overflow = false)
1197 {
1198 tree expr = base;
1199 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1200 sizeof (struct iv));
1201 gcc_assert (step != NULL_TREE);
1202
1203 /* Lower address expression in base except ones with DECL_P as operand.
1204 By doing this:
1205 1) More accurate cost can be computed for address expressions;
1206 2) Duplicate candidates won't be created for bases in different
1207 forms, like &a[0] and &a. */
1208 STRIP_NOPS (expr);
1209 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1210 || contain_complex_addr_expr (expr))
1211 {
1212 aff_tree comb;
1213 tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1214 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1215 }
1216
1217 iv->base = base;
1218 iv->base_object = determine_base_object (base);
1219 iv->step = step;
1220 iv->biv_p = false;
1221 iv->nonlin_use = NULL;
1222 iv->ssa_name = NULL_TREE;
1223 if (!no_overflow
1224 && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1225 base, step))
1226 no_overflow = true;
1227 iv->no_overflow = no_overflow;
1228 iv->have_address_use = false;
1229
1230 return iv;
1231 }
1232
1233 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1234 doesn't overflow. */
1235
1236 static void
set_iv(struct ivopts_data * data,tree iv,tree base,tree step,bool no_overflow)1237 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1238 bool no_overflow)
1239 {
1240 struct version_info *info = name_info (data, iv);
1241
1242 gcc_assert (!info->iv);
1243
1244 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1245 info->iv = alloc_iv (data, base, step, no_overflow);
1246 info->iv->ssa_name = iv;
1247 }
1248
1249 /* Finds induction variable declaration for VAR. */
1250
1251 static struct iv *
get_iv(struct ivopts_data * data,tree var)1252 get_iv (struct ivopts_data *data, tree var)
1253 {
1254 basic_block bb;
1255 tree type = TREE_TYPE (var);
1256
1257 if (!POINTER_TYPE_P (type)
1258 && !INTEGRAL_TYPE_P (type))
1259 return NULL;
1260
1261 if (!name_info (data, var)->iv)
1262 {
1263 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1264
1265 if (!bb
1266 || !flow_bb_inside_loop_p (data->current_loop, bb))
1267 set_iv (data, var, var, build_int_cst (type, 0), true);
1268 }
1269
1270 return name_info (data, var)->iv;
1271 }
1272
1273 /* Return the first non-invariant ssa var found in EXPR. */
1274
1275 static tree
extract_single_var_from_expr(tree expr)1276 extract_single_var_from_expr (tree expr)
1277 {
1278 int i, n;
1279 tree tmp;
1280 enum tree_code code;
1281
1282 if (!expr || is_gimple_min_invariant (expr))
1283 return NULL;
1284
1285 code = TREE_CODE (expr);
1286 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1287 {
1288 n = TREE_OPERAND_LENGTH (expr);
1289 for (i = 0; i < n; i++)
1290 {
1291 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1292
1293 if (tmp)
1294 return tmp;
1295 }
1296 }
1297 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1298 }
1299
1300 /* Finds basic ivs. */
1301
1302 static bool
find_bivs(struct ivopts_data * data)1303 find_bivs (struct ivopts_data *data)
1304 {
1305 gphi *phi;
1306 affine_iv iv;
1307 tree step, type, base, stop;
1308 bool found = false;
1309 struct loop *loop = data->current_loop;
1310 gphi_iterator psi;
1311
1312 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1313 {
1314 phi = psi.phi ();
1315
1316 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1317 continue;
1318
1319 if (virtual_operand_p (PHI_RESULT (phi)))
1320 continue;
1321
1322 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1323 continue;
1324
1325 if (integer_zerop (iv.step))
1326 continue;
1327
1328 step = iv.step;
1329 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1330 /* Stop expanding iv base at the first ssa var referred by iv step.
1331 Ideally we should stop at any ssa var, because that's expensive
1332 and unusual to happen, we just do it on the first one.
1333
1334 See PR64705 for the rationale. */
1335 stop = extract_single_var_from_expr (step);
1336 base = expand_simple_operations (base, stop);
1337 if (contains_abnormal_ssa_name_p (base)
1338 || contains_abnormal_ssa_name_p (step))
1339 continue;
1340
1341 type = TREE_TYPE (PHI_RESULT (phi));
1342 base = fold_convert (type, base);
1343 if (step)
1344 {
1345 if (POINTER_TYPE_P (type))
1346 step = convert_to_ptrofftype (step);
1347 else
1348 step = fold_convert (type, step);
1349 }
1350
1351 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1352 found = true;
1353 }
1354
1355 return found;
1356 }
1357
1358 /* Marks basic ivs. */
1359
1360 static void
mark_bivs(struct ivopts_data * data)1361 mark_bivs (struct ivopts_data *data)
1362 {
1363 gphi *phi;
1364 gimple *def;
1365 tree var;
1366 struct iv *iv, *incr_iv;
1367 struct loop *loop = data->current_loop;
1368 basic_block incr_bb;
1369 gphi_iterator psi;
1370
1371 data->bivs_not_used_in_addr = 0;
1372 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1373 {
1374 phi = psi.phi ();
1375
1376 iv = get_iv (data, PHI_RESULT (phi));
1377 if (!iv)
1378 continue;
1379
1380 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1381 def = SSA_NAME_DEF_STMT (var);
1382 /* Don't mark iv peeled from other one as biv. */
1383 if (def
1384 && gimple_code (def) == GIMPLE_PHI
1385 && gimple_bb (def) == loop->header)
1386 continue;
1387
1388 incr_iv = get_iv (data, var);
1389 if (!incr_iv)
1390 continue;
1391
1392 /* If the increment is in the subloop, ignore it. */
1393 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1394 if (incr_bb->loop_father != data->current_loop
1395 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1396 continue;
1397
1398 iv->biv_p = true;
1399 incr_iv->biv_p = true;
1400 if (iv->no_overflow)
1401 data->bivs_not_used_in_addr++;
1402 if (incr_iv->no_overflow)
1403 data->bivs_not_used_in_addr++;
1404 }
1405 }
1406
1407 /* Checks whether STMT defines a linear induction variable and stores its
1408 parameters to IV. */
1409
1410 static bool
find_givs_in_stmt_scev(struct ivopts_data * data,gimple * stmt,affine_iv * iv)1411 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1412 {
1413 tree lhs, stop;
1414 struct loop *loop = data->current_loop;
1415
1416 iv->base = NULL_TREE;
1417 iv->step = NULL_TREE;
1418
1419 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1420 return false;
1421
1422 lhs = gimple_assign_lhs (stmt);
1423 if (TREE_CODE (lhs) != SSA_NAME)
1424 return false;
1425
1426 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1427 return false;
1428
1429 /* Stop expanding iv base at the first ssa var referred by iv step.
1430 Ideally we should stop at any ssa var, because that's expensive
1431 and unusual to happen, we just do it on the first one.
1432
1433 See PR64705 for the rationale. */
1434 stop = extract_single_var_from_expr (iv->step);
1435 iv->base = expand_simple_operations (iv->base, stop);
1436 if (contains_abnormal_ssa_name_p (iv->base)
1437 || contains_abnormal_ssa_name_p (iv->step))
1438 return false;
1439
1440 /* If STMT could throw, then do not consider STMT as defining a GIV.
1441 While this will suppress optimizations, we cannot safely delete this
1442 GIV and associated statements, even if it appears it is not used. */
1443 if (stmt_could_throw_p (cfun, stmt))
1444 return false;
1445
1446 return true;
1447 }
1448
1449 /* Finds general ivs in statement STMT. */
1450
1451 static void
find_givs_in_stmt(struct ivopts_data * data,gimple * stmt)1452 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1453 {
1454 affine_iv iv;
1455
1456 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1457 return;
1458
1459 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1460 }
1461
1462 /* Finds general ivs in basic block BB. */
1463
1464 static void
find_givs_in_bb(struct ivopts_data * data,basic_block bb)1465 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1466 {
1467 gimple_stmt_iterator bsi;
1468
1469 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1470 find_givs_in_stmt (data, gsi_stmt (bsi));
1471 }
1472
1473 /* Finds general ivs. */
1474
1475 static void
find_givs(struct ivopts_data * data)1476 find_givs (struct ivopts_data *data)
1477 {
1478 struct loop *loop = data->current_loop;
1479 basic_block *body = get_loop_body_in_dom_order (loop);
1480 unsigned i;
1481
1482 for (i = 0; i < loop->num_nodes; i++)
1483 find_givs_in_bb (data, body[i]);
1484 free (body);
1485 }
1486
1487 /* For each ssa name defined in LOOP determines whether it is an induction
1488 variable and if so, its initial value and step. */
1489
1490 static bool
find_induction_variables(struct ivopts_data * data)1491 find_induction_variables (struct ivopts_data *data)
1492 {
1493 unsigned i;
1494 bitmap_iterator bi;
1495
1496 if (!find_bivs (data))
1497 return false;
1498
1499 find_givs (data);
1500 mark_bivs (data);
1501
1502 if (dump_file && (dump_flags & TDF_DETAILS))
1503 {
1504 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1505
1506 if (niter)
1507 {
1508 fprintf (dump_file, " number of iterations ");
1509 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1510 if (!integer_zerop (niter->may_be_zero))
1511 {
1512 fprintf (dump_file, "; zero if ");
1513 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1514 }
1515 fprintf (dump_file, "\n");
1516 };
1517
1518 fprintf (dump_file, "\n<Induction Vars>:\n");
1519 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1520 {
1521 struct version_info *info = ver_info (data, i);
1522 if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1523 dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1524 }
1525 }
1526
1527 return true;
1528 }
1529
1530 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1531 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1532 is the const offset stripped from IV base and MEM_TYPE is the type
1533 of the memory being addressed. For uses of other types, ADDR_BASE
1534 and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. */
1535
1536 static struct iv_use *
record_use(struct iv_group * group,tree * use_p,struct iv * iv,gimple * stmt,enum use_type type,tree mem_type,tree addr_base,poly_uint64 addr_offset)1537 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1538 gimple *stmt, enum use_type type, tree mem_type,
1539 tree addr_base, poly_uint64 addr_offset)
1540 {
1541 struct iv_use *use = XCNEW (struct iv_use);
1542
1543 use->id = group->vuses.length ();
1544 use->group_id = group->id;
1545 use->type = type;
1546 use->mem_type = mem_type;
1547 use->iv = iv;
1548 use->stmt = stmt;
1549 use->op_p = use_p;
1550 use->addr_base = addr_base;
1551 use->addr_offset = addr_offset;
1552
1553 group->vuses.safe_push (use);
1554 return use;
1555 }
1556
1557 /* Checks whether OP is a loop-level invariant and if so, records it.
1558 NONLINEAR_USE is true if the invariant is used in a way we do not
1559 handle specially. */
1560
1561 static void
record_invariant(struct ivopts_data * data,tree op,bool nonlinear_use)1562 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1563 {
1564 basic_block bb;
1565 struct version_info *info;
1566
1567 if (TREE_CODE (op) != SSA_NAME
1568 || virtual_operand_p (op))
1569 return;
1570
1571 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1572 if (bb
1573 && flow_bb_inside_loop_p (data->current_loop, bb))
1574 return;
1575
1576 info = name_info (data, op);
1577 info->name = op;
1578 info->has_nonlin_use |= nonlinear_use;
1579 if (!info->inv_id)
1580 info->inv_id = ++data->max_inv_var_id;
1581 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1582 }
1583
1584 /* Record a group of TYPE. */
1585
1586 static struct iv_group *
record_group(struct ivopts_data * data,enum use_type type)1587 record_group (struct ivopts_data *data, enum use_type type)
1588 {
1589 struct iv_group *group = XCNEW (struct iv_group);
1590
1591 group->id = data->vgroups.length ();
1592 group->type = type;
1593 group->related_cands = BITMAP_ALLOC (NULL);
1594 group->vuses.create (1);
1595
1596 data->vgroups.safe_push (group);
1597 return group;
1598 }
1599
1600 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1601 New group will be created if there is no existing group for the use.
1602 MEM_TYPE is the type of memory being addressed, or NULL if this
1603 isn't an address reference. */
1604
1605 static struct iv_use *
record_group_use(struct ivopts_data * data,tree * use_p,struct iv * iv,gimple * stmt,enum use_type type,tree mem_type)1606 record_group_use (struct ivopts_data *data, tree *use_p,
1607 struct iv *iv, gimple *stmt, enum use_type type,
1608 tree mem_type)
1609 {
1610 tree addr_base = NULL;
1611 struct iv_group *group = NULL;
1612 poly_uint64 addr_offset = 0;
1613
1614 /* Record non address type use in a new group. */
1615 if (address_p (type))
1616 {
1617 unsigned int i;
1618
1619 addr_base = strip_offset (iv->base, &addr_offset);
1620 for (i = 0; i < data->vgroups.length (); i++)
1621 {
1622 struct iv_use *use;
1623
1624 group = data->vgroups[i];
1625 use = group->vuses[0];
1626 if (!address_p (use->type))
1627 continue;
1628
1629 /* Check if it has the same stripped base and step. */
1630 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1631 && operand_equal_p (iv->step, use->iv->step, 0)
1632 && operand_equal_p (addr_base, use->addr_base, 0))
1633 break;
1634 }
1635 if (i == data->vgroups.length ())
1636 group = NULL;
1637 }
1638
1639 if (!group)
1640 group = record_group (data, type);
1641
1642 return record_use (group, use_p, iv, stmt, type, mem_type,
1643 addr_base, addr_offset);
1644 }
1645
1646 /* Checks whether the use OP is interesting and if so, records it. */
1647
1648 static struct iv_use *
find_interesting_uses_op(struct ivopts_data * data,tree op)1649 find_interesting_uses_op (struct ivopts_data *data, tree op)
1650 {
1651 struct iv *iv;
1652 gimple *stmt;
1653 struct iv_use *use;
1654
1655 if (TREE_CODE (op) != SSA_NAME)
1656 return NULL;
1657
1658 iv = get_iv (data, op);
1659 if (!iv)
1660 return NULL;
1661
1662 if (iv->nonlin_use)
1663 {
1664 gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1665 return iv->nonlin_use;
1666 }
1667
1668 if (integer_zerop (iv->step))
1669 {
1670 record_invariant (data, op, true);
1671 return NULL;
1672 }
1673
1674 stmt = SSA_NAME_DEF_STMT (op);
1675 gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1676
1677 use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1678 iv->nonlin_use = use;
1679 return use;
1680 }
1681
1682 /* Indicate how compare type iv_use can be handled. */
1683 enum comp_iv_rewrite
1684 {
1685 COMP_IV_NA,
1686 /* We may rewrite compare type iv_use by expressing value of the iv_use. */
1687 COMP_IV_EXPR,
1688 /* We may rewrite compare type iv_uses on both sides of comparison by
1689 expressing value of each iv_use. */
1690 COMP_IV_EXPR_2,
1691 /* We may rewrite compare type iv_use by expressing value of the iv_use
1692 or by eliminating it with other iv_cand. */
1693 COMP_IV_ELIM
1694 };
1695
1696 /* Given a condition in statement STMT, checks whether it is a compare
1697 of an induction variable and an invariant. If this is the case,
1698 CONTROL_VAR is set to location of the iv, BOUND to the location of
1699 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1700 induction variable descriptions, and true is returned. If this is not
1701 the case, CONTROL_VAR and BOUND are set to the arguments of the
1702 condition and false is returned. */
1703
1704 static enum comp_iv_rewrite
extract_cond_operands(struct ivopts_data * data,gimple * stmt,tree ** control_var,tree ** bound,struct iv ** iv_var,struct iv ** iv_bound)1705 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1706 tree **control_var, tree **bound,
1707 struct iv **iv_var, struct iv **iv_bound)
1708 {
1709 /* The objects returned when COND has constant operands. */
1710 static struct iv const_iv;
1711 static tree zero;
1712 tree *op0 = &zero, *op1 = &zero;
1713 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1714 enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1715
1716 if (gimple_code (stmt) == GIMPLE_COND)
1717 {
1718 gcond *cond_stmt = as_a <gcond *> (stmt);
1719 op0 = gimple_cond_lhs_ptr (cond_stmt);
1720 op1 = gimple_cond_rhs_ptr (cond_stmt);
1721 }
1722 else
1723 {
1724 op0 = gimple_assign_rhs1_ptr (stmt);
1725 op1 = gimple_assign_rhs2_ptr (stmt);
1726 }
1727
1728 zero = integer_zero_node;
1729 const_iv.step = integer_zero_node;
1730
1731 if (TREE_CODE (*op0) == SSA_NAME)
1732 iv0 = get_iv (data, *op0);
1733 if (TREE_CODE (*op1) == SSA_NAME)
1734 iv1 = get_iv (data, *op1);
1735
1736 /* If both sides of comparison are IVs. We can express ivs on both end. */
1737 if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1738 {
1739 rewrite_type = COMP_IV_EXPR_2;
1740 goto end;
1741 }
1742
1743 /* If none side of comparison is IV. */
1744 if ((!iv0 || integer_zerop (iv0->step))
1745 && (!iv1 || integer_zerop (iv1->step)))
1746 goto end;
1747
1748 /* Control variable may be on the other side. */
1749 if (!iv0 || integer_zerop (iv0->step))
1750 {
1751 std::swap (op0, op1);
1752 std::swap (iv0, iv1);
1753 }
1754 /* If one side is IV and the other side isn't loop invariant. */
1755 if (!iv1)
1756 rewrite_type = COMP_IV_EXPR;
1757 /* If one side is IV and the other side is loop invariant. */
1758 else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1759 rewrite_type = COMP_IV_ELIM;
1760
1761 end:
1762 if (control_var)
1763 *control_var = op0;
1764 if (iv_var)
1765 *iv_var = iv0;
1766 if (bound)
1767 *bound = op1;
1768 if (iv_bound)
1769 *iv_bound = iv1;
1770
1771 return rewrite_type;
1772 }
1773
1774 /* Checks whether the condition in STMT is interesting and if so,
1775 records it. */
1776
1777 static void
find_interesting_uses_cond(struct ivopts_data * data,gimple * stmt)1778 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1779 {
1780 tree *var_p, *bound_p;
1781 struct iv *var_iv, *bound_iv;
1782 enum comp_iv_rewrite ret;
1783
1784 ret = extract_cond_operands (data, stmt,
1785 &var_p, &bound_p, &var_iv, &bound_iv);
1786 if (ret == COMP_IV_NA)
1787 {
1788 find_interesting_uses_op (data, *var_p);
1789 find_interesting_uses_op (data, *bound_p);
1790 return;
1791 }
1792
1793 record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1794 /* Record compare type iv_use for iv on the other side of comparison. */
1795 if (ret == COMP_IV_EXPR_2)
1796 record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1797 }
1798
1799 /* Returns the outermost loop EXPR is obviously invariant in
1800 relative to the loop LOOP, i.e. if all its operands are defined
1801 outside of the returned loop. Returns NULL if EXPR is not
1802 even obviously invariant in LOOP. */
1803
1804 struct loop *
outermost_invariant_loop_for_expr(struct loop * loop,tree expr)1805 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1806 {
1807 basic_block def_bb;
1808 unsigned i, len;
1809
1810 if (is_gimple_min_invariant (expr))
1811 return current_loops->tree_root;
1812
1813 if (TREE_CODE (expr) == SSA_NAME)
1814 {
1815 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1816 if (def_bb)
1817 {
1818 if (flow_bb_inside_loop_p (loop, def_bb))
1819 return NULL;
1820 return superloop_at_depth (loop,
1821 loop_depth (def_bb->loop_father) + 1);
1822 }
1823
1824 return current_loops->tree_root;
1825 }
1826
1827 if (!EXPR_P (expr))
1828 return NULL;
1829
1830 unsigned maxdepth = 0;
1831 len = TREE_OPERAND_LENGTH (expr);
1832 for (i = 0; i < len; i++)
1833 {
1834 struct loop *ivloop;
1835 if (!TREE_OPERAND (expr, i))
1836 continue;
1837
1838 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1839 if (!ivloop)
1840 return NULL;
1841 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1842 }
1843
1844 return superloop_at_depth (loop, maxdepth);
1845 }
1846
1847 /* Returns true if expression EXPR is obviously invariant in LOOP,
1848 i.e. if all its operands are defined outside of the LOOP. LOOP
1849 should not be the function body. */
1850
1851 bool
expr_invariant_in_loop_p(struct loop * loop,tree expr)1852 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1853 {
1854 basic_block def_bb;
1855 unsigned i, len;
1856
1857 gcc_assert (loop_depth (loop) > 0);
1858
1859 if (is_gimple_min_invariant (expr))
1860 return true;
1861
1862 if (TREE_CODE (expr) == SSA_NAME)
1863 {
1864 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1865 if (def_bb
1866 && flow_bb_inside_loop_p (loop, def_bb))
1867 return false;
1868
1869 return true;
1870 }
1871
1872 if (!EXPR_P (expr))
1873 return false;
1874
1875 len = TREE_OPERAND_LENGTH (expr);
1876 for (i = 0; i < len; i++)
1877 if (TREE_OPERAND (expr, i)
1878 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1879 return false;
1880
1881 return true;
1882 }
1883
1884 /* Given expression EXPR which computes inductive values with respect
1885 to loop recorded in DATA, this function returns biv from which EXPR
1886 is derived by tracing definition chains of ssa variables in EXPR. */
1887
1888 static struct iv*
find_deriving_biv_for_expr(struct ivopts_data * data,tree expr)1889 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1890 {
1891 struct iv *iv;
1892 unsigned i, n;
1893 tree e2, e1;
1894 enum tree_code code;
1895 gimple *stmt;
1896
1897 if (expr == NULL_TREE)
1898 return NULL;
1899
1900 if (is_gimple_min_invariant (expr))
1901 return NULL;
1902
1903 code = TREE_CODE (expr);
1904 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1905 {
1906 n = TREE_OPERAND_LENGTH (expr);
1907 for (i = 0; i < n; i++)
1908 {
1909 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1910 if (iv)
1911 return iv;
1912 }
1913 }
1914
1915 /* Stop if it's not ssa name. */
1916 if (code != SSA_NAME)
1917 return NULL;
1918
1919 iv = get_iv (data, expr);
1920 if (!iv || integer_zerop (iv->step))
1921 return NULL;
1922 else if (iv->biv_p)
1923 return iv;
1924
1925 stmt = SSA_NAME_DEF_STMT (expr);
1926 if (gphi *phi = dyn_cast <gphi *> (stmt))
1927 {
1928 ssa_op_iter iter;
1929 use_operand_p use_p;
1930 basic_block phi_bb = gimple_bb (phi);
1931
1932 /* Skip loop header PHI that doesn't define biv. */
1933 if (phi_bb->loop_father == data->current_loop)
1934 return NULL;
1935
1936 if (virtual_operand_p (gimple_phi_result (phi)))
1937 return NULL;
1938
1939 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1940 {
1941 tree use = USE_FROM_PTR (use_p);
1942 iv = find_deriving_biv_for_expr (data, use);
1943 if (iv)
1944 return iv;
1945 }
1946 return NULL;
1947 }
1948 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1949 return NULL;
1950
1951 e1 = gimple_assign_rhs1 (stmt);
1952 code = gimple_assign_rhs_code (stmt);
1953 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1954 return find_deriving_biv_for_expr (data, e1);
1955
1956 switch (code)
1957 {
1958 case MULT_EXPR:
1959 case PLUS_EXPR:
1960 case MINUS_EXPR:
1961 case POINTER_PLUS_EXPR:
1962 /* Increments, decrements and multiplications by a constant
1963 are simple. */
1964 e2 = gimple_assign_rhs2 (stmt);
1965 iv = find_deriving_biv_for_expr (data, e2);
1966 if (iv)
1967 return iv;
1968 gcc_fallthrough ();
1969
1970 CASE_CONVERT:
1971 /* Casts are simple. */
1972 return find_deriving_biv_for_expr (data, e1);
1973
1974 default:
1975 break;
1976 }
1977
1978 return NULL;
1979 }
1980
1981 /* Record BIV, its predecessor and successor that they are used in
1982 address type uses. */
1983
1984 static void
record_biv_for_address_use(struct ivopts_data * data,struct iv * biv)1985 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1986 {
1987 unsigned i;
1988 tree type, base_1, base_2;
1989 bitmap_iterator bi;
1990
1991 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1992 || biv->have_address_use || !biv->no_overflow)
1993 return;
1994
1995 type = TREE_TYPE (biv->base);
1996 if (!INTEGRAL_TYPE_P (type))
1997 return;
1998
1999 biv->have_address_use = true;
2000 data->bivs_not_used_in_addr--;
2001 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
2002 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2003 {
2004 struct iv *iv = ver_info (data, i)->iv;
2005
2006 if (!iv || !iv->biv_p || integer_zerop (iv->step)
2007 || iv->have_address_use || !iv->no_overflow)
2008 continue;
2009
2010 if (type != TREE_TYPE (iv->base)
2011 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2012 continue;
2013
2014 if (!operand_equal_p (biv->step, iv->step, 0))
2015 continue;
2016
2017 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2018 if (operand_equal_p (base_1, iv->base, 0)
2019 || operand_equal_p (base_2, biv->base, 0))
2020 {
2021 iv->have_address_use = true;
2022 data->bivs_not_used_in_addr--;
2023 }
2024 }
2025 }
2026
2027 /* Cumulates the steps of indices into DATA and replaces their values with the
2028 initial ones. Returns false when the value of the index cannot be determined.
2029 Callback for for_each_index. */
2030
2031 struct ifs_ivopts_data
2032 {
2033 struct ivopts_data *ivopts_data;
2034 gimple *stmt;
2035 tree step;
2036 };
2037
2038 static bool
idx_find_step(tree base,tree * idx,void * data)2039 idx_find_step (tree base, tree *idx, void *data)
2040 {
2041 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2042 struct iv *iv;
2043 bool use_overflow_semantics = false;
2044 tree step, iv_base, iv_step, lbound, off;
2045 struct loop *loop = dta->ivopts_data->current_loop;
2046
2047 /* If base is a component ref, require that the offset of the reference
2048 be invariant. */
2049 if (TREE_CODE (base) == COMPONENT_REF)
2050 {
2051 off = component_ref_field_offset (base);
2052 return expr_invariant_in_loop_p (loop, off);
2053 }
2054
2055 /* If base is array, first check whether we will be able to move the
2056 reference out of the loop (in order to take its address in strength
2057 reduction). In order for this to work we need both lower bound
2058 and step to be loop invariants. */
2059 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2060 {
2061 /* Moreover, for a range, the size needs to be invariant as well. */
2062 if (TREE_CODE (base) == ARRAY_RANGE_REF
2063 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2064 return false;
2065
2066 step = array_ref_element_size (base);
2067 lbound = array_ref_low_bound (base);
2068
2069 if (!expr_invariant_in_loop_p (loop, step)
2070 || !expr_invariant_in_loop_p (loop, lbound))
2071 return false;
2072 }
2073
2074 if (TREE_CODE (*idx) != SSA_NAME)
2075 return true;
2076
2077 iv = get_iv (dta->ivopts_data, *idx);
2078 if (!iv)
2079 return false;
2080
2081 /* XXX We produce for a base of *D42 with iv->base being &x[0]
2082 *&x[0], which is not folded and does not trigger the
2083 ARRAY_REF path below. */
2084 *idx = iv->base;
2085
2086 if (integer_zerop (iv->step))
2087 return true;
2088
2089 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2090 {
2091 step = array_ref_element_size (base);
2092
2093 /* We only handle addresses whose step is an integer constant. */
2094 if (TREE_CODE (step) != INTEGER_CST)
2095 return false;
2096 }
2097 else
2098 /* The step for pointer arithmetics already is 1 byte. */
2099 step = size_one_node;
2100
2101 iv_base = iv->base;
2102 iv_step = iv->step;
2103 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2104 use_overflow_semantics = true;
2105
2106 if (!convert_affine_scev (dta->ivopts_data->current_loop,
2107 sizetype, &iv_base, &iv_step, dta->stmt,
2108 use_overflow_semantics))
2109 {
2110 /* The index might wrap. */
2111 return false;
2112 }
2113
2114 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2115 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2116
2117 if (dta->ivopts_data->bivs_not_used_in_addr)
2118 {
2119 if (!iv->biv_p)
2120 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2121
2122 record_biv_for_address_use (dta->ivopts_data, iv);
2123 }
2124 return true;
2125 }
2126
2127 /* Records use in index IDX. Callback for for_each_index. Ivopts data
2128 object is passed to it in DATA. */
2129
2130 static bool
idx_record_use(tree base,tree * idx,void * vdata)2131 idx_record_use (tree base, tree *idx,
2132 void *vdata)
2133 {
2134 struct ivopts_data *data = (struct ivopts_data *) vdata;
2135 find_interesting_uses_op (data, *idx);
2136 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2137 {
2138 find_interesting_uses_op (data, array_ref_element_size (base));
2139 find_interesting_uses_op (data, array_ref_low_bound (base));
2140 }
2141 return true;
2142 }
2143
2144 /* If we can prove that TOP = cst * BOT for some constant cst,
2145 store cst to MUL and return true. Otherwise return false.
2146 The returned value is always sign-extended, regardless of the
2147 signedness of TOP and BOT. */
2148
2149 static bool
constant_multiple_of(tree top,tree bot,widest_int * mul)2150 constant_multiple_of (tree top, tree bot, widest_int *mul)
2151 {
2152 tree mby;
2153 enum tree_code code;
2154 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2155 widest_int res, p0, p1;
2156
2157 STRIP_NOPS (top);
2158 STRIP_NOPS (bot);
2159
2160 if (operand_equal_p (top, bot, 0))
2161 {
2162 *mul = 1;
2163 return true;
2164 }
2165
2166 code = TREE_CODE (top);
2167 switch (code)
2168 {
2169 case MULT_EXPR:
2170 mby = TREE_OPERAND (top, 1);
2171 if (TREE_CODE (mby) != INTEGER_CST)
2172 return false;
2173
2174 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2175 return false;
2176
2177 *mul = wi::sext (res * wi::to_widest (mby), precision);
2178 return true;
2179
2180 case PLUS_EXPR:
2181 case MINUS_EXPR:
2182 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2183 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2184 return false;
2185
2186 if (code == MINUS_EXPR)
2187 p1 = -p1;
2188 *mul = wi::sext (p0 + p1, precision);
2189 return true;
2190
2191 case INTEGER_CST:
2192 if (TREE_CODE (bot) != INTEGER_CST)
2193 return false;
2194
2195 p0 = widest_int::from (wi::to_wide (top), SIGNED);
2196 p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2197 if (p1 == 0)
2198 return false;
2199 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2200 return res == 0;
2201
2202 default:
2203 if (POLY_INT_CST_P (top)
2204 && POLY_INT_CST_P (bot)
2205 && constant_multiple_p (wi::to_poly_widest (top),
2206 wi::to_poly_widest (bot), mul))
2207 return true;
2208
2209 return false;
2210 }
2211 }
2212
2213 /* Return true if memory reference REF with step STEP may be unaligned. */
2214
2215 static bool
may_be_unaligned_p(tree ref,tree step)2216 may_be_unaligned_p (tree ref, tree step)
2217 {
2218 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2219 thus they are not misaligned. */
2220 if (TREE_CODE (ref) == TARGET_MEM_REF)
2221 return false;
2222
2223 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2224 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2225 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2226
2227 unsigned HOST_WIDE_INT bitpos;
2228 unsigned int ref_align;
2229 get_object_alignment_1 (ref, &ref_align, &bitpos);
2230 if (ref_align < align
2231 || (bitpos % align) != 0
2232 || (bitpos % BITS_PER_UNIT) != 0)
2233 return true;
2234
2235 unsigned int trailing_zeros = tree_ctz (step);
2236 if (trailing_zeros < HOST_BITS_PER_INT
2237 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2238 return true;
2239
2240 return false;
2241 }
2242
2243 /* Return true if EXPR may be non-addressable. */
2244
2245 bool
may_be_nonaddressable_p(tree expr)2246 may_be_nonaddressable_p (tree expr)
2247 {
2248 switch (TREE_CODE (expr))
2249 {
2250 case VAR_DECL:
2251 /* Check if it's a register variable. */
2252 return DECL_HARD_REGISTER (expr);
2253
2254 case TARGET_MEM_REF:
2255 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2256 target, thus they are always addressable. */
2257 return false;
2258
2259 case MEM_REF:
2260 /* Likewise for MEM_REFs, modulo the storage order. */
2261 return REF_REVERSE_STORAGE_ORDER (expr);
2262
2263 case BIT_FIELD_REF:
2264 if (REF_REVERSE_STORAGE_ORDER (expr))
2265 return true;
2266 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2267
2268 case COMPONENT_REF:
2269 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2270 return true;
2271 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2272 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2273
2274 case ARRAY_REF:
2275 case ARRAY_RANGE_REF:
2276 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2277 return true;
2278 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2279
2280 case VIEW_CONVERT_EXPR:
2281 /* This kind of view-conversions may wrap non-addressable objects
2282 and make them look addressable. After some processing the
2283 non-addressability may be uncovered again, causing ADDR_EXPRs
2284 of inappropriate objects to be built. */
2285 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2286 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2287 return true;
2288 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2289
2290 CASE_CONVERT:
2291 return true;
2292
2293 default:
2294 break;
2295 }
2296
2297 return false;
2298 }
2299
2300 /* Finds addresses in *OP_P inside STMT. */
2301
2302 static void
find_interesting_uses_address(struct ivopts_data * data,gimple * stmt,tree * op_p)2303 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2304 tree *op_p)
2305 {
2306 tree base = *op_p, step = size_zero_node;
2307 struct iv *civ;
2308 struct ifs_ivopts_data ifs_ivopts_data;
2309
2310 /* Do not play with volatile memory references. A bit too conservative,
2311 perhaps, but safe. */
2312 if (gimple_has_volatile_ops (stmt))
2313 goto fail;
2314
2315 /* Ignore bitfields for now. Not really something terribly complicated
2316 to handle. TODO. */
2317 if (TREE_CODE (base) == BIT_FIELD_REF)
2318 goto fail;
2319
2320 base = unshare_expr (base);
2321
2322 if (TREE_CODE (base) == TARGET_MEM_REF)
2323 {
2324 tree type = build_pointer_type (TREE_TYPE (base));
2325 tree astep;
2326
2327 if (TMR_BASE (base)
2328 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2329 {
2330 civ = get_iv (data, TMR_BASE (base));
2331 if (!civ)
2332 goto fail;
2333
2334 TMR_BASE (base) = civ->base;
2335 step = civ->step;
2336 }
2337 if (TMR_INDEX2 (base)
2338 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2339 {
2340 civ = get_iv (data, TMR_INDEX2 (base));
2341 if (!civ)
2342 goto fail;
2343
2344 TMR_INDEX2 (base) = civ->base;
2345 step = civ->step;
2346 }
2347 if (TMR_INDEX (base)
2348 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2349 {
2350 civ = get_iv (data, TMR_INDEX (base));
2351 if (!civ)
2352 goto fail;
2353
2354 TMR_INDEX (base) = civ->base;
2355 astep = civ->step;
2356
2357 if (astep)
2358 {
2359 if (TMR_STEP (base))
2360 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2361
2362 step = fold_build2 (PLUS_EXPR, type, step, astep);
2363 }
2364 }
2365
2366 if (integer_zerop (step))
2367 goto fail;
2368 base = tree_mem_ref_addr (type, base);
2369 }
2370 else
2371 {
2372 ifs_ivopts_data.ivopts_data = data;
2373 ifs_ivopts_data.stmt = stmt;
2374 ifs_ivopts_data.step = size_zero_node;
2375 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2376 || integer_zerop (ifs_ivopts_data.step))
2377 goto fail;
2378 step = ifs_ivopts_data.step;
2379
2380 /* Check that the base expression is addressable. This needs
2381 to be done after substituting bases of IVs into it. */
2382 if (may_be_nonaddressable_p (base))
2383 goto fail;
2384
2385 /* Moreover, on strict alignment platforms, check that it is
2386 sufficiently aligned. */
2387 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2388 goto fail;
2389
2390 base = build_fold_addr_expr (base);
2391
2392 /* Substituting bases of IVs into the base expression might
2393 have caused folding opportunities. */
2394 if (TREE_CODE (base) == ADDR_EXPR)
2395 {
2396 tree *ref = &TREE_OPERAND (base, 0);
2397 while (handled_component_p (*ref))
2398 ref = &TREE_OPERAND (*ref, 0);
2399 if (TREE_CODE (*ref) == MEM_REF)
2400 {
2401 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2402 TREE_OPERAND (*ref, 0),
2403 TREE_OPERAND (*ref, 1));
2404 if (tem)
2405 *ref = tem;
2406 }
2407 }
2408 }
2409
2410 civ = alloc_iv (data, base, step);
2411 /* Fail if base object of this memory reference is unknown. */
2412 if (civ->base_object == NULL_TREE)
2413 goto fail;
2414
2415 record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2416 return;
2417
2418 fail:
2419 for_each_index (op_p, idx_record_use, data);
2420 }
2421
2422 /* Finds and records invariants used in STMT. */
2423
2424 static void
find_invariants_stmt(struct ivopts_data * data,gimple * stmt)2425 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2426 {
2427 ssa_op_iter iter;
2428 use_operand_p use_p;
2429 tree op;
2430
2431 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2432 {
2433 op = USE_FROM_PTR (use_p);
2434 record_invariant (data, op, false);
2435 }
2436 }
2437
2438 /* CALL calls an internal function. If operand *OP_P will become an
2439 address when the call is expanded, return the type of the memory
2440 being addressed, otherwise return null. */
2441
2442 static tree
get_mem_type_for_internal_fn(gcall * call,tree * op_p)2443 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2444 {
2445 switch (gimple_call_internal_fn (call))
2446 {
2447 case IFN_MASK_LOAD:
2448 if (op_p == gimple_call_arg_ptr (call, 0))
2449 return TREE_TYPE (gimple_call_lhs (call));
2450 return NULL_TREE;
2451
2452 case IFN_MASK_STORE:
2453 if (op_p == gimple_call_arg_ptr (call, 0))
2454 return TREE_TYPE (gimple_call_arg (call, 3));
2455 return NULL_TREE;
2456
2457 default:
2458 return NULL_TREE;
2459 }
2460 }
2461
2462 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2463 Return true if the operand will become an address when STMT
2464 is expanded and record the associated address use if so. */
2465
2466 static bool
find_address_like_use(struct ivopts_data * data,gimple * stmt,tree * op_p,struct iv * iv)2467 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2468 struct iv *iv)
2469 {
2470 /* Fail if base object of this memory reference is unknown. */
2471 if (iv->base_object == NULL_TREE)
2472 return false;
2473
2474 tree mem_type = NULL_TREE;
2475 if (gcall *call = dyn_cast <gcall *> (stmt))
2476 if (gimple_call_internal_p (call))
2477 mem_type = get_mem_type_for_internal_fn (call, op_p);
2478 if (mem_type)
2479 {
2480 iv = alloc_iv (data, iv->base, iv->step);
2481 record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2482 return true;
2483 }
2484 return false;
2485 }
2486
2487 /* Finds interesting uses of induction variables in the statement STMT. */
2488
2489 static void
find_interesting_uses_stmt(struct ivopts_data * data,gimple * stmt)2490 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2491 {
2492 struct iv *iv;
2493 tree op, *lhs, *rhs;
2494 ssa_op_iter iter;
2495 use_operand_p use_p;
2496 enum tree_code code;
2497
2498 find_invariants_stmt (data, stmt);
2499
2500 if (gimple_code (stmt) == GIMPLE_COND)
2501 {
2502 find_interesting_uses_cond (data, stmt);
2503 return;
2504 }
2505
2506 if (is_gimple_assign (stmt))
2507 {
2508 lhs = gimple_assign_lhs_ptr (stmt);
2509 rhs = gimple_assign_rhs1_ptr (stmt);
2510
2511 if (TREE_CODE (*lhs) == SSA_NAME)
2512 {
2513 /* If the statement defines an induction variable, the uses are not
2514 interesting by themselves. */
2515
2516 iv = get_iv (data, *lhs);
2517
2518 if (iv && !integer_zerop (iv->step))
2519 return;
2520 }
2521
2522 code = gimple_assign_rhs_code (stmt);
2523 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2524 && (REFERENCE_CLASS_P (*rhs)
2525 || is_gimple_val (*rhs)))
2526 {
2527 if (REFERENCE_CLASS_P (*rhs))
2528 find_interesting_uses_address (data, stmt, rhs);
2529 else
2530 find_interesting_uses_op (data, *rhs);
2531
2532 if (REFERENCE_CLASS_P (*lhs))
2533 find_interesting_uses_address (data, stmt, lhs);
2534 return;
2535 }
2536 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2537 {
2538 find_interesting_uses_cond (data, stmt);
2539 return;
2540 }
2541
2542 /* TODO -- we should also handle address uses of type
2543
2544 memory = call (whatever);
2545
2546 and
2547
2548 call (memory). */
2549 }
2550
2551 if (gimple_code (stmt) == GIMPLE_PHI
2552 && gimple_bb (stmt) == data->current_loop->header)
2553 {
2554 iv = get_iv (data, PHI_RESULT (stmt));
2555
2556 if (iv && !integer_zerop (iv->step))
2557 return;
2558 }
2559
2560 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2561 {
2562 op = USE_FROM_PTR (use_p);
2563
2564 if (TREE_CODE (op) != SSA_NAME)
2565 continue;
2566
2567 iv = get_iv (data, op);
2568 if (!iv)
2569 continue;
2570
2571 if (!find_address_like_use (data, stmt, use_p->use, iv))
2572 find_interesting_uses_op (data, op);
2573 }
2574 }
2575
2576 /* Finds interesting uses of induction variables outside of loops
2577 on loop exit edge EXIT. */
2578
2579 static void
find_interesting_uses_outside(struct ivopts_data * data,edge exit)2580 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2581 {
2582 gphi *phi;
2583 gphi_iterator psi;
2584 tree def;
2585
2586 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2587 {
2588 phi = psi.phi ();
2589 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2590 if (!virtual_operand_p (def))
2591 find_interesting_uses_op (data, def);
2592 }
2593 }
2594
2595 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2596 mode for memory reference represented by USE. */
2597
2598 static GTY (()) vec<rtx, va_gc> *addr_list;
2599
2600 static bool
addr_offset_valid_p(struct iv_use * use,poly_int64 offset)2601 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2602 {
2603 rtx reg, addr;
2604 unsigned list_index;
2605 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2606 machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2607
2608 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2609 if (list_index >= vec_safe_length (addr_list))
2610 vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2611
2612 addr = (*addr_list)[list_index];
2613 if (!addr)
2614 {
2615 addr_mode = targetm.addr_space.address_mode (as);
2616 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2617 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2618 (*addr_list)[list_index] = addr;
2619 }
2620 else
2621 addr_mode = GET_MODE (addr);
2622
2623 XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2624 return (memory_address_addr_space_p (mem_mode, addr, as));
2625 }
2626
2627 /* Comparison function to sort group in ascending order of addr_offset. */
2628
2629 static int
group_compare_offset(const void * a,const void * b)2630 group_compare_offset (const void *a, const void *b)
2631 {
2632 const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2633 const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2634
2635 return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2636 }
2637
2638 /* Check if small groups should be split. Return true if no group
2639 contains more than two uses with distinct addr_offsets. Return
2640 false otherwise. We want to split such groups because:
2641
2642 1) Small groups don't have much benefit and may interfer with
2643 general candidate selection.
2644 2) Size for problem with only small groups is usually small and
2645 general algorithm can handle it well.
2646
2647 TODO -- Above claim may not hold when we want to merge memory
2648 accesses with conseuctive addresses. */
2649
2650 static bool
split_small_address_groups_p(struct ivopts_data * data)2651 split_small_address_groups_p (struct ivopts_data *data)
2652 {
2653 unsigned int i, j, distinct = 1;
2654 struct iv_use *pre;
2655 struct iv_group *group;
2656
2657 for (i = 0; i < data->vgroups.length (); i++)
2658 {
2659 group = data->vgroups[i];
2660 if (group->vuses.length () == 1)
2661 continue;
2662
2663 gcc_assert (address_p (group->type));
2664 if (group->vuses.length () == 2)
2665 {
2666 if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2667 group->vuses[1]->addr_offset) > 0)
2668 std::swap (group->vuses[0], group->vuses[1]);
2669 }
2670 else
2671 group->vuses.qsort (group_compare_offset);
2672
2673 if (distinct > 2)
2674 continue;
2675
2676 distinct = 1;
2677 for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2678 {
2679 if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2680 {
2681 pre = group->vuses[j];
2682 distinct++;
2683 }
2684
2685 if (distinct > 2)
2686 break;
2687 }
2688 }
2689
2690 return (distinct <= 2);
2691 }
2692
2693 /* For each group of address type uses, this function further groups
2694 these uses according to the maximum offset supported by target's
2695 [base + offset] addressing mode. */
2696
2697 static void
split_address_groups(struct ivopts_data * data)2698 split_address_groups (struct ivopts_data *data)
2699 {
2700 unsigned int i, j;
2701 /* Always split group. */
2702 bool split_p = split_small_address_groups_p (data);
2703
2704 for (i = 0; i < data->vgroups.length (); i++)
2705 {
2706 struct iv_group *new_group = NULL;
2707 struct iv_group *group = data->vgroups[i];
2708 struct iv_use *use = group->vuses[0];
2709
2710 use->id = 0;
2711 use->group_id = group->id;
2712 if (group->vuses.length () == 1)
2713 continue;
2714
2715 gcc_assert (address_p (use->type));
2716
2717 for (j = 1; j < group->vuses.length ();)
2718 {
2719 struct iv_use *next = group->vuses[j];
2720 poly_int64 offset = next->addr_offset - use->addr_offset;
2721
2722 /* Split group if aksed to, or the offset against the first
2723 use can't fit in offset part of addressing mode. IV uses
2724 having the same offset are still kept in one group. */
2725 if (maybe_ne (offset, 0)
2726 && (split_p || !addr_offset_valid_p (use, offset)))
2727 {
2728 if (!new_group)
2729 new_group = record_group (data, group->type);
2730 group->vuses.ordered_remove (j);
2731 new_group->vuses.safe_push (next);
2732 continue;
2733 }
2734
2735 next->id = j;
2736 next->group_id = group->id;
2737 j++;
2738 }
2739 }
2740 }
2741
2742 /* Finds uses of the induction variables that are interesting. */
2743
2744 static void
find_interesting_uses(struct ivopts_data * data)2745 find_interesting_uses (struct ivopts_data *data)
2746 {
2747 basic_block bb;
2748 gimple_stmt_iterator bsi;
2749 basic_block *body = get_loop_body (data->current_loop);
2750 unsigned i;
2751 edge e;
2752
2753 for (i = 0; i < data->current_loop->num_nodes; i++)
2754 {
2755 edge_iterator ei;
2756 bb = body[i];
2757
2758 FOR_EACH_EDGE (e, ei, bb->succs)
2759 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2760 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2761 find_interesting_uses_outside (data, e);
2762
2763 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2764 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2765 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2766 if (!is_gimple_debug (gsi_stmt (bsi)))
2767 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2768 }
2769 free (body);
2770
2771 split_address_groups (data);
2772
2773 if (dump_file && (dump_flags & TDF_DETAILS))
2774 {
2775 fprintf (dump_file, "\n<IV Groups>:\n");
2776 dump_groups (dump_file, data);
2777 fprintf (dump_file, "\n");
2778 }
2779 }
2780
2781 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2782 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2783 we are at the top-level of the processed address. */
2784
2785 static tree
strip_offset_1(tree expr,bool inside_addr,bool top_compref,poly_int64 * offset)2786 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2787 poly_int64 *offset)
2788 {
2789 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2790 enum tree_code code;
2791 tree type, orig_type = TREE_TYPE (expr);
2792 poly_int64 off0, off1;
2793 HOST_WIDE_INT st;
2794 tree orig_expr = expr;
2795
2796 STRIP_NOPS (expr);
2797
2798 type = TREE_TYPE (expr);
2799 code = TREE_CODE (expr);
2800 *offset = 0;
2801
2802 switch (code)
2803 {
2804 case POINTER_PLUS_EXPR:
2805 case PLUS_EXPR:
2806 case MINUS_EXPR:
2807 op0 = TREE_OPERAND (expr, 0);
2808 op1 = TREE_OPERAND (expr, 1);
2809
2810 op0 = strip_offset_1 (op0, false, false, &off0);
2811 op1 = strip_offset_1 (op1, false, false, &off1);
2812
2813 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2814 if (op0 == TREE_OPERAND (expr, 0)
2815 && op1 == TREE_OPERAND (expr, 1))
2816 return orig_expr;
2817
2818 if (integer_zerop (op1))
2819 expr = op0;
2820 else if (integer_zerop (op0))
2821 {
2822 if (code == MINUS_EXPR)
2823 expr = fold_build1 (NEGATE_EXPR, type, op1);
2824 else
2825 expr = op1;
2826 }
2827 else
2828 expr = fold_build2 (code, type, op0, op1);
2829
2830 return fold_convert (orig_type, expr);
2831
2832 case MULT_EXPR:
2833 op1 = TREE_OPERAND (expr, 1);
2834 if (!cst_and_fits_in_hwi (op1))
2835 return orig_expr;
2836
2837 op0 = TREE_OPERAND (expr, 0);
2838 op0 = strip_offset_1 (op0, false, false, &off0);
2839 if (op0 == TREE_OPERAND (expr, 0))
2840 return orig_expr;
2841
2842 *offset = off0 * int_cst_value (op1);
2843 if (integer_zerop (op0))
2844 expr = op0;
2845 else
2846 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2847
2848 return fold_convert (orig_type, expr);
2849
2850 case ARRAY_REF:
2851 case ARRAY_RANGE_REF:
2852 if (!inside_addr)
2853 return orig_expr;
2854
2855 step = array_ref_element_size (expr);
2856 if (!cst_and_fits_in_hwi (step))
2857 break;
2858
2859 st = int_cst_value (step);
2860 op1 = TREE_OPERAND (expr, 1);
2861 op1 = strip_offset_1 (op1, false, false, &off1);
2862 *offset = off1 * st;
2863
2864 if (top_compref
2865 && integer_zerop (op1))
2866 {
2867 /* Strip the component reference completely. */
2868 op0 = TREE_OPERAND (expr, 0);
2869 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2870 *offset += off0;
2871 return op0;
2872 }
2873 break;
2874
2875 case COMPONENT_REF:
2876 {
2877 tree field;
2878
2879 if (!inside_addr)
2880 return orig_expr;
2881
2882 tmp = component_ref_field_offset (expr);
2883 field = TREE_OPERAND (expr, 1);
2884 if (top_compref
2885 && cst_and_fits_in_hwi (tmp)
2886 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2887 {
2888 HOST_WIDE_INT boffset, abs_off;
2889
2890 /* Strip the component reference completely. */
2891 op0 = TREE_OPERAND (expr, 0);
2892 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2893 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2894 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2895 if (boffset < 0)
2896 abs_off = -abs_off;
2897
2898 *offset = off0 + int_cst_value (tmp) + abs_off;
2899 return op0;
2900 }
2901 }
2902 break;
2903
2904 case ADDR_EXPR:
2905 op0 = TREE_OPERAND (expr, 0);
2906 op0 = strip_offset_1 (op0, true, true, &off0);
2907 *offset += off0;
2908
2909 if (op0 == TREE_OPERAND (expr, 0))
2910 return orig_expr;
2911
2912 expr = build_fold_addr_expr (op0);
2913 return fold_convert (orig_type, expr);
2914
2915 case MEM_REF:
2916 /* ??? Offset operand? */
2917 inside_addr = false;
2918 break;
2919
2920 default:
2921 if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2922 return build_int_cst (orig_type, 0);
2923 return orig_expr;
2924 }
2925
2926 /* Default handling of expressions for that we want to recurse into
2927 the first operand. */
2928 op0 = TREE_OPERAND (expr, 0);
2929 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2930 *offset += off0;
2931
2932 if (op0 == TREE_OPERAND (expr, 0)
2933 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2934 return orig_expr;
2935
2936 expr = copy_node (expr);
2937 TREE_OPERAND (expr, 0) = op0;
2938 if (op1)
2939 TREE_OPERAND (expr, 1) = op1;
2940
2941 /* Inside address, we might strip the top level component references,
2942 thus changing type of the expression. Handling of ADDR_EXPR
2943 will fix that. */
2944 expr = fold_convert (orig_type, expr);
2945
2946 return expr;
2947 }
2948
2949 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2950
2951 tree
strip_offset(tree expr,poly_uint64_pod * offset)2952 strip_offset (tree expr, poly_uint64_pod *offset)
2953 {
2954 poly_int64 off;
2955 tree core = strip_offset_1 (expr, false, false, &off);
2956 *offset = off;
2957 return core;
2958 }
2959
2960 /* Returns variant of TYPE that can be used as base for different uses.
2961 We return unsigned type with the same precision, which avoids problems
2962 with overflows. */
2963
2964 static tree
generic_type_for(tree type)2965 generic_type_for (tree type)
2966 {
2967 if (POINTER_TYPE_P (type))
2968 return unsigned_type_for (type);
2969
2970 if (TYPE_UNSIGNED (type))
2971 return type;
2972
2973 return unsigned_type_for (type);
2974 }
2975
2976 /* Private data for walk_tree. */
2977
2978 struct walk_tree_data
2979 {
2980 bitmap *inv_vars;
2981 struct ivopts_data *idata;
2982 };
2983
2984 /* Callback function for walk_tree, it records invariants and symbol
2985 reference in *EXPR_P. DATA is the structure storing result info. */
2986
2987 static tree
find_inv_vars_cb(tree * expr_p,int * ws ATTRIBUTE_UNUSED,void * data)2988 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2989 {
2990 tree op = *expr_p;
2991 struct version_info *info;
2992 struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2993
2994 if (TREE_CODE (op) != SSA_NAME)
2995 return NULL_TREE;
2996
2997 info = name_info (wdata->idata, op);
2998 /* Because we expand simple operations when finding IVs, loop invariant
2999 variable that isn't referred by the original loop could be used now.
3000 Record such invariant variables here. */
3001 if (!info->iv)
3002 {
3003 struct ivopts_data *idata = wdata->idata;
3004 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
3005
3006 if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
3007 {
3008 set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
3009 record_invariant (idata, op, false);
3010 }
3011 }
3012 if (!info->inv_id || info->has_nonlin_use)
3013 return NULL_TREE;
3014
3015 if (!*wdata->inv_vars)
3016 *wdata->inv_vars = BITMAP_ALLOC (NULL);
3017 bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3018
3019 return NULL_TREE;
3020 }
3021
3022 /* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should
3023 store it. */
3024
3025 static inline void
find_inv_vars(struct ivopts_data * data,tree * expr_p,bitmap * inv_vars)3026 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3027 {
3028 struct walk_tree_data wdata;
3029
3030 if (!inv_vars)
3031 return;
3032
3033 wdata.idata = data;
3034 wdata.inv_vars = inv_vars;
3035 walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3036 }
3037
3038 /* Get entry from invariant expr hash table for INV_EXPR. New entry
3039 will be recorded if it doesn't exist yet. Given below two exprs:
3040 inv_expr + cst1, inv_expr + cst2
3041 It's hard to make decision whether constant part should be stripped
3042 or not. We choose to not strip based on below facts:
3043 1) We need to count ADD cost for constant part if it's stripped,
3044 which isn't always trivial where this functions is called.
3045 2) Stripping constant away may be conflict with following loop
3046 invariant hoisting pass.
3047 3) Not stripping constant away results in more invariant exprs,
3048 which usually leads to decision preferring lower reg pressure. */
3049
3050 static iv_inv_expr_ent *
get_loop_invariant_expr(struct ivopts_data * data,tree inv_expr)3051 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3052 {
3053 STRIP_NOPS (inv_expr);
3054
3055 if (poly_int_tree_p (inv_expr)
3056 || TREE_CODE (inv_expr) == SSA_NAME)
3057 return NULL;
3058
3059 /* Don't strip constant part away as we used to. */
3060
3061 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
3062 struct iv_inv_expr_ent ent;
3063 ent.expr = inv_expr;
3064 ent.hash = iterative_hash_expr (inv_expr, 0);
3065 struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3066
3067 if (!*slot)
3068 {
3069 *slot = XNEW (struct iv_inv_expr_ent);
3070 (*slot)->expr = inv_expr;
3071 (*slot)->hash = ent.hash;
3072 (*slot)->id = ++data->max_inv_expr_id;
3073 }
3074
3075 return *slot;
3076 }
3077
3078 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3079 position to POS. If USE is not NULL, the candidate is set as related to
3080 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
3081 replacement of the final value of the iv by a direct computation. */
3082
3083 static struct iv_cand *
3084 add_candidate_1 (struct ivopts_data *data,
3085 tree base, tree step, bool important, enum iv_position pos,
3086 struct iv_use *use, gimple *incremented_at,
3087 struct iv *orig_iv = NULL)
3088 {
3089 unsigned i;
3090 struct iv_cand *cand = NULL;
3091 tree type, orig_type;
3092
3093 gcc_assert (base && step);
3094
3095 /* -fkeep-gc-roots-live means that we have to keep a real pointer
3096 live, but the ivopts code may replace a real pointer with one
3097 pointing before or after the memory block that is then adjusted
3098 into the memory block during the loop. FIXME: It would likely be
3099 better to actually force the pointer live and still use ivopts;
3100 for example, it would be enough to write the pointer into memory
3101 and keep it there until after the loop. */
3102 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3103 return NULL;
3104
3105 /* For non-original variables, make sure their values are computed in a type
3106 that does not invoke undefined behavior on overflows (since in general,
3107 we cannot prove that these induction variables are non-wrapping). */
3108 if (pos != IP_ORIGINAL)
3109 {
3110 orig_type = TREE_TYPE (base);
3111 type = generic_type_for (orig_type);
3112 if (type != orig_type)
3113 {
3114 base = fold_convert (type, base);
3115 step = fold_convert (type, step);
3116 }
3117 }
3118
3119 for (i = 0; i < data->vcands.length (); i++)
3120 {
3121 cand = data->vcands[i];
3122
3123 if (cand->pos != pos)
3124 continue;
3125
3126 if (cand->incremented_at != incremented_at
3127 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3128 && cand->ainc_use != use))
3129 continue;
3130
3131 if (operand_equal_p (base, cand->iv->base, 0)
3132 && operand_equal_p (step, cand->iv->step, 0)
3133 && (TYPE_PRECISION (TREE_TYPE (base))
3134 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3135 break;
3136 }
3137
3138 if (i == data->vcands.length ())
3139 {
3140 cand = XCNEW (struct iv_cand);
3141 cand->id = i;
3142 cand->iv = alloc_iv (data, base, step);
3143 cand->pos = pos;
3144 if (pos != IP_ORIGINAL)
3145 {
3146 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3147 cand->var_after = cand->var_before;
3148 }
3149 cand->important = important;
3150 cand->incremented_at = incremented_at;
3151 data->vcands.safe_push (cand);
3152
3153 if (!poly_int_tree_p (step))
3154 {
3155 find_inv_vars (data, &step, &cand->inv_vars);
3156
3157 iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3158 /* Share bitmap between inv_vars and inv_exprs for cand. */
3159 if (inv_expr != NULL)
3160 {
3161 cand->inv_exprs = cand->inv_vars;
3162 cand->inv_vars = NULL;
3163 if (cand->inv_exprs)
3164 bitmap_clear (cand->inv_exprs);
3165 else
3166 cand->inv_exprs = BITMAP_ALLOC (NULL);
3167
3168 bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3169 }
3170 }
3171
3172 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3173 cand->ainc_use = use;
3174 else
3175 cand->ainc_use = NULL;
3176
3177 cand->orig_iv = orig_iv;
3178 if (dump_file && (dump_flags & TDF_DETAILS))
3179 dump_cand (dump_file, cand);
3180 }
3181
3182 cand->important |= important;
3183
3184 /* Relate candidate to the group for which it is added. */
3185 if (use)
3186 bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3187
3188 return cand;
3189 }
3190
3191 /* Returns true if incrementing the induction variable at the end of the LOOP
3192 is allowed.
3193
3194 The purpose is to avoid splitting latch edge with a biv increment, thus
3195 creating a jump, possibly confusing other optimization passes and leaving
3196 less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
3197 available (so we do not have a better alternative), or if the latch edge
3198 is already nonempty. */
3199
3200 static bool
allow_ip_end_pos_p(struct loop * loop)3201 allow_ip_end_pos_p (struct loop *loop)
3202 {
3203 if (!ip_normal_pos (loop))
3204 return true;
3205
3206 if (!empty_block_p (ip_end_pos (loop)))
3207 return true;
3208
3209 return false;
3210 }
3211
3212 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3213 Important field is set to IMPORTANT. */
3214
3215 static void
add_autoinc_candidates(struct ivopts_data * data,tree base,tree step,bool important,struct iv_use * use)3216 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3217 bool important, struct iv_use *use)
3218 {
3219 basic_block use_bb = gimple_bb (use->stmt);
3220 machine_mode mem_mode;
3221 unsigned HOST_WIDE_INT cstepi;
3222
3223 /* If we insert the increment in any position other than the standard
3224 ones, we must ensure that it is incremented once per iteration.
3225 It must not be in an inner nested loop, or one side of an if
3226 statement. */
3227 if (use_bb->loop_father != data->current_loop
3228 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3229 || stmt_can_throw_internal (cfun, use->stmt)
3230 || !cst_and_fits_in_hwi (step))
3231 return;
3232
3233 cstepi = int_cst_value (step);
3234
3235 mem_mode = TYPE_MODE (use->mem_type);
3236 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3237 || USE_STORE_PRE_INCREMENT (mem_mode))
3238 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3239 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3240 || USE_STORE_PRE_DECREMENT (mem_mode))
3241 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3242 {
3243 enum tree_code code = MINUS_EXPR;
3244 tree new_base;
3245 tree new_step = step;
3246
3247 if (POINTER_TYPE_P (TREE_TYPE (base)))
3248 {
3249 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3250 code = POINTER_PLUS_EXPR;
3251 }
3252 else
3253 new_step = fold_convert (TREE_TYPE (base), new_step);
3254 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3255 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3256 use->stmt);
3257 }
3258 if (((USE_LOAD_POST_INCREMENT (mem_mode)
3259 || USE_STORE_POST_INCREMENT (mem_mode))
3260 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3261 || ((USE_LOAD_POST_DECREMENT (mem_mode)
3262 || USE_STORE_POST_DECREMENT (mem_mode))
3263 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3264 {
3265 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3266 use->stmt);
3267 }
3268 }
3269
3270 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3271 position to POS. If USE is not NULL, the candidate is set as related to
3272 it. The candidate computation is scheduled before exit condition and at
3273 the end of loop. */
3274
3275 static void
3276 add_candidate (struct ivopts_data *data,
3277 tree base, tree step, bool important, struct iv_use *use,
3278 struct iv *orig_iv = NULL)
3279 {
3280 if (ip_normal_pos (data->current_loop))
3281 add_candidate_1 (data, base, step, important,
3282 IP_NORMAL, use, NULL, orig_iv);
3283 if (ip_end_pos (data->current_loop)
3284 && allow_ip_end_pos_p (data->current_loop))
3285 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3286 }
3287
3288 /* Adds standard iv candidates. */
3289
3290 static void
add_standard_iv_candidates(struct ivopts_data * data)3291 add_standard_iv_candidates (struct ivopts_data *data)
3292 {
3293 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3294
3295 /* The same for a double-integer type if it is still fast enough. */
3296 if (TYPE_PRECISION
3297 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3298 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3299 add_candidate (data, build_int_cst (long_integer_type_node, 0),
3300 build_int_cst (long_integer_type_node, 1), true, NULL);
3301
3302 /* The same for a double-integer type if it is still fast enough. */
3303 if (TYPE_PRECISION
3304 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3305 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3306 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3307 build_int_cst (long_long_integer_type_node, 1), true, NULL);
3308 }
3309
3310
3311 /* Adds candidates bases on the old induction variable IV. */
3312
3313 static void
add_iv_candidate_for_biv(struct ivopts_data * data,struct iv * iv)3314 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3315 {
3316 gimple *phi;
3317 tree def;
3318 struct iv_cand *cand;
3319
3320 /* Check if this biv is used in address type use. */
3321 if (iv->no_overflow && iv->have_address_use
3322 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3323 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3324 {
3325 tree base = fold_convert (sizetype, iv->base);
3326 tree step = fold_convert (sizetype, iv->step);
3327
3328 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3329 add_candidate (data, base, step, true, NULL, iv);
3330 /* Add iv cand of the original type only if it has nonlinear use. */
3331 if (iv->nonlin_use)
3332 add_candidate (data, iv->base, iv->step, true, NULL);
3333 }
3334 else
3335 add_candidate (data, iv->base, iv->step, true, NULL);
3336
3337 /* The same, but with initial value zero. */
3338 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3339 add_candidate (data, size_int (0), iv->step, true, NULL);
3340 else
3341 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3342 iv->step, true, NULL);
3343
3344 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3345 if (gimple_code (phi) == GIMPLE_PHI)
3346 {
3347 /* Additionally record the possibility of leaving the original iv
3348 untouched. */
3349 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3350 /* Don't add candidate if it's from another PHI node because
3351 it's an affine iv appearing in the form of PEELED_CHREC. */
3352 phi = SSA_NAME_DEF_STMT (def);
3353 if (gimple_code (phi) != GIMPLE_PHI)
3354 {
3355 cand = add_candidate_1 (data,
3356 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3357 SSA_NAME_DEF_STMT (def));
3358 if (cand)
3359 {
3360 cand->var_before = iv->ssa_name;
3361 cand->var_after = def;
3362 }
3363 }
3364 else
3365 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3366 }
3367 }
3368
3369 /* Adds candidates based on the old induction variables. */
3370
3371 static void
add_iv_candidate_for_bivs(struct ivopts_data * data)3372 add_iv_candidate_for_bivs (struct ivopts_data *data)
3373 {
3374 unsigned i;
3375 struct iv *iv;
3376 bitmap_iterator bi;
3377
3378 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3379 {
3380 iv = ver_info (data, i)->iv;
3381 if (iv && iv->biv_p && !integer_zerop (iv->step))
3382 add_iv_candidate_for_biv (data, iv);
3383 }
3384 }
3385
3386 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3387
3388 static void
record_common_cand(struct ivopts_data * data,tree base,tree step,struct iv_use * use)3389 record_common_cand (struct ivopts_data *data, tree base,
3390 tree step, struct iv_use *use)
3391 {
3392 struct iv_common_cand ent;
3393 struct iv_common_cand **slot;
3394
3395 ent.base = base;
3396 ent.step = step;
3397 ent.hash = iterative_hash_expr (base, 0);
3398 ent.hash = iterative_hash_expr (step, ent.hash);
3399
3400 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3401 if (*slot == NULL)
3402 {
3403 *slot = new iv_common_cand ();
3404 (*slot)->base = base;
3405 (*slot)->step = step;
3406 (*slot)->uses.create (8);
3407 (*slot)->hash = ent.hash;
3408 data->iv_common_cands.safe_push ((*slot));
3409 }
3410
3411 gcc_assert (use != NULL);
3412 (*slot)->uses.safe_push (use);
3413 return;
3414 }
3415
3416 /* Comparison function used to sort common candidates. */
3417
3418 static int
common_cand_cmp(const void * p1,const void * p2)3419 common_cand_cmp (const void *p1, const void *p2)
3420 {
3421 unsigned n1, n2;
3422 const struct iv_common_cand *const *const ccand1
3423 = (const struct iv_common_cand *const *)p1;
3424 const struct iv_common_cand *const *const ccand2
3425 = (const struct iv_common_cand *const *)p2;
3426
3427 n1 = (*ccand1)->uses.length ();
3428 n2 = (*ccand2)->uses.length ();
3429 return n2 - n1;
3430 }
3431
3432 /* Adds IV candidates based on common candidated recorded. */
3433
3434 static void
add_iv_candidate_derived_from_uses(struct ivopts_data * data)3435 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3436 {
3437 unsigned i, j;
3438 struct iv_cand *cand_1, *cand_2;
3439
3440 data->iv_common_cands.qsort (common_cand_cmp);
3441 for (i = 0; i < data->iv_common_cands.length (); i++)
3442 {
3443 struct iv_common_cand *ptr = data->iv_common_cands[i];
3444
3445 /* Only add IV candidate if it's derived from multiple uses. */
3446 if (ptr->uses.length () <= 1)
3447 break;
3448
3449 cand_1 = NULL;
3450 cand_2 = NULL;
3451 if (ip_normal_pos (data->current_loop))
3452 cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3453 false, IP_NORMAL, NULL, NULL);
3454
3455 if (ip_end_pos (data->current_loop)
3456 && allow_ip_end_pos_p (data->current_loop))
3457 cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3458 false, IP_END, NULL, NULL);
3459
3460 /* Bind deriving uses and the new candidates. */
3461 for (j = 0; j < ptr->uses.length (); j++)
3462 {
3463 struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3464 if (cand_1)
3465 bitmap_set_bit (group->related_cands, cand_1->id);
3466 if (cand_2)
3467 bitmap_set_bit (group->related_cands, cand_2->id);
3468 }
3469 }
3470
3471 /* Release data since it is useless from this point. */
3472 data->iv_common_cand_tab->empty ();
3473 data->iv_common_cands.truncate (0);
3474 }
3475
3476 /* Adds candidates based on the value of USE's iv. */
3477
3478 static void
add_iv_candidate_for_use(struct ivopts_data * data,struct iv_use * use)3479 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3480 {
3481 poly_uint64 offset;
3482 tree base;
3483 tree basetype;
3484 struct iv *iv = use->iv;
3485
3486 add_candidate (data, iv->base, iv->step, false, use);
3487
3488 /* Record common candidate for use in case it can be shared by others. */
3489 record_common_cand (data, iv->base, iv->step, use);
3490
3491 /* Record common candidate with initial value zero. */
3492 basetype = TREE_TYPE (iv->base);
3493 if (POINTER_TYPE_P (basetype))
3494 basetype = sizetype;
3495 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3496
3497 /* Record common candidate with constant offset stripped in base.
3498 Like the use itself, we also add candidate directly for it. */
3499 base = strip_offset (iv->base, &offset);
3500 if (maybe_ne (offset, 0U) || base != iv->base)
3501 {
3502 record_common_cand (data, base, iv->step, use);
3503 add_candidate (data, base, iv->step, false, use);
3504 }
3505
3506 /* Record common candidate with base_object removed in base. */
3507 base = iv->base;
3508 STRIP_NOPS (base);
3509 if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3510 {
3511 tree step = iv->step;
3512
3513 STRIP_NOPS (step);
3514 base = TREE_OPERAND (base, 1);
3515 step = fold_convert (sizetype, step);
3516 record_common_cand (data, base, step, use);
3517 /* Also record common candidate with offset stripped. */
3518 base = strip_offset (base, &offset);
3519 if (maybe_ne (offset, 0U))
3520 record_common_cand (data, base, step, use);
3521 }
3522
3523 /* At last, add auto-incremental candidates. Make such variables
3524 important since other iv uses with same base object may be based
3525 on it. */
3526 if (use != NULL && address_p (use->type))
3527 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3528 }
3529
3530 /* Adds candidates based on the uses. */
3531
3532 static void
add_iv_candidate_for_groups(struct ivopts_data * data)3533 add_iv_candidate_for_groups (struct ivopts_data *data)
3534 {
3535 unsigned i;
3536
3537 /* Only add candidate for the first use in group. */
3538 for (i = 0; i < data->vgroups.length (); i++)
3539 {
3540 struct iv_group *group = data->vgroups[i];
3541
3542 gcc_assert (group->vuses[0] != NULL);
3543 add_iv_candidate_for_use (data, group->vuses[0]);
3544 }
3545 add_iv_candidate_derived_from_uses (data);
3546 }
3547
3548 /* Record important candidates and add them to related_cands bitmaps. */
3549
3550 static void
record_important_candidates(struct ivopts_data * data)3551 record_important_candidates (struct ivopts_data *data)
3552 {
3553 unsigned i;
3554 struct iv_group *group;
3555
3556 for (i = 0; i < data->vcands.length (); i++)
3557 {
3558 struct iv_cand *cand = data->vcands[i];
3559
3560 if (cand->important)
3561 bitmap_set_bit (data->important_candidates, i);
3562 }
3563
3564 data->consider_all_candidates = (data->vcands.length ()
3565 <= CONSIDER_ALL_CANDIDATES_BOUND);
3566
3567 /* Add important candidates to groups' related_cands bitmaps. */
3568 for (i = 0; i < data->vgroups.length (); i++)
3569 {
3570 group = data->vgroups[i];
3571 bitmap_ior_into (group->related_cands, data->important_candidates);
3572 }
3573 }
3574
3575 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3576 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3577 we allocate a simple list to every use. */
3578
3579 static void
alloc_use_cost_map(struct ivopts_data * data)3580 alloc_use_cost_map (struct ivopts_data *data)
3581 {
3582 unsigned i, size, s;
3583
3584 for (i = 0; i < data->vgroups.length (); i++)
3585 {
3586 struct iv_group *group = data->vgroups[i];
3587
3588 if (data->consider_all_candidates)
3589 size = data->vcands.length ();
3590 else
3591 {
3592 s = bitmap_count_bits (group->related_cands);
3593
3594 /* Round up to the power of two, so that moduling by it is fast. */
3595 size = s ? (1 << ceil_log2 (s)) : 1;
3596 }
3597
3598 group->n_map_members = size;
3599 group->cost_map = XCNEWVEC (struct cost_pair, size);
3600 }
3601 }
3602
3603 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3604 on invariants INV_VARS and that the value used in expressing it is
3605 VALUE, and in case of iv elimination the comparison operator is COMP. */
3606
3607 static void
set_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand,comp_cost cost,bitmap inv_vars,tree value,enum tree_code comp,bitmap inv_exprs)3608 set_group_iv_cost (struct ivopts_data *data,
3609 struct iv_group *group, struct iv_cand *cand,
3610 comp_cost cost, bitmap inv_vars, tree value,
3611 enum tree_code comp, bitmap inv_exprs)
3612 {
3613 unsigned i, s;
3614
3615 if (cost.infinite_cost_p ())
3616 {
3617 BITMAP_FREE (inv_vars);
3618 BITMAP_FREE (inv_exprs);
3619 return;
3620 }
3621
3622 if (data->consider_all_candidates)
3623 {
3624 group->cost_map[cand->id].cand = cand;
3625 group->cost_map[cand->id].cost = cost;
3626 group->cost_map[cand->id].inv_vars = inv_vars;
3627 group->cost_map[cand->id].inv_exprs = inv_exprs;
3628 group->cost_map[cand->id].value = value;
3629 group->cost_map[cand->id].comp = comp;
3630 return;
3631 }
3632
3633 /* n_map_members is a power of two, so this computes modulo. */
3634 s = cand->id & (group->n_map_members - 1);
3635 for (i = s; i < group->n_map_members; i++)
3636 if (!group->cost_map[i].cand)
3637 goto found;
3638 for (i = 0; i < s; i++)
3639 if (!group->cost_map[i].cand)
3640 goto found;
3641
3642 gcc_unreachable ();
3643
3644 found:
3645 group->cost_map[i].cand = cand;
3646 group->cost_map[i].cost = cost;
3647 group->cost_map[i].inv_vars = inv_vars;
3648 group->cost_map[i].inv_exprs = inv_exprs;
3649 group->cost_map[i].value = value;
3650 group->cost_map[i].comp = comp;
3651 }
3652
3653 /* Gets cost of (GROUP, CAND) pair. */
3654
3655 static struct cost_pair *
get_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)3656 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3657 struct iv_cand *cand)
3658 {
3659 unsigned i, s;
3660 struct cost_pair *ret;
3661
3662 if (!cand)
3663 return NULL;
3664
3665 if (data->consider_all_candidates)
3666 {
3667 ret = group->cost_map + cand->id;
3668 if (!ret->cand)
3669 return NULL;
3670
3671 return ret;
3672 }
3673
3674 /* n_map_members is a power of two, so this computes modulo. */
3675 s = cand->id & (group->n_map_members - 1);
3676 for (i = s; i < group->n_map_members; i++)
3677 if (group->cost_map[i].cand == cand)
3678 return group->cost_map + i;
3679 else if (group->cost_map[i].cand == NULL)
3680 return NULL;
3681 for (i = 0; i < s; i++)
3682 if (group->cost_map[i].cand == cand)
3683 return group->cost_map + i;
3684 else if (group->cost_map[i].cand == NULL)
3685 return NULL;
3686
3687 return NULL;
3688 }
3689
3690 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3691 static rtx
produce_memory_decl_rtl(tree obj,int * regno)3692 produce_memory_decl_rtl (tree obj, int *regno)
3693 {
3694 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3695 machine_mode address_mode = targetm.addr_space.address_mode (as);
3696 rtx x;
3697
3698 gcc_assert (obj);
3699 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3700 {
3701 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3702 x = gen_rtx_SYMBOL_REF (address_mode, name);
3703 SET_SYMBOL_REF_DECL (x, obj);
3704 x = gen_rtx_MEM (DECL_MODE (obj), x);
3705 set_mem_addr_space (x, as);
3706 targetm.encode_section_info (obj, x, true);
3707 }
3708 else
3709 {
3710 x = gen_raw_REG (address_mode, (*regno)++);
3711 x = gen_rtx_MEM (DECL_MODE (obj), x);
3712 set_mem_addr_space (x, as);
3713 }
3714
3715 return x;
3716 }
3717
3718 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3719 walk_tree. DATA contains the actual fake register number. */
3720
3721 static tree
prepare_decl_rtl(tree * expr_p,int * ws,void * data)3722 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3723 {
3724 tree obj = NULL_TREE;
3725 rtx x = NULL_RTX;
3726 int *regno = (int *) data;
3727
3728 switch (TREE_CODE (*expr_p))
3729 {
3730 case ADDR_EXPR:
3731 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3732 handled_component_p (*expr_p);
3733 expr_p = &TREE_OPERAND (*expr_p, 0))
3734 continue;
3735 obj = *expr_p;
3736 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3737 x = produce_memory_decl_rtl (obj, regno);
3738 break;
3739
3740 case SSA_NAME:
3741 *ws = 0;
3742 obj = SSA_NAME_VAR (*expr_p);
3743 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3744 if (!obj)
3745 return NULL_TREE;
3746 if (!DECL_RTL_SET_P (obj))
3747 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3748 break;
3749
3750 case VAR_DECL:
3751 case PARM_DECL:
3752 case RESULT_DECL:
3753 *ws = 0;
3754 obj = *expr_p;
3755
3756 if (DECL_RTL_SET_P (obj))
3757 break;
3758
3759 if (DECL_MODE (obj) == BLKmode)
3760 x = produce_memory_decl_rtl (obj, regno);
3761 else
3762 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3763
3764 break;
3765
3766 default:
3767 break;
3768 }
3769
3770 if (x)
3771 {
3772 decl_rtl_to_reset.safe_push (obj);
3773 SET_DECL_RTL (obj, x);
3774 }
3775
3776 return NULL_TREE;
3777 }
3778
3779 /* Determines cost of the computation of EXPR. */
3780
3781 static unsigned
computation_cost(tree expr,bool speed)3782 computation_cost (tree expr, bool speed)
3783 {
3784 rtx_insn *seq;
3785 rtx rslt;
3786 tree type = TREE_TYPE (expr);
3787 unsigned cost;
3788 /* Avoid using hard regs in ways which may be unsupported. */
3789 int regno = LAST_VIRTUAL_REGISTER + 1;
3790 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3791 enum node_frequency real_frequency = node->frequency;
3792
3793 node->frequency = NODE_FREQUENCY_NORMAL;
3794 crtl->maybe_hot_insn_p = speed;
3795 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
3796 start_sequence ();
3797 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3798 seq = get_insns ();
3799 end_sequence ();
3800 default_rtl_profile ();
3801 node->frequency = real_frequency;
3802
3803 cost = seq_cost (seq, speed);
3804 if (MEM_P (rslt))
3805 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3806 TYPE_ADDR_SPACE (type), speed);
3807 else if (!REG_P (rslt))
3808 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3809
3810 return cost;
3811 }
3812
3813 /* Returns variable containing the value of candidate CAND at statement AT. */
3814
3815 static tree
var_at_stmt(struct loop * loop,struct iv_cand * cand,gimple * stmt)3816 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3817 {
3818 if (stmt_after_increment (loop, cand, stmt))
3819 return cand->var_after;
3820 else
3821 return cand->var_before;
3822 }
3823
3824 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3825 same precision that is at least as wide as the precision of TYPE, stores
3826 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3827 type of A and B. */
3828
3829 static tree
determine_common_wider_type(tree * a,tree * b)3830 determine_common_wider_type (tree *a, tree *b)
3831 {
3832 tree wider_type = NULL;
3833 tree suba, subb;
3834 tree atype = TREE_TYPE (*a);
3835
3836 if (CONVERT_EXPR_P (*a))
3837 {
3838 suba = TREE_OPERAND (*a, 0);
3839 wider_type = TREE_TYPE (suba);
3840 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3841 return atype;
3842 }
3843 else
3844 return atype;
3845
3846 if (CONVERT_EXPR_P (*b))
3847 {
3848 subb = TREE_OPERAND (*b, 0);
3849 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3850 return atype;
3851 }
3852 else
3853 return atype;
3854
3855 *a = suba;
3856 *b = subb;
3857 return wider_type;
3858 }
3859
3860 /* Determines the expression by that USE is expressed from induction variable
3861 CAND at statement AT in LOOP. The expression is stored in two parts in a
3862 decomposed form. The invariant part is stored in AFF_INV; while variant
3863 part in AFF_VAR. Store ratio of CAND.step over USE.step in PRAT if it's
3864 non-null. Returns false if USE cannot be expressed using CAND. */
3865
3866 static bool
3867 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3868 struct iv_cand *cand, struct aff_tree *aff_inv,
3869 struct aff_tree *aff_var, widest_int *prat = NULL)
3870 {
3871 tree ubase = use->iv->base, ustep = use->iv->step;
3872 tree cbase = cand->iv->base, cstep = cand->iv->step;
3873 tree common_type, uutype, var, cstep_common;
3874 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3875 aff_tree aff_cbase;
3876 widest_int rat;
3877
3878 /* We must have a precision to express the values of use. */
3879 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3880 return false;
3881
3882 var = var_at_stmt (loop, cand, at);
3883 uutype = unsigned_type_for (utype);
3884
3885 /* If the conversion is not noop, perform it. */
3886 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3887 {
3888 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3889 && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3890 {
3891 tree inner_base, inner_step, inner_type;
3892 inner_base = TREE_OPERAND (cbase, 0);
3893 if (CONVERT_EXPR_P (cstep))
3894 inner_step = TREE_OPERAND (cstep, 0);
3895 else
3896 inner_step = cstep;
3897
3898 inner_type = TREE_TYPE (inner_base);
3899 /* If candidate is added from a biv whose type is smaller than
3900 ctype, we know both candidate and the biv won't overflow.
3901 In this case, it's safe to skip the convertion in candidate.
3902 As an example, (unsigned short)((unsigned long)A) equals to
3903 (unsigned short)A, if A has a type no larger than short. */
3904 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3905 {
3906 cbase = inner_base;
3907 cstep = inner_step;
3908 }
3909 }
3910 cbase = fold_convert (uutype, cbase);
3911 cstep = fold_convert (uutype, cstep);
3912 var = fold_convert (uutype, var);
3913 }
3914
3915 /* Ratio is 1 when computing the value of biv cand by itself.
3916 We can't rely on constant_multiple_of in this case because the
3917 use is created after the original biv is selected. The call
3918 could fail because of inconsistent fold behavior. See PR68021
3919 for more information. */
3920 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3921 {
3922 gcc_assert (is_gimple_assign (use->stmt));
3923 gcc_assert (use->iv->ssa_name == cand->var_after);
3924 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3925 rat = 1;
3926 }
3927 else if (!constant_multiple_of (ustep, cstep, &rat))
3928 return false;
3929
3930 if (prat)
3931 *prat = rat;
3932
3933 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3934 type, we achieve better folding by computing their difference in this
3935 wider type, and cast the result to UUTYPE. We do not need to worry about
3936 overflows, as all the arithmetics will in the end be performed in UUTYPE
3937 anyway. */
3938 common_type = determine_common_wider_type (&ubase, &cbase);
3939
3940 /* use = ubase - ratio * cbase + ratio * var. */
3941 tree_to_aff_combination (ubase, common_type, aff_inv);
3942 tree_to_aff_combination (cbase, common_type, &aff_cbase);
3943 tree_to_aff_combination (var, uutype, aff_var);
3944
3945 /* We need to shift the value if we are after the increment. */
3946 if (stmt_after_increment (loop, cand, at))
3947 {
3948 aff_tree cstep_aff;
3949
3950 if (common_type != uutype)
3951 cstep_common = fold_convert (common_type, cstep);
3952 else
3953 cstep_common = cstep;
3954
3955 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3956 aff_combination_add (&aff_cbase, &cstep_aff);
3957 }
3958
3959 aff_combination_scale (&aff_cbase, -rat);
3960 aff_combination_add (aff_inv, &aff_cbase);
3961 if (common_type != uutype)
3962 aff_combination_convert (aff_inv, uutype);
3963
3964 aff_combination_scale (aff_var, rat);
3965 return true;
3966 }
3967
3968 /* Determines the expression by that USE is expressed from induction variable
3969 CAND at statement AT in LOOP. The expression is stored in a decomposed
3970 form into AFF. Returns false if USE cannot be expressed using CAND. */
3971
3972 static bool
get_computation_aff(struct loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand,struct aff_tree * aff)3973 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3974 struct iv_cand *cand, struct aff_tree *aff)
3975 {
3976 aff_tree aff_var;
3977
3978 if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3979 return false;
3980
3981 aff_combination_add (aff, &aff_var);
3982 return true;
3983 }
3984
3985 /* Return the type of USE. */
3986
3987 static tree
get_use_type(struct iv_use * use)3988 get_use_type (struct iv_use *use)
3989 {
3990 tree base_type = TREE_TYPE (use->iv->base);
3991 tree type;
3992
3993 if (use->type == USE_REF_ADDRESS)
3994 {
3995 /* The base_type may be a void pointer. Create a pointer type based on
3996 the mem_ref instead. */
3997 type = build_pointer_type (TREE_TYPE (*use->op_p));
3998 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3999 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4000 }
4001 else
4002 type = base_type;
4003
4004 return type;
4005 }
4006
4007 /* Determines the expression by that USE is expressed from induction variable
4008 CAND at statement AT in LOOP. The computation is unshared. */
4009
4010 static tree
get_computation_at(struct loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand)4011 get_computation_at (struct loop *loop, gimple *at,
4012 struct iv_use *use, struct iv_cand *cand)
4013 {
4014 aff_tree aff;
4015 tree type = get_use_type (use);
4016
4017 if (!get_computation_aff (loop, at, use, cand, &aff))
4018 return NULL_TREE;
4019 unshare_aff_combination (&aff);
4020 return fold_convert (type, aff_combination_to_tree (&aff));
4021 }
4022
4023 /* Adjust the cost COST for being in loop setup rather than loop body.
4024 If we're optimizing for space, the loop setup overhead is constant;
4025 if we're optimizing for speed, amortize it over the per-iteration cost.
4026 If ROUND_UP_P is true, the result is round up rather than to zero when
4027 optimizing for speed. */
4028 static unsigned
4029 adjust_setup_cost (struct ivopts_data *data, unsigned cost,
4030 bool round_up_p = false)
4031 {
4032 if (cost == INFTY)
4033 return cost;
4034 else if (optimize_loop_for_speed_p (data->current_loop))
4035 {
4036 HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
4037 return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
4038 }
4039 else
4040 return cost;
4041 }
4042
4043 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4044 EXPR operand holding the shift. COST0 and COST1 are the costs for
4045 calculating the operands of EXPR. Returns true if successful, and returns
4046 the cost in COST. */
4047
4048 static bool
get_shiftadd_cost(tree expr,scalar_int_mode mode,comp_cost cost0,comp_cost cost1,tree mult,bool speed,comp_cost * cost)4049 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4050 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4051 {
4052 comp_cost res;
4053 tree op1 = TREE_OPERAND (expr, 1);
4054 tree cst = TREE_OPERAND (mult, 1);
4055 tree multop = TREE_OPERAND (mult, 0);
4056 int m = exact_log2 (int_cst_value (cst));
4057 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4058 int as_cost, sa_cost;
4059 bool mult_in_op1;
4060
4061 if (!(m >= 0 && m < maxm))
4062 return false;
4063
4064 STRIP_NOPS (op1);
4065 mult_in_op1 = operand_equal_p (op1, mult, 0);
4066
4067 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4068
4069 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4070 use that in preference to a shift insn followed by an add insn. */
4071 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4072 ? shiftadd_cost (speed, mode, m)
4073 : (mult_in_op1
4074 ? shiftsub1_cost (speed, mode, m)
4075 : shiftsub0_cost (speed, mode, m)));
4076
4077 res = comp_cost (MIN (as_cost, sa_cost), 0);
4078 res += (mult_in_op1 ? cost0 : cost1);
4079
4080 STRIP_NOPS (multop);
4081 if (!is_gimple_val (multop))
4082 res += force_expr_to_var_cost (multop, speed);
4083
4084 *cost = res;
4085 return true;
4086 }
4087
4088 /* Estimates cost of forcing expression EXPR into a variable. */
4089
4090 static comp_cost
force_expr_to_var_cost(tree expr,bool speed)4091 force_expr_to_var_cost (tree expr, bool speed)
4092 {
4093 static bool costs_initialized = false;
4094 static unsigned integer_cost [2];
4095 static unsigned symbol_cost [2];
4096 static unsigned address_cost [2];
4097 tree op0, op1;
4098 comp_cost cost0, cost1, cost;
4099 machine_mode mode;
4100 scalar_int_mode int_mode;
4101
4102 if (!costs_initialized)
4103 {
4104 tree type = build_pointer_type (integer_type_node);
4105 tree var, addr;
4106 rtx x;
4107 int i;
4108
4109 var = create_tmp_var_raw (integer_type_node, "test_var");
4110 TREE_STATIC (var) = 1;
4111 x = produce_memory_decl_rtl (var, NULL);
4112 SET_DECL_RTL (var, x);
4113
4114 addr = build1 (ADDR_EXPR, type, var);
4115
4116
4117 for (i = 0; i < 2; i++)
4118 {
4119 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4120 2000), i);
4121
4122 symbol_cost[i] = computation_cost (addr, i) + 1;
4123
4124 address_cost[i]
4125 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4126 if (dump_file && (dump_flags & TDF_DETAILS))
4127 {
4128 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4129 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4130 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4131 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4132 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4133 fprintf (dump_file, "\n");
4134 }
4135 }
4136
4137 costs_initialized = true;
4138 }
4139
4140 STRIP_NOPS (expr);
4141
4142 if (SSA_VAR_P (expr))
4143 return no_cost;
4144
4145 if (is_gimple_min_invariant (expr))
4146 {
4147 if (poly_int_tree_p (expr))
4148 return comp_cost (integer_cost [speed], 0);
4149
4150 if (TREE_CODE (expr) == ADDR_EXPR)
4151 {
4152 tree obj = TREE_OPERAND (expr, 0);
4153
4154 if (VAR_P (obj)
4155 || TREE_CODE (obj) == PARM_DECL
4156 || TREE_CODE (obj) == RESULT_DECL)
4157 return comp_cost (symbol_cost [speed], 0);
4158 }
4159
4160 return comp_cost (address_cost [speed], 0);
4161 }
4162
4163 switch (TREE_CODE (expr))
4164 {
4165 case POINTER_PLUS_EXPR:
4166 case PLUS_EXPR:
4167 case MINUS_EXPR:
4168 case MULT_EXPR:
4169 case TRUNC_DIV_EXPR:
4170 case BIT_AND_EXPR:
4171 case BIT_IOR_EXPR:
4172 case LSHIFT_EXPR:
4173 case RSHIFT_EXPR:
4174 op0 = TREE_OPERAND (expr, 0);
4175 op1 = TREE_OPERAND (expr, 1);
4176 STRIP_NOPS (op0);
4177 STRIP_NOPS (op1);
4178 break;
4179
4180 CASE_CONVERT:
4181 case NEGATE_EXPR:
4182 case BIT_NOT_EXPR:
4183 op0 = TREE_OPERAND (expr, 0);
4184 STRIP_NOPS (op0);
4185 op1 = NULL_TREE;
4186 break;
4187
4188 default:
4189 /* Just an arbitrary value, FIXME. */
4190 return comp_cost (target_spill_cost[speed], 0);
4191 }
4192
4193 if (op0 == NULL_TREE
4194 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4195 cost0 = no_cost;
4196 else
4197 cost0 = force_expr_to_var_cost (op0, speed);
4198
4199 if (op1 == NULL_TREE
4200 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4201 cost1 = no_cost;
4202 else
4203 cost1 = force_expr_to_var_cost (op1, speed);
4204
4205 mode = TYPE_MODE (TREE_TYPE (expr));
4206 switch (TREE_CODE (expr))
4207 {
4208 case POINTER_PLUS_EXPR:
4209 case PLUS_EXPR:
4210 case MINUS_EXPR:
4211 case NEGATE_EXPR:
4212 cost = comp_cost (add_cost (speed, mode), 0);
4213 if (TREE_CODE (expr) != NEGATE_EXPR)
4214 {
4215 tree mult = NULL_TREE;
4216 comp_cost sa_cost;
4217 if (TREE_CODE (op1) == MULT_EXPR)
4218 mult = op1;
4219 else if (TREE_CODE (op0) == MULT_EXPR)
4220 mult = op0;
4221
4222 if (mult != NULL_TREE
4223 && is_a <scalar_int_mode> (mode, &int_mode)
4224 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4225 && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4226 speed, &sa_cost))
4227 return sa_cost;
4228 }
4229 break;
4230
4231 CASE_CONVERT:
4232 {
4233 tree inner_mode, outer_mode;
4234 outer_mode = TREE_TYPE (expr);
4235 inner_mode = TREE_TYPE (op0);
4236 cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4237 TYPE_MODE (inner_mode), speed), 0);
4238 }
4239 break;
4240
4241 case MULT_EXPR:
4242 if (cst_and_fits_in_hwi (op0))
4243 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4244 mode, speed), 0);
4245 else if (cst_and_fits_in_hwi (op1))
4246 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4247 mode, speed), 0);
4248 else
4249 return comp_cost (target_spill_cost [speed], 0);
4250 break;
4251
4252 case TRUNC_DIV_EXPR:
4253 /* Division by power of two is usually cheap, so we allow it. Forbid
4254 anything else. */
4255 if (integer_pow2p (TREE_OPERAND (expr, 1)))
4256 cost = comp_cost (add_cost (speed, mode), 0);
4257 else
4258 cost = comp_cost (target_spill_cost[speed], 0);
4259 break;
4260
4261 case BIT_AND_EXPR:
4262 case BIT_IOR_EXPR:
4263 case BIT_NOT_EXPR:
4264 case LSHIFT_EXPR:
4265 case RSHIFT_EXPR:
4266 cost = comp_cost (add_cost (speed, mode), 0);
4267 break;
4268
4269 default:
4270 gcc_unreachable ();
4271 }
4272
4273 cost += cost0;
4274 cost += cost1;
4275 return cost;
4276 }
4277
4278 /* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the
4279 invariants the computation depends on. */
4280
4281 static comp_cost
force_var_cost(struct ivopts_data * data,tree expr,bitmap * inv_vars)4282 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4283 {
4284 if (!expr)
4285 return no_cost;
4286
4287 find_inv_vars (data, &expr, inv_vars);
4288 return force_expr_to_var_cost (expr, data->speed);
4289 }
4290
4291 /* Returns cost of auto-modifying address expression in shape base + offset.
4292 AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4293 address expression. The address expression has ADDR_MODE in addr space
4294 AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4295 speed or size. */
4296
4297 enum ainc_type
4298 {
4299 AINC_PRE_INC, /* Pre increment. */
4300 AINC_PRE_DEC, /* Pre decrement. */
4301 AINC_POST_INC, /* Post increment. */
4302 AINC_POST_DEC, /* Post decrement. */
4303 AINC_NONE /* Also the number of auto increment types. */
4304 };
4305
4306 struct ainc_cost_data
4307 {
4308 unsigned costs[AINC_NONE];
4309 };
4310
4311 static comp_cost
get_address_cost_ainc(poly_int64 ainc_step,poly_int64 ainc_offset,machine_mode addr_mode,machine_mode mem_mode,addr_space_t as,bool speed)4312 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4313 machine_mode addr_mode, machine_mode mem_mode,
4314 addr_space_t as, bool speed)
4315 {
4316 if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4317 && !USE_STORE_PRE_DECREMENT (mem_mode)
4318 && !USE_LOAD_POST_DECREMENT (mem_mode)
4319 && !USE_STORE_POST_DECREMENT (mem_mode)
4320 && !USE_LOAD_PRE_INCREMENT (mem_mode)
4321 && !USE_STORE_PRE_INCREMENT (mem_mode)
4322 && !USE_LOAD_POST_INCREMENT (mem_mode)
4323 && !USE_STORE_POST_INCREMENT (mem_mode))
4324 return infinite_cost;
4325
4326 static vec<ainc_cost_data *> ainc_cost_data_list;
4327 unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4328 if (idx >= ainc_cost_data_list.length ())
4329 {
4330 unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4331
4332 gcc_assert (nsize > idx);
4333 ainc_cost_data_list.safe_grow_cleared (nsize);
4334 }
4335
4336 ainc_cost_data *data = ainc_cost_data_list[idx];
4337 if (data == NULL)
4338 {
4339 rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4340
4341 data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4342 data->costs[AINC_PRE_DEC] = INFTY;
4343 data->costs[AINC_POST_DEC] = INFTY;
4344 data->costs[AINC_PRE_INC] = INFTY;
4345 data->costs[AINC_POST_INC] = INFTY;
4346 if (USE_LOAD_PRE_DECREMENT (mem_mode)
4347 || USE_STORE_PRE_DECREMENT (mem_mode))
4348 {
4349 rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4350
4351 if (memory_address_addr_space_p (mem_mode, addr, as))
4352 data->costs[AINC_PRE_DEC]
4353 = address_cost (addr, mem_mode, as, speed);
4354 }
4355 if (USE_LOAD_POST_DECREMENT (mem_mode)
4356 || USE_STORE_POST_DECREMENT (mem_mode))
4357 {
4358 rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4359
4360 if (memory_address_addr_space_p (mem_mode, addr, as))
4361 data->costs[AINC_POST_DEC]
4362 = address_cost (addr, mem_mode, as, speed);
4363 }
4364 if (USE_LOAD_PRE_INCREMENT (mem_mode)
4365 || USE_STORE_PRE_INCREMENT (mem_mode))
4366 {
4367 rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4368
4369 if (memory_address_addr_space_p (mem_mode, addr, as))
4370 data->costs[AINC_PRE_INC]
4371 = address_cost (addr, mem_mode, as, speed);
4372 }
4373 if (USE_LOAD_POST_INCREMENT (mem_mode)
4374 || USE_STORE_POST_INCREMENT (mem_mode))
4375 {
4376 rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4377
4378 if (memory_address_addr_space_p (mem_mode, addr, as))
4379 data->costs[AINC_POST_INC]
4380 = address_cost (addr, mem_mode, as, speed);
4381 }
4382 ainc_cost_data_list[idx] = data;
4383 }
4384
4385 poly_int64 msize = GET_MODE_SIZE (mem_mode);
4386 if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4387 return comp_cost (data->costs[AINC_POST_INC], 0);
4388 if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4389 return comp_cost (data->costs[AINC_POST_DEC], 0);
4390 if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4391 return comp_cost (data->costs[AINC_PRE_INC], 0);
4392 if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4393 return comp_cost (data->costs[AINC_PRE_DEC], 0);
4394
4395 return infinite_cost;
4396 }
4397
4398 /* Return cost of computing USE's address expression by using CAND.
4399 AFF_INV and AFF_VAR represent invariant and variant parts of the
4400 address expression, respectively. If AFF_INV is simple, store
4401 the loop invariant variables which are depended by it in INV_VARS;
4402 if AFF_INV is complicated, handle it as a new invariant expression
4403 and record it in INV_EXPR. RATIO indicates multiple times between
4404 steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4405 value to it indicating if this is an auto-increment address. */
4406
4407 static comp_cost
get_address_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,aff_tree * aff_inv,aff_tree * aff_var,HOST_WIDE_INT ratio,bitmap * inv_vars,iv_inv_expr_ent ** inv_expr,bool * can_autoinc,bool speed)4408 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4409 struct iv_cand *cand, aff_tree *aff_inv,
4410 aff_tree *aff_var, HOST_WIDE_INT ratio,
4411 bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4412 bool *can_autoinc, bool speed)
4413 {
4414 rtx addr;
4415 bool simple_inv = true;
4416 tree comp_inv = NULL_TREE, type = aff_var->type;
4417 comp_cost var_cost = no_cost, cost = no_cost;
4418 struct mem_address parts = {NULL_TREE, integer_one_node,
4419 NULL_TREE, NULL_TREE, NULL_TREE};
4420 machine_mode addr_mode = TYPE_MODE (type);
4421 machine_mode mem_mode = TYPE_MODE (use->mem_type);
4422 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4423 /* Only true if ratio != 1. */
4424 bool ok_with_ratio_p = false;
4425 bool ok_without_ratio_p = false;
4426
4427 if (!aff_combination_const_p (aff_inv))
4428 {
4429 parts.index = integer_one_node;
4430 /* Addressing mode "base + index". */
4431 ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4432 if (ratio != 1)
4433 {
4434 parts.step = wide_int_to_tree (type, ratio);
4435 /* Addressing mode "base + index << scale". */
4436 ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4437 if (!ok_with_ratio_p)
4438 parts.step = NULL_TREE;
4439 }
4440 if (ok_with_ratio_p || ok_without_ratio_p)
4441 {
4442 if (maybe_ne (aff_inv->offset, 0))
4443 {
4444 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4445 /* Addressing mode "base + index [<< scale] + offset". */
4446 if (!valid_mem_ref_p (mem_mode, as, &parts))
4447 parts.offset = NULL_TREE;
4448 else
4449 aff_inv->offset = 0;
4450 }
4451
4452 move_fixed_address_to_symbol (&parts, aff_inv);
4453 /* Base is fixed address and is moved to symbol part. */
4454 if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4455 parts.base = NULL_TREE;
4456
4457 /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
4458 if (parts.symbol != NULL_TREE
4459 && !valid_mem_ref_p (mem_mode, as, &parts))
4460 {
4461 aff_combination_add_elt (aff_inv, parts.symbol, 1);
4462 parts.symbol = NULL_TREE;
4463 /* Reset SIMPLE_INV since symbol address needs to be computed
4464 outside of address expression in this case. */
4465 simple_inv = false;
4466 /* Symbol part is moved back to base part, it can't be NULL. */
4467 parts.base = integer_one_node;
4468 }
4469 }
4470 else
4471 parts.index = NULL_TREE;
4472 }
4473 else
4474 {
4475 poly_int64 ainc_step;
4476 if (can_autoinc
4477 && ratio == 1
4478 && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4479 {
4480 poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4481
4482 if (stmt_after_increment (data->current_loop, cand, use->stmt))
4483 ainc_offset += ainc_step;
4484 cost = get_address_cost_ainc (ainc_step, ainc_offset,
4485 addr_mode, mem_mode, as, speed);
4486 if (!cost.infinite_cost_p ())
4487 {
4488 *can_autoinc = true;
4489 return cost;
4490 }
4491 cost = no_cost;
4492 }
4493 if (!aff_combination_zero_p (aff_inv))
4494 {
4495 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4496 /* Addressing mode "base + offset". */
4497 if (!valid_mem_ref_p (mem_mode, as, &parts))
4498 parts.offset = NULL_TREE;
4499 else
4500 aff_inv->offset = 0;
4501 }
4502 }
4503
4504 if (simple_inv)
4505 simple_inv = (aff_inv == NULL
4506 || aff_combination_const_p (aff_inv)
4507 || aff_combination_singleton_var_p (aff_inv));
4508 if (!aff_combination_zero_p (aff_inv))
4509 comp_inv = aff_combination_to_tree (aff_inv);
4510 if (comp_inv != NULL_TREE)
4511 cost = force_var_cost (data, comp_inv, inv_vars);
4512 if (ratio != 1 && parts.step == NULL_TREE)
4513 var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4514 if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4515 var_cost += add_cost (speed, addr_mode);
4516
4517 if (comp_inv && inv_expr && !simple_inv)
4518 {
4519 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4520 /* Clear depends on. */
4521 if (*inv_expr != NULL && inv_vars && *inv_vars)
4522 bitmap_clear (*inv_vars);
4523
4524 /* Cost of small invariant expression adjusted against loop niters
4525 is usually zero, which makes it difficult to be differentiated
4526 from candidate based on loop invariant variables. Secondly, the
4527 generated invariant expression may not be hoisted out of loop by
4528 following pass. We penalize the cost by rounding up in order to
4529 neutralize such effects. */
4530 cost.cost = adjust_setup_cost (data, cost.cost, true);
4531 cost.scratch = cost.cost;
4532 }
4533
4534 cost += var_cost;
4535 addr = addr_for_mem_ref (&parts, as, false);
4536 gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4537 cost += address_cost (addr, mem_mode, as, speed);
4538
4539 if (parts.symbol != NULL_TREE)
4540 cost.complexity += 1;
4541 /* Don't increase the complexity of adding a scaled index if it's
4542 the only kind of index that the target allows. */
4543 if (parts.step != NULL_TREE && ok_without_ratio_p)
4544 cost.complexity += 1;
4545 if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4546 cost.complexity += 1;
4547 if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4548 cost.complexity += 1;
4549
4550 return cost;
4551 }
4552
4553 /* Scale (multiply) the computed COST (except scratch part that should be
4554 hoisted out a loop) by header->frequency / AT->frequency, which makes
4555 expected cost more accurate. */
4556
4557 static comp_cost
get_scaled_computation_cost_at(ivopts_data * data,gimple * at,comp_cost cost)4558 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4559 {
4560 int loop_freq = data->current_loop->header->count.to_frequency (cfun);
4561 int bb_freq = gimple_bb (at)->count.to_frequency (cfun);
4562 if (loop_freq != 0)
4563 {
4564 gcc_assert (cost.scratch <= cost.cost);
4565 int scaled_cost
4566 = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4567
4568 if (dump_file && (dump_flags & TDF_DETAILS))
4569 fprintf (dump_file, "Scaling cost based on bb prob "
4570 "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4571 1.0f * bb_freq / loop_freq, cost.cost,
4572 cost.scratch, scaled_cost, bb_freq, loop_freq);
4573
4574 cost.cost = scaled_cost;
4575 }
4576
4577 return cost;
4578 }
4579
4580 /* Determines the cost of the computation by that USE is expressed
4581 from induction variable CAND. If ADDRESS_P is true, we just need
4582 to create an address from it, otherwise we want to get it into
4583 register. A set of invariants we depend on is stored in INV_VARS.
4584 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4585 addressing is likely. If INV_EXPR is nonnull, record invariant
4586 expr entry in it. */
4587
4588 static comp_cost
get_computation_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,bool address_p,bitmap * inv_vars,bool * can_autoinc,iv_inv_expr_ent ** inv_expr)4589 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4590 struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4591 bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4592 {
4593 gimple *at = use->stmt;
4594 tree ubase = use->iv->base, cbase = cand->iv->base;
4595 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4596 tree comp_inv = NULL_TREE;
4597 HOST_WIDE_INT ratio, aratio;
4598 comp_cost cost;
4599 widest_int rat;
4600 aff_tree aff_inv, aff_var;
4601 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4602
4603 if (inv_vars)
4604 *inv_vars = NULL;
4605 if (can_autoinc)
4606 *can_autoinc = false;
4607 if (inv_expr)
4608 *inv_expr = NULL;
4609
4610 /* Check if we have enough precision to express the values of use. */
4611 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4612 return infinite_cost;
4613
4614 if (address_p
4615 || (use->iv->base_object
4616 && cand->iv->base_object
4617 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4618 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4619 {
4620 /* Do not try to express address of an object with computation based
4621 on address of a different object. This may cause problems in rtl
4622 level alias analysis (that does not expect this to be happening,
4623 as this is illegal in C), and would be unlikely to be useful
4624 anyway. */
4625 if (use->iv->base_object
4626 && cand->iv->base_object
4627 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4628 return infinite_cost;
4629 }
4630
4631 if (!get_computation_aff_1 (data->current_loop, at, use,
4632 cand, &aff_inv, &aff_var, &rat)
4633 || !wi::fits_shwi_p (rat))
4634 return infinite_cost;
4635
4636 ratio = rat.to_shwi ();
4637 if (address_p)
4638 {
4639 cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4640 inv_vars, inv_expr, can_autoinc, speed);
4641 return get_scaled_computation_cost_at (data, at, cost);
4642 }
4643
4644 bool simple_inv = (aff_combination_const_p (&aff_inv)
4645 || aff_combination_singleton_var_p (&aff_inv));
4646 tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4647 aff_combination_convert (&aff_inv, signed_type);
4648 if (!aff_combination_zero_p (&aff_inv))
4649 comp_inv = aff_combination_to_tree (&aff_inv);
4650
4651 cost = force_var_cost (data, comp_inv, inv_vars);
4652 if (comp_inv && inv_expr && !simple_inv)
4653 {
4654 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4655 /* Clear depends on. */
4656 if (*inv_expr != NULL && inv_vars && *inv_vars)
4657 bitmap_clear (*inv_vars);
4658
4659 cost.cost = adjust_setup_cost (data, cost.cost);
4660 /* Record setup cost in scratch field. */
4661 cost.scratch = cost.cost;
4662 }
4663 /* Cost of constant integer can be covered when adding invariant part to
4664 variant part. */
4665 else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4666 cost = no_cost;
4667
4668 /* Need type narrowing to represent use with cand. */
4669 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4670 {
4671 machine_mode outer_mode = TYPE_MODE (utype);
4672 machine_mode inner_mode = TYPE_MODE (ctype);
4673 cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4674 }
4675
4676 /* Turn a + i * (-c) into a - i * c. */
4677 if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4678 aratio = -ratio;
4679 else
4680 aratio = ratio;
4681
4682 if (ratio != 1)
4683 cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4684
4685 /* TODO: We may also need to check if we can compute a + i * 4 in one
4686 instruction. */
4687 /* Need to add up the invariant and variant parts. */
4688 if (comp_inv && !integer_zerop (comp_inv))
4689 cost += add_cost (speed, TYPE_MODE (utype));
4690
4691 return get_scaled_computation_cost_at (data, at, cost);
4692 }
4693
4694 /* Determines cost of computing the use in GROUP with CAND in a generic
4695 expression. */
4696
4697 static bool
determine_group_iv_cost_generic(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)4698 determine_group_iv_cost_generic (struct ivopts_data *data,
4699 struct iv_group *group, struct iv_cand *cand)
4700 {
4701 comp_cost cost;
4702 iv_inv_expr_ent *inv_expr = NULL;
4703 bitmap inv_vars = NULL, inv_exprs = NULL;
4704 struct iv_use *use = group->vuses[0];
4705
4706 /* The simple case first -- if we need to express value of the preserved
4707 original biv, the cost is 0. This also prevents us from counting the
4708 cost of increment twice -- once at this use and once in the cost of
4709 the candidate. */
4710 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4711 cost = no_cost;
4712 else
4713 cost = get_computation_cost (data, use, cand, false,
4714 &inv_vars, NULL, &inv_expr);
4715
4716 if (inv_expr)
4717 {
4718 inv_exprs = BITMAP_ALLOC (NULL);
4719 bitmap_set_bit (inv_exprs, inv_expr->id);
4720 }
4721 set_group_iv_cost (data, group, cand, cost, inv_vars,
4722 NULL_TREE, ERROR_MARK, inv_exprs);
4723 return !cost.infinite_cost_p ();
4724 }
4725
4726 /* Determines cost of computing uses in GROUP with CAND in addresses. */
4727
4728 static bool
determine_group_iv_cost_address(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)4729 determine_group_iv_cost_address (struct ivopts_data *data,
4730 struct iv_group *group, struct iv_cand *cand)
4731 {
4732 unsigned i;
4733 bitmap inv_vars = NULL, inv_exprs = NULL;
4734 bool can_autoinc;
4735 iv_inv_expr_ent *inv_expr = NULL;
4736 struct iv_use *use = group->vuses[0];
4737 comp_cost sum_cost = no_cost, cost;
4738
4739 cost = get_computation_cost (data, use, cand, true,
4740 &inv_vars, &can_autoinc, &inv_expr);
4741
4742 if (inv_expr)
4743 {
4744 inv_exprs = BITMAP_ALLOC (NULL);
4745 bitmap_set_bit (inv_exprs, inv_expr->id);
4746 }
4747 sum_cost = cost;
4748 if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4749 {
4750 if (can_autoinc)
4751 sum_cost -= cand->cost_step;
4752 /* If we generated the candidate solely for exploiting autoincrement
4753 opportunities, and it turns out it can't be used, set the cost to
4754 infinity to make sure we ignore it. */
4755 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4756 sum_cost = infinite_cost;
4757 }
4758
4759 /* Uses in a group can share setup code, so only add setup cost once. */
4760 cost -= cost.scratch;
4761 /* Compute and add costs for rest uses of this group. */
4762 for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4763 {
4764 struct iv_use *next = group->vuses[i];
4765
4766 /* TODO: We could skip computing cost for sub iv_use when it has the
4767 same cost as the first iv_use, but the cost really depends on the
4768 offset and where the iv_use is. */
4769 cost = get_computation_cost (data, next, cand, true,
4770 NULL, &can_autoinc, &inv_expr);
4771 if (inv_expr)
4772 {
4773 if (!inv_exprs)
4774 inv_exprs = BITMAP_ALLOC (NULL);
4775
4776 bitmap_set_bit (inv_exprs, inv_expr->id);
4777 }
4778 sum_cost += cost;
4779 }
4780 set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4781 NULL_TREE, ERROR_MARK, inv_exprs);
4782
4783 return !sum_cost.infinite_cost_p ();
4784 }
4785
4786 /* Computes value of candidate CAND at position AT in iteration NITER, and
4787 stores it to VAL. */
4788
4789 static void
cand_value_at(struct loop * loop,struct iv_cand * cand,gimple * at,tree niter,aff_tree * val)4790 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4791 aff_tree *val)
4792 {
4793 aff_tree step, delta, nit;
4794 struct iv *iv = cand->iv;
4795 tree type = TREE_TYPE (iv->base);
4796 tree steptype;
4797 if (POINTER_TYPE_P (type))
4798 steptype = sizetype;
4799 else
4800 steptype = unsigned_type_for (type);
4801
4802 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4803 aff_combination_convert (&step, steptype);
4804 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4805 aff_combination_convert (&nit, steptype);
4806 aff_combination_mult (&nit, &step, &delta);
4807 if (stmt_after_increment (loop, cand, at))
4808 aff_combination_add (&delta, &step);
4809
4810 tree_to_aff_combination (iv->base, type, val);
4811 if (!POINTER_TYPE_P (type))
4812 aff_combination_convert (val, steptype);
4813 aff_combination_add (val, &delta);
4814 }
4815
4816 /* Returns period of induction variable iv. */
4817
4818 static tree
iv_period(struct iv * iv)4819 iv_period (struct iv *iv)
4820 {
4821 tree step = iv->step, period, type;
4822 tree pow2div;
4823
4824 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4825
4826 type = unsigned_type_for (TREE_TYPE (step));
4827 /* Period of the iv is lcm (step, type_range)/step -1,
4828 i.e., N*type_range/step - 1. Since type range is power
4829 of two, N == (step >> num_of_ending_zeros_binary (step),
4830 so the final result is
4831
4832 (type_range >> num_of_ending_zeros_binary (step)) - 1
4833
4834 */
4835 pow2div = num_ending_zeros (step);
4836
4837 period = build_low_bits_mask (type,
4838 (TYPE_PRECISION (type)
4839 - tree_to_uhwi (pow2div)));
4840
4841 return period;
4842 }
4843
4844 /* Returns the comparison operator used when eliminating the iv USE. */
4845
4846 static enum tree_code
iv_elimination_compare(struct ivopts_data * data,struct iv_use * use)4847 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4848 {
4849 struct loop *loop = data->current_loop;
4850 basic_block ex_bb;
4851 edge exit;
4852
4853 ex_bb = gimple_bb (use->stmt);
4854 exit = EDGE_SUCC (ex_bb, 0);
4855 if (flow_bb_inside_loop_p (loop, exit->dest))
4856 exit = EDGE_SUCC (ex_bb, 1);
4857
4858 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4859 }
4860
4861 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4862 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4863 calculation is performed in non-wrapping type.
4864
4865 TODO: More generally, we could test for the situation that
4866 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4867 This would require knowing the sign of OFFSET. */
4868
4869 static bool
difference_cannot_overflow_p(struct ivopts_data * data,tree base,tree offset)4870 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4871 {
4872 enum tree_code code;
4873 tree e1, e2;
4874 aff_tree aff_e1, aff_e2, aff_offset;
4875
4876 if (!nowrap_type_p (TREE_TYPE (base)))
4877 return false;
4878
4879 base = expand_simple_operations (base);
4880
4881 if (TREE_CODE (base) == SSA_NAME)
4882 {
4883 gimple *stmt = SSA_NAME_DEF_STMT (base);
4884
4885 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4886 return false;
4887
4888 code = gimple_assign_rhs_code (stmt);
4889 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4890 return false;
4891
4892 e1 = gimple_assign_rhs1 (stmt);
4893 e2 = gimple_assign_rhs2 (stmt);
4894 }
4895 else
4896 {
4897 code = TREE_CODE (base);
4898 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4899 return false;
4900 e1 = TREE_OPERAND (base, 0);
4901 e2 = TREE_OPERAND (base, 1);
4902 }
4903
4904 /* Use affine expansion as deeper inspection to prove the equality. */
4905 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4906 &aff_e2, &data->name_expansion_cache);
4907 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4908 &aff_offset, &data->name_expansion_cache);
4909 aff_combination_scale (&aff_offset, -1);
4910 switch (code)
4911 {
4912 case PLUS_EXPR:
4913 aff_combination_add (&aff_e2, &aff_offset);
4914 if (aff_combination_zero_p (&aff_e2))
4915 return true;
4916
4917 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4918 &aff_e1, &data->name_expansion_cache);
4919 aff_combination_add (&aff_e1, &aff_offset);
4920 return aff_combination_zero_p (&aff_e1);
4921
4922 case POINTER_PLUS_EXPR:
4923 aff_combination_add (&aff_e2, &aff_offset);
4924 return aff_combination_zero_p (&aff_e2);
4925
4926 default:
4927 return false;
4928 }
4929 }
4930
4931 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4932 comparison with CAND. NITER describes the number of iterations of
4933 the loops. If successful, the comparison in COMP_P is altered accordingly.
4934
4935 We aim to handle the following situation:
4936
4937 sometype *base, *p;
4938 int a, b, i;
4939
4940 i = a;
4941 p = p_0 = base + a;
4942
4943 do
4944 {
4945 bla (*p);
4946 p++;
4947 i++;
4948 }
4949 while (i < b);
4950
4951 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4952 We aim to optimize this to
4953
4954 p = p_0 = base + a;
4955 do
4956 {
4957 bla (*p);
4958 p++;
4959 }
4960 while (p < p_0 - a + b);
4961
4962 This preserves the correctness, since the pointer arithmetics does not
4963 overflow. More precisely:
4964
4965 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4966 overflow in computing it or the values of p.
4967 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4968 overflow. To prove this, we use the fact that p_0 = base + a. */
4969
4970 static bool
iv_elimination_compare_lt(struct ivopts_data * data,struct iv_cand * cand,enum tree_code * comp_p,struct tree_niter_desc * niter)4971 iv_elimination_compare_lt (struct ivopts_data *data,
4972 struct iv_cand *cand, enum tree_code *comp_p,
4973 struct tree_niter_desc *niter)
4974 {
4975 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4976 struct aff_tree nit, tmpa, tmpb;
4977 enum tree_code comp;
4978 HOST_WIDE_INT step;
4979
4980 /* We need to know that the candidate induction variable does not overflow.
4981 While more complex analysis may be used to prove this, for now just
4982 check that the variable appears in the original program and that it
4983 is computed in a type that guarantees no overflows. */
4984 cand_type = TREE_TYPE (cand->iv->base);
4985 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4986 return false;
4987
4988 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4989 the calculation of the BOUND could overflow, making the comparison
4990 invalid. */
4991 if (!data->loop_single_exit_p)
4992 return false;
4993
4994 /* We need to be able to decide whether candidate is increasing or decreasing
4995 in order to choose the right comparison operator. */
4996 if (!cst_and_fits_in_hwi (cand->iv->step))
4997 return false;
4998 step = int_cst_value (cand->iv->step);
4999
5000 /* Check that the number of iterations matches the expected pattern:
5001 a + 1 > b ? 0 : b - a - 1. */
5002 mbz = niter->may_be_zero;
5003 if (TREE_CODE (mbz) == GT_EXPR)
5004 {
5005 /* Handle a + 1 > b. */
5006 tree op0 = TREE_OPERAND (mbz, 0);
5007 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5008 {
5009 a = TREE_OPERAND (op0, 0);
5010 b = TREE_OPERAND (mbz, 1);
5011 }
5012 else
5013 return false;
5014 }
5015 else if (TREE_CODE (mbz) == LT_EXPR)
5016 {
5017 tree op1 = TREE_OPERAND (mbz, 1);
5018
5019 /* Handle b < a + 1. */
5020 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5021 {
5022 a = TREE_OPERAND (op1, 0);
5023 b = TREE_OPERAND (mbz, 0);
5024 }
5025 else
5026 return false;
5027 }
5028 else
5029 return false;
5030
5031 /* Expected number of iterations is B - A - 1. Check that it matches
5032 the actual number, i.e., that B - A - NITER = 1. */
5033 tree_to_aff_combination (niter->niter, nit_type, &nit);
5034 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5035 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5036 aff_combination_scale (&nit, -1);
5037 aff_combination_scale (&tmpa, -1);
5038 aff_combination_add (&tmpb, &tmpa);
5039 aff_combination_add (&tmpb, &nit);
5040 if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5041 return false;
5042
5043 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5044 overflow. */
5045 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5046 cand->iv->step,
5047 fold_convert (TREE_TYPE (cand->iv->step), a));
5048 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5049 return false;
5050
5051 /* Determine the new comparison operator. */
5052 comp = step < 0 ? GT_EXPR : LT_EXPR;
5053 if (*comp_p == NE_EXPR)
5054 *comp_p = comp;
5055 else if (*comp_p == EQ_EXPR)
5056 *comp_p = invert_tree_comparison (comp, false);
5057 else
5058 gcc_unreachable ();
5059
5060 return true;
5061 }
5062
5063 /* Check whether it is possible to express the condition in USE by comparison
5064 of candidate CAND. If so, store the value compared with to BOUND, and the
5065 comparison operator to COMP. */
5066
5067 static bool
may_eliminate_iv(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,tree * bound,enum tree_code * comp)5068 may_eliminate_iv (struct ivopts_data *data,
5069 struct iv_use *use, struct iv_cand *cand, tree *bound,
5070 enum tree_code *comp)
5071 {
5072 basic_block ex_bb;
5073 edge exit;
5074 tree period;
5075 struct loop *loop = data->current_loop;
5076 aff_tree bnd;
5077 struct tree_niter_desc *desc = NULL;
5078
5079 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5080 return false;
5081
5082 /* For now works only for exits that dominate the loop latch.
5083 TODO: extend to other conditions inside loop body. */
5084 ex_bb = gimple_bb (use->stmt);
5085 if (use->stmt != last_stmt (ex_bb)
5086 || gimple_code (use->stmt) != GIMPLE_COND
5087 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5088 return false;
5089
5090 exit = EDGE_SUCC (ex_bb, 0);
5091 if (flow_bb_inside_loop_p (loop, exit->dest))
5092 exit = EDGE_SUCC (ex_bb, 1);
5093 if (flow_bb_inside_loop_p (loop, exit->dest))
5094 return false;
5095
5096 desc = niter_for_exit (data, exit);
5097 if (!desc)
5098 return false;
5099
5100 /* Determine whether we can use the variable to test the exit condition.
5101 This is the case iff the period of the induction variable is greater
5102 than the number of iterations for which the exit condition is true. */
5103 period = iv_period (cand->iv);
5104
5105 /* If the number of iterations is constant, compare against it directly. */
5106 if (TREE_CODE (desc->niter) == INTEGER_CST)
5107 {
5108 /* See cand_value_at. */
5109 if (stmt_after_increment (loop, cand, use->stmt))
5110 {
5111 if (!tree_int_cst_lt (desc->niter, period))
5112 return false;
5113 }
5114 else
5115 {
5116 if (tree_int_cst_lt (period, desc->niter))
5117 return false;
5118 }
5119 }
5120
5121 /* If not, and if this is the only possible exit of the loop, see whether
5122 we can get a conservative estimate on the number of iterations of the
5123 entire loop and compare against that instead. */
5124 else
5125 {
5126 widest_int period_value, max_niter;
5127
5128 max_niter = desc->max;
5129 if (stmt_after_increment (loop, cand, use->stmt))
5130 max_niter += 1;
5131 period_value = wi::to_widest (period);
5132 if (wi::gtu_p (max_niter, period_value))
5133 {
5134 /* See if we can take advantage of inferred loop bound
5135 information. */
5136 if (data->loop_single_exit_p)
5137 {
5138 if (!max_loop_iterations (loop, &max_niter))
5139 return false;
5140 /* The loop bound is already adjusted by adding 1. */
5141 if (wi::gtu_p (max_niter, period_value))
5142 return false;
5143 }
5144 else
5145 return false;
5146 }
5147 }
5148
5149 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5150
5151 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5152 aff_combination_to_tree (&bnd));
5153 *comp = iv_elimination_compare (data, use);
5154
5155 /* It is unlikely that computing the number of iterations using division
5156 would be more profitable than keeping the original induction variable. */
5157 if (expression_expensive_p (*bound))
5158 return false;
5159
5160 /* Sometimes, it is possible to handle the situation that the number of
5161 iterations may be zero unless additional assumptions by using <
5162 instead of != in the exit condition.
5163
5164 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5165 base the exit condition on it. However, that is often too
5166 expensive. */
5167 if (!integer_zerop (desc->may_be_zero))
5168 return iv_elimination_compare_lt (data, cand, comp, desc);
5169
5170 return true;
5171 }
5172
5173 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5174 be copied, if it is used in the loop body and DATA->body_includes_call. */
5175
5176 static int
parm_decl_cost(struct ivopts_data * data,tree bound)5177 parm_decl_cost (struct ivopts_data *data, tree bound)
5178 {
5179 tree sbound = bound;
5180 STRIP_NOPS (sbound);
5181
5182 if (TREE_CODE (sbound) == SSA_NAME
5183 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5184 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5185 && data->body_includes_call)
5186 return COSTS_N_INSNS (1);
5187
5188 return 0;
5189 }
5190
5191 /* Determines cost of computing the use in GROUP with CAND in a condition. */
5192
5193 static bool
determine_group_iv_cost_cond(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5194 determine_group_iv_cost_cond (struct ivopts_data *data,
5195 struct iv_group *group, struct iv_cand *cand)
5196 {
5197 tree bound = NULL_TREE;
5198 struct iv *cmp_iv;
5199 bitmap inv_exprs = NULL;
5200 bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5201 comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5202 enum comp_iv_rewrite rewrite_type;
5203 iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5204 tree *control_var, *bound_cst;
5205 enum tree_code comp = ERROR_MARK;
5206 struct iv_use *use = group->vuses[0];
5207
5208 /* Extract condition operands. */
5209 rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5210 &bound_cst, NULL, &cmp_iv);
5211 gcc_assert (rewrite_type != COMP_IV_NA);
5212
5213 /* Try iv elimination. */
5214 if (rewrite_type == COMP_IV_ELIM
5215 && may_eliminate_iv (data, use, cand, &bound, &comp))
5216 {
5217 elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5218 if (elim_cost.cost == 0)
5219 elim_cost.cost = parm_decl_cost (data, bound);
5220 else if (TREE_CODE (bound) == INTEGER_CST)
5221 elim_cost.cost = 0;
5222 /* If we replace a loop condition 'i < n' with 'p < base + n',
5223 inv_vars_elim will have 'base' and 'n' set, which implies that both
5224 'base' and 'n' will be live during the loop. More likely,
5225 'base + n' will be loop invariant, resulting in only one live value
5226 during the loop. So in that case we clear inv_vars_elim and set
5227 inv_expr_elim instead. */
5228 if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5229 {
5230 inv_expr_elim = get_loop_invariant_expr (data, bound);
5231 bitmap_clear (inv_vars_elim);
5232 }
5233 /* The bound is a loop invariant, so it will be only computed
5234 once. */
5235 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5236 }
5237
5238 /* When the condition is a comparison of the candidate IV against
5239 zero, prefer this IV.
5240
5241 TODO: The constant that we're subtracting from the cost should
5242 be target-dependent. This information should be added to the
5243 target costs for each backend. */
5244 if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5245 && integer_zerop (*bound_cst)
5246 && (operand_equal_p (*control_var, cand->var_after, 0)
5247 || operand_equal_p (*control_var, cand->var_before, 0)))
5248 elim_cost -= 1;
5249
5250 express_cost = get_computation_cost (data, use, cand, false,
5251 &inv_vars_express, NULL,
5252 &inv_expr_express);
5253 if (cmp_iv != NULL)
5254 find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5255
5256 /* Count the cost of the original bound as well. */
5257 bound_cost = force_var_cost (data, *bound_cst, NULL);
5258 if (bound_cost.cost == 0)
5259 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5260 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5261 bound_cost.cost = 0;
5262 express_cost += bound_cost;
5263
5264 /* Choose the better approach, preferring the eliminated IV. */
5265 if (elim_cost <= express_cost)
5266 {
5267 cost = elim_cost;
5268 inv_vars = inv_vars_elim;
5269 inv_vars_elim = NULL;
5270 inv_expr = inv_expr_elim;
5271 }
5272 else
5273 {
5274 cost = express_cost;
5275 inv_vars = inv_vars_express;
5276 inv_vars_express = NULL;
5277 bound = NULL_TREE;
5278 comp = ERROR_MARK;
5279 inv_expr = inv_expr_express;
5280 }
5281
5282 if (inv_expr)
5283 {
5284 inv_exprs = BITMAP_ALLOC (NULL);
5285 bitmap_set_bit (inv_exprs, inv_expr->id);
5286 }
5287 set_group_iv_cost (data, group, cand, cost,
5288 inv_vars, bound, comp, inv_exprs);
5289
5290 if (inv_vars_elim)
5291 BITMAP_FREE (inv_vars_elim);
5292 if (inv_vars_express)
5293 BITMAP_FREE (inv_vars_express);
5294
5295 return !cost.infinite_cost_p ();
5296 }
5297
5298 /* Determines cost of computing uses in GROUP with CAND. Returns false
5299 if USE cannot be represented with CAND. */
5300
5301 static bool
determine_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5302 determine_group_iv_cost (struct ivopts_data *data,
5303 struct iv_group *group, struct iv_cand *cand)
5304 {
5305 switch (group->type)
5306 {
5307 case USE_NONLINEAR_EXPR:
5308 return determine_group_iv_cost_generic (data, group, cand);
5309
5310 case USE_REF_ADDRESS:
5311 case USE_PTR_ADDRESS:
5312 return determine_group_iv_cost_address (data, group, cand);
5313
5314 case USE_COMPARE:
5315 return determine_group_iv_cost_cond (data, group, cand);
5316
5317 default:
5318 gcc_unreachable ();
5319 }
5320 }
5321
5322 /* Return true if get_computation_cost indicates that autoincrement is
5323 a possibility for the pair of USE and CAND, false otherwise. */
5324
5325 static bool
autoinc_possible_for_pair(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5326 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5327 struct iv_cand *cand)
5328 {
5329 if (!address_p (use->type))
5330 return false;
5331
5332 bool can_autoinc = false;
5333 get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5334 return can_autoinc;
5335 }
5336
5337 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5338 use that allows autoincrement, and set their AINC_USE if possible. */
5339
5340 static void
set_autoinc_for_original_candidates(struct ivopts_data * data)5341 set_autoinc_for_original_candidates (struct ivopts_data *data)
5342 {
5343 unsigned i, j;
5344
5345 for (i = 0; i < data->vcands.length (); i++)
5346 {
5347 struct iv_cand *cand = data->vcands[i];
5348 struct iv_use *closest_before = NULL;
5349 struct iv_use *closest_after = NULL;
5350 if (cand->pos != IP_ORIGINAL)
5351 continue;
5352
5353 for (j = 0; j < data->vgroups.length (); j++)
5354 {
5355 struct iv_group *group = data->vgroups[j];
5356 struct iv_use *use = group->vuses[0];
5357 unsigned uid = gimple_uid (use->stmt);
5358
5359 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5360 continue;
5361
5362 if (uid < gimple_uid (cand->incremented_at)
5363 && (closest_before == NULL
5364 || uid > gimple_uid (closest_before->stmt)))
5365 closest_before = use;
5366
5367 if (uid > gimple_uid (cand->incremented_at)
5368 && (closest_after == NULL
5369 || uid < gimple_uid (closest_after->stmt)))
5370 closest_after = use;
5371 }
5372
5373 if (closest_before != NULL
5374 && autoinc_possible_for_pair (data, closest_before, cand))
5375 cand->ainc_use = closest_before;
5376 else if (closest_after != NULL
5377 && autoinc_possible_for_pair (data, closest_after, cand))
5378 cand->ainc_use = closest_after;
5379 }
5380 }
5381
5382 /* Relate compare use with all candidates. */
5383
5384 static void
relate_compare_use_with_all_cands(struct ivopts_data * data)5385 relate_compare_use_with_all_cands (struct ivopts_data *data)
5386 {
5387 unsigned i, count = data->vcands.length ();
5388 for (i = 0; i < data->vgroups.length (); i++)
5389 {
5390 struct iv_group *group = data->vgroups[i];
5391
5392 if (group->type == USE_COMPARE)
5393 bitmap_set_range (group->related_cands, 0, count);
5394 }
5395 }
5396
5397 /* Finds the candidates for the induction variables. */
5398
5399 static void
find_iv_candidates(struct ivopts_data * data)5400 find_iv_candidates (struct ivopts_data *data)
5401 {
5402 /* Add commonly used ivs. */
5403 add_standard_iv_candidates (data);
5404
5405 /* Add old induction variables. */
5406 add_iv_candidate_for_bivs (data);
5407
5408 /* Add induction variables derived from uses. */
5409 add_iv_candidate_for_groups (data);
5410
5411 set_autoinc_for_original_candidates (data);
5412
5413 /* Record the important candidates. */
5414 record_important_candidates (data);
5415
5416 /* Relate compare iv_use with all candidates. */
5417 if (!data->consider_all_candidates)
5418 relate_compare_use_with_all_cands (data);
5419
5420 if (dump_file && (dump_flags & TDF_DETAILS))
5421 {
5422 unsigned i;
5423
5424 fprintf (dump_file, "\n<Important Candidates>:\t");
5425 for (i = 0; i < data->vcands.length (); i++)
5426 if (data->vcands[i]->important)
5427 fprintf (dump_file, " %d,", data->vcands[i]->id);
5428 fprintf (dump_file, "\n");
5429
5430 fprintf (dump_file, "\n<Group, Cand> Related:\n");
5431 for (i = 0; i < data->vgroups.length (); i++)
5432 {
5433 struct iv_group *group = data->vgroups[i];
5434
5435 if (group->related_cands)
5436 {
5437 fprintf (dump_file, " Group %d:\t", group->id);
5438 dump_bitmap (dump_file, group->related_cands);
5439 }
5440 }
5441 fprintf (dump_file, "\n");
5442 }
5443 }
5444
5445 /* Determines costs of computing use of iv with an iv candidate. */
5446
5447 static void
determine_group_iv_costs(struct ivopts_data * data)5448 determine_group_iv_costs (struct ivopts_data *data)
5449 {
5450 unsigned i, j;
5451 struct iv_cand *cand;
5452 struct iv_group *group;
5453 bitmap to_clear = BITMAP_ALLOC (NULL);
5454
5455 alloc_use_cost_map (data);
5456
5457 for (i = 0; i < data->vgroups.length (); i++)
5458 {
5459 group = data->vgroups[i];
5460
5461 if (data->consider_all_candidates)
5462 {
5463 for (j = 0; j < data->vcands.length (); j++)
5464 {
5465 cand = data->vcands[j];
5466 determine_group_iv_cost (data, group, cand);
5467 }
5468 }
5469 else
5470 {
5471 bitmap_iterator bi;
5472
5473 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5474 {
5475 cand = data->vcands[j];
5476 if (!determine_group_iv_cost (data, group, cand))
5477 bitmap_set_bit (to_clear, j);
5478 }
5479
5480 /* Remove the candidates for that the cost is infinite from
5481 the list of related candidates. */
5482 bitmap_and_compl_into (group->related_cands, to_clear);
5483 bitmap_clear (to_clear);
5484 }
5485 }
5486
5487 BITMAP_FREE (to_clear);
5488
5489 if (dump_file && (dump_flags & TDF_DETAILS))
5490 {
5491 bitmap_iterator bi;
5492
5493 /* Dump invariant variables. */
5494 fprintf (dump_file, "\n<Invariant Vars>:\n");
5495 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5496 {
5497 struct version_info *info = ver_info (data, i);
5498 if (info->inv_id)
5499 {
5500 fprintf (dump_file, "Inv %d:\t", info->inv_id);
5501 print_generic_expr (dump_file, info->name, TDF_SLIM);
5502 fprintf (dump_file, "%s\n",
5503 info->has_nonlin_use ? "" : "\t(eliminable)");
5504 }
5505 }
5506
5507 /* Dump invariant expressions. */
5508 fprintf (dump_file, "\n<Invariant Expressions>:\n");
5509 auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5510
5511 for (hash_table<iv_inv_expr_hasher>::iterator it
5512 = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5513 ++it)
5514 list.safe_push (*it);
5515
5516 list.qsort (sort_iv_inv_expr_ent);
5517
5518 for (i = 0; i < list.length (); ++i)
5519 {
5520 fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5521 print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5522 fprintf (dump_file, "\n");
5523 }
5524
5525 fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5526
5527 for (i = 0; i < data->vgroups.length (); i++)
5528 {
5529 group = data->vgroups[i];
5530
5531 fprintf (dump_file, "Group %d:\n", i);
5532 fprintf (dump_file, " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5533 for (j = 0; j < group->n_map_members; j++)
5534 {
5535 if (!group->cost_map[j].cand
5536 || group->cost_map[j].cost.infinite_cost_p ())
5537 continue;
5538
5539 fprintf (dump_file, " %d\t%d\t%d\t",
5540 group->cost_map[j].cand->id,
5541 group->cost_map[j].cost.cost,
5542 group->cost_map[j].cost.complexity);
5543 if (!group->cost_map[j].inv_exprs
5544 || bitmap_empty_p (group->cost_map[j].inv_exprs))
5545 fprintf (dump_file, "NIL;\t");
5546 else
5547 bitmap_print (dump_file,
5548 group->cost_map[j].inv_exprs, "", ";\t");
5549 if (!group->cost_map[j].inv_vars
5550 || bitmap_empty_p (group->cost_map[j].inv_vars))
5551 fprintf (dump_file, "NIL;\n");
5552 else
5553 bitmap_print (dump_file,
5554 group->cost_map[j].inv_vars, "", "\n");
5555 }
5556
5557 fprintf (dump_file, "\n");
5558 }
5559 fprintf (dump_file, "\n");
5560 }
5561 }
5562
5563 /* Determines cost of the candidate CAND. */
5564
5565 static void
determine_iv_cost(struct ivopts_data * data,struct iv_cand * cand)5566 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5567 {
5568 comp_cost cost_base;
5569 unsigned cost, cost_step;
5570 tree base;
5571
5572 gcc_assert (cand->iv != NULL);
5573
5574 /* There are two costs associated with the candidate -- its increment
5575 and its initialization. The second is almost negligible for any loop
5576 that rolls enough, so we take it just very little into account. */
5577
5578 base = cand->iv->base;
5579 cost_base = force_var_cost (data, base, NULL);
5580 /* It will be exceptional that the iv register happens to be initialized with
5581 the proper value at no cost. In general, there will at least be a regcopy
5582 or a const set. */
5583 if (cost_base.cost == 0)
5584 cost_base.cost = COSTS_N_INSNS (1);
5585 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5586
5587 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5588
5589 /* Prefer the original ivs unless we may gain something by replacing it.
5590 The reason is to make debugging simpler; so this is not relevant for
5591 artificial ivs created by other optimization passes. */
5592 if (cand->pos != IP_ORIGINAL
5593 || !SSA_NAME_VAR (cand->var_before)
5594 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5595 cost++;
5596
5597 /* Prefer not to insert statements into latch unless there are some
5598 already (so that we do not create unnecessary jumps). */
5599 if (cand->pos == IP_END
5600 && empty_block_p (ip_end_pos (data->current_loop)))
5601 cost++;
5602
5603 cand->cost = cost;
5604 cand->cost_step = cost_step;
5605 }
5606
5607 /* Determines costs of computation of the candidates. */
5608
5609 static void
determine_iv_costs(struct ivopts_data * data)5610 determine_iv_costs (struct ivopts_data *data)
5611 {
5612 unsigned i;
5613
5614 if (dump_file && (dump_flags & TDF_DETAILS))
5615 {
5616 fprintf (dump_file, "<Candidate Costs>:\n");
5617 fprintf (dump_file, " cand\tcost\n");
5618 }
5619
5620 for (i = 0; i < data->vcands.length (); i++)
5621 {
5622 struct iv_cand *cand = data->vcands[i];
5623
5624 determine_iv_cost (data, cand);
5625
5626 if (dump_file && (dump_flags & TDF_DETAILS))
5627 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5628 }
5629
5630 if (dump_file && (dump_flags & TDF_DETAILS))
5631 fprintf (dump_file, "\n");
5632 }
5633
5634 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5635 induction variables. Note N_INVS includes both invariant variables and
5636 invariant expressions. */
5637
5638 static unsigned
ivopts_estimate_reg_pressure(struct ivopts_data * data,unsigned n_invs,unsigned n_cands)5639 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5640 unsigned n_cands)
5641 {
5642 unsigned cost;
5643 unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5644 unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5645 bool speed = data->speed;
5646
5647 /* If there is a call in the loop body, the call-clobbered registers
5648 are not available for loop invariants. */
5649 if (data->body_includes_call)
5650 available_regs = available_regs - target_clobbered_regs;
5651
5652 /* If we have enough registers. */
5653 if (regs_needed + target_res_regs < available_regs)
5654 cost = n_new;
5655 /* If close to running out of registers, try to preserve them. */
5656 else if (regs_needed <= available_regs)
5657 cost = target_reg_cost [speed] * regs_needed;
5658 /* If we run out of available registers but the number of candidates
5659 does not, we penalize extra registers using target_spill_cost. */
5660 else if (n_cands <= available_regs)
5661 cost = target_reg_cost [speed] * available_regs
5662 + target_spill_cost [speed] * (regs_needed - available_regs);
5663 /* If the number of candidates runs out available registers, we penalize
5664 extra candidate registers using target_spill_cost * 2. Because it is
5665 more expensive to spill induction variable than invariant. */
5666 else
5667 cost = target_reg_cost [speed] * available_regs
5668 + target_spill_cost [speed] * (n_cands - available_regs) * 2
5669 + target_spill_cost [speed] * (regs_needed - n_cands);
5670
5671 /* Finally, add the number of candidates, so that we prefer eliminating
5672 induction variables if possible. */
5673 return cost + n_cands;
5674 }
5675
5676 /* For each size of the induction variable set determine the penalty. */
5677
5678 static void
determine_set_costs(struct ivopts_data * data)5679 determine_set_costs (struct ivopts_data *data)
5680 {
5681 unsigned j, n;
5682 gphi *phi;
5683 gphi_iterator psi;
5684 tree op;
5685 struct loop *loop = data->current_loop;
5686 bitmap_iterator bi;
5687
5688 if (dump_file && (dump_flags & TDF_DETAILS))
5689 {
5690 fprintf (dump_file, "<Global Costs>:\n");
5691 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5692 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5693 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5694 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5695 }
5696
5697 n = 0;
5698 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5699 {
5700 phi = psi.phi ();
5701 op = PHI_RESULT (phi);
5702
5703 if (virtual_operand_p (op))
5704 continue;
5705
5706 if (get_iv (data, op))
5707 continue;
5708
5709 if (!POINTER_TYPE_P (TREE_TYPE (op))
5710 && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5711 continue;
5712
5713 n++;
5714 }
5715
5716 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5717 {
5718 struct version_info *info = ver_info (data, j);
5719
5720 if (info->inv_id && info->has_nonlin_use)
5721 n++;
5722 }
5723
5724 data->regs_used = n;
5725 if (dump_file && (dump_flags & TDF_DETAILS))
5726 fprintf (dump_file, " regs_used %d\n", n);
5727
5728 if (dump_file && (dump_flags & TDF_DETAILS))
5729 {
5730 fprintf (dump_file, " cost for size:\n");
5731 fprintf (dump_file, " ivs\tcost\n");
5732 for (j = 0; j <= 2 * target_avail_regs; j++)
5733 fprintf (dump_file, " %d\t%d\n", j,
5734 ivopts_estimate_reg_pressure (data, 0, j));
5735 fprintf (dump_file, "\n");
5736 }
5737 }
5738
5739 /* Returns true if A is a cheaper cost pair than B. */
5740
5741 static bool
cheaper_cost_pair(struct cost_pair * a,struct cost_pair * b)5742 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5743 {
5744 if (!a)
5745 return false;
5746
5747 if (!b)
5748 return true;
5749
5750 if (a->cost < b->cost)
5751 return true;
5752
5753 if (b->cost < a->cost)
5754 return false;
5755
5756 /* In case the costs are the same, prefer the cheaper candidate. */
5757 if (a->cand->cost < b->cand->cost)
5758 return true;
5759
5760 return false;
5761 }
5762
5763 /* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1
5764 for more expensive, equal and cheaper respectively. */
5765
5766 static int
compare_cost_pair(struct cost_pair * a,struct cost_pair * b)5767 compare_cost_pair (struct cost_pair *a, struct cost_pair *b)
5768 {
5769 if (cheaper_cost_pair (a, b))
5770 return -1;
5771 if (cheaper_cost_pair (b, a))
5772 return 1;
5773
5774 return 0;
5775 }
5776
5777 /* Returns candidate by that USE is expressed in IVS. */
5778
5779 static struct cost_pair *
iv_ca_cand_for_group(struct iv_ca * ivs,struct iv_group * group)5780 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5781 {
5782 return ivs->cand_for_group[group->id];
5783 }
5784
5785 /* Computes the cost field of IVS structure. */
5786
5787 static void
iv_ca_recount_cost(struct ivopts_data * data,struct iv_ca * ivs)5788 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5789 {
5790 comp_cost cost = ivs->cand_use_cost;
5791
5792 cost += ivs->cand_cost;
5793 cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
5794 ivs->cost = cost;
5795 }
5796
5797 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5798 and IVS. */
5799
5800 static void
iv_ca_set_remove_invs(struct iv_ca * ivs,bitmap invs,unsigned * n_inv_uses)5801 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5802 {
5803 bitmap_iterator bi;
5804 unsigned iid;
5805
5806 if (!invs)
5807 return;
5808
5809 gcc_assert (n_inv_uses != NULL);
5810 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5811 {
5812 n_inv_uses[iid]--;
5813 if (n_inv_uses[iid] == 0)
5814 ivs->n_invs--;
5815 }
5816 }
5817
5818 /* Set USE not to be expressed by any candidate in IVS. */
5819
5820 static void
iv_ca_set_no_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group)5821 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5822 struct iv_group *group)
5823 {
5824 unsigned gid = group->id, cid;
5825 struct cost_pair *cp;
5826
5827 cp = ivs->cand_for_group[gid];
5828 if (!cp)
5829 return;
5830 cid = cp->cand->id;
5831
5832 ivs->bad_groups++;
5833 ivs->cand_for_group[gid] = NULL;
5834 ivs->n_cand_uses[cid]--;
5835
5836 if (ivs->n_cand_uses[cid] == 0)
5837 {
5838 bitmap_clear_bit (ivs->cands, cid);
5839 ivs->n_cands--;
5840 ivs->cand_cost -= cp->cand->cost;
5841 iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5842 iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5843 }
5844
5845 ivs->cand_use_cost -= cp->cost;
5846 iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5847 iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5848 iv_ca_recount_cost (data, ivs);
5849 }
5850
5851 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5852 IVS. */
5853
5854 static void
iv_ca_set_add_invs(struct iv_ca * ivs,bitmap invs,unsigned * n_inv_uses)5855 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5856 {
5857 bitmap_iterator bi;
5858 unsigned iid;
5859
5860 if (!invs)
5861 return;
5862
5863 gcc_assert (n_inv_uses != NULL);
5864 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5865 {
5866 n_inv_uses[iid]++;
5867 if (n_inv_uses[iid] == 1)
5868 ivs->n_invs++;
5869 }
5870 }
5871
5872 /* Set cost pair for GROUP in set IVS to CP. */
5873
5874 static void
iv_ca_set_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group,struct cost_pair * cp)5875 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5876 struct iv_group *group, struct cost_pair *cp)
5877 {
5878 unsigned gid = group->id, cid;
5879
5880 if (ivs->cand_for_group[gid] == cp)
5881 return;
5882
5883 if (ivs->cand_for_group[gid])
5884 iv_ca_set_no_cp (data, ivs, group);
5885
5886 if (cp)
5887 {
5888 cid = cp->cand->id;
5889
5890 ivs->bad_groups--;
5891 ivs->cand_for_group[gid] = cp;
5892 ivs->n_cand_uses[cid]++;
5893 if (ivs->n_cand_uses[cid] == 1)
5894 {
5895 bitmap_set_bit (ivs->cands, cid);
5896 ivs->n_cands++;
5897 ivs->cand_cost += cp->cand->cost;
5898 iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5899 iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5900 }
5901
5902 ivs->cand_use_cost += cp->cost;
5903 iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5904 iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5905 iv_ca_recount_cost (data, ivs);
5906 }
5907 }
5908
5909 /* Extend set IVS by expressing USE by some of the candidates in it
5910 if possible. Consider all important candidates if candidates in
5911 set IVS don't give any result. */
5912
5913 static void
iv_ca_add_group(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group)5914 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5915 struct iv_group *group)
5916 {
5917 struct cost_pair *best_cp = NULL, *cp;
5918 bitmap_iterator bi;
5919 unsigned i;
5920 struct iv_cand *cand;
5921
5922 gcc_assert (ivs->upto >= group->id);
5923 ivs->upto++;
5924 ivs->bad_groups++;
5925
5926 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5927 {
5928 cand = data->vcands[i];
5929 cp = get_group_iv_cost (data, group, cand);
5930 if (cheaper_cost_pair (cp, best_cp))
5931 best_cp = cp;
5932 }
5933
5934 if (best_cp == NULL)
5935 {
5936 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5937 {
5938 cand = data->vcands[i];
5939 cp = get_group_iv_cost (data, group, cand);
5940 if (cheaper_cost_pair (cp, best_cp))
5941 best_cp = cp;
5942 }
5943 }
5944
5945 iv_ca_set_cp (data, ivs, group, best_cp);
5946 }
5947
5948 /* Get cost for assignment IVS. */
5949
5950 static comp_cost
iv_ca_cost(struct iv_ca * ivs)5951 iv_ca_cost (struct iv_ca *ivs)
5952 {
5953 /* This was a conditional expression but it triggered a bug in
5954 Sun C 5.5. */
5955 if (ivs->bad_groups)
5956 return infinite_cost;
5957 else
5958 return ivs->cost;
5959 }
5960
5961 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5962 than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants
5963 respectively. */
5964
5965 static int
iv_ca_compare_deps(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group,struct cost_pair * old_cp,struct cost_pair * new_cp)5966 iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs,
5967 struct iv_group *group, struct cost_pair *old_cp,
5968 struct cost_pair *new_cp)
5969 {
5970 gcc_assert (old_cp && new_cp && old_cp != new_cp);
5971 unsigned old_n_invs = ivs->n_invs;
5972 iv_ca_set_cp (data, ivs, group, new_cp);
5973 unsigned new_n_invs = ivs->n_invs;
5974 iv_ca_set_cp (data, ivs, group, old_cp);
5975
5976 return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
5977 }
5978
5979 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5980 it before NEXT. */
5981
5982 static struct iv_ca_delta *
iv_ca_delta_add(struct iv_group * group,struct cost_pair * old_cp,struct cost_pair * new_cp,struct iv_ca_delta * next)5983 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
5984 struct cost_pair *new_cp, struct iv_ca_delta *next)
5985 {
5986 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5987
5988 change->group = group;
5989 change->old_cp = old_cp;
5990 change->new_cp = new_cp;
5991 change->next = next;
5992
5993 return change;
5994 }
5995
5996 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5997 are rewritten. */
5998
5999 static struct iv_ca_delta *
iv_ca_delta_join(struct iv_ca_delta * l1,struct iv_ca_delta * l2)6000 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6001 {
6002 struct iv_ca_delta *last;
6003
6004 if (!l2)
6005 return l1;
6006
6007 if (!l1)
6008 return l2;
6009
6010 for (last = l1; last->next; last = last->next)
6011 continue;
6012 last->next = l2;
6013
6014 return l1;
6015 }
6016
6017 /* Reverse the list of changes DELTA, forming the inverse to it. */
6018
6019 static struct iv_ca_delta *
iv_ca_delta_reverse(struct iv_ca_delta * delta)6020 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6021 {
6022 struct iv_ca_delta *act, *next, *prev = NULL;
6023
6024 for (act = delta; act; act = next)
6025 {
6026 next = act->next;
6027 act->next = prev;
6028 prev = act;
6029
6030 std::swap (act->old_cp, act->new_cp);
6031 }
6032
6033 return prev;
6034 }
6035
6036 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6037 reverted instead. */
6038
6039 static void
iv_ca_delta_commit(struct ivopts_data * data,struct iv_ca * ivs,struct iv_ca_delta * delta,bool forward)6040 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6041 struct iv_ca_delta *delta, bool forward)
6042 {
6043 struct cost_pair *from, *to;
6044 struct iv_ca_delta *act;
6045
6046 if (!forward)
6047 delta = iv_ca_delta_reverse (delta);
6048
6049 for (act = delta; act; act = act->next)
6050 {
6051 from = act->old_cp;
6052 to = act->new_cp;
6053 gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6054 iv_ca_set_cp (data, ivs, act->group, to);
6055 }
6056
6057 if (!forward)
6058 iv_ca_delta_reverse (delta);
6059 }
6060
6061 /* Returns true if CAND is used in IVS. */
6062
6063 static bool
iv_ca_cand_used_p(struct iv_ca * ivs,struct iv_cand * cand)6064 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6065 {
6066 return ivs->n_cand_uses[cand->id] > 0;
6067 }
6068
6069 /* Returns number of induction variable candidates in the set IVS. */
6070
6071 static unsigned
iv_ca_n_cands(struct iv_ca * ivs)6072 iv_ca_n_cands (struct iv_ca *ivs)
6073 {
6074 return ivs->n_cands;
6075 }
6076
6077 /* Free the list of changes DELTA. */
6078
6079 static void
iv_ca_delta_free(struct iv_ca_delta ** delta)6080 iv_ca_delta_free (struct iv_ca_delta **delta)
6081 {
6082 struct iv_ca_delta *act, *next;
6083
6084 for (act = *delta; act; act = next)
6085 {
6086 next = act->next;
6087 free (act);
6088 }
6089
6090 *delta = NULL;
6091 }
6092
6093 /* Allocates new iv candidates assignment. */
6094
6095 static struct iv_ca *
iv_ca_new(struct ivopts_data * data)6096 iv_ca_new (struct ivopts_data *data)
6097 {
6098 struct iv_ca *nw = XNEW (struct iv_ca);
6099
6100 nw->upto = 0;
6101 nw->bad_groups = 0;
6102 nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6103 data->vgroups.length ());
6104 nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6105 nw->cands = BITMAP_ALLOC (NULL);
6106 nw->n_cands = 0;
6107 nw->n_invs = 0;
6108 nw->cand_use_cost = no_cost;
6109 nw->cand_cost = 0;
6110 nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6111 nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6112 nw->cost = no_cost;
6113
6114 return nw;
6115 }
6116
6117 /* Free memory occupied by the set IVS. */
6118
6119 static void
iv_ca_free(struct iv_ca ** ivs)6120 iv_ca_free (struct iv_ca **ivs)
6121 {
6122 free ((*ivs)->cand_for_group);
6123 free ((*ivs)->n_cand_uses);
6124 BITMAP_FREE ((*ivs)->cands);
6125 free ((*ivs)->n_inv_var_uses);
6126 free ((*ivs)->n_inv_expr_uses);
6127 free (*ivs);
6128 *ivs = NULL;
6129 }
6130
6131 /* Dumps IVS to FILE. */
6132
6133 static void
iv_ca_dump(struct ivopts_data * data,FILE * file,struct iv_ca * ivs)6134 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6135 {
6136 unsigned i;
6137 comp_cost cost = iv_ca_cost (ivs);
6138
6139 fprintf (file, " cost: %d (complexity %d)\n", cost.cost,
6140 cost.complexity);
6141 fprintf (file, " cand_cost: %d\n cand_group_cost: %d (complexity %d)\n",
6142 ivs->cand_cost, ivs->cand_use_cost.cost,
6143 ivs->cand_use_cost.complexity);
6144 bitmap_print (file, ivs->cands, " candidates: ","\n");
6145
6146 for (i = 0; i < ivs->upto; i++)
6147 {
6148 struct iv_group *group = data->vgroups[i];
6149 struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6150 if (cp)
6151 fprintf (file, " group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6152 group->id, cp->cand->id, cp->cost.cost,
6153 cp->cost.complexity);
6154 else
6155 fprintf (file, " group:%d --> ??\n", group->id);
6156 }
6157
6158 const char *pref = "";
6159 fprintf (file, " invariant variables: ");
6160 for (i = 1; i <= data->max_inv_var_id; i++)
6161 if (ivs->n_inv_var_uses[i])
6162 {
6163 fprintf (file, "%s%d", pref, i);
6164 pref = ", ";
6165 }
6166
6167 pref = "";
6168 fprintf (file, "\n invariant expressions: ");
6169 for (i = 1; i <= data->max_inv_expr_id; i++)
6170 if (ivs->n_inv_expr_uses[i])
6171 {
6172 fprintf (file, "%s%d", pref, i);
6173 pref = ", ";
6174 }
6175
6176 fprintf (file, "\n\n");
6177 }
6178
6179 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6180 new set, and store differences in DELTA. Number of induction variables
6181 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6182 the function will try to find a solution with mimimal iv candidates. */
6183
6184 static comp_cost
iv_ca_extend(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_ca_delta ** delta,unsigned * n_ivs,bool min_ncand)6185 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6186 struct iv_cand *cand, struct iv_ca_delta **delta,
6187 unsigned *n_ivs, bool min_ncand)
6188 {
6189 unsigned i;
6190 comp_cost cost;
6191 struct iv_group *group;
6192 struct cost_pair *old_cp, *new_cp;
6193
6194 *delta = NULL;
6195 for (i = 0; i < ivs->upto; i++)
6196 {
6197 group = data->vgroups[i];
6198 old_cp = iv_ca_cand_for_group (ivs, group);
6199
6200 if (old_cp
6201 && old_cp->cand == cand)
6202 continue;
6203
6204 new_cp = get_group_iv_cost (data, group, cand);
6205 if (!new_cp)
6206 continue;
6207
6208 if (!min_ncand)
6209 {
6210 int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6211 /* Skip if new_cp depends on more invariants. */
6212 if (cmp_invs > 0)
6213 continue;
6214
6215 int cmp_cost = compare_cost_pair (new_cp, old_cp);
6216 /* Skip if new_cp is not cheaper. */
6217 if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6218 continue;
6219 }
6220
6221 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6222 }
6223
6224 iv_ca_delta_commit (data, ivs, *delta, true);
6225 cost = iv_ca_cost (ivs);
6226 if (n_ivs)
6227 *n_ivs = iv_ca_n_cands (ivs);
6228 iv_ca_delta_commit (data, ivs, *delta, false);
6229
6230 return cost;
6231 }
6232
6233 /* Try narrowing set IVS by removing CAND. Return the cost of
6234 the new set and store the differences in DELTA. START is
6235 the candidate with which we start narrowing. */
6236
6237 static comp_cost
iv_ca_narrow(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_cand * start,struct iv_ca_delta ** delta)6238 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6239 struct iv_cand *cand, struct iv_cand *start,
6240 struct iv_ca_delta **delta)
6241 {
6242 unsigned i, ci;
6243 struct iv_group *group;
6244 struct cost_pair *old_cp, *new_cp, *cp;
6245 bitmap_iterator bi;
6246 struct iv_cand *cnd;
6247 comp_cost cost, best_cost, acost;
6248
6249 *delta = NULL;
6250 for (i = 0; i < data->vgroups.length (); i++)
6251 {
6252 group = data->vgroups[i];
6253
6254 old_cp = iv_ca_cand_for_group (ivs, group);
6255 if (old_cp->cand != cand)
6256 continue;
6257
6258 best_cost = iv_ca_cost (ivs);
6259 /* Start narrowing with START. */
6260 new_cp = get_group_iv_cost (data, group, start);
6261
6262 if (data->consider_all_candidates)
6263 {
6264 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6265 {
6266 if (ci == cand->id || (start && ci == start->id))
6267 continue;
6268
6269 cnd = data->vcands[ci];
6270
6271 cp = get_group_iv_cost (data, group, cnd);
6272 if (!cp)
6273 continue;
6274
6275 iv_ca_set_cp (data, ivs, group, cp);
6276 acost = iv_ca_cost (ivs);
6277
6278 if (acost < best_cost)
6279 {
6280 best_cost = acost;
6281 new_cp = cp;
6282 }
6283 }
6284 }
6285 else
6286 {
6287 EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6288 {
6289 if (ci == cand->id || (start && ci == start->id))
6290 continue;
6291
6292 cnd = data->vcands[ci];
6293
6294 cp = get_group_iv_cost (data, group, cnd);
6295 if (!cp)
6296 continue;
6297
6298 iv_ca_set_cp (data, ivs, group, cp);
6299 acost = iv_ca_cost (ivs);
6300
6301 if (acost < best_cost)
6302 {
6303 best_cost = acost;
6304 new_cp = cp;
6305 }
6306 }
6307 }
6308 /* Restore to old cp for use. */
6309 iv_ca_set_cp (data, ivs, group, old_cp);
6310
6311 if (!new_cp)
6312 {
6313 iv_ca_delta_free (delta);
6314 return infinite_cost;
6315 }
6316
6317 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6318 }
6319
6320 iv_ca_delta_commit (data, ivs, *delta, true);
6321 cost = iv_ca_cost (ivs);
6322 iv_ca_delta_commit (data, ivs, *delta, false);
6323
6324 return cost;
6325 }
6326
6327 /* Try optimizing the set of candidates IVS by removing candidates different
6328 from to EXCEPT_CAND from it. Return cost of the new set, and store
6329 differences in DELTA. */
6330
6331 static comp_cost
iv_ca_prune(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * except_cand,struct iv_ca_delta ** delta)6332 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6333 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6334 {
6335 bitmap_iterator bi;
6336 struct iv_ca_delta *act_delta, *best_delta;
6337 unsigned i;
6338 comp_cost best_cost, acost;
6339 struct iv_cand *cand;
6340
6341 best_delta = NULL;
6342 best_cost = iv_ca_cost (ivs);
6343
6344 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6345 {
6346 cand = data->vcands[i];
6347
6348 if (cand == except_cand)
6349 continue;
6350
6351 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6352
6353 if (acost < best_cost)
6354 {
6355 best_cost = acost;
6356 iv_ca_delta_free (&best_delta);
6357 best_delta = act_delta;
6358 }
6359 else
6360 iv_ca_delta_free (&act_delta);
6361 }
6362
6363 if (!best_delta)
6364 {
6365 *delta = NULL;
6366 return best_cost;
6367 }
6368
6369 /* Recurse to possibly remove other unnecessary ivs. */
6370 iv_ca_delta_commit (data, ivs, best_delta, true);
6371 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6372 iv_ca_delta_commit (data, ivs, best_delta, false);
6373 *delta = iv_ca_delta_join (best_delta, *delta);
6374 return best_cost;
6375 }
6376
6377 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6378 cheaper local cost for GROUP than BEST_CP. Return pointer to
6379 the corresponding cost_pair, otherwise just return BEST_CP. */
6380
6381 static struct cost_pair*
cheaper_cost_with_cand(struct ivopts_data * data,struct iv_group * group,unsigned int cand_idx,struct iv_cand * old_cand,struct cost_pair * best_cp)6382 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6383 unsigned int cand_idx, struct iv_cand *old_cand,
6384 struct cost_pair *best_cp)
6385 {
6386 struct iv_cand *cand;
6387 struct cost_pair *cp;
6388
6389 gcc_assert (old_cand != NULL && best_cp != NULL);
6390 if (cand_idx == old_cand->id)
6391 return best_cp;
6392
6393 cand = data->vcands[cand_idx];
6394 cp = get_group_iv_cost (data, group, cand);
6395 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6396 return cp;
6397
6398 return best_cp;
6399 }
6400
6401 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6402 which are used by more than one iv uses. For each of those candidates,
6403 this function tries to represent iv uses under that candidate using
6404 other ones with lower local cost, then tries to prune the new set.
6405 If the new set has lower cost, It returns the new cost after recording
6406 candidate replacement in list DELTA. */
6407
6408 static comp_cost
iv_ca_replace(struct ivopts_data * data,struct iv_ca * ivs,struct iv_ca_delta ** delta)6409 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6410 struct iv_ca_delta **delta)
6411 {
6412 bitmap_iterator bi, bj;
6413 unsigned int i, j, k;
6414 struct iv_cand *cand;
6415 comp_cost orig_cost, acost;
6416 struct iv_ca_delta *act_delta, *tmp_delta;
6417 struct cost_pair *old_cp, *best_cp = NULL;
6418
6419 *delta = NULL;
6420 orig_cost = iv_ca_cost (ivs);
6421
6422 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6423 {
6424 if (ivs->n_cand_uses[i] == 1
6425 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6426 continue;
6427
6428 cand = data->vcands[i];
6429
6430 act_delta = NULL;
6431 /* Represent uses under current candidate using other ones with
6432 lower local cost. */
6433 for (j = 0; j < ivs->upto; j++)
6434 {
6435 struct iv_group *group = data->vgroups[j];
6436 old_cp = iv_ca_cand_for_group (ivs, group);
6437
6438 if (old_cp->cand != cand)
6439 continue;
6440
6441 best_cp = old_cp;
6442 if (data->consider_all_candidates)
6443 for (k = 0; k < data->vcands.length (); k++)
6444 best_cp = cheaper_cost_with_cand (data, group, k,
6445 old_cp->cand, best_cp);
6446 else
6447 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6448 best_cp = cheaper_cost_with_cand (data, group, k,
6449 old_cp->cand, best_cp);
6450
6451 if (best_cp == old_cp)
6452 continue;
6453
6454 act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6455 }
6456 /* No need for further prune. */
6457 if (!act_delta)
6458 continue;
6459
6460 /* Prune the new candidate set. */
6461 iv_ca_delta_commit (data, ivs, act_delta, true);
6462 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6463 iv_ca_delta_commit (data, ivs, act_delta, false);
6464 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6465
6466 if (acost < orig_cost)
6467 {
6468 *delta = act_delta;
6469 return acost;
6470 }
6471 else
6472 iv_ca_delta_free (&act_delta);
6473 }
6474
6475 return orig_cost;
6476 }
6477
6478 /* Tries to extend the sets IVS in the best possible way in order to
6479 express the GROUP. If ORIGINALP is true, prefer candidates from
6480 the original set of IVs, otherwise favor important candidates not
6481 based on any memory object. */
6482
6483 static bool
try_add_cand_for(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group,bool originalp)6484 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6485 struct iv_group *group, bool originalp)
6486 {
6487 comp_cost best_cost, act_cost;
6488 unsigned i;
6489 bitmap_iterator bi;
6490 struct iv_cand *cand;
6491 struct iv_ca_delta *best_delta = NULL, *act_delta;
6492 struct cost_pair *cp;
6493
6494 iv_ca_add_group (data, ivs, group);
6495 best_cost = iv_ca_cost (ivs);
6496 cp = iv_ca_cand_for_group (ivs, group);
6497 if (cp)
6498 {
6499 best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6500 iv_ca_set_no_cp (data, ivs, group);
6501 }
6502
6503 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6504 first try important candidates not based on any memory object. Only if
6505 this fails, try the specific ones. Rationale -- in loops with many
6506 variables the best choice often is to use just one generic biv. If we
6507 added here many ivs specific to the uses, the optimization algorithm later
6508 would be likely to get stuck in a local minimum, thus causing us to create
6509 too many ivs. The approach from few ivs to more seems more likely to be
6510 successful -- starting from few ivs, replacing an expensive use by a
6511 specific iv should always be a win. */
6512 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6513 {
6514 cand = data->vcands[i];
6515
6516 if (originalp && cand->pos !=IP_ORIGINAL)
6517 continue;
6518
6519 if (!originalp && cand->iv->base_object != NULL_TREE)
6520 continue;
6521
6522 if (iv_ca_cand_used_p (ivs, cand))
6523 continue;
6524
6525 cp = get_group_iv_cost (data, group, cand);
6526 if (!cp)
6527 continue;
6528
6529 iv_ca_set_cp (data, ivs, group, cp);
6530 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6531 true);
6532 iv_ca_set_no_cp (data, ivs, group);
6533 act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6534
6535 if (act_cost < best_cost)
6536 {
6537 best_cost = act_cost;
6538
6539 iv_ca_delta_free (&best_delta);
6540 best_delta = act_delta;
6541 }
6542 else
6543 iv_ca_delta_free (&act_delta);
6544 }
6545
6546 if (best_cost.infinite_cost_p ())
6547 {
6548 for (i = 0; i < group->n_map_members; i++)
6549 {
6550 cp = group->cost_map + i;
6551 cand = cp->cand;
6552 if (!cand)
6553 continue;
6554
6555 /* Already tried this. */
6556 if (cand->important)
6557 {
6558 if (originalp && cand->pos == IP_ORIGINAL)
6559 continue;
6560 if (!originalp && cand->iv->base_object == NULL_TREE)
6561 continue;
6562 }
6563
6564 if (iv_ca_cand_used_p (ivs, cand))
6565 continue;
6566
6567 act_delta = NULL;
6568 iv_ca_set_cp (data, ivs, group, cp);
6569 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6570 iv_ca_set_no_cp (data, ivs, group);
6571 act_delta = iv_ca_delta_add (group,
6572 iv_ca_cand_for_group (ivs, group),
6573 cp, act_delta);
6574
6575 if (act_cost < best_cost)
6576 {
6577 best_cost = act_cost;
6578
6579 if (best_delta)
6580 iv_ca_delta_free (&best_delta);
6581 best_delta = act_delta;
6582 }
6583 else
6584 iv_ca_delta_free (&act_delta);
6585 }
6586 }
6587
6588 iv_ca_delta_commit (data, ivs, best_delta, true);
6589 iv_ca_delta_free (&best_delta);
6590
6591 return !best_cost.infinite_cost_p ();
6592 }
6593
6594 /* Finds an initial assignment of candidates to uses. */
6595
6596 static struct iv_ca *
get_initial_solution(struct ivopts_data * data,bool originalp)6597 get_initial_solution (struct ivopts_data *data, bool originalp)
6598 {
6599 unsigned i;
6600 struct iv_ca *ivs = iv_ca_new (data);
6601
6602 for (i = 0; i < data->vgroups.length (); i++)
6603 if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6604 {
6605 iv_ca_free (&ivs);
6606 return NULL;
6607 }
6608
6609 return ivs;
6610 }
6611
6612 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6613 points to a bool variable, this function tries to break local
6614 optimal fixed-point by replacing candidates in IVS if it's true. */
6615
6616 static bool
try_improve_iv_set(struct ivopts_data * data,struct iv_ca * ivs,bool * try_replace_p)6617 try_improve_iv_set (struct ivopts_data *data,
6618 struct iv_ca *ivs, bool *try_replace_p)
6619 {
6620 unsigned i, n_ivs;
6621 comp_cost acost, best_cost = iv_ca_cost (ivs);
6622 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6623 struct iv_cand *cand;
6624
6625 /* Try extending the set of induction variables by one. */
6626 for (i = 0; i < data->vcands.length (); i++)
6627 {
6628 cand = data->vcands[i];
6629
6630 if (iv_ca_cand_used_p (ivs, cand))
6631 continue;
6632
6633 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6634 if (!act_delta)
6635 continue;
6636
6637 /* If we successfully added the candidate and the set is small enough,
6638 try optimizing it by removing other candidates. */
6639 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6640 {
6641 iv_ca_delta_commit (data, ivs, act_delta, true);
6642 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6643 iv_ca_delta_commit (data, ivs, act_delta, false);
6644 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6645 }
6646
6647 if (acost < best_cost)
6648 {
6649 best_cost = acost;
6650 iv_ca_delta_free (&best_delta);
6651 best_delta = act_delta;
6652 }
6653 else
6654 iv_ca_delta_free (&act_delta);
6655 }
6656
6657 if (!best_delta)
6658 {
6659 /* Try removing the candidates from the set instead. */
6660 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6661
6662 if (!best_delta && *try_replace_p)
6663 {
6664 *try_replace_p = false;
6665 /* So far candidate selecting algorithm tends to choose fewer IVs
6666 so that it can handle cases in which loops have many variables
6667 but the best choice is often to use only one general biv. One
6668 weakness is it can't handle opposite cases, in which different
6669 candidates should be chosen with respect to each use. To solve
6670 the problem, we replace candidates in a manner described by the
6671 comments of iv_ca_replace, thus give general algorithm a chance
6672 to break local optimal fixed-point in these cases. */
6673 best_cost = iv_ca_replace (data, ivs, &best_delta);
6674 }
6675
6676 if (!best_delta)
6677 return false;
6678 }
6679
6680 iv_ca_delta_commit (data, ivs, best_delta, true);
6681 gcc_assert (best_cost == iv_ca_cost (ivs));
6682 iv_ca_delta_free (&best_delta);
6683 return true;
6684 }
6685
6686 /* Attempts to find the optimal set of induction variables. We do simple
6687 greedy heuristic -- we try to replace at most one candidate in the selected
6688 solution and remove the unused ivs while this improves the cost. */
6689
6690 static struct iv_ca *
find_optimal_iv_set_1(struct ivopts_data * data,bool originalp)6691 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6692 {
6693 struct iv_ca *set;
6694 bool try_replace_p = true;
6695
6696 /* Get the initial solution. */
6697 set = get_initial_solution (data, originalp);
6698 if (!set)
6699 {
6700 if (dump_file && (dump_flags & TDF_DETAILS))
6701 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6702 return NULL;
6703 }
6704
6705 if (dump_file && (dump_flags & TDF_DETAILS))
6706 {
6707 fprintf (dump_file, "Initial set of candidates:\n");
6708 iv_ca_dump (data, dump_file, set);
6709 }
6710
6711 while (try_improve_iv_set (data, set, &try_replace_p))
6712 {
6713 if (dump_file && (dump_flags & TDF_DETAILS))
6714 {
6715 fprintf (dump_file, "Improved to:\n");
6716 iv_ca_dump (data, dump_file, set);
6717 }
6718 }
6719
6720 return set;
6721 }
6722
6723 static struct iv_ca *
find_optimal_iv_set(struct ivopts_data * data)6724 find_optimal_iv_set (struct ivopts_data *data)
6725 {
6726 unsigned i;
6727 comp_cost cost, origcost;
6728 struct iv_ca *set, *origset;
6729
6730 /* Determine the cost based on a strategy that starts with original IVs,
6731 and try again using a strategy that prefers candidates not based
6732 on any IVs. */
6733 origset = find_optimal_iv_set_1 (data, true);
6734 set = find_optimal_iv_set_1 (data, false);
6735
6736 if (!origset && !set)
6737 return NULL;
6738
6739 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6740 cost = set ? iv_ca_cost (set) : infinite_cost;
6741
6742 if (dump_file && (dump_flags & TDF_DETAILS))
6743 {
6744 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6745 origcost.cost, origcost.complexity);
6746 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6747 cost.cost, cost.complexity);
6748 }
6749
6750 /* Choose the one with the best cost. */
6751 if (origcost <= cost)
6752 {
6753 if (set)
6754 iv_ca_free (&set);
6755 set = origset;
6756 }
6757 else if (origset)
6758 iv_ca_free (&origset);
6759
6760 for (i = 0; i < data->vgroups.length (); i++)
6761 {
6762 struct iv_group *group = data->vgroups[i];
6763 group->selected = iv_ca_cand_for_group (set, group)->cand;
6764 }
6765
6766 return set;
6767 }
6768
6769 /* Creates a new induction variable corresponding to CAND. */
6770
6771 static void
create_new_iv(struct ivopts_data * data,struct iv_cand * cand)6772 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6773 {
6774 gimple_stmt_iterator incr_pos;
6775 tree base;
6776 struct iv_use *use;
6777 struct iv_group *group;
6778 bool after = false;
6779
6780 gcc_assert (cand->iv != NULL);
6781
6782 switch (cand->pos)
6783 {
6784 case IP_NORMAL:
6785 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6786 break;
6787
6788 case IP_END:
6789 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6790 after = true;
6791 break;
6792
6793 case IP_AFTER_USE:
6794 after = true;
6795 /* fall through */
6796 case IP_BEFORE_USE:
6797 incr_pos = gsi_for_stmt (cand->incremented_at);
6798 break;
6799
6800 case IP_ORIGINAL:
6801 /* Mark that the iv is preserved. */
6802 name_info (data, cand->var_before)->preserve_biv = true;
6803 name_info (data, cand->var_after)->preserve_biv = true;
6804
6805 /* Rewrite the increment so that it uses var_before directly. */
6806 use = find_interesting_uses_op (data, cand->var_after);
6807 group = data->vgroups[use->group_id];
6808 group->selected = cand;
6809 return;
6810 }
6811
6812 gimple_add_tmp_var (cand->var_before);
6813
6814 base = unshare_expr (cand->iv->base);
6815
6816 create_iv (base, unshare_expr (cand->iv->step),
6817 cand->var_before, data->current_loop,
6818 &incr_pos, after, &cand->var_before, &cand->var_after);
6819 }
6820
6821 /* Creates new induction variables described in SET. */
6822
6823 static void
create_new_ivs(struct ivopts_data * data,struct iv_ca * set)6824 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6825 {
6826 unsigned i;
6827 struct iv_cand *cand;
6828 bitmap_iterator bi;
6829
6830 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6831 {
6832 cand = data->vcands[i];
6833 create_new_iv (data, cand);
6834 }
6835
6836 if (dump_file && (dump_flags & TDF_DETAILS))
6837 {
6838 fprintf (dump_file, "Selected IV set for loop %d",
6839 data->current_loop->num);
6840 if (data->loop_loc != UNKNOWN_LOCATION)
6841 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6842 LOCATION_LINE (data->loop_loc));
6843 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
6844 avg_loop_niter (data->current_loop));
6845 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6846 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6847 {
6848 cand = data->vcands[i];
6849 dump_cand (dump_file, cand);
6850 }
6851 fprintf (dump_file, "\n");
6852 }
6853 }
6854
6855 /* Rewrites USE (definition of iv used in a nonlinear expression)
6856 using candidate CAND. */
6857
6858 static void
rewrite_use_nonlinear_expr(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)6859 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6860 struct iv_use *use, struct iv_cand *cand)
6861 {
6862 gassign *ass;
6863 gimple_stmt_iterator bsi;
6864 tree comp, type = get_use_type (use), tgt;
6865
6866 /* An important special case -- if we are asked to express value of
6867 the original iv by itself, just exit; there is no need to
6868 introduce a new computation (that might also need casting the
6869 variable to unsigned and back). */
6870 if (cand->pos == IP_ORIGINAL
6871 && cand->incremented_at == use->stmt)
6872 {
6873 tree op = NULL_TREE;
6874 enum tree_code stmt_code;
6875
6876 gcc_assert (is_gimple_assign (use->stmt));
6877 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6878
6879 /* Check whether we may leave the computation unchanged.
6880 This is the case only if it does not rely on other
6881 computations in the loop -- otherwise, the computation
6882 we rely upon may be removed in remove_unused_ivs,
6883 thus leading to ICE. */
6884 stmt_code = gimple_assign_rhs_code (use->stmt);
6885 if (stmt_code == PLUS_EXPR
6886 || stmt_code == MINUS_EXPR
6887 || stmt_code == POINTER_PLUS_EXPR)
6888 {
6889 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6890 op = gimple_assign_rhs2 (use->stmt);
6891 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6892 op = gimple_assign_rhs1 (use->stmt);
6893 }
6894
6895 if (op != NULL_TREE)
6896 {
6897 if (expr_invariant_in_loop_p (data->current_loop, op))
6898 return;
6899 if (TREE_CODE (op) == SSA_NAME)
6900 {
6901 struct iv *iv = get_iv (data, op);
6902 if (iv != NULL && integer_zerop (iv->step))
6903 return;
6904 }
6905 }
6906 }
6907
6908 switch (gimple_code (use->stmt))
6909 {
6910 case GIMPLE_PHI:
6911 tgt = PHI_RESULT (use->stmt);
6912
6913 /* If we should keep the biv, do not replace it. */
6914 if (name_info (data, tgt)->preserve_biv)
6915 return;
6916
6917 bsi = gsi_after_labels (gimple_bb (use->stmt));
6918 break;
6919
6920 case GIMPLE_ASSIGN:
6921 tgt = gimple_assign_lhs (use->stmt);
6922 bsi = gsi_for_stmt (use->stmt);
6923 break;
6924
6925 default:
6926 gcc_unreachable ();
6927 }
6928
6929 aff_tree aff_inv, aff_var;
6930 if (!get_computation_aff_1 (data->current_loop, use->stmt,
6931 use, cand, &aff_inv, &aff_var))
6932 gcc_unreachable ();
6933
6934 unshare_aff_combination (&aff_inv);
6935 unshare_aff_combination (&aff_var);
6936 /* Prefer CSE opportunity than loop invariant by adding offset at last
6937 so that iv_uses have different offsets can be CSEed. */
6938 poly_widest_int offset = aff_inv.offset;
6939 aff_inv.offset = 0;
6940
6941 gimple_seq stmt_list = NULL, seq = NULL;
6942 tree comp_op1 = aff_combination_to_tree (&aff_inv);
6943 tree comp_op2 = aff_combination_to_tree (&aff_var);
6944 gcc_assert (comp_op1 && comp_op2);
6945
6946 comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
6947 gimple_seq_add_seq (&stmt_list, seq);
6948 comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
6949 gimple_seq_add_seq (&stmt_list, seq);
6950
6951 if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
6952 std::swap (comp_op1, comp_op2);
6953
6954 if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
6955 {
6956 comp = fold_build_pointer_plus (comp_op1,
6957 fold_convert (sizetype, comp_op2));
6958 comp = fold_build_pointer_plus (comp,
6959 wide_int_to_tree (sizetype, offset));
6960 }
6961 else
6962 {
6963 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
6964 fold_convert (TREE_TYPE (comp_op1), comp_op2));
6965 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
6966 wide_int_to_tree (TREE_TYPE (comp_op1), offset));
6967 }
6968
6969 comp = fold_convert (type, comp);
6970 if (!valid_gimple_rhs_p (comp)
6971 || (gimple_code (use->stmt) != GIMPLE_PHI
6972 /* We can't allow re-allocating the stmt as it might be pointed
6973 to still. */
6974 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6975 >= gimple_num_ops (gsi_stmt (bsi)))))
6976 {
6977 comp = force_gimple_operand (comp, &seq, true, NULL);
6978 gimple_seq_add_seq (&stmt_list, seq);
6979 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6980 {
6981 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6982 /* As this isn't a plain copy we have to reset alignment
6983 information. */
6984 if (SSA_NAME_PTR_INFO (comp))
6985 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6986 }
6987 }
6988
6989 gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
6990 if (gimple_code (use->stmt) == GIMPLE_PHI)
6991 {
6992 ass = gimple_build_assign (tgt, comp);
6993 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6994
6995 bsi = gsi_for_stmt (use->stmt);
6996 remove_phi_node (&bsi, false);
6997 }
6998 else
6999 {
7000 gimple_assign_set_rhs_from_tree (&bsi, comp);
7001 use->stmt = gsi_stmt (bsi);
7002 }
7003 }
7004
7005 /* Performs a peephole optimization to reorder the iv update statement with
7006 a mem ref to enable instruction combining in later phases. The mem ref uses
7007 the iv value before the update, so the reordering transformation requires
7008 adjustment of the offset. CAND is the selected IV_CAND.
7009
7010 Example:
7011
7012 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7013 iv2 = iv1 + 1;
7014
7015 if (t < val) (1)
7016 goto L;
7017 goto Head;
7018
7019
7020 directly propagating t over to (1) will introduce overlapping live range
7021 thus increase register pressure. This peephole transform it into:
7022
7023
7024 iv2 = iv1 + 1;
7025 t = MEM_REF (base, iv2, 8, 8);
7026 if (t < val)
7027 goto L;
7028 goto Head;
7029 */
7030
7031 static void
adjust_iv_update_pos(struct iv_cand * cand,struct iv_use * use)7032 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7033 {
7034 tree var_after;
7035 gimple *iv_update, *stmt;
7036 basic_block bb;
7037 gimple_stmt_iterator gsi, gsi_iv;
7038
7039 if (cand->pos != IP_NORMAL)
7040 return;
7041
7042 var_after = cand->var_after;
7043 iv_update = SSA_NAME_DEF_STMT (var_after);
7044
7045 bb = gimple_bb (iv_update);
7046 gsi = gsi_last_nondebug_bb (bb);
7047 stmt = gsi_stmt (gsi);
7048
7049 /* Only handle conditional statement for now. */
7050 if (gimple_code (stmt) != GIMPLE_COND)
7051 return;
7052
7053 gsi_prev_nondebug (&gsi);
7054 stmt = gsi_stmt (gsi);
7055 if (stmt != iv_update)
7056 return;
7057
7058 gsi_prev_nondebug (&gsi);
7059 if (gsi_end_p (gsi))
7060 return;
7061
7062 stmt = gsi_stmt (gsi);
7063 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7064 return;
7065
7066 if (stmt != use->stmt)
7067 return;
7068
7069 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7070 return;
7071
7072 if (dump_file && (dump_flags & TDF_DETAILS))
7073 {
7074 fprintf (dump_file, "Reordering \n");
7075 print_gimple_stmt (dump_file, iv_update, 0);
7076 print_gimple_stmt (dump_file, use->stmt, 0);
7077 fprintf (dump_file, "\n");
7078 }
7079
7080 gsi = gsi_for_stmt (use->stmt);
7081 gsi_iv = gsi_for_stmt (iv_update);
7082 gsi_move_before (&gsi_iv, &gsi);
7083
7084 cand->pos = IP_BEFORE_USE;
7085 cand->incremented_at = use->stmt;
7086 }
7087
7088 /* Return the alias pointer type that should be used for a MEM_REF
7089 associated with USE, which has type USE_PTR_ADDRESS. */
7090
7091 static tree
get_alias_ptr_type_for_ptr_address(iv_use * use)7092 get_alias_ptr_type_for_ptr_address (iv_use *use)
7093 {
7094 gcall *call = as_a <gcall *> (use->stmt);
7095 switch (gimple_call_internal_fn (call))
7096 {
7097 case IFN_MASK_LOAD:
7098 case IFN_MASK_STORE:
7099 /* The second argument contains the correct alias type. */
7100 gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7101 return TREE_TYPE (gimple_call_arg (call, 1));
7102
7103 default:
7104 gcc_unreachable ();
7105 }
7106 }
7107
7108
7109 /* Rewrites USE (address that is an iv) using candidate CAND. */
7110
7111 static void
rewrite_use_address(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7112 rewrite_use_address (struct ivopts_data *data,
7113 struct iv_use *use, struct iv_cand *cand)
7114 {
7115 aff_tree aff;
7116 bool ok;
7117
7118 adjust_iv_update_pos (cand, use);
7119 ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7120 gcc_assert (ok);
7121 unshare_aff_combination (&aff);
7122
7123 /* To avoid undefined overflow problems, all IV candidates use unsigned
7124 integer types. The drawback is that this makes it impossible for
7125 create_mem_ref to distinguish an IV that is based on a memory object
7126 from one that represents simply an offset.
7127
7128 To work around this problem, we pass a hint to create_mem_ref that
7129 indicates which variable (if any) in aff is an IV based on a memory
7130 object. Note that we only consider the candidate. If this is not
7131 based on an object, the base of the reference is in some subexpression
7132 of the use -- but these will use pointer types, so they are recognized
7133 by the create_mem_ref heuristics anyway. */
7134 tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7135 tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7136 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7137 tree type = use->mem_type;
7138 tree alias_ptr_type;
7139 if (use->type == USE_PTR_ADDRESS)
7140 alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7141 else
7142 {
7143 gcc_assert (type == TREE_TYPE (*use->op_p));
7144 unsigned int align = get_object_alignment (*use->op_p);
7145 if (align != TYPE_ALIGN (type))
7146 type = build_aligned_type (type, align);
7147 alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7148 }
7149 tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7150 iv, base_hint, data->speed);
7151
7152 if (use->type == USE_PTR_ADDRESS)
7153 {
7154 ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7155 ref = fold_convert (get_use_type (use), ref);
7156 ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7157 true, GSI_SAME_STMT);
7158 }
7159 else
7160 copy_ref_info (ref, *use->op_p);
7161
7162 *use->op_p = ref;
7163 }
7164
7165 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7166 candidate CAND. */
7167
7168 static void
rewrite_use_compare(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7169 rewrite_use_compare (struct ivopts_data *data,
7170 struct iv_use *use, struct iv_cand *cand)
7171 {
7172 tree comp, op, bound;
7173 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7174 enum tree_code compare;
7175 struct iv_group *group = data->vgroups[use->group_id];
7176 struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7177
7178 bound = cp->value;
7179 if (bound)
7180 {
7181 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7182 tree var_type = TREE_TYPE (var);
7183 gimple_seq stmts;
7184
7185 if (dump_file && (dump_flags & TDF_DETAILS))
7186 {
7187 fprintf (dump_file, "Replacing exit test: ");
7188 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7189 }
7190 compare = cp->comp;
7191 bound = unshare_expr (fold_convert (var_type, bound));
7192 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7193 if (stmts)
7194 gsi_insert_seq_on_edge_immediate (
7195 loop_preheader_edge (data->current_loop),
7196 stmts);
7197
7198 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7199 gimple_cond_set_lhs (cond_stmt, var);
7200 gimple_cond_set_code (cond_stmt, compare);
7201 gimple_cond_set_rhs (cond_stmt, op);
7202 return;
7203 }
7204
7205 /* The induction variable elimination failed; just express the original
7206 giv. */
7207 comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7208 gcc_assert (comp != NULL_TREE);
7209 gcc_assert (use->op_p != NULL);
7210 *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7211 SSA_NAME_VAR (*use->op_p),
7212 true, GSI_SAME_STMT);
7213 }
7214
7215 /* Rewrite the groups using the selected induction variables. */
7216
7217 static void
rewrite_groups(struct ivopts_data * data)7218 rewrite_groups (struct ivopts_data *data)
7219 {
7220 unsigned i, j;
7221
7222 for (i = 0; i < data->vgroups.length (); i++)
7223 {
7224 struct iv_group *group = data->vgroups[i];
7225 struct iv_cand *cand = group->selected;
7226
7227 gcc_assert (cand);
7228
7229 if (group->type == USE_NONLINEAR_EXPR)
7230 {
7231 for (j = 0; j < group->vuses.length (); j++)
7232 {
7233 rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7234 update_stmt (group->vuses[j]->stmt);
7235 }
7236 }
7237 else if (address_p (group->type))
7238 {
7239 for (j = 0; j < group->vuses.length (); j++)
7240 {
7241 rewrite_use_address (data, group->vuses[j], cand);
7242 update_stmt (group->vuses[j]->stmt);
7243 }
7244 }
7245 else
7246 {
7247 gcc_assert (group->type == USE_COMPARE);
7248
7249 for (j = 0; j < group->vuses.length (); j++)
7250 {
7251 rewrite_use_compare (data, group->vuses[j], cand);
7252 update_stmt (group->vuses[j]->stmt);
7253 }
7254 }
7255 }
7256 }
7257
7258 /* Removes the ivs that are not used after rewriting. */
7259
7260 static void
remove_unused_ivs(struct ivopts_data * data,bitmap toremove)7261 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7262 {
7263 unsigned j;
7264 bitmap_iterator bi;
7265
7266 /* Figure out an order in which to release SSA DEFs so that we don't
7267 release something that we'd have to propagate into a debug stmt
7268 afterwards. */
7269 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7270 {
7271 struct version_info *info;
7272
7273 info = ver_info (data, j);
7274 if (info->iv
7275 && !integer_zerop (info->iv->step)
7276 && !info->inv_id
7277 && !info->iv->nonlin_use
7278 && !info->preserve_biv)
7279 {
7280 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7281
7282 tree def = info->iv->ssa_name;
7283
7284 if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7285 {
7286 imm_use_iterator imm_iter;
7287 use_operand_p use_p;
7288 gimple *stmt;
7289 int count = 0;
7290
7291 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7292 {
7293 if (!gimple_debug_bind_p (stmt))
7294 continue;
7295
7296 /* We just want to determine whether to do nothing
7297 (count == 0), to substitute the computed
7298 expression into a single use of the SSA DEF by
7299 itself (count == 1), or to use a debug temp
7300 because the SSA DEF is used multiple times or as
7301 part of a larger expression (count > 1). */
7302 count++;
7303 if (gimple_debug_bind_get_value (stmt) != def)
7304 count++;
7305
7306 if (count > 1)
7307 BREAK_FROM_IMM_USE_STMT (imm_iter);
7308 }
7309
7310 if (!count)
7311 continue;
7312
7313 struct iv_use dummy_use;
7314 struct iv_cand *best_cand = NULL, *cand;
7315 unsigned i, best_pref = 0, cand_pref;
7316
7317 memset (&dummy_use, 0, sizeof (dummy_use));
7318 dummy_use.iv = info->iv;
7319 for (i = 0; i < data->vgroups.length () && i < 64; i++)
7320 {
7321 cand = data->vgroups[i]->selected;
7322 if (cand == best_cand)
7323 continue;
7324 cand_pref = operand_equal_p (cand->iv->step,
7325 info->iv->step, 0)
7326 ? 4 : 0;
7327 cand_pref
7328 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7329 == TYPE_MODE (TREE_TYPE (info->iv->base))
7330 ? 2 : 0;
7331 cand_pref
7332 += TREE_CODE (cand->iv->base) == INTEGER_CST
7333 ? 1 : 0;
7334 if (best_cand == NULL || best_pref < cand_pref)
7335 {
7336 best_cand = cand;
7337 best_pref = cand_pref;
7338 }
7339 }
7340
7341 if (!best_cand)
7342 continue;
7343
7344 tree comp = get_computation_at (data->current_loop,
7345 SSA_NAME_DEF_STMT (def),
7346 &dummy_use, best_cand);
7347 if (!comp)
7348 continue;
7349
7350 if (count > 1)
7351 {
7352 tree vexpr = make_node (DEBUG_EXPR_DECL);
7353 DECL_ARTIFICIAL (vexpr) = 1;
7354 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7355 if (SSA_NAME_VAR (def))
7356 SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7357 else
7358 SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7359 gdebug *def_temp
7360 = gimple_build_debug_bind (vexpr, comp, NULL);
7361 gimple_stmt_iterator gsi;
7362
7363 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7364 gsi = gsi_after_labels (gimple_bb
7365 (SSA_NAME_DEF_STMT (def)));
7366 else
7367 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7368
7369 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7370 comp = vexpr;
7371 }
7372
7373 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7374 {
7375 if (!gimple_debug_bind_p (stmt))
7376 continue;
7377
7378 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7379 SET_USE (use_p, comp);
7380
7381 update_stmt (stmt);
7382 }
7383 }
7384 }
7385 }
7386 }
7387
7388 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7389 for hash_map::traverse. */
7390
7391 bool
free_tree_niter_desc(edge const &,tree_niter_desc * const & value,void *)7392 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7393 {
7394 free (value);
7395 return true;
7396 }
7397
7398 /* Frees data allocated by the optimization of a single loop. */
7399
7400 static void
free_loop_data(struct ivopts_data * data)7401 free_loop_data (struct ivopts_data *data)
7402 {
7403 unsigned i, j;
7404 bitmap_iterator bi;
7405 tree obj;
7406
7407 if (data->niters)
7408 {
7409 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7410 delete data->niters;
7411 data->niters = NULL;
7412 }
7413
7414 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7415 {
7416 struct version_info *info;
7417
7418 info = ver_info (data, i);
7419 info->iv = NULL;
7420 info->has_nonlin_use = false;
7421 info->preserve_biv = false;
7422 info->inv_id = 0;
7423 }
7424 bitmap_clear (data->relevant);
7425 bitmap_clear (data->important_candidates);
7426
7427 for (i = 0; i < data->vgroups.length (); i++)
7428 {
7429 struct iv_group *group = data->vgroups[i];
7430
7431 for (j = 0; j < group->vuses.length (); j++)
7432 free (group->vuses[j]);
7433 group->vuses.release ();
7434
7435 BITMAP_FREE (group->related_cands);
7436 for (j = 0; j < group->n_map_members; j++)
7437 {
7438 if (group->cost_map[j].inv_vars)
7439 BITMAP_FREE (group->cost_map[j].inv_vars);
7440 if (group->cost_map[j].inv_exprs)
7441 BITMAP_FREE (group->cost_map[j].inv_exprs);
7442 }
7443
7444 free (group->cost_map);
7445 free (group);
7446 }
7447 data->vgroups.truncate (0);
7448
7449 for (i = 0; i < data->vcands.length (); i++)
7450 {
7451 struct iv_cand *cand = data->vcands[i];
7452
7453 if (cand->inv_vars)
7454 BITMAP_FREE (cand->inv_vars);
7455 if (cand->inv_exprs)
7456 BITMAP_FREE (cand->inv_exprs);
7457 free (cand);
7458 }
7459 data->vcands.truncate (0);
7460
7461 if (data->version_info_size < num_ssa_names)
7462 {
7463 data->version_info_size = 2 * num_ssa_names;
7464 free (data->version_info);
7465 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7466 }
7467
7468 data->max_inv_var_id = 0;
7469 data->max_inv_expr_id = 0;
7470
7471 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7472 SET_DECL_RTL (obj, NULL_RTX);
7473
7474 decl_rtl_to_reset.truncate (0);
7475
7476 data->inv_expr_tab->empty ();
7477
7478 data->iv_common_cand_tab->empty ();
7479 data->iv_common_cands.truncate (0);
7480 }
7481
7482 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7483 loop tree. */
7484
7485 static void
tree_ssa_iv_optimize_finalize(struct ivopts_data * data)7486 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7487 {
7488 free_loop_data (data);
7489 free (data->version_info);
7490 BITMAP_FREE (data->relevant);
7491 BITMAP_FREE (data->important_candidates);
7492
7493 decl_rtl_to_reset.release ();
7494 data->vgroups.release ();
7495 data->vcands.release ();
7496 delete data->inv_expr_tab;
7497 data->inv_expr_tab = NULL;
7498 free_affine_expand_cache (&data->name_expansion_cache);
7499 delete data->iv_common_cand_tab;
7500 data->iv_common_cand_tab = NULL;
7501 data->iv_common_cands.release ();
7502 obstack_free (&data->iv_obstack, NULL);
7503 }
7504
7505 /* Returns true if the loop body BODY includes any function calls. */
7506
7507 static bool
loop_body_includes_call(basic_block * body,unsigned num_nodes)7508 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7509 {
7510 gimple_stmt_iterator gsi;
7511 unsigned i;
7512
7513 for (i = 0; i < num_nodes; i++)
7514 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7515 {
7516 gimple *stmt = gsi_stmt (gsi);
7517 if (is_gimple_call (stmt)
7518 && !gimple_call_internal_p (stmt)
7519 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7520 return true;
7521 }
7522 return false;
7523 }
7524
7525 /* Optimizes the LOOP. Returns true if anything changed. */
7526
7527 static bool
tree_ssa_iv_optimize_loop(struct ivopts_data * data,struct loop * loop,bitmap toremove)7528 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop,
7529 bitmap toremove)
7530 {
7531 bool changed = false;
7532 struct iv_ca *iv_ca;
7533 edge exit = single_dom_exit (loop);
7534 basic_block *body;
7535
7536 gcc_assert (!data->niters);
7537 data->current_loop = loop;
7538 data->loop_loc = find_loop_location (loop).get_location_t ();
7539 data->speed = optimize_loop_for_speed_p (loop);
7540
7541 if (dump_file && (dump_flags & TDF_DETAILS))
7542 {
7543 fprintf (dump_file, "Processing loop %d", loop->num);
7544 if (data->loop_loc != UNKNOWN_LOCATION)
7545 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7546 LOCATION_LINE (data->loop_loc));
7547 fprintf (dump_file, "\n");
7548
7549 if (exit)
7550 {
7551 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7552 exit->src->index, exit->dest->index);
7553 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7554 fprintf (dump_file, "\n");
7555 }
7556
7557 fprintf (dump_file, "\n");
7558 }
7559
7560 body = get_loop_body (loop);
7561 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7562 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7563 free (body);
7564
7565 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7566
7567 /* For each ssa name determines whether it behaves as an induction variable
7568 in some loop. */
7569 if (!find_induction_variables (data))
7570 goto finish;
7571
7572 /* Finds interesting uses (item 1). */
7573 find_interesting_uses (data);
7574 if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7575 goto finish;
7576
7577 /* Finds candidates for the induction variables (item 2). */
7578 find_iv_candidates (data);
7579
7580 /* Calculates the costs (item 3, part 1). */
7581 determine_iv_costs (data);
7582 determine_group_iv_costs (data);
7583 determine_set_costs (data);
7584
7585 /* Find the optimal set of induction variables (item 3, part 2). */
7586 iv_ca = find_optimal_iv_set (data);
7587 if (!iv_ca)
7588 goto finish;
7589 changed = true;
7590
7591 /* Create the new induction variables (item 4, part 1). */
7592 create_new_ivs (data, iv_ca);
7593 iv_ca_free (&iv_ca);
7594
7595 /* Rewrite the uses (item 4, part 2). */
7596 rewrite_groups (data);
7597
7598 /* Remove the ivs that are unused after rewriting. */
7599 remove_unused_ivs (data, toremove);
7600
7601 finish:
7602 free_loop_data (data);
7603
7604 return changed;
7605 }
7606
7607 /* Main entry point. Optimizes induction variables in loops. */
7608
7609 void
tree_ssa_iv_optimize(void)7610 tree_ssa_iv_optimize (void)
7611 {
7612 struct loop *loop;
7613 struct ivopts_data data;
7614 auto_bitmap toremove;
7615
7616 tree_ssa_iv_optimize_init (&data);
7617
7618 /* Optimize the loops starting with the innermost ones. */
7619 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7620 {
7621 if (dump_file && (dump_flags & TDF_DETAILS))
7622 flow_loop_dump (loop, dump_file, NULL, 1);
7623
7624 tree_ssa_iv_optimize_loop (&data, loop, toremove);
7625 }
7626
7627 /* Remove eliminated IV defs. */
7628 release_defs_bitset (toremove);
7629
7630 /* We have changed the structure of induction variables; it might happen
7631 that definitions in the scev database refer to some of them that were
7632 eliminated. */
7633 scev_reset_htab ();
7634 /* Likewise niter and control-IV information. */
7635 free_numbers_of_iterations_estimates (cfun);
7636
7637 tree_ssa_iv_optimize_finalize (&data);
7638 }
7639
7640 #include "gt-tree-ssa-loop-ivopts.h"
7641