1 /* Induction variable optimizations.
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 Note the interesting uses are categorized and handled in group.
34 Generally, address type uses are grouped together if their iv bases
35 are different in constant offset.
36
37 2) Candidates for the induction variables are found. This includes
38
39 -- old induction variables
40 -- the variables defined by expressions derived from the "interesting
41 groups/uses" above
42
43 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 cost function assigns a cost to sets of induction variables and consists
45 of three parts:
46
47 -- The group/use costs. Each of the interesting groups/uses chooses
48 the best induction variable in the set and adds its cost to the sum.
49 The cost reflects the time spent on modifying the induction variables
50 value to be usable for the given purpose (adding base and offset for
51 arrays, etc.).
52 -- The variable costs. Each of the variables has a cost assigned that
53 reflects the costs associated with incrementing the value of the
54 variable. The original variables are somewhat preferred.
55 -- The set cost. Depending on the size of the set, extra cost may be
56 added to reflect register pressure.
57
58 All the costs are defined in a machine-specific way, using the target
59 hooks and machine descriptions to determine them.
60
61 4) The trees are transformed to use the new variables, the dead code is
62 removed.
63
64 All of this is done loop by loop. Doing it globally is theoretically
65 possible, it might give a better performance and it might enable us
66 to decide costs more precisely, but getting all the interactions right
67 would be complicated. */
68
69 #include "config.h"
70 #include "system.h"
71 #include "coretypes.h"
72 #include "backend.h"
73 #include "rtl.h"
74 #include "tree.h"
75 #include "gimple.h"
76 #include "cfghooks.h"
77 #include "tree-pass.h"
78 #include "memmodel.h"
79 #include "tm_p.h"
80 #include "ssa.h"
81 #include "expmed.h"
82 #include "insn-config.h"
83 #include "emit-rtl.h"
84 #include "recog.h"
85 #include "cgraph.h"
86 #include "gimple-pretty-print.h"
87 #include "alias.h"
88 #include "fold-const.h"
89 #include "stor-layout.h"
90 #include "tree-eh.h"
91 #include "gimplify.h"
92 #include "gimple-iterator.h"
93 #include "gimplify-me.h"
94 #include "tree-cfg.h"
95 #include "tree-ssa-loop-ivopts.h"
96 #include "tree-ssa-loop-manip.h"
97 #include "tree-ssa-loop-niter.h"
98 #include "tree-ssa-loop.h"
99 #include "explow.h"
100 #include "expr.h"
101 #include "tree-dfa.h"
102 #include "tree-ssa.h"
103 #include "cfgloop.h"
104 #include "tree-scalar-evolution.h"
105 #include "params.h"
106 #include "tree-affine.h"
107 #include "tree-ssa-propagate.h"
108 #include "tree-ssa-address.h"
109 #include "builtins.h"
110 #include "tree-vectorizer.h"
111
112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
113 cost of different addressing modes. This should be moved to a TBD
114 interface between the GIMPLE and RTL worlds. */
115
116 /* The infinite cost. */
117 #define INFTY 10000000
118
119 /* Returns the expected number of loop iterations for LOOP.
120 The average trip count is computed from profile data if it
121 exists. */
122
123 static inline HOST_WIDE_INT
avg_loop_niter(struct loop * loop)124 avg_loop_niter (struct loop *loop)
125 {
126 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
127 if (niter == -1)
128 {
129 niter = likely_max_stmt_executions_int (loop);
130
131 if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
132 return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
133 }
134
135 return niter;
136 }
137
138 struct iv_use;
139
140 /* Representation of the induction variable. */
141 struct iv
142 {
143 tree base; /* Initial value of the iv. */
144 tree base_object; /* A memory object to that the induction variable points. */
145 tree step; /* Step of the iv (constant only). */
146 tree ssa_name; /* The ssa name with the value. */
147 struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
148 bool biv_p; /* Is it a biv? */
149 bool no_overflow; /* True if the iv doesn't overflow. */
150 bool have_address_use;/* For biv, indicate if it's used in any address
151 type use. */
152 };
153
154 /* Per-ssa version information (induction variable descriptions, etc.). */
155 struct version_info
156 {
157 tree name; /* The ssa name. */
158 struct iv *iv; /* Induction variable description. */
159 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
160 an expression that is not an induction variable. */
161 bool preserve_biv; /* For the original biv, whether to preserve it. */
162 unsigned inv_id; /* Id of an invariant. */
163 };
164
165 /* Types of uses. */
166 enum use_type
167 {
168 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
169 USE_REF_ADDRESS, /* Use is an address for an explicit memory
170 reference. */
171 USE_PTR_ADDRESS, /* Use is a pointer argument to a function in
172 cases where the expansion of the function
173 will turn the argument into a normal address. */
174 USE_COMPARE /* Use is a compare. */
175 };
176
177 /* Cost of a computation. */
178 struct comp_cost
179 {
comp_costcomp_cost180 comp_cost (): cost (0), complexity (0), scratch (0)
181 {}
182
183 comp_cost (int cost, unsigned complexity, int scratch = 0)
costcomp_cost184 : cost (cost), complexity (complexity), scratch (scratch)
185 {}
186
187 /* Returns true if COST is infinite. */
188 bool infinite_cost_p ();
189
190 /* Adds costs COST1 and COST2. */
191 friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
192
193 /* Adds COST to the comp_cost. */
194 comp_cost operator+= (comp_cost cost);
195
196 /* Adds constant C to this comp_cost. */
197 comp_cost operator+= (HOST_WIDE_INT c);
198
199 /* Subtracts constant C to this comp_cost. */
200 comp_cost operator-= (HOST_WIDE_INT c);
201
202 /* Divide the comp_cost by constant C. */
203 comp_cost operator/= (HOST_WIDE_INT c);
204
205 /* Multiply the comp_cost by constant C. */
206 comp_cost operator*= (HOST_WIDE_INT c);
207
208 /* Subtracts costs COST1 and COST2. */
209 friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
210
211 /* Subtracts COST from this comp_cost. */
212 comp_cost operator-= (comp_cost cost);
213
214 /* Returns true if COST1 is smaller than COST2. */
215 friend bool operator< (comp_cost cost1, comp_cost cost2);
216
217 /* Returns true if COST1 and COST2 are equal. */
218 friend bool operator== (comp_cost cost1, comp_cost cost2);
219
220 /* Returns true if COST1 is smaller or equal than COST2. */
221 friend bool operator<= (comp_cost cost1, comp_cost cost2);
222
223 int cost; /* The runtime cost. */
224 unsigned complexity; /* The estimate of the complexity of the code for
225 the computation (in no concrete units --
226 complexity field should be larger for more
227 complex expressions and addressing modes). */
228 int scratch; /* Scratch used during cost computation. */
229 };
230
231 static const comp_cost no_cost;
232 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
233
234 bool
infinite_cost_p()235 comp_cost::infinite_cost_p ()
236 {
237 return cost == INFTY;
238 }
239
240 comp_cost
241 operator+ (comp_cost cost1, comp_cost cost2)
242 {
243 if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
244 return infinite_cost;
245
246 cost1.cost += cost2.cost;
247 cost1.complexity += cost2.complexity;
248
249 return cost1;
250 }
251
252 comp_cost
253 operator- (comp_cost cost1, comp_cost cost2)
254 {
255 if (cost1.infinite_cost_p ())
256 return infinite_cost;
257
258 gcc_assert (!cost2.infinite_cost_p ());
259
260 cost1.cost -= cost2.cost;
261 cost1.complexity -= cost2.complexity;
262
263 return cost1;
264 }
265
266 comp_cost
267 comp_cost::operator+= (comp_cost cost)
268 {
269 *this = *this + cost;
270 return *this;
271 }
272
273 comp_cost
274 comp_cost::operator+= (HOST_WIDE_INT c)
275 {
276 if (infinite_cost_p ())
277 return *this;
278
279 this->cost += c;
280
281 return *this;
282 }
283
284 comp_cost
285 comp_cost::operator-= (HOST_WIDE_INT c)
286 {
287 if (infinite_cost_p ())
288 return *this;
289
290 this->cost -= c;
291
292 return *this;
293 }
294
295 comp_cost
296 comp_cost::operator/= (HOST_WIDE_INT c)
297 {
298 if (infinite_cost_p ())
299 return *this;
300
301 this->cost /= c;
302
303 return *this;
304 }
305
306 comp_cost
307 comp_cost::operator*= (HOST_WIDE_INT c)
308 {
309 if (infinite_cost_p ())
310 return *this;
311
312 this->cost *= c;
313
314 return *this;
315 }
316
317 comp_cost
318 comp_cost::operator-= (comp_cost cost)
319 {
320 *this = *this - cost;
321 return *this;
322 }
323
324 bool
325 operator< (comp_cost cost1, comp_cost cost2)
326 {
327 if (cost1.cost == cost2.cost)
328 return cost1.complexity < cost2.complexity;
329
330 return cost1.cost < cost2.cost;
331 }
332
333 bool
334 operator== (comp_cost cost1, comp_cost cost2)
335 {
336 return cost1.cost == cost2.cost
337 && cost1.complexity == cost2.complexity;
338 }
339
340 bool
341 operator<= (comp_cost cost1, comp_cost cost2)
342 {
343 return cost1 < cost2 || cost1 == cost2;
344 }
345
346 struct iv_inv_expr_ent;
347
348 /* The candidate - cost pair. */
349 struct cost_pair
350 {
351 struct iv_cand *cand; /* The candidate. */
352 comp_cost cost; /* The cost. */
353 enum tree_code comp; /* For iv elimination, the comparison. */
354 bitmap inv_vars; /* The list of invariant ssa_vars that have to be
355 preserved when representing iv_use with iv_cand. */
356 bitmap inv_exprs; /* The list of newly created invariant expressions
357 when representing iv_use with iv_cand. */
358 tree value; /* For final value elimination, the expression for
359 the final value of the iv. For iv elimination,
360 the new bound to compare with. */
361 };
362
363 /* Use. */
364 struct iv_use
365 {
366 unsigned id; /* The id of the use. */
367 unsigned group_id; /* The group id the use belongs to. */
368 enum use_type type; /* Type of the use. */
369 tree mem_type; /* The memory type to use when testing whether an
370 address is legitimate, and what the address's
371 cost is. */
372 struct iv *iv; /* The induction variable it is based on. */
373 gimple *stmt; /* Statement in that it occurs. */
374 tree *op_p; /* The place where it occurs. */
375
376 tree addr_base; /* Base address with const offset stripped. */
377 poly_uint64_pod addr_offset;
378 /* Const offset stripped from base address. */
379 };
380
381 /* Group of uses. */
382 struct iv_group
383 {
384 /* The id of the group. */
385 unsigned id;
386 /* Uses of the group are of the same type. */
387 enum use_type type;
388 /* The set of "related" IV candidates, plus the important ones. */
389 bitmap related_cands;
390 /* Number of IV candidates in the cost_map. */
391 unsigned n_map_members;
392 /* The costs wrto the iv candidates. */
393 struct cost_pair *cost_map;
394 /* The selected candidate for the group. */
395 struct iv_cand *selected;
396 /* Uses in the group. */
397 vec<struct iv_use *> vuses;
398 };
399
400 /* The position where the iv is computed. */
401 enum iv_position
402 {
403 IP_NORMAL, /* At the end, just before the exit condition. */
404 IP_END, /* At the end of the latch block. */
405 IP_BEFORE_USE, /* Immediately before a specific use. */
406 IP_AFTER_USE, /* Immediately after a specific use. */
407 IP_ORIGINAL /* The original biv. */
408 };
409
410 /* The induction variable candidate. */
411 struct iv_cand
412 {
413 unsigned id; /* The number of the candidate. */
414 bool important; /* Whether this is an "important" candidate, i.e. such
415 that it should be considered by all uses. */
416 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
417 gimple *incremented_at;/* For original biv, the statement where it is
418 incremented. */
419 tree var_before; /* The variable used for it before increment. */
420 tree var_after; /* The variable used for it after increment. */
421 struct iv *iv; /* The value of the candidate. NULL for
422 "pseudocandidate" used to indicate the possibility
423 to replace the final value of an iv by direct
424 computation of the value. */
425 unsigned cost; /* Cost of the candidate. */
426 unsigned cost_step; /* Cost of the candidate's increment operation. */
427 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
428 where it is incremented. */
429 bitmap inv_vars; /* The list of invariant ssa_vars used in step of the
430 iv_cand. */
431 bitmap inv_exprs; /* If step is more complicated than a single ssa_var,
432 hanlde it as a new invariant expression which will
433 be hoisted out of loop. */
434 struct iv *orig_iv; /* The original iv if this cand is added from biv with
435 smaller type. */
436 };
437
438 /* Hashtable entry for common candidate derived from iv uses. */
439 struct iv_common_cand
440 {
441 tree base;
442 tree step;
443 /* IV uses from which this common candidate is derived. */
444 auto_vec<struct iv_use *> uses;
445 hashval_t hash;
446 };
447
448 /* Hashtable helpers. */
449
450 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
451 {
452 static inline hashval_t hash (const iv_common_cand *);
453 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
454 };
455
456 /* Hash function for possible common candidates. */
457
458 inline hashval_t
hash(const iv_common_cand * ccand)459 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
460 {
461 return ccand->hash;
462 }
463
464 /* Hash table equality function for common candidates. */
465
466 inline bool
equal(const iv_common_cand * ccand1,const iv_common_cand * ccand2)467 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
468 const iv_common_cand *ccand2)
469 {
470 return (ccand1->hash == ccand2->hash
471 && operand_equal_p (ccand1->base, ccand2->base, 0)
472 && operand_equal_p (ccand1->step, ccand2->step, 0)
473 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
474 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
475 }
476
477 /* Loop invariant expression hashtable entry. */
478
479 struct iv_inv_expr_ent
480 {
481 /* Tree expression of the entry. */
482 tree expr;
483 /* Unique indentifier. */
484 int id;
485 /* Hash value. */
486 hashval_t hash;
487 };
488
489 /* Sort iv_inv_expr_ent pair A and B by id field. */
490
491 static int
sort_iv_inv_expr_ent(const void * a,const void * b)492 sort_iv_inv_expr_ent (const void *a, const void *b)
493 {
494 const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
495 const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
496
497 unsigned id1 = (*e1)->id;
498 unsigned id2 = (*e2)->id;
499
500 if (id1 < id2)
501 return -1;
502 else if (id1 > id2)
503 return 1;
504 else
505 return 0;
506 }
507
508 /* Hashtable helpers. */
509
510 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
511 {
512 static inline hashval_t hash (const iv_inv_expr_ent *);
513 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
514 };
515
516 /* Return true if uses of type TYPE represent some form of address. */
517
518 inline bool
address_p(use_type type)519 address_p (use_type type)
520 {
521 return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
522 }
523
524 /* Hash function for loop invariant expressions. */
525
526 inline hashval_t
hash(const iv_inv_expr_ent * expr)527 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
528 {
529 return expr->hash;
530 }
531
532 /* Hash table equality function for expressions. */
533
534 inline bool
equal(const iv_inv_expr_ent * expr1,const iv_inv_expr_ent * expr2)535 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
536 const iv_inv_expr_ent *expr2)
537 {
538 return expr1->hash == expr2->hash
539 && operand_equal_p (expr1->expr, expr2->expr, 0);
540 }
541
542 struct ivopts_data
543 {
544 /* The currently optimized loop. */
545 struct loop *current_loop;
546 source_location loop_loc;
547
548 /* Numbers of iterations for all exits of the current loop. */
549 hash_map<edge, tree_niter_desc *> *niters;
550
551 /* Number of registers used in it. */
552 unsigned regs_used;
553
554 /* The size of version_info array allocated. */
555 unsigned version_info_size;
556
557 /* The array of information for the ssa names. */
558 struct version_info *version_info;
559
560 /* The hashtable of loop invariant expressions created
561 by ivopt. */
562 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
563
564 /* The bitmap of indices in version_info whose value was changed. */
565 bitmap relevant;
566
567 /* The uses of induction variables. */
568 vec<iv_group *> vgroups;
569
570 /* The candidates. */
571 vec<iv_cand *> vcands;
572
573 /* A bitmap of important candidates. */
574 bitmap important_candidates;
575
576 /* Cache used by tree_to_aff_combination_expand. */
577 hash_map<tree, name_expansion *> *name_expansion_cache;
578
579 /* The hashtable of common candidates derived from iv uses. */
580 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
581
582 /* The common candidates. */
583 vec<iv_common_cand *> iv_common_cands;
584
585 /* Hash map recording base object information of tree exp. */
586 hash_map<tree, tree> *base_object_map;
587
588 /* The maximum invariant variable id. */
589 unsigned max_inv_var_id;
590
591 /* The maximum invariant expression id. */
592 unsigned max_inv_expr_id;
593
594 /* Number of no_overflow BIVs which are not used in memory address. */
595 unsigned bivs_not_used_in_addr;
596
597 /* Obstack for iv structure. */
598 struct obstack iv_obstack;
599
600 /* Whether to consider just related and important candidates when replacing a
601 use. */
602 bool consider_all_candidates;
603
604 /* Are we optimizing for speed? */
605 bool speed;
606
607 /* Whether the loop body includes any function calls. */
608 bool body_includes_call;
609
610 /* Whether the loop body can only be exited via single exit. */
611 bool loop_single_exit_p;
612 };
613
614 /* An assignment of iv candidates to uses. */
615
616 struct iv_ca
617 {
618 /* The number of uses covered by the assignment. */
619 unsigned upto;
620
621 /* Number of uses that cannot be expressed by the candidates in the set. */
622 unsigned bad_groups;
623
624 /* Candidate assigned to a use, together with the related costs. */
625 struct cost_pair **cand_for_group;
626
627 /* Number of times each candidate is used. */
628 unsigned *n_cand_uses;
629
630 /* The candidates used. */
631 bitmap cands;
632
633 /* The number of candidates in the set. */
634 unsigned n_cands;
635
636 /* The number of invariants needed, including both invariant variants and
637 invariant expressions. */
638 unsigned n_invs;
639
640 /* Total cost of expressing uses. */
641 comp_cost cand_use_cost;
642
643 /* Total cost of candidates. */
644 unsigned cand_cost;
645
646 /* Number of times each invariant variable is used. */
647 unsigned *n_inv_var_uses;
648
649 /* Number of times each invariant expression is used. */
650 unsigned *n_inv_expr_uses;
651
652 /* Total cost of the assignment. */
653 comp_cost cost;
654 };
655
656 /* Difference of two iv candidate assignments. */
657
658 struct iv_ca_delta
659 {
660 /* Changed group. */
661 struct iv_group *group;
662
663 /* An old assignment (for rollback purposes). */
664 struct cost_pair *old_cp;
665
666 /* A new assignment. */
667 struct cost_pair *new_cp;
668
669 /* Next change in the list. */
670 struct iv_ca_delta *next;
671 };
672
673 /* Bound on number of candidates below that all candidates are considered. */
674
675 #define CONSIDER_ALL_CANDIDATES_BOUND \
676 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
677
678 /* If there are more iv occurrences, we just give up (it is quite unlikely that
679 optimizing such a loop would help, and it would take ages). */
680
681 #define MAX_CONSIDERED_GROUPS \
682 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
683
684 /* If there are at most this number of ivs in the set, try removing unnecessary
685 ivs from the set always. */
686
687 #define ALWAYS_PRUNE_CAND_SET_BOUND \
688 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
689
690 /* The list of trees for that the decl_rtl field must be reset is stored
691 here. */
692
693 static vec<tree> decl_rtl_to_reset;
694
695 static comp_cost force_expr_to_var_cost (tree, bool);
696
697 /* The single loop exit if it dominates the latch, NULL otherwise. */
698
699 edge
single_dom_exit(struct loop * loop)700 single_dom_exit (struct loop *loop)
701 {
702 edge exit = single_exit (loop);
703
704 if (!exit)
705 return NULL;
706
707 if (!just_once_each_iteration_p (loop, exit->src))
708 return NULL;
709
710 return exit;
711 }
712
713 /* Dumps information about the induction variable IV to FILE. Don't dump
714 variable's name if DUMP_NAME is FALSE. The information is dumped with
715 preceding spaces indicated by INDENT_LEVEL. */
716
717 void
dump_iv(FILE * file,struct iv * iv,bool dump_name,unsigned indent_level)718 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
719 {
720 const char *p;
721 const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
722
723 if (indent_level > 4)
724 indent_level = 4;
725 p = spaces + 8 - (indent_level << 1);
726
727 fprintf (file, "%sIV struct:\n", p);
728 if (iv->ssa_name && dump_name)
729 {
730 fprintf (file, "%s SSA_NAME:\t", p);
731 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
732 fprintf (file, "\n");
733 }
734
735 fprintf (file, "%s Type:\t", p);
736 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
737 fprintf (file, "\n");
738
739 fprintf (file, "%s Base:\t", p);
740 print_generic_expr (file, iv->base, TDF_SLIM);
741 fprintf (file, "\n");
742
743 fprintf (file, "%s Step:\t", p);
744 print_generic_expr (file, iv->step, TDF_SLIM);
745 fprintf (file, "\n");
746
747 if (iv->base_object)
748 {
749 fprintf (file, "%s Object:\t", p);
750 print_generic_expr (file, iv->base_object, TDF_SLIM);
751 fprintf (file, "\n");
752 }
753
754 fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
755
756 fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
757 p, iv->no_overflow ? "No-overflow" : "Overflow");
758 }
759
760 /* Dumps information about the USE to FILE. */
761
762 void
dump_use(FILE * file,struct iv_use * use)763 dump_use (FILE *file, struct iv_use *use)
764 {
765 fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
766 fprintf (file, " At stmt:\t");
767 print_gimple_stmt (file, use->stmt, 0);
768 fprintf (file, " At pos:\t");
769 if (use->op_p)
770 print_generic_expr (file, *use->op_p, TDF_SLIM);
771 fprintf (file, "\n");
772 dump_iv (file, use->iv, false, 2);
773 }
774
775 /* Dumps information about the uses to FILE. */
776
777 void
dump_groups(FILE * file,struct ivopts_data * data)778 dump_groups (FILE *file, struct ivopts_data *data)
779 {
780 unsigned i, j;
781 struct iv_group *group;
782
783 for (i = 0; i < data->vgroups.length (); i++)
784 {
785 group = data->vgroups[i];
786 fprintf (file, "Group %d:\n", group->id);
787 if (group->type == USE_NONLINEAR_EXPR)
788 fprintf (file, " Type:\tGENERIC\n");
789 else if (group->type == USE_REF_ADDRESS)
790 fprintf (file, " Type:\tREFERENCE ADDRESS\n");
791 else if (group->type == USE_PTR_ADDRESS)
792 fprintf (file, " Type:\tPOINTER ARGUMENT ADDRESS\n");
793 else
794 {
795 gcc_assert (group->type == USE_COMPARE);
796 fprintf (file, " Type:\tCOMPARE\n");
797 }
798 for (j = 0; j < group->vuses.length (); j++)
799 dump_use (file, group->vuses[j]);
800 }
801 }
802
803 /* Dumps information about induction variable candidate CAND to FILE. */
804
805 void
dump_cand(FILE * file,struct iv_cand * cand)806 dump_cand (FILE *file, struct iv_cand *cand)
807 {
808 struct iv *iv = cand->iv;
809
810 fprintf (file, "Candidate %d:\n", cand->id);
811 if (cand->inv_vars)
812 {
813 fprintf (file, " Depend on inv.vars: ");
814 dump_bitmap (file, cand->inv_vars);
815 }
816 if (cand->inv_exprs)
817 {
818 fprintf (file, " Depend on inv.exprs: ");
819 dump_bitmap (file, cand->inv_exprs);
820 }
821
822 if (cand->var_before)
823 {
824 fprintf (file, " Var befor: ");
825 print_generic_expr (file, cand->var_before, TDF_SLIM);
826 fprintf (file, "\n");
827 }
828 if (cand->var_after)
829 {
830 fprintf (file, " Var after: ");
831 print_generic_expr (file, cand->var_after, TDF_SLIM);
832 fprintf (file, "\n");
833 }
834
835 switch (cand->pos)
836 {
837 case IP_NORMAL:
838 fprintf (file, " Incr POS: before exit test\n");
839 break;
840
841 case IP_BEFORE_USE:
842 fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
843 break;
844
845 case IP_AFTER_USE:
846 fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
847 break;
848
849 case IP_END:
850 fprintf (file, " Incr POS: at end\n");
851 break;
852
853 case IP_ORIGINAL:
854 fprintf (file, " Incr POS: orig biv\n");
855 break;
856 }
857
858 dump_iv (file, iv, false, 1);
859 }
860
861 /* Returns the info for ssa version VER. */
862
863 static inline struct version_info *
ver_info(struct ivopts_data * data,unsigned ver)864 ver_info (struct ivopts_data *data, unsigned ver)
865 {
866 return data->version_info + ver;
867 }
868
869 /* Returns the info for ssa name NAME. */
870
871 static inline struct version_info *
name_info(struct ivopts_data * data,tree name)872 name_info (struct ivopts_data *data, tree name)
873 {
874 return ver_info (data, SSA_NAME_VERSION (name));
875 }
876
877 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
878 emitted in LOOP. */
879
880 static bool
stmt_after_ip_normal_pos(struct loop * loop,gimple * stmt)881 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
882 {
883 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
884
885 gcc_assert (bb);
886
887 if (sbb == loop->latch)
888 return true;
889
890 if (sbb != bb)
891 return false;
892
893 return stmt == last_stmt (bb);
894 }
895
896 /* Returns true if STMT if after the place where the original induction
897 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
898 if the positions are identical. */
899
900 static bool
stmt_after_inc_pos(struct iv_cand * cand,gimple * stmt,bool true_if_equal)901 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
902 {
903 basic_block cand_bb = gimple_bb (cand->incremented_at);
904 basic_block stmt_bb = gimple_bb (stmt);
905
906 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
907 return false;
908
909 if (stmt_bb != cand_bb)
910 return true;
911
912 if (true_if_equal
913 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
914 return true;
915 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
916 }
917
918 /* Returns true if STMT if after the place where the induction variable
919 CAND is incremented in LOOP. */
920
921 static bool
stmt_after_increment(struct loop * loop,struct iv_cand * cand,gimple * stmt)922 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
923 {
924 switch (cand->pos)
925 {
926 case IP_END:
927 return false;
928
929 case IP_NORMAL:
930 return stmt_after_ip_normal_pos (loop, stmt);
931
932 case IP_ORIGINAL:
933 case IP_AFTER_USE:
934 return stmt_after_inc_pos (cand, stmt, false);
935
936 case IP_BEFORE_USE:
937 return stmt_after_inc_pos (cand, stmt, true);
938
939 default:
940 gcc_unreachable ();
941 }
942 }
943
944 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
945
946 static bool
abnormal_ssa_name_p(tree exp)947 abnormal_ssa_name_p (tree exp)
948 {
949 if (!exp)
950 return false;
951
952 if (TREE_CODE (exp) != SSA_NAME)
953 return false;
954
955 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
956 }
957
958 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
959 abnormal phi node. Callback for for_each_index. */
960
961 static bool
idx_contains_abnormal_ssa_name_p(tree base,tree * index,void * data ATTRIBUTE_UNUSED)962 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
963 void *data ATTRIBUTE_UNUSED)
964 {
965 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
966 {
967 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
968 return false;
969 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
970 return false;
971 }
972
973 return !abnormal_ssa_name_p (*index);
974 }
975
976 /* Returns true if EXPR contains a ssa name that occurs in an
977 abnormal phi node. */
978
979 bool
contains_abnormal_ssa_name_p(tree expr)980 contains_abnormal_ssa_name_p (tree expr)
981 {
982 enum tree_code code;
983 enum tree_code_class codeclass;
984
985 if (!expr)
986 return false;
987
988 code = TREE_CODE (expr);
989 codeclass = TREE_CODE_CLASS (code);
990
991 if (code == SSA_NAME)
992 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
993
994 if (code == INTEGER_CST
995 || is_gimple_min_invariant (expr))
996 return false;
997
998 if (code == ADDR_EXPR)
999 return !for_each_index (&TREE_OPERAND (expr, 0),
1000 idx_contains_abnormal_ssa_name_p,
1001 NULL);
1002
1003 if (code == COND_EXPR)
1004 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
1005 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
1006 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
1007
1008 switch (codeclass)
1009 {
1010 case tcc_binary:
1011 case tcc_comparison:
1012 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
1013 return true;
1014
1015 /* Fallthru. */
1016 case tcc_unary:
1017 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
1018 return true;
1019
1020 break;
1021
1022 default:
1023 gcc_unreachable ();
1024 }
1025
1026 return false;
1027 }
1028
1029 /* Returns the structure describing number of iterations determined from
1030 EXIT of DATA->current_loop, or NULL if something goes wrong. */
1031
1032 static struct tree_niter_desc *
niter_for_exit(struct ivopts_data * data,edge exit)1033 niter_for_exit (struct ivopts_data *data, edge exit)
1034 {
1035 struct tree_niter_desc *desc;
1036 tree_niter_desc **slot;
1037
1038 if (!data->niters)
1039 {
1040 data->niters = new hash_map<edge, tree_niter_desc *>;
1041 slot = NULL;
1042 }
1043 else
1044 slot = data->niters->get (exit);
1045
1046 if (!slot)
1047 {
1048 /* Try to determine number of iterations. We cannot safely work with ssa
1049 names that appear in phi nodes on abnormal edges, so that we do not
1050 create overlapping life ranges for them (PR 27283). */
1051 desc = XNEW (struct tree_niter_desc);
1052 if (!number_of_iterations_exit (data->current_loop,
1053 exit, desc, true)
1054 || contains_abnormal_ssa_name_p (desc->niter))
1055 {
1056 XDELETE (desc);
1057 desc = NULL;
1058 }
1059 data->niters->put (exit, desc);
1060 }
1061 else
1062 desc = *slot;
1063
1064 return desc;
1065 }
1066
1067 /* Returns the structure describing number of iterations determined from
1068 single dominating exit of DATA->current_loop, or NULL if something
1069 goes wrong. */
1070
1071 static struct tree_niter_desc *
niter_for_single_dom_exit(struct ivopts_data * data)1072 niter_for_single_dom_exit (struct ivopts_data *data)
1073 {
1074 edge exit = single_dom_exit (data->current_loop);
1075
1076 if (!exit)
1077 return NULL;
1078
1079 return niter_for_exit (data, exit);
1080 }
1081
1082 /* Initializes data structures used by the iv optimization pass, stored
1083 in DATA. */
1084
1085 static void
tree_ssa_iv_optimize_init(struct ivopts_data * data)1086 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1087 {
1088 data->version_info_size = 2 * num_ssa_names;
1089 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1090 data->relevant = BITMAP_ALLOC (NULL);
1091 data->important_candidates = BITMAP_ALLOC (NULL);
1092 data->max_inv_var_id = 0;
1093 data->max_inv_expr_id = 0;
1094 data->niters = NULL;
1095 data->vgroups.create (20);
1096 data->vcands.create (20);
1097 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1098 data->name_expansion_cache = NULL;
1099 data->base_object_map = NULL;
1100 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1101 data->iv_common_cands.create (20);
1102 decl_rtl_to_reset.create (20);
1103 gcc_obstack_init (&data->iv_obstack);
1104 }
1105
1106 /* walk_tree callback for determine_base_object. */
1107
1108 static tree
determine_base_object_1(tree * tp,int * walk_subtrees,void * wdata)1109 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1110 {
1111 tree_code code = TREE_CODE (*tp);
1112 tree obj = NULL_TREE;
1113 if (code == ADDR_EXPR)
1114 {
1115 tree base = get_base_address (TREE_OPERAND (*tp, 0));
1116 if (!base)
1117 obj = *tp;
1118 else if (TREE_CODE (base) != MEM_REF)
1119 obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1120 }
1121 else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1122 obj = fold_convert (ptr_type_node, *tp);
1123
1124 if (!obj)
1125 {
1126 if (!EXPR_P (*tp))
1127 *walk_subtrees = 0;
1128
1129 return NULL_TREE;
1130 }
1131 /* Record special node for multiple base objects and stop. */
1132 if (*static_cast<tree *> (wdata))
1133 {
1134 *static_cast<tree *> (wdata) = integer_zero_node;
1135 return integer_zero_node;
1136 }
1137 /* Record the base object and continue looking. */
1138 *static_cast<tree *> (wdata) = obj;
1139 return NULL_TREE;
1140 }
1141
1142 /* Returns a memory object to that EXPR points with caching. Return NULL if we
1143 are able to determine that it does not point to any such object; specially
1144 return integer_zero_node if EXPR contains multiple base objects. */
1145
1146 static tree
determine_base_object(struct ivopts_data * data,tree expr)1147 determine_base_object (struct ivopts_data *data, tree expr)
1148 {
1149 tree *slot, obj = NULL_TREE;
1150 if (data->base_object_map)
1151 {
1152 if ((slot = data->base_object_map->get(expr)) != NULL)
1153 return *slot;
1154 }
1155 else
1156 data->base_object_map = new hash_map<tree, tree>;
1157
1158 (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1159 data->base_object_map->put (expr, obj);
1160 return obj;
1161 }
1162
1163 /* Return true if address expression with non-DECL_P operand appears
1164 in EXPR. */
1165
1166 static bool
contain_complex_addr_expr(tree expr)1167 contain_complex_addr_expr (tree expr)
1168 {
1169 bool res = false;
1170
1171 STRIP_NOPS (expr);
1172 switch (TREE_CODE (expr))
1173 {
1174 case POINTER_PLUS_EXPR:
1175 case PLUS_EXPR:
1176 case MINUS_EXPR:
1177 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1178 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1179 break;
1180
1181 case ADDR_EXPR:
1182 return (!DECL_P (TREE_OPERAND (expr, 0)));
1183
1184 default:
1185 return false;
1186 }
1187
1188 return res;
1189 }
1190
1191 /* Allocates an induction variable with given initial value BASE and step STEP
1192 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1193
1194 static struct iv *
1195 alloc_iv (struct ivopts_data *data, tree base, tree step,
1196 bool no_overflow = false)
1197 {
1198 tree expr = base;
1199 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1200 sizeof (struct iv));
1201 gcc_assert (step != NULL_TREE);
1202
1203 /* Lower address expression in base except ones with DECL_P as operand.
1204 By doing this:
1205 1) More accurate cost can be computed for address expressions;
1206 2) Duplicate candidates won't be created for bases in different
1207 forms, like &a[0] and &a. */
1208 STRIP_NOPS (expr);
1209 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1210 || contain_complex_addr_expr (expr))
1211 {
1212 aff_tree comb;
1213 tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1214 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1215 }
1216
1217 iv->base = base;
1218 iv->base_object = determine_base_object (data, base);
1219 iv->step = step;
1220 iv->biv_p = false;
1221 iv->nonlin_use = NULL;
1222 iv->ssa_name = NULL_TREE;
1223 if (!no_overflow
1224 && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1225 base, step))
1226 no_overflow = true;
1227 iv->no_overflow = no_overflow;
1228 iv->have_address_use = false;
1229
1230 return iv;
1231 }
1232
1233 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1234 doesn't overflow. */
1235
1236 static void
set_iv(struct ivopts_data * data,tree iv,tree base,tree step,bool no_overflow)1237 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1238 bool no_overflow)
1239 {
1240 struct version_info *info = name_info (data, iv);
1241
1242 gcc_assert (!info->iv);
1243
1244 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1245 info->iv = alloc_iv (data, base, step, no_overflow);
1246 info->iv->ssa_name = iv;
1247 }
1248
1249 /* Finds induction variable declaration for VAR. */
1250
1251 static struct iv *
get_iv(struct ivopts_data * data,tree var)1252 get_iv (struct ivopts_data *data, tree var)
1253 {
1254 basic_block bb;
1255 tree type = TREE_TYPE (var);
1256
1257 if (!POINTER_TYPE_P (type)
1258 && !INTEGRAL_TYPE_P (type))
1259 return NULL;
1260
1261 if (!name_info (data, var)->iv)
1262 {
1263 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1264
1265 if (!bb
1266 || !flow_bb_inside_loop_p (data->current_loop, bb))
1267 set_iv (data, var, var, build_int_cst (type, 0), true);
1268 }
1269
1270 return name_info (data, var)->iv;
1271 }
1272
1273 /* Return the first non-invariant ssa var found in EXPR. */
1274
1275 static tree
extract_single_var_from_expr(tree expr)1276 extract_single_var_from_expr (tree expr)
1277 {
1278 int i, n;
1279 tree tmp;
1280 enum tree_code code;
1281
1282 if (!expr || is_gimple_min_invariant (expr))
1283 return NULL;
1284
1285 code = TREE_CODE (expr);
1286 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1287 {
1288 n = TREE_OPERAND_LENGTH (expr);
1289 for (i = 0; i < n; i++)
1290 {
1291 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1292
1293 if (tmp)
1294 return tmp;
1295 }
1296 }
1297 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1298 }
1299
1300 /* Finds basic ivs. */
1301
1302 static bool
find_bivs(struct ivopts_data * data)1303 find_bivs (struct ivopts_data *data)
1304 {
1305 gphi *phi;
1306 affine_iv iv;
1307 tree step, type, base, stop;
1308 bool found = false;
1309 struct loop *loop = data->current_loop;
1310 gphi_iterator psi;
1311
1312 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1313 {
1314 phi = psi.phi ();
1315
1316 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1317 continue;
1318
1319 if (virtual_operand_p (PHI_RESULT (phi)))
1320 continue;
1321
1322 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1323 continue;
1324
1325 if (integer_zerop (iv.step))
1326 continue;
1327
1328 step = iv.step;
1329 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1330 /* Stop expanding iv base at the first ssa var referred by iv step.
1331 Ideally we should stop at any ssa var, because that's expensive
1332 and unusual to happen, we just do it on the first one.
1333
1334 See PR64705 for the rationale. */
1335 stop = extract_single_var_from_expr (step);
1336 base = expand_simple_operations (base, stop);
1337 if (contains_abnormal_ssa_name_p (base)
1338 || contains_abnormal_ssa_name_p (step))
1339 continue;
1340
1341 type = TREE_TYPE (PHI_RESULT (phi));
1342 base = fold_convert (type, base);
1343 if (step)
1344 {
1345 if (POINTER_TYPE_P (type))
1346 step = convert_to_ptrofftype (step);
1347 else
1348 step = fold_convert (type, step);
1349 }
1350
1351 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1352 found = true;
1353 }
1354
1355 return found;
1356 }
1357
1358 /* Marks basic ivs. */
1359
1360 static void
mark_bivs(struct ivopts_data * data)1361 mark_bivs (struct ivopts_data *data)
1362 {
1363 gphi *phi;
1364 gimple *def;
1365 tree var;
1366 struct iv *iv, *incr_iv;
1367 struct loop *loop = data->current_loop;
1368 basic_block incr_bb;
1369 gphi_iterator psi;
1370
1371 data->bivs_not_used_in_addr = 0;
1372 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1373 {
1374 phi = psi.phi ();
1375
1376 iv = get_iv (data, PHI_RESULT (phi));
1377 if (!iv)
1378 continue;
1379
1380 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1381 def = SSA_NAME_DEF_STMT (var);
1382 /* Don't mark iv peeled from other one as biv. */
1383 if (def
1384 && gimple_code (def) == GIMPLE_PHI
1385 && gimple_bb (def) == loop->header)
1386 continue;
1387
1388 incr_iv = get_iv (data, var);
1389 if (!incr_iv)
1390 continue;
1391
1392 /* If the increment is in the subloop, ignore it. */
1393 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1394 if (incr_bb->loop_father != data->current_loop
1395 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1396 continue;
1397
1398 iv->biv_p = true;
1399 incr_iv->biv_p = true;
1400 if (iv->no_overflow)
1401 data->bivs_not_used_in_addr++;
1402 if (incr_iv->no_overflow)
1403 data->bivs_not_used_in_addr++;
1404 }
1405 }
1406
1407 /* Checks whether STMT defines a linear induction variable and stores its
1408 parameters to IV. */
1409
1410 static bool
find_givs_in_stmt_scev(struct ivopts_data * data,gimple * stmt,affine_iv * iv)1411 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1412 {
1413 tree lhs, stop;
1414 struct loop *loop = data->current_loop;
1415
1416 iv->base = NULL_TREE;
1417 iv->step = NULL_TREE;
1418
1419 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1420 return false;
1421
1422 lhs = gimple_assign_lhs (stmt);
1423 if (TREE_CODE (lhs) != SSA_NAME)
1424 return false;
1425
1426 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1427 return false;
1428
1429 /* Stop expanding iv base at the first ssa var referred by iv step.
1430 Ideally we should stop at any ssa var, because that's expensive
1431 and unusual to happen, we just do it on the first one.
1432
1433 See PR64705 for the rationale. */
1434 stop = extract_single_var_from_expr (iv->step);
1435 iv->base = expand_simple_operations (iv->base, stop);
1436 if (contains_abnormal_ssa_name_p (iv->base)
1437 || contains_abnormal_ssa_name_p (iv->step))
1438 return false;
1439
1440 /* If STMT could throw, then do not consider STMT as defining a GIV.
1441 While this will suppress optimizations, we can not safely delete this
1442 GIV and associated statements, even if it appears it is not used. */
1443 if (stmt_could_throw_p (stmt))
1444 return false;
1445
1446 return true;
1447 }
1448
1449 /* Finds general ivs in statement STMT. */
1450
1451 static void
find_givs_in_stmt(struct ivopts_data * data,gimple * stmt)1452 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1453 {
1454 affine_iv iv;
1455
1456 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1457 return;
1458
1459 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1460 }
1461
1462 /* Finds general ivs in basic block BB. */
1463
1464 static void
find_givs_in_bb(struct ivopts_data * data,basic_block bb)1465 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1466 {
1467 gimple_stmt_iterator bsi;
1468
1469 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1470 find_givs_in_stmt (data, gsi_stmt (bsi));
1471 }
1472
1473 /* Finds general ivs. */
1474
1475 static void
find_givs(struct ivopts_data * data)1476 find_givs (struct ivopts_data *data)
1477 {
1478 struct loop *loop = data->current_loop;
1479 basic_block *body = get_loop_body_in_dom_order (loop);
1480 unsigned i;
1481
1482 for (i = 0; i < loop->num_nodes; i++)
1483 find_givs_in_bb (data, body[i]);
1484 free (body);
1485 }
1486
1487 /* For each ssa name defined in LOOP determines whether it is an induction
1488 variable and if so, its initial value and step. */
1489
1490 static bool
find_induction_variables(struct ivopts_data * data)1491 find_induction_variables (struct ivopts_data *data)
1492 {
1493 unsigned i;
1494 bitmap_iterator bi;
1495
1496 if (!find_bivs (data))
1497 return false;
1498
1499 find_givs (data);
1500 mark_bivs (data);
1501
1502 if (dump_file && (dump_flags & TDF_DETAILS))
1503 {
1504 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1505
1506 if (niter)
1507 {
1508 fprintf (dump_file, " number of iterations ");
1509 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1510 if (!integer_zerop (niter->may_be_zero))
1511 {
1512 fprintf (dump_file, "; zero if ");
1513 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1514 }
1515 fprintf (dump_file, "\n");
1516 };
1517
1518 fprintf (dump_file, "\n<Induction Vars>:\n");
1519 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1520 {
1521 struct version_info *info = ver_info (data, i);
1522 if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1523 dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1524 }
1525 }
1526
1527 return true;
1528 }
1529
1530 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1531 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1532 is the const offset stripped from IV base and MEM_TYPE is the type
1533 of the memory being addressed. For uses of other types, ADDR_BASE
1534 and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. */
1535
1536 static struct iv_use *
record_use(struct iv_group * group,tree * use_p,struct iv * iv,gimple * stmt,enum use_type type,tree mem_type,tree addr_base,poly_uint64 addr_offset)1537 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1538 gimple *stmt, enum use_type type, tree mem_type,
1539 tree addr_base, poly_uint64 addr_offset)
1540 {
1541 struct iv_use *use = XCNEW (struct iv_use);
1542
1543 use->id = group->vuses.length ();
1544 use->group_id = group->id;
1545 use->type = type;
1546 use->mem_type = mem_type;
1547 use->iv = iv;
1548 use->stmt = stmt;
1549 use->op_p = use_p;
1550 use->addr_base = addr_base;
1551 use->addr_offset = addr_offset;
1552
1553 group->vuses.safe_push (use);
1554 return use;
1555 }
1556
1557 /* Checks whether OP is a loop-level invariant and if so, records it.
1558 NONLINEAR_USE is true if the invariant is used in a way we do not
1559 handle specially. */
1560
1561 static void
record_invariant(struct ivopts_data * data,tree op,bool nonlinear_use)1562 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1563 {
1564 basic_block bb;
1565 struct version_info *info;
1566
1567 if (TREE_CODE (op) != SSA_NAME
1568 || virtual_operand_p (op))
1569 return;
1570
1571 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1572 if (bb
1573 && flow_bb_inside_loop_p (data->current_loop, bb))
1574 return;
1575
1576 info = name_info (data, op);
1577 info->name = op;
1578 info->has_nonlin_use |= nonlinear_use;
1579 if (!info->inv_id)
1580 info->inv_id = ++data->max_inv_var_id;
1581 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1582 }
1583
1584 /* Record a group of TYPE. */
1585
1586 static struct iv_group *
record_group(struct ivopts_data * data,enum use_type type)1587 record_group (struct ivopts_data *data, enum use_type type)
1588 {
1589 struct iv_group *group = XCNEW (struct iv_group);
1590
1591 group->id = data->vgroups.length ();
1592 group->type = type;
1593 group->related_cands = BITMAP_ALLOC (NULL);
1594 group->vuses.create (1);
1595
1596 data->vgroups.safe_push (group);
1597 return group;
1598 }
1599
1600 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1601 New group will be created if there is no existing group for the use.
1602 MEM_TYPE is the type of memory being addressed, or NULL if this
1603 isn't an address reference. */
1604
1605 static struct iv_use *
record_group_use(struct ivopts_data * data,tree * use_p,struct iv * iv,gimple * stmt,enum use_type type,tree mem_type)1606 record_group_use (struct ivopts_data *data, tree *use_p,
1607 struct iv *iv, gimple *stmt, enum use_type type,
1608 tree mem_type)
1609 {
1610 tree addr_base = NULL;
1611 struct iv_group *group = NULL;
1612 poly_uint64 addr_offset = 0;
1613
1614 /* Record non address type use in a new group. */
1615 if (address_p (type))
1616 {
1617 unsigned int i;
1618
1619 addr_base = strip_offset (iv->base, &addr_offset);
1620 for (i = 0; i < data->vgroups.length (); i++)
1621 {
1622 struct iv_use *use;
1623
1624 group = data->vgroups[i];
1625 use = group->vuses[0];
1626 if (!address_p (use->type))
1627 continue;
1628
1629 /* Check if it has the same stripped base and step. */
1630 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1631 && operand_equal_p (iv->step, use->iv->step, 0)
1632 && operand_equal_p (addr_base, use->addr_base, 0))
1633 break;
1634 }
1635 if (i == data->vgroups.length ())
1636 group = NULL;
1637 }
1638
1639 if (!group)
1640 group = record_group (data, type);
1641
1642 return record_use (group, use_p, iv, stmt, type, mem_type,
1643 addr_base, addr_offset);
1644 }
1645
1646 /* Checks whether the use OP is interesting and if so, records it. */
1647
1648 static struct iv_use *
find_interesting_uses_op(struct ivopts_data * data,tree op)1649 find_interesting_uses_op (struct ivopts_data *data, tree op)
1650 {
1651 struct iv *iv;
1652 gimple *stmt;
1653 struct iv_use *use;
1654
1655 if (TREE_CODE (op) != SSA_NAME)
1656 return NULL;
1657
1658 iv = get_iv (data, op);
1659 if (!iv)
1660 return NULL;
1661
1662 if (iv->nonlin_use)
1663 {
1664 gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1665 return iv->nonlin_use;
1666 }
1667
1668 if (integer_zerop (iv->step))
1669 {
1670 record_invariant (data, op, true);
1671 return NULL;
1672 }
1673
1674 stmt = SSA_NAME_DEF_STMT (op);
1675 gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1676
1677 use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1678 iv->nonlin_use = use;
1679 return use;
1680 }
1681
1682 /* Indicate how compare type iv_use can be handled. */
1683 enum comp_iv_rewrite
1684 {
1685 COMP_IV_NA,
1686 /* We may rewrite compare type iv_use by expressing value of the iv_use. */
1687 COMP_IV_EXPR,
1688 /* We may rewrite compare type iv_uses on both sides of comparison by
1689 expressing value of each iv_use. */
1690 COMP_IV_EXPR_2,
1691 /* We may rewrite compare type iv_use by expressing value of the iv_use
1692 or by eliminating it with other iv_cand. */
1693 COMP_IV_ELIM
1694 };
1695
1696 /* Given a condition in statement STMT, checks whether it is a compare
1697 of an induction variable and an invariant. If this is the case,
1698 CONTROL_VAR is set to location of the iv, BOUND to the location of
1699 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1700 induction variable descriptions, and true is returned. If this is not
1701 the case, CONTROL_VAR and BOUND are set to the arguments of the
1702 condition and false is returned. */
1703
1704 static enum comp_iv_rewrite
extract_cond_operands(struct ivopts_data * data,gimple * stmt,tree ** control_var,tree ** bound,struct iv ** iv_var,struct iv ** iv_bound)1705 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1706 tree **control_var, tree **bound,
1707 struct iv **iv_var, struct iv **iv_bound)
1708 {
1709 /* The objects returned when COND has constant operands. */
1710 static struct iv const_iv;
1711 static tree zero;
1712 tree *op0 = &zero, *op1 = &zero;
1713 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1714 enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1715
1716 if (gimple_code (stmt) == GIMPLE_COND)
1717 {
1718 gcond *cond_stmt = as_a <gcond *> (stmt);
1719 op0 = gimple_cond_lhs_ptr (cond_stmt);
1720 op1 = gimple_cond_rhs_ptr (cond_stmt);
1721 }
1722 else
1723 {
1724 op0 = gimple_assign_rhs1_ptr (stmt);
1725 op1 = gimple_assign_rhs2_ptr (stmt);
1726 }
1727
1728 zero = integer_zero_node;
1729 const_iv.step = integer_zero_node;
1730
1731 if (TREE_CODE (*op0) == SSA_NAME)
1732 iv0 = get_iv (data, *op0);
1733 if (TREE_CODE (*op1) == SSA_NAME)
1734 iv1 = get_iv (data, *op1);
1735
1736 /* If both sides of comparison are IVs. We can express ivs on both end. */
1737 if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1738 {
1739 rewrite_type = COMP_IV_EXPR_2;
1740 goto end;
1741 }
1742
1743 /* If none side of comparison is IV. */
1744 if ((!iv0 || integer_zerop (iv0->step))
1745 && (!iv1 || integer_zerop (iv1->step)))
1746 goto end;
1747
1748 /* Control variable may be on the other side. */
1749 if (!iv0 || integer_zerop (iv0->step))
1750 {
1751 std::swap (op0, op1);
1752 std::swap (iv0, iv1);
1753 }
1754 /* If one side is IV and the other side isn't loop invariant. */
1755 if (!iv1)
1756 rewrite_type = COMP_IV_EXPR;
1757 /* If one side is IV and the other side is loop invariant. */
1758 else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1759 rewrite_type = COMP_IV_ELIM;
1760
1761 end:
1762 if (control_var)
1763 *control_var = op0;
1764 if (iv_var)
1765 *iv_var = iv0;
1766 if (bound)
1767 *bound = op1;
1768 if (iv_bound)
1769 *iv_bound = iv1;
1770
1771 return rewrite_type;
1772 }
1773
1774 /* Checks whether the condition in STMT is interesting and if so,
1775 records it. */
1776
1777 static void
find_interesting_uses_cond(struct ivopts_data * data,gimple * stmt)1778 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1779 {
1780 tree *var_p, *bound_p;
1781 struct iv *var_iv, *bound_iv;
1782 enum comp_iv_rewrite ret;
1783
1784 ret = extract_cond_operands (data, stmt,
1785 &var_p, &bound_p, &var_iv, &bound_iv);
1786 if (ret == COMP_IV_NA)
1787 {
1788 find_interesting_uses_op (data, *var_p);
1789 find_interesting_uses_op (data, *bound_p);
1790 return;
1791 }
1792
1793 record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1794 /* Record compare type iv_use for iv on the other side of comparison. */
1795 if (ret == COMP_IV_EXPR_2)
1796 record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1797 }
1798
1799 /* Returns the outermost loop EXPR is obviously invariant in
1800 relative to the loop LOOP, i.e. if all its operands are defined
1801 outside of the returned loop. Returns NULL if EXPR is not
1802 even obviously invariant in LOOP. */
1803
1804 struct loop *
outermost_invariant_loop_for_expr(struct loop * loop,tree expr)1805 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1806 {
1807 basic_block def_bb;
1808 unsigned i, len;
1809
1810 if (is_gimple_min_invariant (expr))
1811 return current_loops->tree_root;
1812
1813 if (TREE_CODE (expr) == SSA_NAME)
1814 {
1815 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1816 if (def_bb)
1817 {
1818 if (flow_bb_inside_loop_p (loop, def_bb))
1819 return NULL;
1820 return superloop_at_depth (loop,
1821 loop_depth (def_bb->loop_father) + 1);
1822 }
1823
1824 return current_loops->tree_root;
1825 }
1826
1827 if (!EXPR_P (expr))
1828 return NULL;
1829
1830 unsigned maxdepth = 0;
1831 len = TREE_OPERAND_LENGTH (expr);
1832 for (i = 0; i < len; i++)
1833 {
1834 struct loop *ivloop;
1835 if (!TREE_OPERAND (expr, i))
1836 continue;
1837
1838 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1839 if (!ivloop)
1840 return NULL;
1841 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1842 }
1843
1844 return superloop_at_depth (loop, maxdepth);
1845 }
1846
1847 /* Returns true if expression EXPR is obviously invariant in LOOP,
1848 i.e. if all its operands are defined outside of the LOOP. LOOP
1849 should not be the function body. */
1850
1851 bool
expr_invariant_in_loop_p(struct loop * loop,tree expr)1852 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1853 {
1854 basic_block def_bb;
1855 unsigned i, len;
1856
1857 gcc_assert (loop_depth (loop) > 0);
1858
1859 if (is_gimple_min_invariant (expr))
1860 return true;
1861
1862 if (TREE_CODE (expr) == SSA_NAME)
1863 {
1864 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1865 if (def_bb
1866 && flow_bb_inside_loop_p (loop, def_bb))
1867 return false;
1868
1869 return true;
1870 }
1871
1872 if (!EXPR_P (expr))
1873 return false;
1874
1875 len = TREE_OPERAND_LENGTH (expr);
1876 for (i = 0; i < len; i++)
1877 if (TREE_OPERAND (expr, i)
1878 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1879 return false;
1880
1881 return true;
1882 }
1883
1884 /* Given expression EXPR which computes inductive values with respect
1885 to loop recorded in DATA, this function returns biv from which EXPR
1886 is derived by tracing definition chains of ssa variables in EXPR. */
1887
1888 static struct iv*
find_deriving_biv_for_expr(struct ivopts_data * data,tree expr)1889 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1890 {
1891 struct iv *iv;
1892 unsigned i, n;
1893 tree e2, e1;
1894 enum tree_code code;
1895 gimple *stmt;
1896
1897 if (expr == NULL_TREE)
1898 return NULL;
1899
1900 if (is_gimple_min_invariant (expr))
1901 return NULL;
1902
1903 code = TREE_CODE (expr);
1904 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1905 {
1906 n = TREE_OPERAND_LENGTH (expr);
1907 for (i = 0; i < n; i++)
1908 {
1909 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1910 if (iv)
1911 return iv;
1912 }
1913 }
1914
1915 /* Stop if it's not ssa name. */
1916 if (code != SSA_NAME)
1917 return NULL;
1918
1919 iv = get_iv (data, expr);
1920 if (!iv || integer_zerop (iv->step))
1921 return NULL;
1922 else if (iv->biv_p)
1923 return iv;
1924
1925 stmt = SSA_NAME_DEF_STMT (expr);
1926 if (gphi *phi = dyn_cast <gphi *> (stmt))
1927 {
1928 ssa_op_iter iter;
1929 use_operand_p use_p;
1930 basic_block phi_bb = gimple_bb (phi);
1931
1932 /* Skip loop header PHI that doesn't define biv. */
1933 if (phi_bb->loop_father == data->current_loop)
1934 return NULL;
1935
1936 if (virtual_operand_p (gimple_phi_result (phi)))
1937 return NULL;
1938
1939 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1940 {
1941 tree use = USE_FROM_PTR (use_p);
1942 iv = find_deriving_biv_for_expr (data, use);
1943 if (iv)
1944 return iv;
1945 }
1946 return NULL;
1947 }
1948 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1949 return NULL;
1950
1951 e1 = gimple_assign_rhs1 (stmt);
1952 code = gimple_assign_rhs_code (stmt);
1953 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1954 return find_deriving_biv_for_expr (data, e1);
1955
1956 switch (code)
1957 {
1958 case MULT_EXPR:
1959 case PLUS_EXPR:
1960 case MINUS_EXPR:
1961 case POINTER_PLUS_EXPR:
1962 /* Increments, decrements and multiplications by a constant
1963 are simple. */
1964 e2 = gimple_assign_rhs2 (stmt);
1965 iv = find_deriving_biv_for_expr (data, e2);
1966 if (iv)
1967 return iv;
1968 gcc_fallthrough ();
1969
1970 CASE_CONVERT:
1971 /* Casts are simple. */
1972 return find_deriving_biv_for_expr (data, e1);
1973
1974 default:
1975 break;
1976 }
1977
1978 return NULL;
1979 }
1980
1981 /* Record BIV, its predecessor and successor that they are used in
1982 address type uses. */
1983
1984 static void
record_biv_for_address_use(struct ivopts_data * data,struct iv * biv)1985 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1986 {
1987 unsigned i;
1988 tree type, base_1, base_2;
1989 bitmap_iterator bi;
1990
1991 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1992 || biv->have_address_use || !biv->no_overflow)
1993 return;
1994
1995 type = TREE_TYPE (biv->base);
1996 if (!INTEGRAL_TYPE_P (type))
1997 return;
1998
1999 biv->have_address_use = true;
2000 data->bivs_not_used_in_addr--;
2001 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
2002 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2003 {
2004 struct iv *iv = ver_info (data, i)->iv;
2005
2006 if (!iv || !iv->biv_p || integer_zerop (iv->step)
2007 || iv->have_address_use || !iv->no_overflow)
2008 continue;
2009
2010 if (type != TREE_TYPE (iv->base)
2011 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2012 continue;
2013
2014 if (!operand_equal_p (biv->step, iv->step, 0))
2015 continue;
2016
2017 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2018 if (operand_equal_p (base_1, iv->base, 0)
2019 || operand_equal_p (base_2, biv->base, 0))
2020 {
2021 iv->have_address_use = true;
2022 data->bivs_not_used_in_addr--;
2023 }
2024 }
2025 }
2026
2027 /* Cumulates the steps of indices into DATA and replaces their values with the
2028 initial ones. Returns false when the value of the index cannot be determined.
2029 Callback for for_each_index. */
2030
2031 struct ifs_ivopts_data
2032 {
2033 struct ivopts_data *ivopts_data;
2034 gimple *stmt;
2035 tree step;
2036 };
2037
2038 static bool
idx_find_step(tree base,tree * idx,void * data)2039 idx_find_step (tree base, tree *idx, void *data)
2040 {
2041 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2042 struct iv *iv;
2043 bool use_overflow_semantics = false;
2044 tree step, iv_base, iv_step, lbound, off;
2045 struct loop *loop = dta->ivopts_data->current_loop;
2046
2047 /* If base is a component ref, require that the offset of the reference
2048 be invariant. */
2049 if (TREE_CODE (base) == COMPONENT_REF)
2050 {
2051 off = component_ref_field_offset (base);
2052 return expr_invariant_in_loop_p (loop, off);
2053 }
2054
2055 /* If base is array, first check whether we will be able to move the
2056 reference out of the loop (in order to take its address in strength
2057 reduction). In order for this to work we need both lower bound
2058 and step to be loop invariants. */
2059 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2060 {
2061 /* Moreover, for a range, the size needs to be invariant as well. */
2062 if (TREE_CODE (base) == ARRAY_RANGE_REF
2063 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2064 return false;
2065
2066 step = array_ref_element_size (base);
2067 lbound = array_ref_low_bound (base);
2068
2069 if (!expr_invariant_in_loop_p (loop, step)
2070 || !expr_invariant_in_loop_p (loop, lbound))
2071 return false;
2072 }
2073
2074 if (TREE_CODE (*idx) != SSA_NAME)
2075 return true;
2076
2077 iv = get_iv (dta->ivopts_data, *idx);
2078 if (!iv)
2079 return false;
2080
2081 /* XXX We produce for a base of *D42 with iv->base being &x[0]
2082 *&x[0], which is not folded and does not trigger the
2083 ARRAY_REF path below. */
2084 *idx = iv->base;
2085
2086 if (integer_zerop (iv->step))
2087 return true;
2088
2089 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2090 {
2091 step = array_ref_element_size (base);
2092
2093 /* We only handle addresses whose step is an integer constant. */
2094 if (TREE_CODE (step) != INTEGER_CST)
2095 return false;
2096 }
2097 else
2098 /* The step for pointer arithmetics already is 1 byte. */
2099 step = size_one_node;
2100
2101 iv_base = iv->base;
2102 iv_step = iv->step;
2103 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2104 use_overflow_semantics = true;
2105
2106 if (!convert_affine_scev (dta->ivopts_data->current_loop,
2107 sizetype, &iv_base, &iv_step, dta->stmt,
2108 use_overflow_semantics))
2109 {
2110 /* The index might wrap. */
2111 return false;
2112 }
2113
2114 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2115 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2116
2117 if (dta->ivopts_data->bivs_not_used_in_addr)
2118 {
2119 if (!iv->biv_p)
2120 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2121
2122 record_biv_for_address_use (dta->ivopts_data, iv);
2123 }
2124 return true;
2125 }
2126
2127 /* Records use in index IDX. Callback for for_each_index. Ivopts data
2128 object is passed to it in DATA. */
2129
2130 static bool
idx_record_use(tree base,tree * idx,void * vdata)2131 idx_record_use (tree base, tree *idx,
2132 void *vdata)
2133 {
2134 struct ivopts_data *data = (struct ivopts_data *) vdata;
2135 find_interesting_uses_op (data, *idx);
2136 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2137 {
2138 find_interesting_uses_op (data, array_ref_element_size (base));
2139 find_interesting_uses_op (data, array_ref_low_bound (base));
2140 }
2141 return true;
2142 }
2143
2144 /* If we can prove that TOP = cst * BOT for some constant cst,
2145 store cst to MUL and return true. Otherwise return false.
2146 The returned value is always sign-extended, regardless of the
2147 signedness of TOP and BOT. */
2148
2149 static bool
constant_multiple_of(tree top,tree bot,widest_int * mul)2150 constant_multiple_of (tree top, tree bot, widest_int *mul)
2151 {
2152 tree mby;
2153 enum tree_code code;
2154 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2155 widest_int res, p0, p1;
2156
2157 STRIP_NOPS (top);
2158 STRIP_NOPS (bot);
2159
2160 if (operand_equal_p (top, bot, 0))
2161 {
2162 *mul = 1;
2163 return true;
2164 }
2165
2166 code = TREE_CODE (top);
2167 switch (code)
2168 {
2169 case MULT_EXPR:
2170 mby = TREE_OPERAND (top, 1);
2171 if (TREE_CODE (mby) != INTEGER_CST)
2172 return false;
2173
2174 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2175 return false;
2176
2177 *mul = wi::sext (res * wi::to_widest (mby), precision);
2178 return true;
2179
2180 case PLUS_EXPR:
2181 case MINUS_EXPR:
2182 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2183 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2184 return false;
2185
2186 if (code == MINUS_EXPR)
2187 p1 = -p1;
2188 *mul = wi::sext (p0 + p1, precision);
2189 return true;
2190
2191 case INTEGER_CST:
2192 if (TREE_CODE (bot) != INTEGER_CST)
2193 return false;
2194
2195 p0 = widest_int::from (wi::to_wide (top), SIGNED);
2196 p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2197 if (p1 == 0)
2198 return false;
2199 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2200 return res == 0;
2201
2202 default:
2203 if (POLY_INT_CST_P (top)
2204 && POLY_INT_CST_P (bot)
2205 && constant_multiple_p (wi::to_poly_widest (top),
2206 wi::to_poly_widest (bot), mul))
2207 return true;
2208
2209 return false;
2210 }
2211 }
2212
2213 /* Return true if memory reference REF with step STEP may be unaligned. */
2214
2215 static bool
may_be_unaligned_p(tree ref,tree step)2216 may_be_unaligned_p (tree ref, tree step)
2217 {
2218 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2219 thus they are not misaligned. */
2220 if (TREE_CODE (ref) == TARGET_MEM_REF)
2221 return false;
2222
2223 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2224 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2225 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2226
2227 unsigned HOST_WIDE_INT bitpos;
2228 unsigned int ref_align;
2229 get_object_alignment_1 (ref, &ref_align, &bitpos);
2230 if (ref_align < align
2231 || (bitpos % align) != 0
2232 || (bitpos % BITS_PER_UNIT) != 0)
2233 return true;
2234
2235 unsigned int trailing_zeros = tree_ctz (step);
2236 if (trailing_zeros < HOST_BITS_PER_INT
2237 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2238 return true;
2239
2240 return false;
2241 }
2242
2243 /* Return true if EXPR may be non-addressable. */
2244
2245 bool
may_be_nonaddressable_p(tree expr)2246 may_be_nonaddressable_p (tree expr)
2247 {
2248 switch (TREE_CODE (expr))
2249 {
2250 case TARGET_MEM_REF:
2251 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2252 target, thus they are always addressable. */
2253 return false;
2254
2255 case MEM_REF:
2256 /* Likewise for MEM_REFs, modulo the storage order. */
2257 return REF_REVERSE_STORAGE_ORDER (expr);
2258
2259 case BIT_FIELD_REF:
2260 if (REF_REVERSE_STORAGE_ORDER (expr))
2261 return true;
2262 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2263
2264 case COMPONENT_REF:
2265 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2266 return true;
2267 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2268 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2269
2270 case ARRAY_REF:
2271 case ARRAY_RANGE_REF:
2272 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2273 return true;
2274 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2275
2276 case VIEW_CONVERT_EXPR:
2277 /* This kind of view-conversions may wrap non-addressable objects
2278 and make them look addressable. After some processing the
2279 non-addressability may be uncovered again, causing ADDR_EXPRs
2280 of inappropriate objects to be built. */
2281 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2282 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2283 return true;
2284 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2285
2286 CASE_CONVERT:
2287 return true;
2288
2289 default:
2290 break;
2291 }
2292
2293 return false;
2294 }
2295
2296 /* Finds addresses in *OP_P inside STMT. */
2297
2298 static void
find_interesting_uses_address(struct ivopts_data * data,gimple * stmt,tree * op_p)2299 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2300 tree *op_p)
2301 {
2302 tree base = *op_p, step = size_zero_node;
2303 struct iv *civ;
2304 struct ifs_ivopts_data ifs_ivopts_data;
2305
2306 /* Do not play with volatile memory references. A bit too conservative,
2307 perhaps, but safe. */
2308 if (gimple_has_volatile_ops (stmt))
2309 goto fail;
2310
2311 /* Ignore bitfields for now. Not really something terribly complicated
2312 to handle. TODO. */
2313 if (TREE_CODE (base) == BIT_FIELD_REF)
2314 goto fail;
2315
2316 base = unshare_expr (base);
2317
2318 if (TREE_CODE (base) == TARGET_MEM_REF)
2319 {
2320 tree type = build_pointer_type (TREE_TYPE (base));
2321 tree astep;
2322
2323 if (TMR_BASE (base)
2324 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2325 {
2326 civ = get_iv (data, TMR_BASE (base));
2327 if (!civ)
2328 goto fail;
2329
2330 TMR_BASE (base) = civ->base;
2331 step = civ->step;
2332 }
2333 if (TMR_INDEX2 (base)
2334 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2335 {
2336 civ = get_iv (data, TMR_INDEX2 (base));
2337 if (!civ)
2338 goto fail;
2339
2340 TMR_INDEX2 (base) = civ->base;
2341 step = civ->step;
2342 }
2343 if (TMR_INDEX (base)
2344 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2345 {
2346 civ = get_iv (data, TMR_INDEX (base));
2347 if (!civ)
2348 goto fail;
2349
2350 TMR_INDEX (base) = civ->base;
2351 astep = civ->step;
2352
2353 if (astep)
2354 {
2355 if (TMR_STEP (base))
2356 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2357
2358 step = fold_build2 (PLUS_EXPR, type, step, astep);
2359 }
2360 }
2361
2362 if (integer_zerop (step))
2363 goto fail;
2364 base = tree_mem_ref_addr (type, base);
2365 }
2366 else
2367 {
2368 ifs_ivopts_data.ivopts_data = data;
2369 ifs_ivopts_data.stmt = stmt;
2370 ifs_ivopts_data.step = size_zero_node;
2371 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2372 || integer_zerop (ifs_ivopts_data.step))
2373 goto fail;
2374 step = ifs_ivopts_data.step;
2375
2376 /* Check that the base expression is addressable. This needs
2377 to be done after substituting bases of IVs into it. */
2378 if (may_be_nonaddressable_p (base))
2379 goto fail;
2380
2381 /* Moreover, on strict alignment platforms, check that it is
2382 sufficiently aligned. */
2383 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2384 goto fail;
2385
2386 base = build_fold_addr_expr (base);
2387
2388 /* Substituting bases of IVs into the base expression might
2389 have caused folding opportunities. */
2390 if (TREE_CODE (base) == ADDR_EXPR)
2391 {
2392 tree *ref = &TREE_OPERAND (base, 0);
2393 while (handled_component_p (*ref))
2394 ref = &TREE_OPERAND (*ref, 0);
2395 if (TREE_CODE (*ref) == MEM_REF)
2396 {
2397 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2398 TREE_OPERAND (*ref, 0),
2399 TREE_OPERAND (*ref, 1));
2400 if (tem)
2401 *ref = tem;
2402 }
2403 }
2404 }
2405
2406 civ = alloc_iv (data, base, step);
2407 /* Fail if base object of this memory reference is unknown. */
2408 if (civ->base_object == NULL_TREE)
2409 goto fail;
2410
2411 record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2412 return;
2413
2414 fail:
2415 for_each_index (op_p, idx_record_use, data);
2416 }
2417
2418 /* Finds and records invariants used in STMT. */
2419
2420 static void
find_invariants_stmt(struct ivopts_data * data,gimple * stmt)2421 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2422 {
2423 ssa_op_iter iter;
2424 use_operand_p use_p;
2425 tree op;
2426
2427 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2428 {
2429 op = USE_FROM_PTR (use_p);
2430 record_invariant (data, op, false);
2431 }
2432 }
2433
2434 /* CALL calls an internal function. If operand *OP_P will become an
2435 address when the call is expanded, return the type of the memory
2436 being addressed, otherwise return null. */
2437
2438 static tree
get_mem_type_for_internal_fn(gcall * call,tree * op_p)2439 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2440 {
2441 switch (gimple_call_internal_fn (call))
2442 {
2443 case IFN_MASK_LOAD:
2444 if (op_p == gimple_call_arg_ptr (call, 0))
2445 return TREE_TYPE (gimple_call_lhs (call));
2446 return NULL_TREE;
2447
2448 case IFN_MASK_STORE:
2449 if (op_p == gimple_call_arg_ptr (call, 0))
2450 return TREE_TYPE (gimple_call_arg (call, 3));
2451 return NULL_TREE;
2452
2453 default:
2454 return NULL_TREE;
2455 }
2456 }
2457
2458 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2459 Return true if the operand will become an address when STMT
2460 is expanded and record the associated address use if so. */
2461
2462 static bool
find_address_like_use(struct ivopts_data * data,gimple * stmt,tree * op_p,struct iv * iv)2463 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2464 struct iv *iv)
2465 {
2466 /* Fail if base object of this memory reference is unknown. */
2467 if (iv->base_object == NULL_TREE)
2468 return false;
2469
2470 tree mem_type = NULL_TREE;
2471 if (gcall *call = dyn_cast <gcall *> (stmt))
2472 if (gimple_call_internal_p (call))
2473 mem_type = get_mem_type_for_internal_fn (call, op_p);
2474 if (mem_type)
2475 {
2476 iv = alloc_iv (data, iv->base, iv->step);
2477 record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2478 return true;
2479 }
2480 return false;
2481 }
2482
2483 /* Finds interesting uses of induction variables in the statement STMT. */
2484
2485 static void
find_interesting_uses_stmt(struct ivopts_data * data,gimple * stmt)2486 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2487 {
2488 struct iv *iv;
2489 tree op, *lhs, *rhs;
2490 ssa_op_iter iter;
2491 use_operand_p use_p;
2492 enum tree_code code;
2493
2494 find_invariants_stmt (data, stmt);
2495
2496 if (gimple_code (stmt) == GIMPLE_COND)
2497 {
2498 find_interesting_uses_cond (data, stmt);
2499 return;
2500 }
2501
2502 if (is_gimple_assign (stmt))
2503 {
2504 lhs = gimple_assign_lhs_ptr (stmt);
2505 rhs = gimple_assign_rhs1_ptr (stmt);
2506
2507 if (TREE_CODE (*lhs) == SSA_NAME)
2508 {
2509 /* If the statement defines an induction variable, the uses are not
2510 interesting by themselves. */
2511
2512 iv = get_iv (data, *lhs);
2513
2514 if (iv && !integer_zerop (iv->step))
2515 return;
2516 }
2517
2518 code = gimple_assign_rhs_code (stmt);
2519 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2520 && (REFERENCE_CLASS_P (*rhs)
2521 || is_gimple_val (*rhs)))
2522 {
2523 if (REFERENCE_CLASS_P (*rhs))
2524 find_interesting_uses_address (data, stmt, rhs);
2525 else
2526 find_interesting_uses_op (data, *rhs);
2527
2528 if (REFERENCE_CLASS_P (*lhs))
2529 find_interesting_uses_address (data, stmt, lhs);
2530 return;
2531 }
2532 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2533 {
2534 find_interesting_uses_cond (data, stmt);
2535 return;
2536 }
2537
2538 /* TODO -- we should also handle address uses of type
2539
2540 memory = call (whatever);
2541
2542 and
2543
2544 call (memory). */
2545 }
2546
2547 if (gimple_code (stmt) == GIMPLE_PHI
2548 && gimple_bb (stmt) == data->current_loop->header)
2549 {
2550 iv = get_iv (data, PHI_RESULT (stmt));
2551
2552 if (iv && !integer_zerop (iv->step))
2553 return;
2554 }
2555
2556 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2557 {
2558 op = USE_FROM_PTR (use_p);
2559
2560 if (TREE_CODE (op) != SSA_NAME)
2561 continue;
2562
2563 iv = get_iv (data, op);
2564 if (!iv)
2565 continue;
2566
2567 if (!find_address_like_use (data, stmt, use_p->use, iv))
2568 find_interesting_uses_op (data, op);
2569 }
2570 }
2571
2572 /* Finds interesting uses of induction variables outside of loops
2573 on loop exit edge EXIT. */
2574
2575 static void
find_interesting_uses_outside(struct ivopts_data * data,edge exit)2576 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2577 {
2578 gphi *phi;
2579 gphi_iterator psi;
2580 tree def;
2581
2582 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2583 {
2584 phi = psi.phi ();
2585 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2586 if (!virtual_operand_p (def))
2587 find_interesting_uses_op (data, def);
2588 }
2589 }
2590
2591 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2592 mode for memory reference represented by USE. */
2593
2594 static GTY (()) vec<rtx, va_gc> *addr_list;
2595
2596 static bool
addr_offset_valid_p(struct iv_use * use,poly_int64 offset)2597 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2598 {
2599 rtx reg, addr;
2600 unsigned list_index;
2601 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2602 machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2603
2604 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2605 if (list_index >= vec_safe_length (addr_list))
2606 vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2607
2608 addr = (*addr_list)[list_index];
2609 if (!addr)
2610 {
2611 addr_mode = targetm.addr_space.address_mode (as);
2612 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2613 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2614 (*addr_list)[list_index] = addr;
2615 }
2616 else
2617 addr_mode = GET_MODE (addr);
2618
2619 XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2620 return (memory_address_addr_space_p (mem_mode, addr, as));
2621 }
2622
2623 /* Comparison function to sort group in ascending order of addr_offset. */
2624
2625 static int
group_compare_offset(const void * a,const void * b)2626 group_compare_offset (const void *a, const void *b)
2627 {
2628 const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2629 const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2630
2631 return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2632 }
2633
2634 /* Check if small groups should be split. Return true if no group
2635 contains more than two uses with distinct addr_offsets. Return
2636 false otherwise. We want to split such groups because:
2637
2638 1) Small groups don't have much benefit and may interfer with
2639 general candidate selection.
2640 2) Size for problem with only small groups is usually small and
2641 general algorithm can handle it well.
2642
2643 TODO -- Above claim may not hold when we want to merge memory
2644 accesses with conseuctive addresses. */
2645
2646 static bool
split_small_address_groups_p(struct ivopts_data * data)2647 split_small_address_groups_p (struct ivopts_data *data)
2648 {
2649 unsigned int i, j, distinct = 1;
2650 struct iv_use *pre;
2651 struct iv_group *group;
2652
2653 for (i = 0; i < data->vgroups.length (); i++)
2654 {
2655 group = data->vgroups[i];
2656 if (group->vuses.length () == 1)
2657 continue;
2658
2659 gcc_assert (address_p (group->type));
2660 if (group->vuses.length () == 2)
2661 {
2662 if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2663 group->vuses[1]->addr_offset) > 0)
2664 std::swap (group->vuses[0], group->vuses[1]);
2665 }
2666 else
2667 group->vuses.qsort (group_compare_offset);
2668
2669 if (distinct > 2)
2670 continue;
2671
2672 distinct = 1;
2673 for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2674 {
2675 if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2676 {
2677 pre = group->vuses[j];
2678 distinct++;
2679 }
2680
2681 if (distinct > 2)
2682 break;
2683 }
2684 }
2685
2686 return (distinct <= 2);
2687 }
2688
2689 /* For each group of address type uses, this function further groups
2690 these uses according to the maximum offset supported by target's
2691 [base + offset] addressing mode. */
2692
2693 static void
split_address_groups(struct ivopts_data * data)2694 split_address_groups (struct ivopts_data *data)
2695 {
2696 unsigned int i, j;
2697 /* Always split group. */
2698 bool split_p = split_small_address_groups_p (data);
2699
2700 for (i = 0; i < data->vgroups.length (); i++)
2701 {
2702 struct iv_group *new_group = NULL;
2703 struct iv_group *group = data->vgroups[i];
2704 struct iv_use *use = group->vuses[0];
2705
2706 use->id = 0;
2707 use->group_id = group->id;
2708 if (group->vuses.length () == 1)
2709 continue;
2710
2711 gcc_assert (address_p (use->type));
2712
2713 for (j = 1; j < group->vuses.length ();)
2714 {
2715 struct iv_use *next = group->vuses[j];
2716 poly_int64 offset = next->addr_offset - use->addr_offset;
2717
2718 /* Split group if aksed to, or the offset against the first
2719 use can't fit in offset part of addressing mode. IV uses
2720 having the same offset are still kept in one group. */
2721 if (maybe_ne (offset, 0)
2722 && (split_p || !addr_offset_valid_p (use, offset)))
2723 {
2724 if (!new_group)
2725 new_group = record_group (data, group->type);
2726 group->vuses.ordered_remove (j);
2727 new_group->vuses.safe_push (next);
2728 continue;
2729 }
2730
2731 next->id = j;
2732 next->group_id = group->id;
2733 j++;
2734 }
2735 }
2736 }
2737
2738 /* Finds uses of the induction variables that are interesting. */
2739
2740 static void
find_interesting_uses(struct ivopts_data * data)2741 find_interesting_uses (struct ivopts_data *data)
2742 {
2743 basic_block bb;
2744 gimple_stmt_iterator bsi;
2745 basic_block *body = get_loop_body (data->current_loop);
2746 unsigned i;
2747 edge e;
2748
2749 for (i = 0; i < data->current_loop->num_nodes; i++)
2750 {
2751 edge_iterator ei;
2752 bb = body[i];
2753
2754 FOR_EACH_EDGE (e, ei, bb->succs)
2755 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2756 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2757 find_interesting_uses_outside (data, e);
2758
2759 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2760 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2761 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2762 if (!is_gimple_debug (gsi_stmt (bsi)))
2763 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2764 }
2765 free (body);
2766
2767 split_address_groups (data);
2768
2769 if (dump_file && (dump_flags & TDF_DETAILS))
2770 {
2771 fprintf (dump_file, "\n<IV Groups>:\n");
2772 dump_groups (dump_file, data);
2773 fprintf (dump_file, "\n");
2774 }
2775 }
2776
2777 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2778 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2779 we are at the top-level of the processed address. */
2780
2781 static tree
strip_offset_1(tree expr,bool inside_addr,bool top_compref,poly_int64 * offset)2782 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2783 poly_int64 *offset)
2784 {
2785 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2786 enum tree_code code;
2787 tree type, orig_type = TREE_TYPE (expr);
2788 poly_int64 off0, off1;
2789 HOST_WIDE_INT st;
2790 tree orig_expr = expr;
2791
2792 STRIP_NOPS (expr);
2793
2794 type = TREE_TYPE (expr);
2795 code = TREE_CODE (expr);
2796 *offset = 0;
2797
2798 switch (code)
2799 {
2800 case POINTER_PLUS_EXPR:
2801 case PLUS_EXPR:
2802 case MINUS_EXPR:
2803 op0 = TREE_OPERAND (expr, 0);
2804 op1 = TREE_OPERAND (expr, 1);
2805
2806 op0 = strip_offset_1 (op0, false, false, &off0);
2807 op1 = strip_offset_1 (op1, false, false, &off1);
2808
2809 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2810 if (op0 == TREE_OPERAND (expr, 0)
2811 && op1 == TREE_OPERAND (expr, 1))
2812 return orig_expr;
2813
2814 if (integer_zerop (op1))
2815 expr = op0;
2816 else if (integer_zerop (op0))
2817 {
2818 if (code == MINUS_EXPR)
2819 expr = fold_build1 (NEGATE_EXPR, type, op1);
2820 else
2821 expr = op1;
2822 }
2823 else
2824 expr = fold_build2 (code, type, op0, op1);
2825
2826 return fold_convert (orig_type, expr);
2827
2828 case MULT_EXPR:
2829 op1 = TREE_OPERAND (expr, 1);
2830 if (!cst_and_fits_in_hwi (op1))
2831 return orig_expr;
2832
2833 op0 = TREE_OPERAND (expr, 0);
2834 op0 = strip_offset_1 (op0, false, false, &off0);
2835 if (op0 == TREE_OPERAND (expr, 0))
2836 return orig_expr;
2837
2838 *offset = off0 * int_cst_value (op1);
2839 if (integer_zerop (op0))
2840 expr = op0;
2841 else
2842 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2843
2844 return fold_convert (orig_type, expr);
2845
2846 case ARRAY_REF:
2847 case ARRAY_RANGE_REF:
2848 if (!inside_addr)
2849 return orig_expr;
2850
2851 step = array_ref_element_size (expr);
2852 if (!cst_and_fits_in_hwi (step))
2853 break;
2854
2855 st = int_cst_value (step);
2856 op1 = TREE_OPERAND (expr, 1);
2857 op1 = strip_offset_1 (op1, false, false, &off1);
2858 *offset = off1 * st;
2859
2860 if (top_compref
2861 && integer_zerop (op1))
2862 {
2863 /* Strip the component reference completely. */
2864 op0 = TREE_OPERAND (expr, 0);
2865 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2866 *offset += off0;
2867 return op0;
2868 }
2869 break;
2870
2871 case COMPONENT_REF:
2872 {
2873 tree field;
2874
2875 if (!inside_addr)
2876 return orig_expr;
2877
2878 tmp = component_ref_field_offset (expr);
2879 field = TREE_OPERAND (expr, 1);
2880 if (top_compref
2881 && cst_and_fits_in_hwi (tmp)
2882 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2883 {
2884 HOST_WIDE_INT boffset, abs_off;
2885
2886 /* Strip the component reference completely. */
2887 op0 = TREE_OPERAND (expr, 0);
2888 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2889 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2890 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2891 if (boffset < 0)
2892 abs_off = -abs_off;
2893
2894 *offset = off0 + int_cst_value (tmp) + abs_off;
2895 return op0;
2896 }
2897 }
2898 break;
2899
2900 case ADDR_EXPR:
2901 op0 = TREE_OPERAND (expr, 0);
2902 op0 = strip_offset_1 (op0, true, true, &off0);
2903 *offset += off0;
2904
2905 if (op0 == TREE_OPERAND (expr, 0))
2906 return orig_expr;
2907
2908 expr = build_fold_addr_expr (op0);
2909 return fold_convert (orig_type, expr);
2910
2911 case MEM_REF:
2912 /* ??? Offset operand? */
2913 inside_addr = false;
2914 break;
2915
2916 default:
2917 if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2918 return build_int_cst (orig_type, 0);
2919 return orig_expr;
2920 }
2921
2922 /* Default handling of expressions for that we want to recurse into
2923 the first operand. */
2924 op0 = TREE_OPERAND (expr, 0);
2925 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2926 *offset += off0;
2927
2928 if (op0 == TREE_OPERAND (expr, 0)
2929 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2930 return orig_expr;
2931
2932 expr = copy_node (expr);
2933 TREE_OPERAND (expr, 0) = op0;
2934 if (op1)
2935 TREE_OPERAND (expr, 1) = op1;
2936
2937 /* Inside address, we might strip the top level component references,
2938 thus changing type of the expression. Handling of ADDR_EXPR
2939 will fix that. */
2940 expr = fold_convert (orig_type, expr);
2941
2942 return expr;
2943 }
2944
2945 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2946
2947 tree
strip_offset(tree expr,poly_uint64_pod * offset)2948 strip_offset (tree expr, poly_uint64_pod *offset)
2949 {
2950 poly_int64 off;
2951 tree core = strip_offset_1 (expr, false, false, &off);
2952 *offset = off;
2953 return core;
2954 }
2955
2956 /* Returns variant of TYPE that can be used as base for different uses.
2957 We return unsigned type with the same precision, which avoids problems
2958 with overflows. */
2959
2960 static tree
generic_type_for(tree type)2961 generic_type_for (tree type)
2962 {
2963 if (POINTER_TYPE_P (type))
2964 return unsigned_type_for (type);
2965
2966 if (TYPE_UNSIGNED (type))
2967 return type;
2968
2969 return unsigned_type_for (type);
2970 }
2971
2972 /* Private data for walk_tree. */
2973
2974 struct walk_tree_data
2975 {
2976 bitmap *inv_vars;
2977 struct ivopts_data *idata;
2978 };
2979
2980 /* Callback function for walk_tree, it records invariants and symbol
2981 reference in *EXPR_P. DATA is the structure storing result info. */
2982
2983 static tree
find_inv_vars_cb(tree * expr_p,int * ws ATTRIBUTE_UNUSED,void * data)2984 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2985 {
2986 tree op = *expr_p;
2987 struct version_info *info;
2988 struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2989
2990 if (TREE_CODE (op) != SSA_NAME)
2991 return NULL_TREE;
2992
2993 info = name_info (wdata->idata, op);
2994 /* Because we expand simple operations when finding IVs, loop invariant
2995 variable that isn't referred by the original loop could be used now.
2996 Record such invariant variables here. */
2997 if (!info->iv)
2998 {
2999 struct ivopts_data *idata = wdata->idata;
3000 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
3001
3002 if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
3003 {
3004 set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
3005 record_invariant (idata, op, false);
3006 }
3007 }
3008 if (!info->inv_id || info->has_nonlin_use)
3009 return NULL_TREE;
3010
3011 if (!*wdata->inv_vars)
3012 *wdata->inv_vars = BITMAP_ALLOC (NULL);
3013 bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3014
3015 return NULL_TREE;
3016 }
3017
3018 /* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should
3019 store it. */
3020
3021 static inline void
find_inv_vars(struct ivopts_data * data,tree * expr_p,bitmap * inv_vars)3022 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3023 {
3024 struct walk_tree_data wdata;
3025
3026 if (!inv_vars)
3027 return;
3028
3029 wdata.idata = data;
3030 wdata.inv_vars = inv_vars;
3031 walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3032 }
3033
3034 /* Get entry from invariant expr hash table for INV_EXPR. New entry
3035 will be recorded if it doesn't exist yet. Given below two exprs:
3036 inv_expr + cst1, inv_expr + cst2
3037 It's hard to make decision whether constant part should be stripped
3038 or not. We choose to not strip based on below facts:
3039 1) We need to count ADD cost for constant part if it's stripped,
3040 which is't always trivial where this functions is called.
3041 2) Stripping constant away may be conflict with following loop
3042 invariant hoisting pass.
3043 3) Not stripping constant away results in more invariant exprs,
3044 which usually leads to decision preferring lower reg pressure. */
3045
3046 static iv_inv_expr_ent *
get_loop_invariant_expr(struct ivopts_data * data,tree inv_expr)3047 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3048 {
3049 STRIP_NOPS (inv_expr);
3050
3051 if (poly_int_tree_p (inv_expr)
3052 || TREE_CODE (inv_expr) == SSA_NAME)
3053 return NULL;
3054
3055 /* Don't strip constant part away as we used to. */
3056
3057 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
3058 struct iv_inv_expr_ent ent;
3059 ent.expr = inv_expr;
3060 ent.hash = iterative_hash_expr (inv_expr, 0);
3061 struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3062
3063 if (!*slot)
3064 {
3065 *slot = XNEW (struct iv_inv_expr_ent);
3066 (*slot)->expr = inv_expr;
3067 (*slot)->hash = ent.hash;
3068 (*slot)->id = ++data->max_inv_expr_id;
3069 }
3070
3071 return *slot;
3072 }
3073
3074 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3075 position to POS. If USE is not NULL, the candidate is set as related to
3076 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
3077 replacement of the final value of the iv by a direct computation. */
3078
3079 static struct iv_cand *
3080 add_candidate_1 (struct ivopts_data *data,
3081 tree base, tree step, bool important, enum iv_position pos,
3082 struct iv_use *use, gimple *incremented_at,
3083 struct iv *orig_iv = NULL)
3084 {
3085 unsigned i;
3086 struct iv_cand *cand = NULL;
3087 tree type, orig_type;
3088
3089 gcc_assert (base && step);
3090
3091 /* -fkeep-gc-roots-live means that we have to keep a real pointer
3092 live, but the ivopts code may replace a real pointer with one
3093 pointing before or after the memory block that is then adjusted
3094 into the memory block during the loop. FIXME: It would likely be
3095 better to actually force the pointer live and still use ivopts;
3096 for example, it would be enough to write the pointer into memory
3097 and keep it there until after the loop. */
3098 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3099 return NULL;
3100
3101 /* For non-original variables, make sure their values are computed in a type
3102 that does not invoke undefined behavior on overflows (since in general,
3103 we cannot prove that these induction variables are non-wrapping). */
3104 if (pos != IP_ORIGINAL)
3105 {
3106 orig_type = TREE_TYPE (base);
3107 type = generic_type_for (orig_type);
3108 if (type != orig_type)
3109 {
3110 base = fold_convert (type, base);
3111 step = fold_convert (type, step);
3112 }
3113 }
3114
3115 for (i = 0; i < data->vcands.length (); i++)
3116 {
3117 cand = data->vcands[i];
3118
3119 if (cand->pos != pos)
3120 continue;
3121
3122 if (cand->incremented_at != incremented_at
3123 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3124 && cand->ainc_use != use))
3125 continue;
3126
3127 if (operand_equal_p (base, cand->iv->base, 0)
3128 && operand_equal_p (step, cand->iv->step, 0)
3129 && (TYPE_PRECISION (TREE_TYPE (base))
3130 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3131 break;
3132 }
3133
3134 if (i == data->vcands.length ())
3135 {
3136 cand = XCNEW (struct iv_cand);
3137 cand->id = i;
3138 cand->iv = alloc_iv (data, base, step);
3139 cand->pos = pos;
3140 if (pos != IP_ORIGINAL)
3141 {
3142 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3143 cand->var_after = cand->var_before;
3144 }
3145 cand->important = important;
3146 cand->incremented_at = incremented_at;
3147 data->vcands.safe_push (cand);
3148
3149 if (!poly_int_tree_p (step))
3150 {
3151 find_inv_vars (data, &step, &cand->inv_vars);
3152
3153 iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3154 /* Share bitmap between inv_vars and inv_exprs for cand. */
3155 if (inv_expr != NULL)
3156 {
3157 cand->inv_exprs = cand->inv_vars;
3158 cand->inv_vars = NULL;
3159 if (cand->inv_exprs)
3160 bitmap_clear (cand->inv_exprs);
3161 else
3162 cand->inv_exprs = BITMAP_ALLOC (NULL);
3163
3164 bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3165 }
3166 }
3167
3168 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3169 cand->ainc_use = use;
3170 else
3171 cand->ainc_use = NULL;
3172
3173 cand->orig_iv = orig_iv;
3174 if (dump_file && (dump_flags & TDF_DETAILS))
3175 dump_cand (dump_file, cand);
3176 }
3177
3178 cand->important |= important;
3179
3180 /* Relate candidate to the group for which it is added. */
3181 if (use)
3182 bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3183
3184 return cand;
3185 }
3186
3187 /* Returns true if incrementing the induction variable at the end of the LOOP
3188 is allowed.
3189
3190 The purpose is to avoid splitting latch edge with a biv increment, thus
3191 creating a jump, possibly confusing other optimization passes and leaving
3192 less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
3193 available (so we do not have a better alternative), or if the latch edge
3194 is already nonempty. */
3195
3196 static bool
allow_ip_end_pos_p(struct loop * loop)3197 allow_ip_end_pos_p (struct loop *loop)
3198 {
3199 if (!ip_normal_pos (loop))
3200 return true;
3201
3202 if (!empty_block_p (ip_end_pos (loop)))
3203 return true;
3204
3205 return false;
3206 }
3207
3208 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3209 Important field is set to IMPORTANT. */
3210
3211 static void
add_autoinc_candidates(struct ivopts_data * data,tree base,tree step,bool important,struct iv_use * use)3212 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3213 bool important, struct iv_use *use)
3214 {
3215 basic_block use_bb = gimple_bb (use->stmt);
3216 machine_mode mem_mode;
3217 unsigned HOST_WIDE_INT cstepi;
3218
3219 /* If we insert the increment in any position other than the standard
3220 ones, we must ensure that it is incremented once per iteration.
3221 It must not be in an inner nested loop, or one side of an if
3222 statement. */
3223 if (use_bb->loop_father != data->current_loop
3224 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3225 || stmt_can_throw_internal (use->stmt)
3226 || !cst_and_fits_in_hwi (step))
3227 return;
3228
3229 cstepi = int_cst_value (step);
3230
3231 mem_mode = TYPE_MODE (use->mem_type);
3232 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3233 || USE_STORE_PRE_INCREMENT (mem_mode))
3234 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3235 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3236 || USE_STORE_PRE_DECREMENT (mem_mode))
3237 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3238 {
3239 enum tree_code code = MINUS_EXPR;
3240 tree new_base;
3241 tree new_step = step;
3242
3243 if (POINTER_TYPE_P (TREE_TYPE (base)))
3244 {
3245 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3246 code = POINTER_PLUS_EXPR;
3247 }
3248 else
3249 new_step = fold_convert (TREE_TYPE (base), new_step);
3250 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3251 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3252 use->stmt);
3253 }
3254 if (((USE_LOAD_POST_INCREMENT (mem_mode)
3255 || USE_STORE_POST_INCREMENT (mem_mode))
3256 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3257 || ((USE_LOAD_POST_DECREMENT (mem_mode)
3258 || USE_STORE_POST_DECREMENT (mem_mode))
3259 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3260 {
3261 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3262 use->stmt);
3263 }
3264 }
3265
3266 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3267 position to POS. If USE is not NULL, the candidate is set as related to
3268 it. The candidate computation is scheduled before exit condition and at
3269 the end of loop. */
3270
3271 static void
3272 add_candidate (struct ivopts_data *data,
3273 tree base, tree step, bool important, struct iv_use *use,
3274 struct iv *orig_iv = NULL)
3275 {
3276 if (ip_normal_pos (data->current_loop))
3277 add_candidate_1 (data, base, step, important,
3278 IP_NORMAL, use, NULL, orig_iv);
3279 if (ip_end_pos (data->current_loop)
3280 && allow_ip_end_pos_p (data->current_loop))
3281 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3282 }
3283
3284 /* Adds standard iv candidates. */
3285
3286 static void
add_standard_iv_candidates(struct ivopts_data * data)3287 add_standard_iv_candidates (struct ivopts_data *data)
3288 {
3289 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3290
3291 /* The same for a double-integer type if it is still fast enough. */
3292 if (TYPE_PRECISION
3293 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3294 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3295 add_candidate (data, build_int_cst (long_integer_type_node, 0),
3296 build_int_cst (long_integer_type_node, 1), true, NULL);
3297
3298 /* The same for a double-integer type if it is still fast enough. */
3299 if (TYPE_PRECISION
3300 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3301 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3302 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3303 build_int_cst (long_long_integer_type_node, 1), true, NULL);
3304 }
3305
3306
3307 /* Adds candidates bases on the old induction variable IV. */
3308
3309 static void
add_iv_candidate_for_biv(struct ivopts_data * data,struct iv * iv)3310 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3311 {
3312 gimple *phi;
3313 tree def;
3314 struct iv_cand *cand;
3315
3316 /* Check if this biv is used in address type use. */
3317 if (iv->no_overflow && iv->have_address_use
3318 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3319 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3320 {
3321 tree base = fold_convert (sizetype, iv->base);
3322 tree step = fold_convert (sizetype, iv->step);
3323
3324 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3325 add_candidate (data, base, step, true, NULL, iv);
3326 /* Add iv cand of the original type only if it has nonlinear use. */
3327 if (iv->nonlin_use)
3328 add_candidate (data, iv->base, iv->step, true, NULL);
3329 }
3330 else
3331 add_candidate (data, iv->base, iv->step, true, NULL);
3332
3333 /* The same, but with initial value zero. */
3334 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3335 add_candidate (data, size_int (0), iv->step, true, NULL);
3336 else
3337 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3338 iv->step, true, NULL);
3339
3340 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3341 if (gimple_code (phi) == GIMPLE_PHI)
3342 {
3343 /* Additionally record the possibility of leaving the original iv
3344 untouched. */
3345 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3346 /* Don't add candidate if it's from another PHI node because
3347 it's an affine iv appearing in the form of PEELED_CHREC. */
3348 phi = SSA_NAME_DEF_STMT (def);
3349 if (gimple_code (phi) != GIMPLE_PHI)
3350 {
3351 cand = add_candidate_1 (data,
3352 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3353 SSA_NAME_DEF_STMT (def));
3354 if (cand)
3355 {
3356 cand->var_before = iv->ssa_name;
3357 cand->var_after = def;
3358 }
3359 }
3360 else
3361 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3362 }
3363 }
3364
3365 /* Adds candidates based on the old induction variables. */
3366
3367 static void
add_iv_candidate_for_bivs(struct ivopts_data * data)3368 add_iv_candidate_for_bivs (struct ivopts_data *data)
3369 {
3370 unsigned i;
3371 struct iv *iv;
3372 bitmap_iterator bi;
3373
3374 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3375 {
3376 iv = ver_info (data, i)->iv;
3377 if (iv && iv->biv_p && !integer_zerop (iv->step))
3378 add_iv_candidate_for_biv (data, iv);
3379 }
3380 }
3381
3382 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3383
3384 static void
record_common_cand(struct ivopts_data * data,tree base,tree step,struct iv_use * use)3385 record_common_cand (struct ivopts_data *data, tree base,
3386 tree step, struct iv_use *use)
3387 {
3388 struct iv_common_cand ent;
3389 struct iv_common_cand **slot;
3390
3391 ent.base = base;
3392 ent.step = step;
3393 ent.hash = iterative_hash_expr (base, 0);
3394 ent.hash = iterative_hash_expr (step, ent.hash);
3395
3396 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3397 if (*slot == NULL)
3398 {
3399 *slot = new iv_common_cand ();
3400 (*slot)->base = base;
3401 (*slot)->step = step;
3402 (*slot)->uses.create (8);
3403 (*slot)->hash = ent.hash;
3404 data->iv_common_cands.safe_push ((*slot));
3405 }
3406
3407 gcc_assert (use != NULL);
3408 (*slot)->uses.safe_push (use);
3409 return;
3410 }
3411
3412 /* Comparison function used to sort common candidates. */
3413
3414 static int
common_cand_cmp(const void * p1,const void * p2)3415 common_cand_cmp (const void *p1, const void *p2)
3416 {
3417 unsigned n1, n2;
3418 const struct iv_common_cand *const *const ccand1
3419 = (const struct iv_common_cand *const *)p1;
3420 const struct iv_common_cand *const *const ccand2
3421 = (const struct iv_common_cand *const *)p2;
3422
3423 n1 = (*ccand1)->uses.length ();
3424 n2 = (*ccand2)->uses.length ();
3425 return n2 - n1;
3426 }
3427
3428 /* Adds IV candidates based on common candidated recorded. */
3429
3430 static void
add_iv_candidate_derived_from_uses(struct ivopts_data * data)3431 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3432 {
3433 unsigned i, j;
3434 struct iv_cand *cand_1, *cand_2;
3435
3436 data->iv_common_cands.qsort (common_cand_cmp);
3437 for (i = 0; i < data->iv_common_cands.length (); i++)
3438 {
3439 struct iv_common_cand *ptr = data->iv_common_cands[i];
3440
3441 /* Only add IV candidate if it's derived from multiple uses. */
3442 if (ptr->uses.length () <= 1)
3443 break;
3444
3445 cand_1 = NULL;
3446 cand_2 = NULL;
3447 if (ip_normal_pos (data->current_loop))
3448 cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3449 false, IP_NORMAL, NULL, NULL);
3450
3451 if (ip_end_pos (data->current_loop)
3452 && allow_ip_end_pos_p (data->current_loop))
3453 cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3454 false, IP_END, NULL, NULL);
3455
3456 /* Bind deriving uses and the new candidates. */
3457 for (j = 0; j < ptr->uses.length (); j++)
3458 {
3459 struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3460 if (cand_1)
3461 bitmap_set_bit (group->related_cands, cand_1->id);
3462 if (cand_2)
3463 bitmap_set_bit (group->related_cands, cand_2->id);
3464 }
3465 }
3466
3467 /* Release data since it is useless from this point. */
3468 data->iv_common_cand_tab->empty ();
3469 data->iv_common_cands.truncate (0);
3470 }
3471
3472 /* Adds candidates based on the value of USE's iv. */
3473
3474 static void
add_iv_candidate_for_use(struct ivopts_data * data,struct iv_use * use)3475 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3476 {
3477 poly_uint64 offset;
3478 tree base;
3479 tree basetype;
3480 struct iv *iv = use->iv;
3481
3482 add_candidate (data, iv->base, iv->step, false, use);
3483
3484 /* Record common candidate for use in case it can be shared by others. */
3485 record_common_cand (data, iv->base, iv->step, use);
3486
3487 /* Record common candidate with initial value zero. */
3488 basetype = TREE_TYPE (iv->base);
3489 if (POINTER_TYPE_P (basetype))
3490 basetype = sizetype;
3491 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3492
3493 /* Record common candidate with constant offset stripped in base.
3494 Like the use itself, we also add candidate directly for it. */
3495 base = strip_offset (iv->base, &offset);
3496 if (maybe_ne (offset, 0U) || base != iv->base)
3497 {
3498 record_common_cand (data, base, iv->step, use);
3499 add_candidate (data, base, iv->step, false, use);
3500 }
3501
3502 /* Record common candidate with base_object removed in base. */
3503 base = iv->base;
3504 STRIP_NOPS (base);
3505 if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3506 {
3507 tree step = iv->step;
3508
3509 STRIP_NOPS (step);
3510 base = TREE_OPERAND (base, 1);
3511 step = fold_convert (sizetype, step);
3512 record_common_cand (data, base, step, use);
3513 /* Also record common candidate with offset stripped. */
3514 base = strip_offset (base, &offset);
3515 if (maybe_ne (offset, 0U))
3516 record_common_cand (data, base, step, use);
3517 }
3518
3519 /* At last, add auto-incremental candidates. Make such variables
3520 important since other iv uses with same base object may be based
3521 on it. */
3522 if (use != NULL && address_p (use->type))
3523 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3524 }
3525
3526 /* Adds candidates based on the uses. */
3527
3528 static void
add_iv_candidate_for_groups(struct ivopts_data * data)3529 add_iv_candidate_for_groups (struct ivopts_data *data)
3530 {
3531 unsigned i;
3532
3533 /* Only add candidate for the first use in group. */
3534 for (i = 0; i < data->vgroups.length (); i++)
3535 {
3536 struct iv_group *group = data->vgroups[i];
3537
3538 gcc_assert (group->vuses[0] != NULL);
3539 add_iv_candidate_for_use (data, group->vuses[0]);
3540 }
3541 add_iv_candidate_derived_from_uses (data);
3542 }
3543
3544 /* Record important candidates and add them to related_cands bitmaps. */
3545
3546 static void
record_important_candidates(struct ivopts_data * data)3547 record_important_candidates (struct ivopts_data *data)
3548 {
3549 unsigned i;
3550 struct iv_group *group;
3551
3552 for (i = 0; i < data->vcands.length (); i++)
3553 {
3554 struct iv_cand *cand = data->vcands[i];
3555
3556 if (cand->important)
3557 bitmap_set_bit (data->important_candidates, i);
3558 }
3559
3560 data->consider_all_candidates = (data->vcands.length ()
3561 <= CONSIDER_ALL_CANDIDATES_BOUND);
3562
3563 /* Add important candidates to groups' related_cands bitmaps. */
3564 for (i = 0; i < data->vgroups.length (); i++)
3565 {
3566 group = data->vgroups[i];
3567 bitmap_ior_into (group->related_cands, data->important_candidates);
3568 }
3569 }
3570
3571 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3572 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3573 we allocate a simple list to every use. */
3574
3575 static void
alloc_use_cost_map(struct ivopts_data * data)3576 alloc_use_cost_map (struct ivopts_data *data)
3577 {
3578 unsigned i, size, s;
3579
3580 for (i = 0; i < data->vgroups.length (); i++)
3581 {
3582 struct iv_group *group = data->vgroups[i];
3583
3584 if (data->consider_all_candidates)
3585 size = data->vcands.length ();
3586 else
3587 {
3588 s = bitmap_count_bits (group->related_cands);
3589
3590 /* Round up to the power of two, so that moduling by it is fast. */
3591 size = s ? (1 << ceil_log2 (s)) : 1;
3592 }
3593
3594 group->n_map_members = size;
3595 group->cost_map = XCNEWVEC (struct cost_pair, size);
3596 }
3597 }
3598
3599 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3600 on invariants INV_VARS and that the value used in expressing it is
3601 VALUE, and in case of iv elimination the comparison operator is COMP. */
3602
3603 static void
set_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand,comp_cost cost,bitmap inv_vars,tree value,enum tree_code comp,bitmap inv_exprs)3604 set_group_iv_cost (struct ivopts_data *data,
3605 struct iv_group *group, struct iv_cand *cand,
3606 comp_cost cost, bitmap inv_vars, tree value,
3607 enum tree_code comp, bitmap inv_exprs)
3608 {
3609 unsigned i, s;
3610
3611 if (cost.infinite_cost_p ())
3612 {
3613 BITMAP_FREE (inv_vars);
3614 BITMAP_FREE (inv_exprs);
3615 return;
3616 }
3617
3618 if (data->consider_all_candidates)
3619 {
3620 group->cost_map[cand->id].cand = cand;
3621 group->cost_map[cand->id].cost = cost;
3622 group->cost_map[cand->id].inv_vars = inv_vars;
3623 group->cost_map[cand->id].inv_exprs = inv_exprs;
3624 group->cost_map[cand->id].value = value;
3625 group->cost_map[cand->id].comp = comp;
3626 return;
3627 }
3628
3629 /* n_map_members is a power of two, so this computes modulo. */
3630 s = cand->id & (group->n_map_members - 1);
3631 for (i = s; i < group->n_map_members; i++)
3632 if (!group->cost_map[i].cand)
3633 goto found;
3634 for (i = 0; i < s; i++)
3635 if (!group->cost_map[i].cand)
3636 goto found;
3637
3638 gcc_unreachable ();
3639
3640 found:
3641 group->cost_map[i].cand = cand;
3642 group->cost_map[i].cost = cost;
3643 group->cost_map[i].inv_vars = inv_vars;
3644 group->cost_map[i].inv_exprs = inv_exprs;
3645 group->cost_map[i].value = value;
3646 group->cost_map[i].comp = comp;
3647 }
3648
3649 /* Gets cost of (GROUP, CAND) pair. */
3650
3651 static struct cost_pair *
get_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)3652 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3653 struct iv_cand *cand)
3654 {
3655 unsigned i, s;
3656 struct cost_pair *ret;
3657
3658 if (!cand)
3659 return NULL;
3660
3661 if (data->consider_all_candidates)
3662 {
3663 ret = group->cost_map + cand->id;
3664 if (!ret->cand)
3665 return NULL;
3666
3667 return ret;
3668 }
3669
3670 /* n_map_members is a power of two, so this computes modulo. */
3671 s = cand->id & (group->n_map_members - 1);
3672 for (i = s; i < group->n_map_members; i++)
3673 if (group->cost_map[i].cand == cand)
3674 return group->cost_map + i;
3675 else if (group->cost_map[i].cand == NULL)
3676 return NULL;
3677 for (i = 0; i < s; i++)
3678 if (group->cost_map[i].cand == cand)
3679 return group->cost_map + i;
3680 else if (group->cost_map[i].cand == NULL)
3681 return NULL;
3682
3683 return NULL;
3684 }
3685
3686 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3687 static rtx
produce_memory_decl_rtl(tree obj,int * regno)3688 produce_memory_decl_rtl (tree obj, int *regno)
3689 {
3690 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3691 machine_mode address_mode = targetm.addr_space.address_mode (as);
3692 rtx x;
3693
3694 gcc_assert (obj);
3695 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3696 {
3697 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3698 x = gen_rtx_SYMBOL_REF (address_mode, name);
3699 SET_SYMBOL_REF_DECL (x, obj);
3700 x = gen_rtx_MEM (DECL_MODE (obj), x);
3701 set_mem_addr_space (x, as);
3702 targetm.encode_section_info (obj, x, true);
3703 }
3704 else
3705 {
3706 x = gen_raw_REG (address_mode, (*regno)++);
3707 x = gen_rtx_MEM (DECL_MODE (obj), x);
3708 set_mem_addr_space (x, as);
3709 }
3710
3711 return x;
3712 }
3713
3714 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3715 walk_tree. DATA contains the actual fake register number. */
3716
3717 static tree
prepare_decl_rtl(tree * expr_p,int * ws,void * data)3718 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3719 {
3720 tree obj = NULL_TREE;
3721 rtx x = NULL_RTX;
3722 int *regno = (int *) data;
3723
3724 switch (TREE_CODE (*expr_p))
3725 {
3726 case ADDR_EXPR:
3727 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3728 handled_component_p (*expr_p);
3729 expr_p = &TREE_OPERAND (*expr_p, 0))
3730 continue;
3731 obj = *expr_p;
3732 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3733 x = produce_memory_decl_rtl (obj, regno);
3734 break;
3735
3736 case SSA_NAME:
3737 *ws = 0;
3738 obj = SSA_NAME_VAR (*expr_p);
3739 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3740 if (!obj)
3741 return NULL_TREE;
3742 if (!DECL_RTL_SET_P (obj))
3743 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3744 break;
3745
3746 case VAR_DECL:
3747 case PARM_DECL:
3748 case RESULT_DECL:
3749 *ws = 0;
3750 obj = *expr_p;
3751
3752 if (DECL_RTL_SET_P (obj))
3753 break;
3754
3755 if (DECL_MODE (obj) == BLKmode)
3756 x = produce_memory_decl_rtl (obj, regno);
3757 else
3758 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3759
3760 break;
3761
3762 default:
3763 break;
3764 }
3765
3766 if (x)
3767 {
3768 decl_rtl_to_reset.safe_push (obj);
3769 SET_DECL_RTL (obj, x);
3770 }
3771
3772 return NULL_TREE;
3773 }
3774
3775 /* Determines cost of the computation of EXPR. */
3776
3777 static unsigned
computation_cost(tree expr,bool speed)3778 computation_cost (tree expr, bool speed)
3779 {
3780 rtx_insn *seq;
3781 rtx rslt;
3782 tree type = TREE_TYPE (expr);
3783 unsigned cost;
3784 /* Avoid using hard regs in ways which may be unsupported. */
3785 int regno = LAST_VIRTUAL_REGISTER + 1;
3786 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3787 enum node_frequency real_frequency = node->frequency;
3788
3789 node->frequency = NODE_FREQUENCY_NORMAL;
3790 crtl->maybe_hot_insn_p = speed;
3791 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
3792 start_sequence ();
3793 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3794 seq = get_insns ();
3795 end_sequence ();
3796 default_rtl_profile ();
3797 node->frequency = real_frequency;
3798
3799 cost = seq_cost (seq, speed);
3800 if (MEM_P (rslt))
3801 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3802 TYPE_ADDR_SPACE (type), speed);
3803 else if (!REG_P (rslt))
3804 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3805
3806 return cost;
3807 }
3808
3809 /* Returns variable containing the value of candidate CAND at statement AT. */
3810
3811 static tree
var_at_stmt(struct loop * loop,struct iv_cand * cand,gimple * stmt)3812 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3813 {
3814 if (stmt_after_increment (loop, cand, stmt))
3815 return cand->var_after;
3816 else
3817 return cand->var_before;
3818 }
3819
3820 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3821 same precision that is at least as wide as the precision of TYPE, stores
3822 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3823 type of A and B. */
3824
3825 static tree
determine_common_wider_type(tree * a,tree * b)3826 determine_common_wider_type (tree *a, tree *b)
3827 {
3828 tree wider_type = NULL;
3829 tree suba, subb;
3830 tree atype = TREE_TYPE (*a);
3831
3832 if (CONVERT_EXPR_P (*a))
3833 {
3834 suba = TREE_OPERAND (*a, 0);
3835 wider_type = TREE_TYPE (suba);
3836 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3837 return atype;
3838 }
3839 else
3840 return atype;
3841
3842 if (CONVERT_EXPR_P (*b))
3843 {
3844 subb = TREE_OPERAND (*b, 0);
3845 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3846 return atype;
3847 }
3848 else
3849 return atype;
3850
3851 *a = suba;
3852 *b = subb;
3853 return wider_type;
3854 }
3855
3856 /* Determines the expression by that USE is expressed from induction variable
3857 CAND at statement AT in LOOP. The expression is stored in two parts in a
3858 decomposed form. The invariant part is stored in AFF_INV; while variant
3859 part in AFF_VAR. Store ratio of CAND.step over USE.step in PRAT if it's
3860 non-null. Returns false if USE cannot be expressed using CAND. */
3861
3862 static bool
3863 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3864 struct iv_cand *cand, struct aff_tree *aff_inv,
3865 struct aff_tree *aff_var, widest_int *prat = NULL)
3866 {
3867 tree ubase = use->iv->base, ustep = use->iv->step;
3868 tree cbase = cand->iv->base, cstep = cand->iv->step;
3869 tree common_type, uutype, var, cstep_common;
3870 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3871 aff_tree aff_cbase;
3872 widest_int rat;
3873
3874 /* We must have a precision to express the values of use. */
3875 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3876 return false;
3877
3878 var = var_at_stmt (loop, cand, at);
3879 uutype = unsigned_type_for (utype);
3880
3881 /* If the conversion is not noop, perform it. */
3882 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3883 {
3884 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3885 && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3886 {
3887 tree inner_base, inner_step, inner_type;
3888 inner_base = TREE_OPERAND (cbase, 0);
3889 if (CONVERT_EXPR_P (cstep))
3890 inner_step = TREE_OPERAND (cstep, 0);
3891 else
3892 inner_step = cstep;
3893
3894 inner_type = TREE_TYPE (inner_base);
3895 /* If candidate is added from a biv whose type is smaller than
3896 ctype, we know both candidate and the biv won't overflow.
3897 In this case, it's safe to skip the convertion in candidate.
3898 As an example, (unsigned short)((unsigned long)A) equals to
3899 (unsigned short)A, if A has a type no larger than short. */
3900 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3901 {
3902 cbase = inner_base;
3903 cstep = inner_step;
3904 }
3905 }
3906 cbase = fold_convert (uutype, cbase);
3907 cstep = fold_convert (uutype, cstep);
3908 var = fold_convert (uutype, var);
3909 }
3910
3911 /* Ratio is 1 when computing the value of biv cand by itself.
3912 We can't rely on constant_multiple_of in this case because the
3913 use is created after the original biv is selected. The call
3914 could fail because of inconsistent fold behavior. See PR68021
3915 for more information. */
3916 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3917 {
3918 gcc_assert (is_gimple_assign (use->stmt));
3919 gcc_assert (use->iv->ssa_name == cand->var_after);
3920 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3921 rat = 1;
3922 }
3923 else if (!constant_multiple_of (ustep, cstep, &rat))
3924 return false;
3925
3926 if (prat)
3927 *prat = rat;
3928
3929 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3930 type, we achieve better folding by computing their difference in this
3931 wider type, and cast the result to UUTYPE. We do not need to worry about
3932 overflows, as all the arithmetics will in the end be performed in UUTYPE
3933 anyway. */
3934 common_type = determine_common_wider_type (&ubase, &cbase);
3935
3936 /* use = ubase - ratio * cbase + ratio * var. */
3937 tree_to_aff_combination (ubase, common_type, aff_inv);
3938 tree_to_aff_combination (cbase, common_type, &aff_cbase);
3939 tree_to_aff_combination (var, uutype, aff_var);
3940
3941 /* We need to shift the value if we are after the increment. */
3942 if (stmt_after_increment (loop, cand, at))
3943 {
3944 aff_tree cstep_aff;
3945
3946 if (common_type != uutype)
3947 cstep_common = fold_convert (common_type, cstep);
3948 else
3949 cstep_common = cstep;
3950
3951 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3952 aff_combination_add (&aff_cbase, &cstep_aff);
3953 }
3954
3955 aff_combination_scale (&aff_cbase, -rat);
3956 aff_combination_add (aff_inv, &aff_cbase);
3957 if (common_type != uutype)
3958 aff_combination_convert (aff_inv, uutype);
3959
3960 aff_combination_scale (aff_var, rat);
3961 return true;
3962 }
3963
3964 /* Determines the expression by that USE is expressed from induction variable
3965 CAND at statement AT in LOOP. The expression is stored in a decomposed
3966 form into AFF. Returns false if USE cannot be expressed using CAND. */
3967
3968 static bool
get_computation_aff(struct loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand,struct aff_tree * aff)3969 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3970 struct iv_cand *cand, struct aff_tree *aff)
3971 {
3972 aff_tree aff_var;
3973
3974 if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3975 return false;
3976
3977 aff_combination_add (aff, &aff_var);
3978 return true;
3979 }
3980
3981 /* Return the type of USE. */
3982
3983 static tree
get_use_type(struct iv_use * use)3984 get_use_type (struct iv_use *use)
3985 {
3986 tree base_type = TREE_TYPE (use->iv->base);
3987 tree type;
3988
3989 if (use->type == USE_REF_ADDRESS)
3990 {
3991 /* The base_type may be a void pointer. Create a pointer type based on
3992 the mem_ref instead. */
3993 type = build_pointer_type (TREE_TYPE (*use->op_p));
3994 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3995 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3996 }
3997 else
3998 type = base_type;
3999
4000 return type;
4001 }
4002
4003 /* Determines the expression by that USE is expressed from induction variable
4004 CAND at statement AT in LOOP. The computation is unshared. */
4005
4006 static tree
get_computation_at(struct loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand)4007 get_computation_at (struct loop *loop, gimple *at,
4008 struct iv_use *use, struct iv_cand *cand)
4009 {
4010 aff_tree aff;
4011 tree type = get_use_type (use);
4012
4013 if (!get_computation_aff (loop, at, use, cand, &aff))
4014 return NULL_TREE;
4015 unshare_aff_combination (&aff);
4016 return fold_convert (type, aff_combination_to_tree (&aff));
4017 }
4018
4019 /* Adjust the cost COST for being in loop setup rather than loop body.
4020 If we're optimizing for space, the loop setup overhead is constant;
4021 if we're optimizing for speed, amortize it over the per-iteration cost.
4022 If ROUND_UP_P is true, the result is round up rather than to zero when
4023 optimizing for speed. */
4024 static unsigned
4025 adjust_setup_cost (struct ivopts_data *data, unsigned cost,
4026 bool round_up_p = false)
4027 {
4028 if (cost == INFTY)
4029 return cost;
4030 else if (optimize_loop_for_speed_p (data->current_loop))
4031 {
4032 HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
4033 return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
4034 }
4035 else
4036 return cost;
4037 }
4038
4039 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4040 EXPR operand holding the shift. COST0 and COST1 are the costs for
4041 calculating the operands of EXPR. Returns true if successful, and returns
4042 the cost in COST. */
4043
4044 static bool
get_shiftadd_cost(tree expr,scalar_int_mode mode,comp_cost cost0,comp_cost cost1,tree mult,bool speed,comp_cost * cost)4045 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4046 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4047 {
4048 comp_cost res;
4049 tree op1 = TREE_OPERAND (expr, 1);
4050 tree cst = TREE_OPERAND (mult, 1);
4051 tree multop = TREE_OPERAND (mult, 0);
4052 int m = exact_log2 (int_cst_value (cst));
4053 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4054 int as_cost, sa_cost;
4055 bool mult_in_op1;
4056
4057 if (!(m >= 0 && m < maxm))
4058 return false;
4059
4060 STRIP_NOPS (op1);
4061 mult_in_op1 = operand_equal_p (op1, mult, 0);
4062
4063 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4064
4065 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4066 use that in preference to a shift insn followed by an add insn. */
4067 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4068 ? shiftadd_cost (speed, mode, m)
4069 : (mult_in_op1
4070 ? shiftsub1_cost (speed, mode, m)
4071 : shiftsub0_cost (speed, mode, m)));
4072
4073 res = comp_cost (MIN (as_cost, sa_cost), 0);
4074 res += (mult_in_op1 ? cost0 : cost1);
4075
4076 STRIP_NOPS (multop);
4077 if (!is_gimple_val (multop))
4078 res += force_expr_to_var_cost (multop, speed);
4079
4080 *cost = res;
4081 return true;
4082 }
4083
4084 /* Estimates cost of forcing expression EXPR into a variable. */
4085
4086 static comp_cost
force_expr_to_var_cost(tree expr,bool speed)4087 force_expr_to_var_cost (tree expr, bool speed)
4088 {
4089 static bool costs_initialized = false;
4090 static unsigned integer_cost [2];
4091 static unsigned symbol_cost [2];
4092 static unsigned address_cost [2];
4093 tree op0, op1;
4094 comp_cost cost0, cost1, cost;
4095 machine_mode mode;
4096 scalar_int_mode int_mode;
4097
4098 if (!costs_initialized)
4099 {
4100 tree type = build_pointer_type (integer_type_node);
4101 tree var, addr;
4102 rtx x;
4103 int i;
4104
4105 var = create_tmp_var_raw (integer_type_node, "test_var");
4106 TREE_STATIC (var) = 1;
4107 x = produce_memory_decl_rtl (var, NULL);
4108 SET_DECL_RTL (var, x);
4109
4110 addr = build1 (ADDR_EXPR, type, var);
4111
4112
4113 for (i = 0; i < 2; i++)
4114 {
4115 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4116 2000), i);
4117
4118 symbol_cost[i] = computation_cost (addr, i) + 1;
4119
4120 address_cost[i]
4121 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4122 if (dump_file && (dump_flags & TDF_DETAILS))
4123 {
4124 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4125 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4126 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4127 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4128 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4129 fprintf (dump_file, "\n");
4130 }
4131 }
4132
4133 costs_initialized = true;
4134 }
4135
4136 STRIP_NOPS (expr);
4137
4138 if (SSA_VAR_P (expr))
4139 return no_cost;
4140
4141 if (is_gimple_min_invariant (expr))
4142 {
4143 if (poly_int_tree_p (expr))
4144 return comp_cost (integer_cost [speed], 0);
4145
4146 if (TREE_CODE (expr) == ADDR_EXPR)
4147 {
4148 tree obj = TREE_OPERAND (expr, 0);
4149
4150 if (VAR_P (obj)
4151 || TREE_CODE (obj) == PARM_DECL
4152 || TREE_CODE (obj) == RESULT_DECL)
4153 return comp_cost (symbol_cost [speed], 0);
4154 }
4155
4156 return comp_cost (address_cost [speed], 0);
4157 }
4158
4159 switch (TREE_CODE (expr))
4160 {
4161 case POINTER_PLUS_EXPR:
4162 case PLUS_EXPR:
4163 case MINUS_EXPR:
4164 case MULT_EXPR:
4165 case TRUNC_DIV_EXPR:
4166 case BIT_AND_EXPR:
4167 case BIT_IOR_EXPR:
4168 case LSHIFT_EXPR:
4169 case RSHIFT_EXPR:
4170 op0 = TREE_OPERAND (expr, 0);
4171 op1 = TREE_OPERAND (expr, 1);
4172 STRIP_NOPS (op0);
4173 STRIP_NOPS (op1);
4174 break;
4175
4176 CASE_CONVERT:
4177 case NEGATE_EXPR:
4178 case BIT_NOT_EXPR:
4179 op0 = TREE_OPERAND (expr, 0);
4180 STRIP_NOPS (op0);
4181 op1 = NULL_TREE;
4182 break;
4183
4184 default:
4185 /* Just an arbitrary value, FIXME. */
4186 return comp_cost (target_spill_cost[speed], 0);
4187 }
4188
4189 if (op0 == NULL_TREE
4190 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4191 cost0 = no_cost;
4192 else
4193 cost0 = force_expr_to_var_cost (op0, speed);
4194
4195 if (op1 == NULL_TREE
4196 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4197 cost1 = no_cost;
4198 else
4199 cost1 = force_expr_to_var_cost (op1, speed);
4200
4201 mode = TYPE_MODE (TREE_TYPE (expr));
4202 switch (TREE_CODE (expr))
4203 {
4204 case POINTER_PLUS_EXPR:
4205 case PLUS_EXPR:
4206 case MINUS_EXPR:
4207 case NEGATE_EXPR:
4208 cost = comp_cost (add_cost (speed, mode), 0);
4209 if (TREE_CODE (expr) != NEGATE_EXPR)
4210 {
4211 tree mult = NULL_TREE;
4212 comp_cost sa_cost;
4213 if (TREE_CODE (op1) == MULT_EXPR)
4214 mult = op1;
4215 else if (TREE_CODE (op0) == MULT_EXPR)
4216 mult = op0;
4217
4218 if (mult != NULL_TREE
4219 && is_a <scalar_int_mode> (mode, &int_mode)
4220 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4221 && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4222 speed, &sa_cost))
4223 return sa_cost;
4224 }
4225 break;
4226
4227 CASE_CONVERT:
4228 {
4229 tree inner_mode, outer_mode;
4230 outer_mode = TREE_TYPE (expr);
4231 inner_mode = TREE_TYPE (op0);
4232 cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4233 TYPE_MODE (inner_mode), speed), 0);
4234 }
4235 break;
4236
4237 case MULT_EXPR:
4238 if (cst_and_fits_in_hwi (op0))
4239 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4240 mode, speed), 0);
4241 else if (cst_and_fits_in_hwi (op1))
4242 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4243 mode, speed), 0);
4244 else
4245 return comp_cost (target_spill_cost [speed], 0);
4246 break;
4247
4248 case TRUNC_DIV_EXPR:
4249 /* Division by power of two is usually cheap, so we allow it. Forbid
4250 anything else. */
4251 if (integer_pow2p (TREE_OPERAND (expr, 1)))
4252 cost = comp_cost (add_cost (speed, mode), 0);
4253 else
4254 cost = comp_cost (target_spill_cost[speed], 0);
4255 break;
4256
4257 case BIT_AND_EXPR:
4258 case BIT_IOR_EXPR:
4259 case BIT_NOT_EXPR:
4260 case LSHIFT_EXPR:
4261 case RSHIFT_EXPR:
4262 cost = comp_cost (add_cost (speed, mode), 0);
4263 break;
4264
4265 default:
4266 gcc_unreachable ();
4267 }
4268
4269 cost += cost0;
4270 cost += cost1;
4271 return cost;
4272 }
4273
4274 /* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the
4275 invariants the computation depends on. */
4276
4277 static comp_cost
force_var_cost(struct ivopts_data * data,tree expr,bitmap * inv_vars)4278 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4279 {
4280 if (!expr)
4281 return no_cost;
4282
4283 find_inv_vars (data, &expr, inv_vars);
4284 return force_expr_to_var_cost (expr, data->speed);
4285 }
4286
4287 /* Returns cost of auto-modifying address expression in shape base + offset.
4288 AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4289 address expression. The address expression has ADDR_MODE in addr space
4290 AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4291 speed or size. */
4292
4293 enum ainc_type
4294 {
4295 AINC_PRE_INC, /* Pre increment. */
4296 AINC_PRE_DEC, /* Pre decrement. */
4297 AINC_POST_INC, /* Post increment. */
4298 AINC_POST_DEC, /* Post decrement. */
4299 AINC_NONE /* Also the number of auto increment types. */
4300 };
4301
4302 struct ainc_cost_data
4303 {
4304 unsigned costs[AINC_NONE];
4305 };
4306
4307 static comp_cost
get_address_cost_ainc(poly_int64 ainc_step,poly_int64 ainc_offset,machine_mode addr_mode,machine_mode mem_mode,addr_space_t as,bool speed)4308 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4309 machine_mode addr_mode, machine_mode mem_mode,
4310 addr_space_t as, bool speed)
4311 {
4312 if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4313 && !USE_STORE_PRE_DECREMENT (mem_mode)
4314 && !USE_LOAD_POST_DECREMENT (mem_mode)
4315 && !USE_STORE_POST_DECREMENT (mem_mode)
4316 && !USE_LOAD_PRE_INCREMENT (mem_mode)
4317 && !USE_STORE_PRE_INCREMENT (mem_mode)
4318 && !USE_LOAD_POST_INCREMENT (mem_mode)
4319 && !USE_STORE_POST_INCREMENT (mem_mode))
4320 return infinite_cost;
4321
4322 static vec<ainc_cost_data *> ainc_cost_data_list;
4323 unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4324 if (idx >= ainc_cost_data_list.length ())
4325 {
4326 unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4327
4328 gcc_assert (nsize > idx);
4329 ainc_cost_data_list.safe_grow_cleared (nsize);
4330 }
4331
4332 ainc_cost_data *data = ainc_cost_data_list[idx];
4333 if (data == NULL)
4334 {
4335 rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4336
4337 data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4338 data->costs[AINC_PRE_DEC] = INFTY;
4339 data->costs[AINC_POST_DEC] = INFTY;
4340 data->costs[AINC_PRE_INC] = INFTY;
4341 data->costs[AINC_POST_INC] = INFTY;
4342 if (USE_LOAD_PRE_DECREMENT (mem_mode)
4343 || USE_STORE_PRE_DECREMENT (mem_mode))
4344 {
4345 rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4346
4347 if (memory_address_addr_space_p (mem_mode, addr, as))
4348 data->costs[AINC_PRE_DEC]
4349 = address_cost (addr, mem_mode, as, speed);
4350 }
4351 if (USE_LOAD_POST_DECREMENT (mem_mode)
4352 || USE_STORE_POST_DECREMENT (mem_mode))
4353 {
4354 rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4355
4356 if (memory_address_addr_space_p (mem_mode, addr, as))
4357 data->costs[AINC_POST_DEC]
4358 = address_cost (addr, mem_mode, as, speed);
4359 }
4360 if (USE_LOAD_PRE_INCREMENT (mem_mode)
4361 || USE_STORE_PRE_INCREMENT (mem_mode))
4362 {
4363 rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4364
4365 if (memory_address_addr_space_p (mem_mode, addr, as))
4366 data->costs[AINC_PRE_INC]
4367 = address_cost (addr, mem_mode, as, speed);
4368 }
4369 if (USE_LOAD_POST_INCREMENT (mem_mode)
4370 || USE_STORE_POST_INCREMENT (mem_mode))
4371 {
4372 rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4373
4374 if (memory_address_addr_space_p (mem_mode, addr, as))
4375 data->costs[AINC_POST_INC]
4376 = address_cost (addr, mem_mode, as, speed);
4377 }
4378 ainc_cost_data_list[idx] = data;
4379 }
4380
4381 poly_int64 msize = GET_MODE_SIZE (mem_mode);
4382 if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4383 return comp_cost (data->costs[AINC_POST_INC], 0);
4384 if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4385 return comp_cost (data->costs[AINC_POST_DEC], 0);
4386 if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4387 return comp_cost (data->costs[AINC_PRE_INC], 0);
4388 if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4389 return comp_cost (data->costs[AINC_PRE_DEC], 0);
4390
4391 return infinite_cost;
4392 }
4393
4394 /* Return cost of computing USE's address expression by using CAND.
4395 AFF_INV and AFF_VAR represent invariant and variant parts of the
4396 address expression, respectively. If AFF_INV is simple, store
4397 the loop invariant variables which are depended by it in INV_VARS;
4398 if AFF_INV is complicated, handle it as a new invariant expression
4399 and record it in INV_EXPR. RATIO indicates multiple times between
4400 steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4401 value to it indicating if this is an auto-increment address. */
4402
4403 static comp_cost
get_address_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,aff_tree * aff_inv,aff_tree * aff_var,HOST_WIDE_INT ratio,bitmap * inv_vars,iv_inv_expr_ent ** inv_expr,bool * can_autoinc,bool speed)4404 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4405 struct iv_cand *cand, aff_tree *aff_inv,
4406 aff_tree *aff_var, HOST_WIDE_INT ratio,
4407 bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4408 bool *can_autoinc, bool speed)
4409 {
4410 rtx addr;
4411 bool simple_inv = true;
4412 tree comp_inv = NULL_TREE, type = aff_var->type;
4413 comp_cost var_cost = no_cost, cost = no_cost;
4414 struct mem_address parts = {NULL_TREE, integer_one_node,
4415 NULL_TREE, NULL_TREE, NULL_TREE};
4416 machine_mode addr_mode = TYPE_MODE (type);
4417 machine_mode mem_mode = TYPE_MODE (use->mem_type);
4418 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4419 /* Only true if ratio != 1. */
4420 bool ok_with_ratio_p = false;
4421 bool ok_without_ratio_p = false;
4422
4423 if (!aff_combination_const_p (aff_inv))
4424 {
4425 parts.index = integer_one_node;
4426 /* Addressing mode "base + index". */
4427 ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4428 if (ratio != 1)
4429 {
4430 parts.step = wide_int_to_tree (type, ratio);
4431 /* Addressing mode "base + index << scale". */
4432 ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4433 if (!ok_with_ratio_p)
4434 parts.step = NULL_TREE;
4435 }
4436 if (ok_with_ratio_p || ok_without_ratio_p)
4437 {
4438 if (maybe_ne (aff_inv->offset, 0))
4439 {
4440 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4441 /* Addressing mode "base + index [<< scale] + offset". */
4442 if (!valid_mem_ref_p (mem_mode, as, &parts))
4443 parts.offset = NULL_TREE;
4444 else
4445 aff_inv->offset = 0;
4446 }
4447
4448 move_fixed_address_to_symbol (&parts, aff_inv);
4449 /* Base is fixed address and is moved to symbol part. */
4450 if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4451 parts.base = NULL_TREE;
4452
4453 /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
4454 if (parts.symbol != NULL_TREE
4455 && !valid_mem_ref_p (mem_mode, as, &parts))
4456 {
4457 aff_combination_add_elt (aff_inv, parts.symbol, 1);
4458 parts.symbol = NULL_TREE;
4459 /* Reset SIMPLE_INV since symbol address needs to be computed
4460 outside of address expression in this case. */
4461 simple_inv = false;
4462 /* Symbol part is moved back to base part, it can't be NULL. */
4463 parts.base = integer_one_node;
4464 }
4465 }
4466 else
4467 parts.index = NULL_TREE;
4468 }
4469 else
4470 {
4471 poly_int64 ainc_step;
4472 if (can_autoinc
4473 && ratio == 1
4474 && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4475 {
4476 poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4477
4478 if (stmt_after_increment (data->current_loop, cand, use->stmt))
4479 ainc_offset += ainc_step;
4480 cost = get_address_cost_ainc (ainc_step, ainc_offset,
4481 addr_mode, mem_mode, as, speed);
4482 if (!cost.infinite_cost_p ())
4483 {
4484 *can_autoinc = true;
4485 return cost;
4486 }
4487 cost = no_cost;
4488 }
4489 if (!aff_combination_zero_p (aff_inv))
4490 {
4491 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4492 /* Addressing mode "base + offset". */
4493 if (!valid_mem_ref_p (mem_mode, as, &parts))
4494 parts.offset = NULL_TREE;
4495 else
4496 aff_inv->offset = 0;
4497 }
4498 }
4499
4500 if (simple_inv)
4501 simple_inv = (aff_inv == NULL
4502 || aff_combination_const_p (aff_inv)
4503 || aff_combination_singleton_var_p (aff_inv));
4504 if (!aff_combination_zero_p (aff_inv))
4505 comp_inv = aff_combination_to_tree (aff_inv);
4506 if (comp_inv != NULL_TREE)
4507 cost = force_var_cost (data, comp_inv, inv_vars);
4508 if (ratio != 1 && parts.step == NULL_TREE)
4509 var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4510 if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4511 var_cost += add_cost (speed, addr_mode);
4512
4513 if (comp_inv && inv_expr && !simple_inv)
4514 {
4515 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4516 /* Clear depends on. */
4517 if (*inv_expr != NULL && inv_vars && *inv_vars)
4518 bitmap_clear (*inv_vars);
4519
4520 /* Cost of small invariant expression adjusted against loop niters
4521 is usually zero, which makes it difficult to be differentiated
4522 from candidate based on loop invariant variables. Secondly, the
4523 generated invariant expression may not be hoisted out of loop by
4524 following pass. We penalize the cost by rounding up in order to
4525 neutralize such effects. */
4526 cost.cost = adjust_setup_cost (data, cost.cost, true);
4527 cost.scratch = cost.cost;
4528 }
4529
4530 cost += var_cost;
4531 addr = addr_for_mem_ref (&parts, as, false);
4532 gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4533 cost += address_cost (addr, mem_mode, as, speed);
4534
4535 if (parts.symbol != NULL_TREE)
4536 cost.complexity += 1;
4537 /* Don't increase the complexity of adding a scaled index if it's
4538 the only kind of index that the target allows. */
4539 if (parts.step != NULL_TREE && ok_without_ratio_p)
4540 cost.complexity += 1;
4541 if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4542 cost.complexity += 1;
4543 if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4544 cost.complexity += 1;
4545
4546 return cost;
4547 }
4548
4549 /* Scale (multiply) the computed COST (except scratch part that should be
4550 hoisted out a loop) by header->frequency / AT->frequency, which makes
4551 expected cost more accurate. */
4552
4553 static comp_cost
get_scaled_computation_cost_at(ivopts_data * data,gimple * at,comp_cost cost)4554 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4555 {
4556 int loop_freq = data->current_loop->header->count.to_frequency (cfun);
4557 int bb_freq = gimple_bb (at)->count.to_frequency (cfun);
4558 if (loop_freq != 0)
4559 {
4560 gcc_assert (cost.scratch <= cost.cost);
4561 int scaled_cost
4562 = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4563
4564 if (dump_file && (dump_flags & TDF_DETAILS))
4565 fprintf (dump_file, "Scaling cost based on bb prob "
4566 "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4567 1.0f * bb_freq / loop_freq, cost.cost,
4568 cost.scratch, scaled_cost, bb_freq, loop_freq);
4569
4570 cost.cost = scaled_cost;
4571 }
4572
4573 return cost;
4574 }
4575
4576 /* Determines the cost of the computation by that USE is expressed
4577 from induction variable CAND. If ADDRESS_P is true, we just need
4578 to create an address from it, otherwise we want to get it into
4579 register. A set of invariants we depend on is stored in INV_VARS.
4580 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4581 addressing is likely. If INV_EXPR is nonnull, record invariant
4582 expr entry in it. */
4583
4584 static comp_cost
get_computation_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,bool address_p,bitmap * inv_vars,bool * can_autoinc,iv_inv_expr_ent ** inv_expr)4585 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4586 struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4587 bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4588 {
4589 gimple *at = use->stmt;
4590 tree ubase = use->iv->base, cbase = cand->iv->base;
4591 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4592 tree comp_inv = NULL_TREE;
4593 HOST_WIDE_INT ratio, aratio;
4594 comp_cost cost;
4595 widest_int rat;
4596 aff_tree aff_inv, aff_var;
4597 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4598
4599 if (inv_vars)
4600 *inv_vars = NULL;
4601 if (can_autoinc)
4602 *can_autoinc = false;
4603 if (inv_expr)
4604 *inv_expr = NULL;
4605
4606 /* Check if we have enough precision to express the values of use. */
4607 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4608 return infinite_cost;
4609
4610 if (address_p
4611 || (use->iv->base_object
4612 && cand->iv->base_object
4613 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4614 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4615 {
4616 /* Do not try to express address of an object with computation based
4617 on address of a different object. This may cause problems in rtl
4618 level alias analysis (that does not expect this to be happening,
4619 as this is illegal in C), and would be unlikely to be useful
4620 anyway. */
4621 if (use->iv->base_object
4622 && cand->iv->base_object
4623 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4624 return infinite_cost;
4625 }
4626
4627 if (!get_computation_aff_1 (data->current_loop, at, use,
4628 cand, &aff_inv, &aff_var, &rat)
4629 || !wi::fits_shwi_p (rat))
4630 return infinite_cost;
4631
4632 ratio = rat.to_shwi ();
4633 if (address_p)
4634 {
4635 cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4636 inv_vars, inv_expr, can_autoinc, speed);
4637 return get_scaled_computation_cost_at (data, at, cost);
4638 }
4639
4640 bool simple_inv = (aff_combination_const_p (&aff_inv)
4641 || aff_combination_singleton_var_p (&aff_inv));
4642 tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4643 aff_combination_convert (&aff_inv, signed_type);
4644 if (!aff_combination_zero_p (&aff_inv))
4645 comp_inv = aff_combination_to_tree (&aff_inv);
4646
4647 cost = force_var_cost (data, comp_inv, inv_vars);
4648 if (comp_inv && inv_expr && !simple_inv)
4649 {
4650 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4651 /* Clear depends on. */
4652 if (*inv_expr != NULL && inv_vars && *inv_vars)
4653 bitmap_clear (*inv_vars);
4654
4655 cost.cost = adjust_setup_cost (data, cost.cost);
4656 /* Record setup cost in scratch field. */
4657 cost.scratch = cost.cost;
4658 }
4659 /* Cost of constant integer can be covered when adding invariant part to
4660 variant part. */
4661 else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4662 cost = no_cost;
4663
4664 /* Need type narrowing to represent use with cand. */
4665 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4666 {
4667 machine_mode outer_mode = TYPE_MODE (utype);
4668 machine_mode inner_mode = TYPE_MODE (ctype);
4669 cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4670 }
4671
4672 /* Turn a + i * (-c) into a - i * c. */
4673 if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4674 aratio = -ratio;
4675 else
4676 aratio = ratio;
4677
4678 if (ratio != 1)
4679 cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4680
4681 /* TODO: We may also need to check if we can compute a + i * 4 in one
4682 instruction. */
4683 /* Need to add up the invariant and variant parts. */
4684 if (comp_inv && !integer_zerop (comp_inv))
4685 cost += add_cost (speed, TYPE_MODE (utype));
4686
4687 return get_scaled_computation_cost_at (data, at, cost);
4688 }
4689
4690 /* Determines cost of computing the use in GROUP with CAND in a generic
4691 expression. */
4692
4693 static bool
determine_group_iv_cost_generic(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)4694 determine_group_iv_cost_generic (struct ivopts_data *data,
4695 struct iv_group *group, struct iv_cand *cand)
4696 {
4697 comp_cost cost;
4698 iv_inv_expr_ent *inv_expr = NULL;
4699 bitmap inv_vars = NULL, inv_exprs = NULL;
4700 struct iv_use *use = group->vuses[0];
4701
4702 /* The simple case first -- if we need to express value of the preserved
4703 original biv, the cost is 0. This also prevents us from counting the
4704 cost of increment twice -- once at this use and once in the cost of
4705 the candidate. */
4706 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4707 cost = no_cost;
4708 else
4709 cost = get_computation_cost (data, use, cand, false,
4710 &inv_vars, NULL, &inv_expr);
4711
4712 if (inv_expr)
4713 {
4714 inv_exprs = BITMAP_ALLOC (NULL);
4715 bitmap_set_bit (inv_exprs, inv_expr->id);
4716 }
4717 set_group_iv_cost (data, group, cand, cost, inv_vars,
4718 NULL_TREE, ERROR_MARK, inv_exprs);
4719 return !cost.infinite_cost_p ();
4720 }
4721
4722 /* Determines cost of computing uses in GROUP with CAND in addresses. */
4723
4724 static bool
determine_group_iv_cost_address(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)4725 determine_group_iv_cost_address (struct ivopts_data *data,
4726 struct iv_group *group, struct iv_cand *cand)
4727 {
4728 unsigned i;
4729 bitmap inv_vars = NULL, inv_exprs = NULL;
4730 bool can_autoinc;
4731 iv_inv_expr_ent *inv_expr = NULL;
4732 struct iv_use *use = group->vuses[0];
4733 comp_cost sum_cost = no_cost, cost;
4734
4735 cost = get_computation_cost (data, use, cand, true,
4736 &inv_vars, &can_autoinc, &inv_expr);
4737
4738 if (inv_expr)
4739 {
4740 inv_exprs = BITMAP_ALLOC (NULL);
4741 bitmap_set_bit (inv_exprs, inv_expr->id);
4742 }
4743 sum_cost = cost;
4744 if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4745 {
4746 if (can_autoinc)
4747 sum_cost -= cand->cost_step;
4748 /* If we generated the candidate solely for exploiting autoincrement
4749 opportunities, and it turns out it can't be used, set the cost to
4750 infinity to make sure we ignore it. */
4751 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4752 sum_cost = infinite_cost;
4753 }
4754
4755 /* Uses in a group can share setup code, so only add setup cost once. */
4756 cost -= cost.scratch;
4757 /* Compute and add costs for rest uses of this group. */
4758 for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4759 {
4760 struct iv_use *next = group->vuses[i];
4761
4762 /* TODO: We could skip computing cost for sub iv_use when it has the
4763 same cost as the first iv_use, but the cost really depends on the
4764 offset and where the iv_use is. */
4765 cost = get_computation_cost (data, next, cand, true,
4766 NULL, &can_autoinc, &inv_expr);
4767 if (inv_expr)
4768 {
4769 if (!inv_exprs)
4770 inv_exprs = BITMAP_ALLOC (NULL);
4771
4772 bitmap_set_bit (inv_exprs, inv_expr->id);
4773 }
4774 sum_cost += cost;
4775 }
4776 set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4777 NULL_TREE, ERROR_MARK, inv_exprs);
4778
4779 return !sum_cost.infinite_cost_p ();
4780 }
4781
4782 /* Computes value of candidate CAND at position AT in iteration NITER, and
4783 stores it to VAL. */
4784
4785 static void
cand_value_at(struct loop * loop,struct iv_cand * cand,gimple * at,tree niter,aff_tree * val)4786 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4787 aff_tree *val)
4788 {
4789 aff_tree step, delta, nit;
4790 struct iv *iv = cand->iv;
4791 tree type = TREE_TYPE (iv->base);
4792 tree steptype;
4793 if (POINTER_TYPE_P (type))
4794 steptype = sizetype;
4795 else
4796 steptype = unsigned_type_for (type);
4797
4798 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4799 aff_combination_convert (&step, steptype);
4800 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4801 aff_combination_convert (&nit, steptype);
4802 aff_combination_mult (&nit, &step, &delta);
4803 if (stmt_after_increment (loop, cand, at))
4804 aff_combination_add (&delta, &step);
4805
4806 tree_to_aff_combination (iv->base, type, val);
4807 if (!POINTER_TYPE_P (type))
4808 aff_combination_convert (val, steptype);
4809 aff_combination_add (val, &delta);
4810 }
4811
4812 /* Returns period of induction variable iv. */
4813
4814 static tree
iv_period(struct iv * iv)4815 iv_period (struct iv *iv)
4816 {
4817 tree step = iv->step, period, type;
4818 tree pow2div;
4819
4820 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4821
4822 type = unsigned_type_for (TREE_TYPE (step));
4823 /* Period of the iv is lcm (step, type_range)/step -1,
4824 i.e., N*type_range/step - 1. Since type range is power
4825 of two, N == (step >> num_of_ending_zeros_binary (step),
4826 so the final result is
4827
4828 (type_range >> num_of_ending_zeros_binary (step)) - 1
4829
4830 */
4831 pow2div = num_ending_zeros (step);
4832
4833 period = build_low_bits_mask (type,
4834 (TYPE_PRECISION (type)
4835 - tree_to_uhwi (pow2div)));
4836
4837 return period;
4838 }
4839
4840 /* Returns the comparison operator used when eliminating the iv USE. */
4841
4842 static enum tree_code
iv_elimination_compare(struct ivopts_data * data,struct iv_use * use)4843 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4844 {
4845 struct loop *loop = data->current_loop;
4846 basic_block ex_bb;
4847 edge exit;
4848
4849 ex_bb = gimple_bb (use->stmt);
4850 exit = EDGE_SUCC (ex_bb, 0);
4851 if (flow_bb_inside_loop_p (loop, exit->dest))
4852 exit = EDGE_SUCC (ex_bb, 1);
4853
4854 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4855 }
4856
4857 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4858 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4859 calculation is performed in non-wrapping type.
4860
4861 TODO: More generally, we could test for the situation that
4862 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4863 This would require knowing the sign of OFFSET. */
4864
4865 static bool
difference_cannot_overflow_p(struct ivopts_data * data,tree base,tree offset)4866 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4867 {
4868 enum tree_code code;
4869 tree e1, e2;
4870 aff_tree aff_e1, aff_e2, aff_offset;
4871
4872 if (!nowrap_type_p (TREE_TYPE (base)))
4873 return false;
4874
4875 base = expand_simple_operations (base);
4876
4877 if (TREE_CODE (base) == SSA_NAME)
4878 {
4879 gimple *stmt = SSA_NAME_DEF_STMT (base);
4880
4881 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4882 return false;
4883
4884 code = gimple_assign_rhs_code (stmt);
4885 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4886 return false;
4887
4888 e1 = gimple_assign_rhs1 (stmt);
4889 e2 = gimple_assign_rhs2 (stmt);
4890 }
4891 else
4892 {
4893 code = TREE_CODE (base);
4894 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4895 return false;
4896 e1 = TREE_OPERAND (base, 0);
4897 e2 = TREE_OPERAND (base, 1);
4898 }
4899
4900 /* Use affine expansion as deeper inspection to prove the equality. */
4901 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4902 &aff_e2, &data->name_expansion_cache);
4903 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4904 &aff_offset, &data->name_expansion_cache);
4905 aff_combination_scale (&aff_offset, -1);
4906 switch (code)
4907 {
4908 case PLUS_EXPR:
4909 aff_combination_add (&aff_e2, &aff_offset);
4910 if (aff_combination_zero_p (&aff_e2))
4911 return true;
4912
4913 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4914 &aff_e1, &data->name_expansion_cache);
4915 aff_combination_add (&aff_e1, &aff_offset);
4916 return aff_combination_zero_p (&aff_e1);
4917
4918 case POINTER_PLUS_EXPR:
4919 aff_combination_add (&aff_e2, &aff_offset);
4920 return aff_combination_zero_p (&aff_e2);
4921
4922 default:
4923 return false;
4924 }
4925 }
4926
4927 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4928 comparison with CAND. NITER describes the number of iterations of
4929 the loops. If successful, the comparison in COMP_P is altered accordingly.
4930
4931 We aim to handle the following situation:
4932
4933 sometype *base, *p;
4934 int a, b, i;
4935
4936 i = a;
4937 p = p_0 = base + a;
4938
4939 do
4940 {
4941 bla (*p);
4942 p++;
4943 i++;
4944 }
4945 while (i < b);
4946
4947 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4948 We aim to optimize this to
4949
4950 p = p_0 = base + a;
4951 do
4952 {
4953 bla (*p);
4954 p++;
4955 }
4956 while (p < p_0 - a + b);
4957
4958 This preserves the correctness, since the pointer arithmetics does not
4959 overflow. More precisely:
4960
4961 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4962 overflow in computing it or the values of p.
4963 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4964 overflow. To prove this, we use the fact that p_0 = base + a. */
4965
4966 static bool
iv_elimination_compare_lt(struct ivopts_data * data,struct iv_cand * cand,enum tree_code * comp_p,struct tree_niter_desc * niter)4967 iv_elimination_compare_lt (struct ivopts_data *data,
4968 struct iv_cand *cand, enum tree_code *comp_p,
4969 struct tree_niter_desc *niter)
4970 {
4971 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4972 struct aff_tree nit, tmpa, tmpb;
4973 enum tree_code comp;
4974 HOST_WIDE_INT step;
4975
4976 /* We need to know that the candidate induction variable does not overflow.
4977 While more complex analysis may be used to prove this, for now just
4978 check that the variable appears in the original program and that it
4979 is computed in a type that guarantees no overflows. */
4980 cand_type = TREE_TYPE (cand->iv->base);
4981 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4982 return false;
4983
4984 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4985 the calculation of the BOUND could overflow, making the comparison
4986 invalid. */
4987 if (!data->loop_single_exit_p)
4988 return false;
4989
4990 /* We need to be able to decide whether candidate is increasing or decreasing
4991 in order to choose the right comparison operator. */
4992 if (!cst_and_fits_in_hwi (cand->iv->step))
4993 return false;
4994 step = int_cst_value (cand->iv->step);
4995
4996 /* Check that the number of iterations matches the expected pattern:
4997 a + 1 > b ? 0 : b - a - 1. */
4998 mbz = niter->may_be_zero;
4999 if (TREE_CODE (mbz) == GT_EXPR)
5000 {
5001 /* Handle a + 1 > b. */
5002 tree op0 = TREE_OPERAND (mbz, 0);
5003 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5004 {
5005 a = TREE_OPERAND (op0, 0);
5006 b = TREE_OPERAND (mbz, 1);
5007 }
5008 else
5009 return false;
5010 }
5011 else if (TREE_CODE (mbz) == LT_EXPR)
5012 {
5013 tree op1 = TREE_OPERAND (mbz, 1);
5014
5015 /* Handle b < a + 1. */
5016 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5017 {
5018 a = TREE_OPERAND (op1, 0);
5019 b = TREE_OPERAND (mbz, 0);
5020 }
5021 else
5022 return false;
5023 }
5024 else
5025 return false;
5026
5027 /* Expected number of iterations is B - A - 1. Check that it matches
5028 the actual number, i.e., that B - A - NITER = 1. */
5029 tree_to_aff_combination (niter->niter, nit_type, &nit);
5030 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5031 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5032 aff_combination_scale (&nit, -1);
5033 aff_combination_scale (&tmpa, -1);
5034 aff_combination_add (&tmpb, &tmpa);
5035 aff_combination_add (&tmpb, &nit);
5036 if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5037 return false;
5038
5039 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5040 overflow. */
5041 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5042 cand->iv->step,
5043 fold_convert (TREE_TYPE (cand->iv->step), a));
5044 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5045 return false;
5046
5047 /* Determine the new comparison operator. */
5048 comp = step < 0 ? GT_EXPR : LT_EXPR;
5049 if (*comp_p == NE_EXPR)
5050 *comp_p = comp;
5051 else if (*comp_p == EQ_EXPR)
5052 *comp_p = invert_tree_comparison (comp, false);
5053 else
5054 gcc_unreachable ();
5055
5056 return true;
5057 }
5058
5059 /* Check whether it is possible to express the condition in USE by comparison
5060 of candidate CAND. If so, store the value compared with to BOUND, and the
5061 comparison operator to COMP. */
5062
5063 static bool
may_eliminate_iv(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,tree * bound,enum tree_code * comp)5064 may_eliminate_iv (struct ivopts_data *data,
5065 struct iv_use *use, struct iv_cand *cand, tree *bound,
5066 enum tree_code *comp)
5067 {
5068 basic_block ex_bb;
5069 edge exit;
5070 tree period;
5071 struct loop *loop = data->current_loop;
5072 aff_tree bnd;
5073 struct tree_niter_desc *desc = NULL;
5074
5075 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5076 return false;
5077
5078 /* For now works only for exits that dominate the loop latch.
5079 TODO: extend to other conditions inside loop body. */
5080 ex_bb = gimple_bb (use->stmt);
5081 if (use->stmt != last_stmt (ex_bb)
5082 || gimple_code (use->stmt) != GIMPLE_COND
5083 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5084 return false;
5085
5086 exit = EDGE_SUCC (ex_bb, 0);
5087 if (flow_bb_inside_loop_p (loop, exit->dest))
5088 exit = EDGE_SUCC (ex_bb, 1);
5089 if (flow_bb_inside_loop_p (loop, exit->dest))
5090 return false;
5091
5092 desc = niter_for_exit (data, exit);
5093 if (!desc)
5094 return false;
5095
5096 /* Determine whether we can use the variable to test the exit condition.
5097 This is the case iff the period of the induction variable is greater
5098 than the number of iterations for which the exit condition is true. */
5099 period = iv_period (cand->iv);
5100
5101 /* If the number of iterations is constant, compare against it directly. */
5102 if (TREE_CODE (desc->niter) == INTEGER_CST)
5103 {
5104 /* See cand_value_at. */
5105 if (stmt_after_increment (loop, cand, use->stmt))
5106 {
5107 if (!tree_int_cst_lt (desc->niter, period))
5108 return false;
5109 }
5110 else
5111 {
5112 if (tree_int_cst_lt (period, desc->niter))
5113 return false;
5114 }
5115 }
5116
5117 /* If not, and if this is the only possible exit of the loop, see whether
5118 we can get a conservative estimate on the number of iterations of the
5119 entire loop and compare against that instead. */
5120 else
5121 {
5122 widest_int period_value, max_niter;
5123
5124 max_niter = desc->max;
5125 if (stmt_after_increment (loop, cand, use->stmt))
5126 max_niter += 1;
5127 period_value = wi::to_widest (period);
5128 if (wi::gtu_p (max_niter, period_value))
5129 {
5130 /* See if we can take advantage of inferred loop bound
5131 information. */
5132 if (data->loop_single_exit_p)
5133 {
5134 if (!max_loop_iterations (loop, &max_niter))
5135 return false;
5136 /* The loop bound is already adjusted by adding 1. */
5137 if (wi::gtu_p (max_niter, period_value))
5138 return false;
5139 }
5140 else
5141 return false;
5142 }
5143 }
5144
5145 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5146
5147 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5148 aff_combination_to_tree (&bnd));
5149 *comp = iv_elimination_compare (data, use);
5150
5151 /* It is unlikely that computing the number of iterations using division
5152 would be more profitable than keeping the original induction variable. */
5153 if (expression_expensive_p (*bound))
5154 return false;
5155
5156 /* Sometimes, it is possible to handle the situation that the number of
5157 iterations may be zero unless additional assumptions by using <
5158 instead of != in the exit condition.
5159
5160 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5161 base the exit condition on it. However, that is often too
5162 expensive. */
5163 if (!integer_zerop (desc->may_be_zero))
5164 return iv_elimination_compare_lt (data, cand, comp, desc);
5165
5166 return true;
5167 }
5168
5169 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5170 be copied, if it is used in the loop body and DATA->body_includes_call. */
5171
5172 static int
parm_decl_cost(struct ivopts_data * data,tree bound)5173 parm_decl_cost (struct ivopts_data *data, tree bound)
5174 {
5175 tree sbound = bound;
5176 STRIP_NOPS (sbound);
5177
5178 if (TREE_CODE (sbound) == SSA_NAME
5179 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5180 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5181 && data->body_includes_call)
5182 return COSTS_N_INSNS (1);
5183
5184 return 0;
5185 }
5186
5187 /* Determines cost of computing the use in GROUP with CAND in a condition. */
5188
5189 static bool
determine_group_iv_cost_cond(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5190 determine_group_iv_cost_cond (struct ivopts_data *data,
5191 struct iv_group *group, struct iv_cand *cand)
5192 {
5193 tree bound = NULL_TREE;
5194 struct iv *cmp_iv;
5195 bitmap inv_exprs = NULL;
5196 bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5197 comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5198 enum comp_iv_rewrite rewrite_type;
5199 iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5200 tree *control_var, *bound_cst;
5201 enum tree_code comp = ERROR_MARK;
5202 struct iv_use *use = group->vuses[0];
5203
5204 /* Extract condition operands. */
5205 rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5206 &bound_cst, NULL, &cmp_iv);
5207 gcc_assert (rewrite_type != COMP_IV_NA);
5208
5209 /* Try iv elimination. */
5210 if (rewrite_type == COMP_IV_ELIM
5211 && may_eliminate_iv (data, use, cand, &bound, &comp))
5212 {
5213 elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5214 if (elim_cost.cost == 0)
5215 elim_cost.cost = parm_decl_cost (data, bound);
5216 else if (TREE_CODE (bound) == INTEGER_CST)
5217 elim_cost.cost = 0;
5218 /* If we replace a loop condition 'i < n' with 'p < base + n',
5219 inv_vars_elim will have 'base' and 'n' set, which implies that both
5220 'base' and 'n' will be live during the loop. More likely,
5221 'base + n' will be loop invariant, resulting in only one live value
5222 during the loop. So in that case we clear inv_vars_elim and set
5223 inv_expr_elim instead. */
5224 if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5225 {
5226 inv_expr_elim = get_loop_invariant_expr (data, bound);
5227 bitmap_clear (inv_vars_elim);
5228 }
5229 /* The bound is a loop invariant, so it will be only computed
5230 once. */
5231 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5232 }
5233
5234 /* When the condition is a comparison of the candidate IV against
5235 zero, prefer this IV.
5236
5237 TODO: The constant that we're subtracting from the cost should
5238 be target-dependent. This information should be added to the
5239 target costs for each backend. */
5240 if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5241 && integer_zerop (*bound_cst)
5242 && (operand_equal_p (*control_var, cand->var_after, 0)
5243 || operand_equal_p (*control_var, cand->var_before, 0)))
5244 elim_cost -= 1;
5245
5246 express_cost = get_computation_cost (data, use, cand, false,
5247 &inv_vars_express, NULL,
5248 &inv_expr_express);
5249 if (cmp_iv != NULL)
5250 find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5251
5252 /* Count the cost of the original bound as well. */
5253 bound_cost = force_var_cost (data, *bound_cst, NULL);
5254 if (bound_cost.cost == 0)
5255 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5256 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5257 bound_cost.cost = 0;
5258 express_cost += bound_cost;
5259
5260 /* Choose the better approach, preferring the eliminated IV. */
5261 if (elim_cost <= express_cost)
5262 {
5263 cost = elim_cost;
5264 inv_vars = inv_vars_elim;
5265 inv_vars_elim = NULL;
5266 inv_expr = inv_expr_elim;
5267 }
5268 else
5269 {
5270 cost = express_cost;
5271 inv_vars = inv_vars_express;
5272 inv_vars_express = NULL;
5273 bound = NULL_TREE;
5274 comp = ERROR_MARK;
5275 inv_expr = inv_expr_express;
5276 }
5277
5278 if (inv_expr)
5279 {
5280 inv_exprs = BITMAP_ALLOC (NULL);
5281 bitmap_set_bit (inv_exprs, inv_expr->id);
5282 }
5283 set_group_iv_cost (data, group, cand, cost,
5284 inv_vars, bound, comp, inv_exprs);
5285
5286 if (inv_vars_elim)
5287 BITMAP_FREE (inv_vars_elim);
5288 if (inv_vars_express)
5289 BITMAP_FREE (inv_vars_express);
5290
5291 return !cost.infinite_cost_p ();
5292 }
5293
5294 /* Determines cost of computing uses in GROUP with CAND. Returns false
5295 if USE cannot be represented with CAND. */
5296
5297 static bool
determine_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5298 determine_group_iv_cost (struct ivopts_data *data,
5299 struct iv_group *group, struct iv_cand *cand)
5300 {
5301 switch (group->type)
5302 {
5303 case USE_NONLINEAR_EXPR:
5304 return determine_group_iv_cost_generic (data, group, cand);
5305
5306 case USE_REF_ADDRESS:
5307 case USE_PTR_ADDRESS:
5308 return determine_group_iv_cost_address (data, group, cand);
5309
5310 case USE_COMPARE:
5311 return determine_group_iv_cost_cond (data, group, cand);
5312
5313 default:
5314 gcc_unreachable ();
5315 }
5316 }
5317
5318 /* Return true if get_computation_cost indicates that autoincrement is
5319 a possibility for the pair of USE and CAND, false otherwise. */
5320
5321 static bool
autoinc_possible_for_pair(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5322 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5323 struct iv_cand *cand)
5324 {
5325 if (!address_p (use->type))
5326 return false;
5327
5328 bool can_autoinc = false;
5329 get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5330 return can_autoinc;
5331 }
5332
5333 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5334 use that allows autoincrement, and set their AINC_USE if possible. */
5335
5336 static void
set_autoinc_for_original_candidates(struct ivopts_data * data)5337 set_autoinc_for_original_candidates (struct ivopts_data *data)
5338 {
5339 unsigned i, j;
5340
5341 for (i = 0; i < data->vcands.length (); i++)
5342 {
5343 struct iv_cand *cand = data->vcands[i];
5344 struct iv_use *closest_before = NULL;
5345 struct iv_use *closest_after = NULL;
5346 if (cand->pos != IP_ORIGINAL)
5347 continue;
5348
5349 for (j = 0; j < data->vgroups.length (); j++)
5350 {
5351 struct iv_group *group = data->vgroups[j];
5352 struct iv_use *use = group->vuses[0];
5353 unsigned uid = gimple_uid (use->stmt);
5354
5355 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5356 continue;
5357
5358 if (uid < gimple_uid (cand->incremented_at)
5359 && (closest_before == NULL
5360 || uid > gimple_uid (closest_before->stmt)))
5361 closest_before = use;
5362
5363 if (uid > gimple_uid (cand->incremented_at)
5364 && (closest_after == NULL
5365 || uid < gimple_uid (closest_after->stmt)))
5366 closest_after = use;
5367 }
5368
5369 if (closest_before != NULL
5370 && autoinc_possible_for_pair (data, closest_before, cand))
5371 cand->ainc_use = closest_before;
5372 else if (closest_after != NULL
5373 && autoinc_possible_for_pair (data, closest_after, cand))
5374 cand->ainc_use = closest_after;
5375 }
5376 }
5377
5378 /* Relate compare use with all candidates. */
5379
5380 static void
relate_compare_use_with_all_cands(struct ivopts_data * data)5381 relate_compare_use_with_all_cands (struct ivopts_data *data)
5382 {
5383 unsigned i, count = data->vcands.length ();
5384 for (i = 0; i < data->vgroups.length (); i++)
5385 {
5386 struct iv_group *group = data->vgroups[i];
5387
5388 if (group->type == USE_COMPARE)
5389 bitmap_set_range (group->related_cands, 0, count);
5390 }
5391 }
5392
5393 /* Finds the candidates for the induction variables. */
5394
5395 static void
find_iv_candidates(struct ivopts_data * data)5396 find_iv_candidates (struct ivopts_data *data)
5397 {
5398 /* Add commonly used ivs. */
5399 add_standard_iv_candidates (data);
5400
5401 /* Add old induction variables. */
5402 add_iv_candidate_for_bivs (data);
5403
5404 /* Add induction variables derived from uses. */
5405 add_iv_candidate_for_groups (data);
5406
5407 set_autoinc_for_original_candidates (data);
5408
5409 /* Record the important candidates. */
5410 record_important_candidates (data);
5411
5412 /* Relate compare iv_use with all candidates. */
5413 if (!data->consider_all_candidates)
5414 relate_compare_use_with_all_cands (data);
5415
5416 if (dump_file && (dump_flags & TDF_DETAILS))
5417 {
5418 unsigned i;
5419
5420 fprintf (dump_file, "\n<Important Candidates>:\t");
5421 for (i = 0; i < data->vcands.length (); i++)
5422 if (data->vcands[i]->important)
5423 fprintf (dump_file, " %d,", data->vcands[i]->id);
5424 fprintf (dump_file, "\n");
5425
5426 fprintf (dump_file, "\n<Group, Cand> Related:\n");
5427 for (i = 0; i < data->vgroups.length (); i++)
5428 {
5429 struct iv_group *group = data->vgroups[i];
5430
5431 if (group->related_cands)
5432 {
5433 fprintf (dump_file, " Group %d:\t", group->id);
5434 dump_bitmap (dump_file, group->related_cands);
5435 }
5436 }
5437 fprintf (dump_file, "\n");
5438 }
5439 }
5440
5441 /* Determines costs of computing use of iv with an iv candidate. */
5442
5443 static void
determine_group_iv_costs(struct ivopts_data * data)5444 determine_group_iv_costs (struct ivopts_data *data)
5445 {
5446 unsigned i, j;
5447 struct iv_cand *cand;
5448 struct iv_group *group;
5449 bitmap to_clear = BITMAP_ALLOC (NULL);
5450
5451 alloc_use_cost_map (data);
5452
5453 for (i = 0; i < data->vgroups.length (); i++)
5454 {
5455 group = data->vgroups[i];
5456
5457 if (data->consider_all_candidates)
5458 {
5459 for (j = 0; j < data->vcands.length (); j++)
5460 {
5461 cand = data->vcands[j];
5462 determine_group_iv_cost (data, group, cand);
5463 }
5464 }
5465 else
5466 {
5467 bitmap_iterator bi;
5468
5469 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5470 {
5471 cand = data->vcands[j];
5472 if (!determine_group_iv_cost (data, group, cand))
5473 bitmap_set_bit (to_clear, j);
5474 }
5475
5476 /* Remove the candidates for that the cost is infinite from
5477 the list of related candidates. */
5478 bitmap_and_compl_into (group->related_cands, to_clear);
5479 bitmap_clear (to_clear);
5480 }
5481 }
5482
5483 BITMAP_FREE (to_clear);
5484
5485 if (dump_file && (dump_flags & TDF_DETAILS))
5486 {
5487 bitmap_iterator bi;
5488
5489 /* Dump invariant variables. */
5490 fprintf (dump_file, "\n<Invariant Vars>:\n");
5491 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5492 {
5493 struct version_info *info = ver_info (data, i);
5494 if (info->inv_id)
5495 {
5496 fprintf (dump_file, "Inv %d:\t", info->inv_id);
5497 print_generic_expr (dump_file, info->name, TDF_SLIM);
5498 fprintf (dump_file, "%s\n",
5499 info->has_nonlin_use ? "" : "\t(eliminable)");
5500 }
5501 }
5502
5503 /* Dump invariant expressions. */
5504 fprintf (dump_file, "\n<Invariant Expressions>:\n");
5505 auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5506
5507 for (hash_table<iv_inv_expr_hasher>::iterator it
5508 = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5509 ++it)
5510 list.safe_push (*it);
5511
5512 list.qsort (sort_iv_inv_expr_ent);
5513
5514 for (i = 0; i < list.length (); ++i)
5515 {
5516 fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5517 print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5518 fprintf (dump_file, "\n");
5519 }
5520
5521 fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5522
5523 for (i = 0; i < data->vgroups.length (); i++)
5524 {
5525 group = data->vgroups[i];
5526
5527 fprintf (dump_file, "Group %d:\n", i);
5528 fprintf (dump_file, " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5529 for (j = 0; j < group->n_map_members; j++)
5530 {
5531 if (!group->cost_map[j].cand
5532 || group->cost_map[j].cost.infinite_cost_p ())
5533 continue;
5534
5535 fprintf (dump_file, " %d\t%d\t%d\t",
5536 group->cost_map[j].cand->id,
5537 group->cost_map[j].cost.cost,
5538 group->cost_map[j].cost.complexity);
5539 if (!group->cost_map[j].inv_exprs
5540 || bitmap_empty_p (group->cost_map[j].inv_exprs))
5541 fprintf (dump_file, "NIL;\t");
5542 else
5543 bitmap_print (dump_file,
5544 group->cost_map[j].inv_exprs, "", ";\t");
5545 if (!group->cost_map[j].inv_vars
5546 || bitmap_empty_p (group->cost_map[j].inv_vars))
5547 fprintf (dump_file, "NIL;\n");
5548 else
5549 bitmap_print (dump_file,
5550 group->cost_map[j].inv_vars, "", "\n");
5551 }
5552
5553 fprintf (dump_file, "\n");
5554 }
5555 fprintf (dump_file, "\n");
5556 }
5557 }
5558
5559 /* Determines cost of the candidate CAND. */
5560
5561 static void
determine_iv_cost(struct ivopts_data * data,struct iv_cand * cand)5562 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5563 {
5564 comp_cost cost_base;
5565 unsigned cost, cost_step;
5566 tree base;
5567
5568 gcc_assert (cand->iv != NULL);
5569
5570 /* There are two costs associated with the candidate -- its increment
5571 and its initialization. The second is almost negligible for any loop
5572 that rolls enough, so we take it just very little into account. */
5573
5574 base = cand->iv->base;
5575 cost_base = force_var_cost (data, base, NULL);
5576 /* It will be exceptional that the iv register happens to be initialized with
5577 the proper value at no cost. In general, there will at least be a regcopy
5578 or a const set. */
5579 if (cost_base.cost == 0)
5580 cost_base.cost = COSTS_N_INSNS (1);
5581 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5582
5583 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5584
5585 /* Prefer the original ivs unless we may gain something by replacing it.
5586 The reason is to make debugging simpler; so this is not relevant for
5587 artificial ivs created by other optimization passes. */
5588 if (cand->pos != IP_ORIGINAL
5589 || !SSA_NAME_VAR (cand->var_before)
5590 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5591 cost++;
5592
5593 /* Prefer not to insert statements into latch unless there are some
5594 already (so that we do not create unnecessary jumps). */
5595 if (cand->pos == IP_END
5596 && empty_block_p (ip_end_pos (data->current_loop)))
5597 cost++;
5598
5599 cand->cost = cost;
5600 cand->cost_step = cost_step;
5601 }
5602
5603 /* Determines costs of computation of the candidates. */
5604
5605 static void
determine_iv_costs(struct ivopts_data * data)5606 determine_iv_costs (struct ivopts_data *data)
5607 {
5608 unsigned i;
5609
5610 if (dump_file && (dump_flags & TDF_DETAILS))
5611 {
5612 fprintf (dump_file, "<Candidate Costs>:\n");
5613 fprintf (dump_file, " cand\tcost\n");
5614 }
5615
5616 for (i = 0; i < data->vcands.length (); i++)
5617 {
5618 struct iv_cand *cand = data->vcands[i];
5619
5620 determine_iv_cost (data, cand);
5621
5622 if (dump_file && (dump_flags & TDF_DETAILS))
5623 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5624 }
5625
5626 if (dump_file && (dump_flags & TDF_DETAILS))
5627 fprintf (dump_file, "\n");
5628 }
5629
5630 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5631 induction variables. Note N_INVS includes both invariant variables and
5632 invariant expressions. */
5633
5634 static unsigned
ivopts_estimate_reg_pressure(struct ivopts_data * data,unsigned n_invs,unsigned n_cands)5635 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5636 unsigned n_cands)
5637 {
5638 unsigned cost;
5639 unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5640 unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5641 bool speed = data->speed;
5642
5643 /* If there is a call in the loop body, the call-clobbered registers
5644 are not available for loop invariants. */
5645 if (data->body_includes_call)
5646 available_regs = available_regs - target_clobbered_regs;
5647
5648 /* If we have enough registers. */
5649 if (regs_needed + target_res_regs < available_regs)
5650 cost = n_new;
5651 /* If close to running out of registers, try to preserve them. */
5652 else if (regs_needed <= available_regs)
5653 cost = target_reg_cost [speed] * regs_needed;
5654 /* If we run out of available registers but the number of candidates
5655 does not, we penalize extra registers using target_spill_cost. */
5656 else if (n_cands <= available_regs)
5657 cost = target_reg_cost [speed] * available_regs
5658 + target_spill_cost [speed] * (regs_needed - available_regs);
5659 /* If the number of candidates runs out available registers, we penalize
5660 extra candidate registers using target_spill_cost * 2. Because it is
5661 more expensive to spill induction variable than invariant. */
5662 else
5663 cost = target_reg_cost [speed] * available_regs
5664 + target_spill_cost [speed] * (n_cands - available_regs) * 2
5665 + target_spill_cost [speed] * (regs_needed - n_cands);
5666
5667 /* Finally, add the number of candidates, so that we prefer eliminating
5668 induction variables if possible. */
5669 return cost + n_cands;
5670 }
5671
5672 /* For each size of the induction variable set determine the penalty. */
5673
5674 static void
determine_set_costs(struct ivopts_data * data)5675 determine_set_costs (struct ivopts_data *data)
5676 {
5677 unsigned j, n;
5678 gphi *phi;
5679 gphi_iterator psi;
5680 tree op;
5681 struct loop *loop = data->current_loop;
5682 bitmap_iterator bi;
5683
5684 if (dump_file && (dump_flags & TDF_DETAILS))
5685 {
5686 fprintf (dump_file, "<Global Costs>:\n");
5687 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5688 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5689 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5690 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5691 }
5692
5693 n = 0;
5694 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5695 {
5696 phi = psi.phi ();
5697 op = PHI_RESULT (phi);
5698
5699 if (virtual_operand_p (op))
5700 continue;
5701
5702 if (get_iv (data, op))
5703 continue;
5704
5705 if (!POINTER_TYPE_P (TREE_TYPE (op))
5706 && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5707 continue;
5708
5709 n++;
5710 }
5711
5712 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5713 {
5714 struct version_info *info = ver_info (data, j);
5715
5716 if (info->inv_id && info->has_nonlin_use)
5717 n++;
5718 }
5719
5720 data->regs_used = n;
5721 if (dump_file && (dump_flags & TDF_DETAILS))
5722 fprintf (dump_file, " regs_used %d\n", n);
5723
5724 if (dump_file && (dump_flags & TDF_DETAILS))
5725 {
5726 fprintf (dump_file, " cost for size:\n");
5727 fprintf (dump_file, " ivs\tcost\n");
5728 for (j = 0; j <= 2 * target_avail_regs; j++)
5729 fprintf (dump_file, " %d\t%d\n", j,
5730 ivopts_estimate_reg_pressure (data, 0, j));
5731 fprintf (dump_file, "\n");
5732 }
5733 }
5734
5735 /* Returns true if A is a cheaper cost pair than B. */
5736
5737 static bool
cheaper_cost_pair(struct cost_pair * a,struct cost_pair * b)5738 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5739 {
5740 if (!a)
5741 return false;
5742
5743 if (!b)
5744 return true;
5745
5746 if (a->cost < b->cost)
5747 return true;
5748
5749 if (b->cost < a->cost)
5750 return false;
5751
5752 /* In case the costs are the same, prefer the cheaper candidate. */
5753 if (a->cand->cost < b->cand->cost)
5754 return true;
5755
5756 return false;
5757 }
5758
5759 /* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1
5760 for more expensive, equal and cheaper respectively. */
5761
5762 static int
compare_cost_pair(struct cost_pair * a,struct cost_pair * b)5763 compare_cost_pair (struct cost_pair *a, struct cost_pair *b)
5764 {
5765 if (cheaper_cost_pair (a, b))
5766 return -1;
5767 if (cheaper_cost_pair (b, a))
5768 return 1;
5769
5770 return 0;
5771 }
5772
5773 /* Returns candidate by that USE is expressed in IVS. */
5774
5775 static struct cost_pair *
iv_ca_cand_for_group(struct iv_ca * ivs,struct iv_group * group)5776 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5777 {
5778 return ivs->cand_for_group[group->id];
5779 }
5780
5781 /* Computes the cost field of IVS structure. */
5782
5783 static void
iv_ca_recount_cost(struct ivopts_data * data,struct iv_ca * ivs)5784 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5785 {
5786 comp_cost cost = ivs->cand_use_cost;
5787
5788 cost += ivs->cand_cost;
5789 cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
5790 ivs->cost = cost;
5791 }
5792
5793 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5794 and IVS. */
5795
5796 static void
iv_ca_set_remove_invs(struct iv_ca * ivs,bitmap invs,unsigned * n_inv_uses)5797 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5798 {
5799 bitmap_iterator bi;
5800 unsigned iid;
5801
5802 if (!invs)
5803 return;
5804
5805 gcc_assert (n_inv_uses != NULL);
5806 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5807 {
5808 n_inv_uses[iid]--;
5809 if (n_inv_uses[iid] == 0)
5810 ivs->n_invs--;
5811 }
5812 }
5813
5814 /* Set USE not to be expressed by any candidate in IVS. */
5815
5816 static void
iv_ca_set_no_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group)5817 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5818 struct iv_group *group)
5819 {
5820 unsigned gid = group->id, cid;
5821 struct cost_pair *cp;
5822
5823 cp = ivs->cand_for_group[gid];
5824 if (!cp)
5825 return;
5826 cid = cp->cand->id;
5827
5828 ivs->bad_groups++;
5829 ivs->cand_for_group[gid] = NULL;
5830 ivs->n_cand_uses[cid]--;
5831
5832 if (ivs->n_cand_uses[cid] == 0)
5833 {
5834 bitmap_clear_bit (ivs->cands, cid);
5835 ivs->n_cands--;
5836 ivs->cand_cost -= cp->cand->cost;
5837 iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5838 iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5839 }
5840
5841 ivs->cand_use_cost -= cp->cost;
5842 iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5843 iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5844 iv_ca_recount_cost (data, ivs);
5845 }
5846
5847 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5848 IVS. */
5849
5850 static void
iv_ca_set_add_invs(struct iv_ca * ivs,bitmap invs,unsigned * n_inv_uses)5851 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5852 {
5853 bitmap_iterator bi;
5854 unsigned iid;
5855
5856 if (!invs)
5857 return;
5858
5859 gcc_assert (n_inv_uses != NULL);
5860 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5861 {
5862 n_inv_uses[iid]++;
5863 if (n_inv_uses[iid] == 1)
5864 ivs->n_invs++;
5865 }
5866 }
5867
5868 /* Set cost pair for GROUP in set IVS to CP. */
5869
5870 static void
iv_ca_set_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group,struct cost_pair * cp)5871 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5872 struct iv_group *group, struct cost_pair *cp)
5873 {
5874 unsigned gid = group->id, cid;
5875
5876 if (ivs->cand_for_group[gid] == cp)
5877 return;
5878
5879 if (ivs->cand_for_group[gid])
5880 iv_ca_set_no_cp (data, ivs, group);
5881
5882 if (cp)
5883 {
5884 cid = cp->cand->id;
5885
5886 ivs->bad_groups--;
5887 ivs->cand_for_group[gid] = cp;
5888 ivs->n_cand_uses[cid]++;
5889 if (ivs->n_cand_uses[cid] == 1)
5890 {
5891 bitmap_set_bit (ivs->cands, cid);
5892 ivs->n_cands++;
5893 ivs->cand_cost += cp->cand->cost;
5894 iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5895 iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5896 }
5897
5898 ivs->cand_use_cost += cp->cost;
5899 iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5900 iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5901 iv_ca_recount_cost (data, ivs);
5902 }
5903 }
5904
5905 /* Extend set IVS by expressing USE by some of the candidates in it
5906 if possible. Consider all important candidates if candidates in
5907 set IVS don't give any result. */
5908
5909 static void
iv_ca_add_group(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group)5910 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5911 struct iv_group *group)
5912 {
5913 struct cost_pair *best_cp = NULL, *cp;
5914 bitmap_iterator bi;
5915 unsigned i;
5916 struct iv_cand *cand;
5917
5918 gcc_assert (ivs->upto >= group->id);
5919 ivs->upto++;
5920 ivs->bad_groups++;
5921
5922 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5923 {
5924 cand = data->vcands[i];
5925 cp = get_group_iv_cost (data, group, cand);
5926 if (cheaper_cost_pair (cp, best_cp))
5927 best_cp = cp;
5928 }
5929
5930 if (best_cp == NULL)
5931 {
5932 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5933 {
5934 cand = data->vcands[i];
5935 cp = get_group_iv_cost (data, group, cand);
5936 if (cheaper_cost_pair (cp, best_cp))
5937 best_cp = cp;
5938 }
5939 }
5940
5941 iv_ca_set_cp (data, ivs, group, best_cp);
5942 }
5943
5944 /* Get cost for assignment IVS. */
5945
5946 static comp_cost
iv_ca_cost(struct iv_ca * ivs)5947 iv_ca_cost (struct iv_ca *ivs)
5948 {
5949 /* This was a conditional expression but it triggered a bug in
5950 Sun C 5.5. */
5951 if (ivs->bad_groups)
5952 return infinite_cost;
5953 else
5954 return ivs->cost;
5955 }
5956
5957 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5958 than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants
5959 respectively. */
5960
5961 static int
iv_ca_compare_deps(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group,struct cost_pair * old_cp,struct cost_pair * new_cp)5962 iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs,
5963 struct iv_group *group, struct cost_pair *old_cp,
5964 struct cost_pair *new_cp)
5965 {
5966 gcc_assert (old_cp && new_cp && old_cp != new_cp);
5967 unsigned old_n_invs = ivs->n_invs;
5968 iv_ca_set_cp (data, ivs, group, new_cp);
5969 unsigned new_n_invs = ivs->n_invs;
5970 iv_ca_set_cp (data, ivs, group, old_cp);
5971
5972 return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
5973 }
5974
5975 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5976 it before NEXT. */
5977
5978 static struct iv_ca_delta *
iv_ca_delta_add(struct iv_group * group,struct cost_pair * old_cp,struct cost_pair * new_cp,struct iv_ca_delta * next)5979 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
5980 struct cost_pair *new_cp, struct iv_ca_delta *next)
5981 {
5982 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5983
5984 change->group = group;
5985 change->old_cp = old_cp;
5986 change->new_cp = new_cp;
5987 change->next = next;
5988
5989 return change;
5990 }
5991
5992 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5993 are rewritten. */
5994
5995 static struct iv_ca_delta *
iv_ca_delta_join(struct iv_ca_delta * l1,struct iv_ca_delta * l2)5996 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5997 {
5998 struct iv_ca_delta *last;
5999
6000 if (!l2)
6001 return l1;
6002
6003 if (!l1)
6004 return l2;
6005
6006 for (last = l1; last->next; last = last->next)
6007 continue;
6008 last->next = l2;
6009
6010 return l1;
6011 }
6012
6013 /* Reverse the list of changes DELTA, forming the inverse to it. */
6014
6015 static struct iv_ca_delta *
iv_ca_delta_reverse(struct iv_ca_delta * delta)6016 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6017 {
6018 struct iv_ca_delta *act, *next, *prev = NULL;
6019
6020 for (act = delta; act; act = next)
6021 {
6022 next = act->next;
6023 act->next = prev;
6024 prev = act;
6025
6026 std::swap (act->old_cp, act->new_cp);
6027 }
6028
6029 return prev;
6030 }
6031
6032 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6033 reverted instead. */
6034
6035 static void
iv_ca_delta_commit(struct ivopts_data * data,struct iv_ca * ivs,struct iv_ca_delta * delta,bool forward)6036 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6037 struct iv_ca_delta *delta, bool forward)
6038 {
6039 struct cost_pair *from, *to;
6040 struct iv_ca_delta *act;
6041
6042 if (!forward)
6043 delta = iv_ca_delta_reverse (delta);
6044
6045 for (act = delta; act; act = act->next)
6046 {
6047 from = act->old_cp;
6048 to = act->new_cp;
6049 gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6050 iv_ca_set_cp (data, ivs, act->group, to);
6051 }
6052
6053 if (!forward)
6054 iv_ca_delta_reverse (delta);
6055 }
6056
6057 /* Returns true if CAND is used in IVS. */
6058
6059 static bool
iv_ca_cand_used_p(struct iv_ca * ivs,struct iv_cand * cand)6060 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6061 {
6062 return ivs->n_cand_uses[cand->id] > 0;
6063 }
6064
6065 /* Returns number of induction variable candidates in the set IVS. */
6066
6067 static unsigned
iv_ca_n_cands(struct iv_ca * ivs)6068 iv_ca_n_cands (struct iv_ca *ivs)
6069 {
6070 return ivs->n_cands;
6071 }
6072
6073 /* Free the list of changes DELTA. */
6074
6075 static void
iv_ca_delta_free(struct iv_ca_delta ** delta)6076 iv_ca_delta_free (struct iv_ca_delta **delta)
6077 {
6078 struct iv_ca_delta *act, *next;
6079
6080 for (act = *delta; act; act = next)
6081 {
6082 next = act->next;
6083 free (act);
6084 }
6085
6086 *delta = NULL;
6087 }
6088
6089 /* Allocates new iv candidates assignment. */
6090
6091 static struct iv_ca *
iv_ca_new(struct ivopts_data * data)6092 iv_ca_new (struct ivopts_data *data)
6093 {
6094 struct iv_ca *nw = XNEW (struct iv_ca);
6095
6096 nw->upto = 0;
6097 nw->bad_groups = 0;
6098 nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6099 data->vgroups.length ());
6100 nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6101 nw->cands = BITMAP_ALLOC (NULL);
6102 nw->n_cands = 0;
6103 nw->n_invs = 0;
6104 nw->cand_use_cost = no_cost;
6105 nw->cand_cost = 0;
6106 nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6107 nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6108 nw->cost = no_cost;
6109
6110 return nw;
6111 }
6112
6113 /* Free memory occupied by the set IVS. */
6114
6115 static void
iv_ca_free(struct iv_ca ** ivs)6116 iv_ca_free (struct iv_ca **ivs)
6117 {
6118 free ((*ivs)->cand_for_group);
6119 free ((*ivs)->n_cand_uses);
6120 BITMAP_FREE ((*ivs)->cands);
6121 free ((*ivs)->n_inv_var_uses);
6122 free ((*ivs)->n_inv_expr_uses);
6123 free (*ivs);
6124 *ivs = NULL;
6125 }
6126
6127 /* Dumps IVS to FILE. */
6128
6129 static void
iv_ca_dump(struct ivopts_data * data,FILE * file,struct iv_ca * ivs)6130 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6131 {
6132 unsigned i;
6133 comp_cost cost = iv_ca_cost (ivs);
6134
6135 fprintf (file, " cost: %d (complexity %d)\n", cost.cost,
6136 cost.complexity);
6137 fprintf (file, " cand_cost: %d\n cand_group_cost: %d (complexity %d)\n",
6138 ivs->cand_cost, ivs->cand_use_cost.cost,
6139 ivs->cand_use_cost.complexity);
6140 bitmap_print (file, ivs->cands, " candidates: ","\n");
6141
6142 for (i = 0; i < ivs->upto; i++)
6143 {
6144 struct iv_group *group = data->vgroups[i];
6145 struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6146 if (cp)
6147 fprintf (file, " group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6148 group->id, cp->cand->id, cp->cost.cost,
6149 cp->cost.complexity);
6150 else
6151 fprintf (file, " group:%d --> ??\n", group->id);
6152 }
6153
6154 const char *pref = "";
6155 fprintf (file, " invariant variables: ");
6156 for (i = 1; i <= data->max_inv_var_id; i++)
6157 if (ivs->n_inv_var_uses[i])
6158 {
6159 fprintf (file, "%s%d", pref, i);
6160 pref = ", ";
6161 }
6162
6163 pref = "";
6164 fprintf (file, "\n invariant expressions: ");
6165 for (i = 1; i <= data->max_inv_expr_id; i++)
6166 if (ivs->n_inv_expr_uses[i])
6167 {
6168 fprintf (file, "%s%d", pref, i);
6169 pref = ", ";
6170 }
6171
6172 fprintf (file, "\n\n");
6173 }
6174
6175 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6176 new set, and store differences in DELTA. Number of induction variables
6177 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6178 the function will try to find a solution with mimimal iv candidates. */
6179
6180 static comp_cost
iv_ca_extend(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_ca_delta ** delta,unsigned * n_ivs,bool min_ncand)6181 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6182 struct iv_cand *cand, struct iv_ca_delta **delta,
6183 unsigned *n_ivs, bool min_ncand)
6184 {
6185 unsigned i;
6186 comp_cost cost;
6187 struct iv_group *group;
6188 struct cost_pair *old_cp, *new_cp;
6189
6190 *delta = NULL;
6191 for (i = 0; i < ivs->upto; i++)
6192 {
6193 group = data->vgroups[i];
6194 old_cp = iv_ca_cand_for_group (ivs, group);
6195
6196 if (old_cp
6197 && old_cp->cand == cand)
6198 continue;
6199
6200 new_cp = get_group_iv_cost (data, group, cand);
6201 if (!new_cp)
6202 continue;
6203
6204 if (!min_ncand)
6205 {
6206 int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6207 /* Skip if new_cp depends on more invariants. */
6208 if (cmp_invs > 0)
6209 continue;
6210
6211 int cmp_cost = compare_cost_pair (new_cp, old_cp);
6212 /* Skip if new_cp is not cheaper. */
6213 if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6214 continue;
6215 }
6216
6217 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6218 }
6219
6220 iv_ca_delta_commit (data, ivs, *delta, true);
6221 cost = iv_ca_cost (ivs);
6222 if (n_ivs)
6223 *n_ivs = iv_ca_n_cands (ivs);
6224 iv_ca_delta_commit (data, ivs, *delta, false);
6225
6226 return cost;
6227 }
6228
6229 /* Try narrowing set IVS by removing CAND. Return the cost of
6230 the new set and store the differences in DELTA. START is
6231 the candidate with which we start narrowing. */
6232
6233 static comp_cost
iv_ca_narrow(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_cand * start,struct iv_ca_delta ** delta)6234 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6235 struct iv_cand *cand, struct iv_cand *start,
6236 struct iv_ca_delta **delta)
6237 {
6238 unsigned i, ci;
6239 struct iv_group *group;
6240 struct cost_pair *old_cp, *new_cp, *cp;
6241 bitmap_iterator bi;
6242 struct iv_cand *cnd;
6243 comp_cost cost, best_cost, acost;
6244
6245 *delta = NULL;
6246 for (i = 0; i < data->vgroups.length (); i++)
6247 {
6248 group = data->vgroups[i];
6249
6250 old_cp = iv_ca_cand_for_group (ivs, group);
6251 if (old_cp->cand != cand)
6252 continue;
6253
6254 best_cost = iv_ca_cost (ivs);
6255 /* Start narrowing with START. */
6256 new_cp = get_group_iv_cost (data, group, start);
6257
6258 if (data->consider_all_candidates)
6259 {
6260 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6261 {
6262 if (ci == cand->id || (start && ci == start->id))
6263 continue;
6264
6265 cnd = data->vcands[ci];
6266
6267 cp = get_group_iv_cost (data, group, cnd);
6268 if (!cp)
6269 continue;
6270
6271 iv_ca_set_cp (data, ivs, group, cp);
6272 acost = iv_ca_cost (ivs);
6273
6274 if (acost < best_cost)
6275 {
6276 best_cost = acost;
6277 new_cp = cp;
6278 }
6279 }
6280 }
6281 else
6282 {
6283 EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6284 {
6285 if (ci == cand->id || (start && ci == start->id))
6286 continue;
6287
6288 cnd = data->vcands[ci];
6289
6290 cp = get_group_iv_cost (data, group, cnd);
6291 if (!cp)
6292 continue;
6293
6294 iv_ca_set_cp (data, ivs, group, cp);
6295 acost = iv_ca_cost (ivs);
6296
6297 if (acost < best_cost)
6298 {
6299 best_cost = acost;
6300 new_cp = cp;
6301 }
6302 }
6303 }
6304 /* Restore to old cp for use. */
6305 iv_ca_set_cp (data, ivs, group, old_cp);
6306
6307 if (!new_cp)
6308 {
6309 iv_ca_delta_free (delta);
6310 return infinite_cost;
6311 }
6312
6313 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6314 }
6315
6316 iv_ca_delta_commit (data, ivs, *delta, true);
6317 cost = iv_ca_cost (ivs);
6318 iv_ca_delta_commit (data, ivs, *delta, false);
6319
6320 return cost;
6321 }
6322
6323 /* Try optimizing the set of candidates IVS by removing candidates different
6324 from to EXCEPT_CAND from it. Return cost of the new set, and store
6325 differences in DELTA. */
6326
6327 static comp_cost
iv_ca_prune(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * except_cand,struct iv_ca_delta ** delta)6328 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6329 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6330 {
6331 bitmap_iterator bi;
6332 struct iv_ca_delta *act_delta, *best_delta;
6333 unsigned i;
6334 comp_cost best_cost, acost;
6335 struct iv_cand *cand;
6336
6337 best_delta = NULL;
6338 best_cost = iv_ca_cost (ivs);
6339
6340 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6341 {
6342 cand = data->vcands[i];
6343
6344 if (cand == except_cand)
6345 continue;
6346
6347 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6348
6349 if (acost < best_cost)
6350 {
6351 best_cost = acost;
6352 iv_ca_delta_free (&best_delta);
6353 best_delta = act_delta;
6354 }
6355 else
6356 iv_ca_delta_free (&act_delta);
6357 }
6358
6359 if (!best_delta)
6360 {
6361 *delta = NULL;
6362 return best_cost;
6363 }
6364
6365 /* Recurse to possibly remove other unnecessary ivs. */
6366 iv_ca_delta_commit (data, ivs, best_delta, true);
6367 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6368 iv_ca_delta_commit (data, ivs, best_delta, false);
6369 *delta = iv_ca_delta_join (best_delta, *delta);
6370 return best_cost;
6371 }
6372
6373 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6374 cheaper local cost for GROUP than BEST_CP. Return pointer to
6375 the corresponding cost_pair, otherwise just return BEST_CP. */
6376
6377 static struct cost_pair*
cheaper_cost_with_cand(struct ivopts_data * data,struct iv_group * group,unsigned int cand_idx,struct iv_cand * old_cand,struct cost_pair * best_cp)6378 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6379 unsigned int cand_idx, struct iv_cand *old_cand,
6380 struct cost_pair *best_cp)
6381 {
6382 struct iv_cand *cand;
6383 struct cost_pair *cp;
6384
6385 gcc_assert (old_cand != NULL && best_cp != NULL);
6386 if (cand_idx == old_cand->id)
6387 return best_cp;
6388
6389 cand = data->vcands[cand_idx];
6390 cp = get_group_iv_cost (data, group, cand);
6391 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6392 return cp;
6393
6394 return best_cp;
6395 }
6396
6397 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6398 which are used by more than one iv uses. For each of those candidates,
6399 this function tries to represent iv uses under that candidate using
6400 other ones with lower local cost, then tries to prune the new set.
6401 If the new set has lower cost, It returns the new cost after recording
6402 candidate replacement in list DELTA. */
6403
6404 static comp_cost
iv_ca_replace(struct ivopts_data * data,struct iv_ca * ivs,struct iv_ca_delta ** delta)6405 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6406 struct iv_ca_delta **delta)
6407 {
6408 bitmap_iterator bi, bj;
6409 unsigned int i, j, k;
6410 struct iv_cand *cand;
6411 comp_cost orig_cost, acost;
6412 struct iv_ca_delta *act_delta, *tmp_delta;
6413 struct cost_pair *old_cp, *best_cp = NULL;
6414
6415 *delta = NULL;
6416 orig_cost = iv_ca_cost (ivs);
6417
6418 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6419 {
6420 if (ivs->n_cand_uses[i] == 1
6421 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6422 continue;
6423
6424 cand = data->vcands[i];
6425
6426 act_delta = NULL;
6427 /* Represent uses under current candidate using other ones with
6428 lower local cost. */
6429 for (j = 0; j < ivs->upto; j++)
6430 {
6431 struct iv_group *group = data->vgroups[j];
6432 old_cp = iv_ca_cand_for_group (ivs, group);
6433
6434 if (old_cp->cand != cand)
6435 continue;
6436
6437 best_cp = old_cp;
6438 if (data->consider_all_candidates)
6439 for (k = 0; k < data->vcands.length (); k++)
6440 best_cp = cheaper_cost_with_cand (data, group, k,
6441 old_cp->cand, best_cp);
6442 else
6443 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6444 best_cp = cheaper_cost_with_cand (data, group, k,
6445 old_cp->cand, best_cp);
6446
6447 if (best_cp == old_cp)
6448 continue;
6449
6450 act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6451 }
6452 /* No need for further prune. */
6453 if (!act_delta)
6454 continue;
6455
6456 /* Prune the new candidate set. */
6457 iv_ca_delta_commit (data, ivs, act_delta, true);
6458 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6459 iv_ca_delta_commit (data, ivs, act_delta, false);
6460 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6461
6462 if (acost < orig_cost)
6463 {
6464 *delta = act_delta;
6465 return acost;
6466 }
6467 else
6468 iv_ca_delta_free (&act_delta);
6469 }
6470
6471 return orig_cost;
6472 }
6473
6474 /* Tries to extend the sets IVS in the best possible way in order to
6475 express the GROUP. If ORIGINALP is true, prefer candidates from
6476 the original set of IVs, otherwise favor important candidates not
6477 based on any memory object. */
6478
6479 static bool
try_add_cand_for(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group,bool originalp)6480 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6481 struct iv_group *group, bool originalp)
6482 {
6483 comp_cost best_cost, act_cost;
6484 unsigned i;
6485 bitmap_iterator bi;
6486 struct iv_cand *cand;
6487 struct iv_ca_delta *best_delta = NULL, *act_delta;
6488 struct cost_pair *cp;
6489
6490 iv_ca_add_group (data, ivs, group);
6491 best_cost = iv_ca_cost (ivs);
6492 cp = iv_ca_cand_for_group (ivs, group);
6493 if (cp)
6494 {
6495 best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6496 iv_ca_set_no_cp (data, ivs, group);
6497 }
6498
6499 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6500 first try important candidates not based on any memory object. Only if
6501 this fails, try the specific ones. Rationale -- in loops with many
6502 variables the best choice often is to use just one generic biv. If we
6503 added here many ivs specific to the uses, the optimization algorithm later
6504 would be likely to get stuck in a local minimum, thus causing us to create
6505 too many ivs. The approach from few ivs to more seems more likely to be
6506 successful -- starting from few ivs, replacing an expensive use by a
6507 specific iv should always be a win. */
6508 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6509 {
6510 cand = data->vcands[i];
6511
6512 if (originalp && cand->pos !=IP_ORIGINAL)
6513 continue;
6514
6515 if (!originalp && cand->iv->base_object != NULL_TREE)
6516 continue;
6517
6518 if (iv_ca_cand_used_p (ivs, cand))
6519 continue;
6520
6521 cp = get_group_iv_cost (data, group, cand);
6522 if (!cp)
6523 continue;
6524
6525 iv_ca_set_cp (data, ivs, group, cp);
6526 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6527 true);
6528 iv_ca_set_no_cp (data, ivs, group);
6529 act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6530
6531 if (act_cost < best_cost)
6532 {
6533 best_cost = act_cost;
6534
6535 iv_ca_delta_free (&best_delta);
6536 best_delta = act_delta;
6537 }
6538 else
6539 iv_ca_delta_free (&act_delta);
6540 }
6541
6542 if (best_cost.infinite_cost_p ())
6543 {
6544 for (i = 0; i < group->n_map_members; i++)
6545 {
6546 cp = group->cost_map + i;
6547 cand = cp->cand;
6548 if (!cand)
6549 continue;
6550
6551 /* Already tried this. */
6552 if (cand->important)
6553 {
6554 if (originalp && cand->pos == IP_ORIGINAL)
6555 continue;
6556 if (!originalp && cand->iv->base_object == NULL_TREE)
6557 continue;
6558 }
6559
6560 if (iv_ca_cand_used_p (ivs, cand))
6561 continue;
6562
6563 act_delta = NULL;
6564 iv_ca_set_cp (data, ivs, group, cp);
6565 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6566 iv_ca_set_no_cp (data, ivs, group);
6567 act_delta = iv_ca_delta_add (group,
6568 iv_ca_cand_for_group (ivs, group),
6569 cp, act_delta);
6570
6571 if (act_cost < best_cost)
6572 {
6573 best_cost = act_cost;
6574
6575 if (best_delta)
6576 iv_ca_delta_free (&best_delta);
6577 best_delta = act_delta;
6578 }
6579 else
6580 iv_ca_delta_free (&act_delta);
6581 }
6582 }
6583
6584 iv_ca_delta_commit (data, ivs, best_delta, true);
6585 iv_ca_delta_free (&best_delta);
6586
6587 return !best_cost.infinite_cost_p ();
6588 }
6589
6590 /* Finds an initial assignment of candidates to uses. */
6591
6592 static struct iv_ca *
get_initial_solution(struct ivopts_data * data,bool originalp)6593 get_initial_solution (struct ivopts_data *data, bool originalp)
6594 {
6595 unsigned i;
6596 struct iv_ca *ivs = iv_ca_new (data);
6597
6598 for (i = 0; i < data->vgroups.length (); i++)
6599 if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6600 {
6601 iv_ca_free (&ivs);
6602 return NULL;
6603 }
6604
6605 return ivs;
6606 }
6607
6608 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6609 points to a bool variable, this function tries to break local
6610 optimal fixed-point by replacing candidates in IVS if it's true. */
6611
6612 static bool
try_improve_iv_set(struct ivopts_data * data,struct iv_ca * ivs,bool * try_replace_p)6613 try_improve_iv_set (struct ivopts_data *data,
6614 struct iv_ca *ivs, bool *try_replace_p)
6615 {
6616 unsigned i, n_ivs;
6617 comp_cost acost, best_cost = iv_ca_cost (ivs);
6618 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6619 struct iv_cand *cand;
6620
6621 /* Try extending the set of induction variables by one. */
6622 for (i = 0; i < data->vcands.length (); i++)
6623 {
6624 cand = data->vcands[i];
6625
6626 if (iv_ca_cand_used_p (ivs, cand))
6627 continue;
6628
6629 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6630 if (!act_delta)
6631 continue;
6632
6633 /* If we successfully added the candidate and the set is small enough,
6634 try optimizing it by removing other candidates. */
6635 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6636 {
6637 iv_ca_delta_commit (data, ivs, act_delta, true);
6638 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6639 iv_ca_delta_commit (data, ivs, act_delta, false);
6640 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6641 }
6642
6643 if (acost < best_cost)
6644 {
6645 best_cost = acost;
6646 iv_ca_delta_free (&best_delta);
6647 best_delta = act_delta;
6648 }
6649 else
6650 iv_ca_delta_free (&act_delta);
6651 }
6652
6653 if (!best_delta)
6654 {
6655 /* Try removing the candidates from the set instead. */
6656 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6657
6658 if (!best_delta && *try_replace_p)
6659 {
6660 *try_replace_p = false;
6661 /* So far candidate selecting algorithm tends to choose fewer IVs
6662 so that it can handle cases in which loops have many variables
6663 but the best choice is often to use only one general biv. One
6664 weakness is it can't handle opposite cases, in which different
6665 candidates should be chosen with respect to each use. To solve
6666 the problem, we replace candidates in a manner described by the
6667 comments of iv_ca_replace, thus give general algorithm a chance
6668 to break local optimal fixed-point in these cases. */
6669 best_cost = iv_ca_replace (data, ivs, &best_delta);
6670 }
6671
6672 if (!best_delta)
6673 return false;
6674 }
6675
6676 iv_ca_delta_commit (data, ivs, best_delta, true);
6677 gcc_assert (best_cost == iv_ca_cost (ivs));
6678 iv_ca_delta_free (&best_delta);
6679 return true;
6680 }
6681
6682 /* Attempts to find the optimal set of induction variables. We do simple
6683 greedy heuristic -- we try to replace at most one candidate in the selected
6684 solution and remove the unused ivs while this improves the cost. */
6685
6686 static struct iv_ca *
find_optimal_iv_set_1(struct ivopts_data * data,bool originalp)6687 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6688 {
6689 struct iv_ca *set;
6690 bool try_replace_p = true;
6691
6692 /* Get the initial solution. */
6693 set = get_initial_solution (data, originalp);
6694 if (!set)
6695 {
6696 if (dump_file && (dump_flags & TDF_DETAILS))
6697 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6698 return NULL;
6699 }
6700
6701 if (dump_file && (dump_flags & TDF_DETAILS))
6702 {
6703 fprintf (dump_file, "Initial set of candidates:\n");
6704 iv_ca_dump (data, dump_file, set);
6705 }
6706
6707 while (try_improve_iv_set (data, set, &try_replace_p))
6708 {
6709 if (dump_file && (dump_flags & TDF_DETAILS))
6710 {
6711 fprintf (dump_file, "Improved to:\n");
6712 iv_ca_dump (data, dump_file, set);
6713 }
6714 }
6715
6716 return set;
6717 }
6718
6719 static struct iv_ca *
find_optimal_iv_set(struct ivopts_data * data)6720 find_optimal_iv_set (struct ivopts_data *data)
6721 {
6722 unsigned i;
6723 comp_cost cost, origcost;
6724 struct iv_ca *set, *origset;
6725
6726 /* Determine the cost based on a strategy that starts with original IVs,
6727 and try again using a strategy that prefers candidates not based
6728 on any IVs. */
6729 origset = find_optimal_iv_set_1 (data, true);
6730 set = find_optimal_iv_set_1 (data, false);
6731
6732 if (!origset && !set)
6733 return NULL;
6734
6735 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6736 cost = set ? iv_ca_cost (set) : infinite_cost;
6737
6738 if (dump_file && (dump_flags & TDF_DETAILS))
6739 {
6740 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6741 origcost.cost, origcost.complexity);
6742 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6743 cost.cost, cost.complexity);
6744 }
6745
6746 /* Choose the one with the best cost. */
6747 if (origcost <= cost)
6748 {
6749 if (set)
6750 iv_ca_free (&set);
6751 set = origset;
6752 }
6753 else if (origset)
6754 iv_ca_free (&origset);
6755
6756 for (i = 0; i < data->vgroups.length (); i++)
6757 {
6758 struct iv_group *group = data->vgroups[i];
6759 group->selected = iv_ca_cand_for_group (set, group)->cand;
6760 }
6761
6762 return set;
6763 }
6764
6765 /* Creates a new induction variable corresponding to CAND. */
6766
6767 static void
create_new_iv(struct ivopts_data * data,struct iv_cand * cand)6768 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6769 {
6770 gimple_stmt_iterator incr_pos;
6771 tree base;
6772 struct iv_use *use;
6773 struct iv_group *group;
6774 bool after = false;
6775
6776 gcc_assert (cand->iv != NULL);
6777
6778 switch (cand->pos)
6779 {
6780 case IP_NORMAL:
6781 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6782 break;
6783
6784 case IP_END:
6785 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6786 after = true;
6787 break;
6788
6789 case IP_AFTER_USE:
6790 after = true;
6791 /* fall through */
6792 case IP_BEFORE_USE:
6793 incr_pos = gsi_for_stmt (cand->incremented_at);
6794 break;
6795
6796 case IP_ORIGINAL:
6797 /* Mark that the iv is preserved. */
6798 name_info (data, cand->var_before)->preserve_biv = true;
6799 name_info (data, cand->var_after)->preserve_biv = true;
6800
6801 /* Rewrite the increment so that it uses var_before directly. */
6802 use = find_interesting_uses_op (data, cand->var_after);
6803 group = data->vgroups[use->group_id];
6804 group->selected = cand;
6805 return;
6806 }
6807
6808 gimple_add_tmp_var (cand->var_before);
6809
6810 base = unshare_expr (cand->iv->base);
6811
6812 create_iv (base, unshare_expr (cand->iv->step),
6813 cand->var_before, data->current_loop,
6814 &incr_pos, after, &cand->var_before, &cand->var_after);
6815 }
6816
6817 /* Creates new induction variables described in SET. */
6818
6819 static void
create_new_ivs(struct ivopts_data * data,struct iv_ca * set)6820 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6821 {
6822 unsigned i;
6823 struct iv_cand *cand;
6824 bitmap_iterator bi;
6825
6826 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6827 {
6828 cand = data->vcands[i];
6829 create_new_iv (data, cand);
6830 }
6831
6832 if (dump_file && (dump_flags & TDF_DETAILS))
6833 {
6834 fprintf (dump_file, "Selected IV set for loop %d",
6835 data->current_loop->num);
6836 if (data->loop_loc != UNKNOWN_LOCATION)
6837 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6838 LOCATION_LINE (data->loop_loc));
6839 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
6840 avg_loop_niter (data->current_loop));
6841 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6842 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6843 {
6844 cand = data->vcands[i];
6845 dump_cand (dump_file, cand);
6846 }
6847 fprintf (dump_file, "\n");
6848 }
6849 }
6850
6851 /* Rewrites USE (definition of iv used in a nonlinear expression)
6852 using candidate CAND. */
6853
6854 static void
rewrite_use_nonlinear_expr(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)6855 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6856 struct iv_use *use, struct iv_cand *cand)
6857 {
6858 gassign *ass;
6859 gimple_stmt_iterator bsi;
6860 tree comp, type = get_use_type (use), tgt;
6861
6862 /* An important special case -- if we are asked to express value of
6863 the original iv by itself, just exit; there is no need to
6864 introduce a new computation (that might also need casting the
6865 variable to unsigned and back). */
6866 if (cand->pos == IP_ORIGINAL
6867 && cand->incremented_at == use->stmt)
6868 {
6869 tree op = NULL_TREE;
6870 enum tree_code stmt_code;
6871
6872 gcc_assert (is_gimple_assign (use->stmt));
6873 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6874
6875 /* Check whether we may leave the computation unchanged.
6876 This is the case only if it does not rely on other
6877 computations in the loop -- otherwise, the computation
6878 we rely upon may be removed in remove_unused_ivs,
6879 thus leading to ICE. */
6880 stmt_code = gimple_assign_rhs_code (use->stmt);
6881 if (stmt_code == PLUS_EXPR
6882 || stmt_code == MINUS_EXPR
6883 || stmt_code == POINTER_PLUS_EXPR)
6884 {
6885 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6886 op = gimple_assign_rhs2 (use->stmt);
6887 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6888 op = gimple_assign_rhs1 (use->stmt);
6889 }
6890
6891 if (op != NULL_TREE)
6892 {
6893 if (expr_invariant_in_loop_p (data->current_loop, op))
6894 return;
6895 if (TREE_CODE (op) == SSA_NAME)
6896 {
6897 struct iv *iv = get_iv (data, op);
6898 if (iv != NULL && integer_zerop (iv->step))
6899 return;
6900 }
6901 }
6902 }
6903
6904 switch (gimple_code (use->stmt))
6905 {
6906 case GIMPLE_PHI:
6907 tgt = PHI_RESULT (use->stmt);
6908
6909 /* If we should keep the biv, do not replace it. */
6910 if (name_info (data, tgt)->preserve_biv)
6911 return;
6912
6913 bsi = gsi_after_labels (gimple_bb (use->stmt));
6914 break;
6915
6916 case GIMPLE_ASSIGN:
6917 tgt = gimple_assign_lhs (use->stmt);
6918 bsi = gsi_for_stmt (use->stmt);
6919 break;
6920
6921 default:
6922 gcc_unreachable ();
6923 }
6924
6925 aff_tree aff_inv, aff_var;
6926 if (!get_computation_aff_1 (data->current_loop, use->stmt,
6927 use, cand, &aff_inv, &aff_var))
6928 gcc_unreachable ();
6929
6930 unshare_aff_combination (&aff_inv);
6931 unshare_aff_combination (&aff_var);
6932 /* Prefer CSE opportunity than loop invariant by adding offset at last
6933 so that iv_uses have different offsets can be CSEed. */
6934 poly_widest_int offset = aff_inv.offset;
6935 aff_inv.offset = 0;
6936
6937 gimple_seq stmt_list = NULL, seq = NULL;
6938 tree comp_op1 = aff_combination_to_tree (&aff_inv);
6939 tree comp_op2 = aff_combination_to_tree (&aff_var);
6940 gcc_assert (comp_op1 && comp_op2);
6941
6942 comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
6943 gimple_seq_add_seq (&stmt_list, seq);
6944 comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
6945 gimple_seq_add_seq (&stmt_list, seq);
6946
6947 if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
6948 std::swap (comp_op1, comp_op2);
6949
6950 if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
6951 {
6952 comp = fold_build_pointer_plus (comp_op1,
6953 fold_convert (sizetype, comp_op2));
6954 comp = fold_build_pointer_plus (comp,
6955 wide_int_to_tree (sizetype, offset));
6956 }
6957 else
6958 {
6959 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
6960 fold_convert (TREE_TYPE (comp_op1), comp_op2));
6961 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
6962 wide_int_to_tree (TREE_TYPE (comp_op1), offset));
6963 }
6964
6965 comp = fold_convert (type, comp);
6966 if (!valid_gimple_rhs_p (comp)
6967 || (gimple_code (use->stmt) != GIMPLE_PHI
6968 /* We can't allow re-allocating the stmt as it might be pointed
6969 to still. */
6970 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6971 >= gimple_num_ops (gsi_stmt (bsi)))))
6972 {
6973 comp = force_gimple_operand (comp, &seq, true, NULL);
6974 gimple_seq_add_seq (&stmt_list, seq);
6975 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6976 {
6977 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6978 /* As this isn't a plain copy we have to reset alignment
6979 information. */
6980 if (SSA_NAME_PTR_INFO (comp))
6981 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6982 }
6983 }
6984
6985 gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
6986 if (gimple_code (use->stmt) == GIMPLE_PHI)
6987 {
6988 ass = gimple_build_assign (tgt, comp);
6989 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6990
6991 bsi = gsi_for_stmt (use->stmt);
6992 remove_phi_node (&bsi, false);
6993 }
6994 else
6995 {
6996 gimple_assign_set_rhs_from_tree (&bsi, comp);
6997 use->stmt = gsi_stmt (bsi);
6998 }
6999 }
7000
7001 /* Performs a peephole optimization to reorder the iv update statement with
7002 a mem ref to enable instruction combining in later phases. The mem ref uses
7003 the iv value before the update, so the reordering transformation requires
7004 adjustment of the offset. CAND is the selected IV_CAND.
7005
7006 Example:
7007
7008 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7009 iv2 = iv1 + 1;
7010
7011 if (t < val) (1)
7012 goto L;
7013 goto Head;
7014
7015
7016 directly propagating t over to (1) will introduce overlapping live range
7017 thus increase register pressure. This peephole transform it into:
7018
7019
7020 iv2 = iv1 + 1;
7021 t = MEM_REF (base, iv2, 8, 8);
7022 if (t < val)
7023 goto L;
7024 goto Head;
7025 */
7026
7027 static void
adjust_iv_update_pos(struct iv_cand * cand,struct iv_use * use)7028 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7029 {
7030 tree var_after;
7031 gimple *iv_update, *stmt;
7032 basic_block bb;
7033 gimple_stmt_iterator gsi, gsi_iv;
7034
7035 if (cand->pos != IP_NORMAL)
7036 return;
7037
7038 var_after = cand->var_after;
7039 iv_update = SSA_NAME_DEF_STMT (var_after);
7040
7041 bb = gimple_bb (iv_update);
7042 gsi = gsi_last_nondebug_bb (bb);
7043 stmt = gsi_stmt (gsi);
7044
7045 /* Only handle conditional statement for now. */
7046 if (gimple_code (stmt) != GIMPLE_COND)
7047 return;
7048
7049 gsi_prev_nondebug (&gsi);
7050 stmt = gsi_stmt (gsi);
7051 if (stmt != iv_update)
7052 return;
7053
7054 gsi_prev_nondebug (&gsi);
7055 if (gsi_end_p (gsi))
7056 return;
7057
7058 stmt = gsi_stmt (gsi);
7059 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7060 return;
7061
7062 if (stmt != use->stmt)
7063 return;
7064
7065 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7066 return;
7067
7068 if (dump_file && (dump_flags & TDF_DETAILS))
7069 {
7070 fprintf (dump_file, "Reordering \n");
7071 print_gimple_stmt (dump_file, iv_update, 0);
7072 print_gimple_stmt (dump_file, use->stmt, 0);
7073 fprintf (dump_file, "\n");
7074 }
7075
7076 gsi = gsi_for_stmt (use->stmt);
7077 gsi_iv = gsi_for_stmt (iv_update);
7078 gsi_move_before (&gsi_iv, &gsi);
7079
7080 cand->pos = IP_BEFORE_USE;
7081 cand->incremented_at = use->stmt;
7082 }
7083
7084 /* Return the alias pointer type that should be used for a MEM_REF
7085 associated with USE, which has type USE_PTR_ADDRESS. */
7086
7087 static tree
get_alias_ptr_type_for_ptr_address(iv_use * use)7088 get_alias_ptr_type_for_ptr_address (iv_use *use)
7089 {
7090 gcall *call = as_a <gcall *> (use->stmt);
7091 switch (gimple_call_internal_fn (call))
7092 {
7093 case IFN_MASK_LOAD:
7094 case IFN_MASK_STORE:
7095 /* The second argument contains the correct alias type. */
7096 gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7097 return TREE_TYPE (gimple_call_arg (call, 1));
7098
7099 default:
7100 gcc_unreachable ();
7101 }
7102 }
7103
7104
7105 /* Rewrites USE (address that is an iv) using candidate CAND. */
7106
7107 static void
rewrite_use_address(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7108 rewrite_use_address (struct ivopts_data *data,
7109 struct iv_use *use, struct iv_cand *cand)
7110 {
7111 aff_tree aff;
7112 bool ok;
7113
7114 adjust_iv_update_pos (cand, use);
7115 ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7116 gcc_assert (ok);
7117 unshare_aff_combination (&aff);
7118
7119 /* To avoid undefined overflow problems, all IV candidates use unsigned
7120 integer types. The drawback is that this makes it impossible for
7121 create_mem_ref to distinguish an IV that is based on a memory object
7122 from one that represents simply an offset.
7123
7124 To work around this problem, we pass a hint to create_mem_ref that
7125 indicates which variable (if any) in aff is an IV based on a memory
7126 object. Note that we only consider the candidate. If this is not
7127 based on an object, the base of the reference is in some subexpression
7128 of the use -- but these will use pointer types, so they are recognized
7129 by the create_mem_ref heuristics anyway. */
7130 tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7131 tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7132 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7133 tree type = use->mem_type;
7134 tree alias_ptr_type;
7135 if (use->type == USE_PTR_ADDRESS)
7136 alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7137 else
7138 {
7139 gcc_assert (type == TREE_TYPE (*use->op_p));
7140 unsigned int align = get_object_alignment (*use->op_p);
7141 if (align != TYPE_ALIGN (type))
7142 type = build_aligned_type (type, align);
7143 alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7144 }
7145 tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7146 iv, base_hint, data->speed);
7147
7148 if (use->type == USE_PTR_ADDRESS)
7149 {
7150 ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7151 ref = fold_convert (get_use_type (use), ref);
7152 ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7153 true, GSI_SAME_STMT);
7154 }
7155 else
7156 copy_ref_info (ref, *use->op_p);
7157
7158 *use->op_p = ref;
7159 }
7160
7161 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7162 candidate CAND. */
7163
7164 static void
rewrite_use_compare(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7165 rewrite_use_compare (struct ivopts_data *data,
7166 struct iv_use *use, struct iv_cand *cand)
7167 {
7168 tree comp, op, bound;
7169 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7170 enum tree_code compare;
7171 struct iv_group *group = data->vgroups[use->group_id];
7172 struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7173
7174 bound = cp->value;
7175 if (bound)
7176 {
7177 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7178 tree var_type = TREE_TYPE (var);
7179 gimple_seq stmts;
7180
7181 if (dump_file && (dump_flags & TDF_DETAILS))
7182 {
7183 fprintf (dump_file, "Replacing exit test: ");
7184 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7185 }
7186 compare = cp->comp;
7187 bound = unshare_expr (fold_convert (var_type, bound));
7188 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7189 if (stmts)
7190 gsi_insert_seq_on_edge_immediate (
7191 loop_preheader_edge (data->current_loop),
7192 stmts);
7193
7194 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7195 gimple_cond_set_lhs (cond_stmt, var);
7196 gimple_cond_set_code (cond_stmt, compare);
7197 gimple_cond_set_rhs (cond_stmt, op);
7198 return;
7199 }
7200
7201 /* The induction variable elimination failed; just express the original
7202 giv. */
7203 comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7204 gcc_assert (comp != NULL_TREE);
7205 gcc_assert (use->op_p != NULL);
7206 *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7207 SSA_NAME_VAR (*use->op_p),
7208 true, GSI_SAME_STMT);
7209 }
7210
7211 /* Rewrite the groups using the selected induction variables. */
7212
7213 static void
rewrite_groups(struct ivopts_data * data)7214 rewrite_groups (struct ivopts_data *data)
7215 {
7216 unsigned i, j;
7217
7218 for (i = 0; i < data->vgroups.length (); i++)
7219 {
7220 struct iv_group *group = data->vgroups[i];
7221 struct iv_cand *cand = group->selected;
7222
7223 gcc_assert (cand);
7224
7225 if (group->type == USE_NONLINEAR_EXPR)
7226 {
7227 for (j = 0; j < group->vuses.length (); j++)
7228 {
7229 rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7230 update_stmt (group->vuses[j]->stmt);
7231 }
7232 }
7233 else if (address_p (group->type))
7234 {
7235 for (j = 0; j < group->vuses.length (); j++)
7236 {
7237 rewrite_use_address (data, group->vuses[j], cand);
7238 update_stmt (group->vuses[j]->stmt);
7239 }
7240 }
7241 else
7242 {
7243 gcc_assert (group->type == USE_COMPARE);
7244
7245 for (j = 0; j < group->vuses.length (); j++)
7246 {
7247 rewrite_use_compare (data, group->vuses[j], cand);
7248 update_stmt (group->vuses[j]->stmt);
7249 }
7250 }
7251 }
7252 }
7253
7254 /* Removes the ivs that are not used after rewriting. */
7255
7256 static void
remove_unused_ivs(struct ivopts_data * data)7257 remove_unused_ivs (struct ivopts_data *data)
7258 {
7259 unsigned j;
7260 bitmap_iterator bi;
7261 bitmap toremove = BITMAP_ALLOC (NULL);
7262
7263 /* Figure out an order in which to release SSA DEFs so that we don't
7264 release something that we'd have to propagate into a debug stmt
7265 afterwards. */
7266 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7267 {
7268 struct version_info *info;
7269
7270 info = ver_info (data, j);
7271 if (info->iv
7272 && !integer_zerop (info->iv->step)
7273 && !info->inv_id
7274 && !info->iv->nonlin_use
7275 && !info->preserve_biv)
7276 {
7277 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7278
7279 tree def = info->iv->ssa_name;
7280
7281 if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7282 {
7283 imm_use_iterator imm_iter;
7284 use_operand_p use_p;
7285 gimple *stmt;
7286 int count = 0;
7287
7288 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7289 {
7290 if (!gimple_debug_bind_p (stmt))
7291 continue;
7292
7293 /* We just want to determine whether to do nothing
7294 (count == 0), to substitute the computed
7295 expression into a single use of the SSA DEF by
7296 itself (count == 1), or to use a debug temp
7297 because the SSA DEF is used multiple times or as
7298 part of a larger expression (count > 1). */
7299 count++;
7300 if (gimple_debug_bind_get_value (stmt) != def)
7301 count++;
7302
7303 if (count > 1)
7304 BREAK_FROM_IMM_USE_STMT (imm_iter);
7305 }
7306
7307 if (!count)
7308 continue;
7309
7310 struct iv_use dummy_use;
7311 struct iv_cand *best_cand = NULL, *cand;
7312 unsigned i, best_pref = 0, cand_pref;
7313
7314 memset (&dummy_use, 0, sizeof (dummy_use));
7315 dummy_use.iv = info->iv;
7316 for (i = 0; i < data->vgroups.length () && i < 64; i++)
7317 {
7318 cand = data->vgroups[i]->selected;
7319 if (cand == best_cand)
7320 continue;
7321 cand_pref = operand_equal_p (cand->iv->step,
7322 info->iv->step, 0)
7323 ? 4 : 0;
7324 cand_pref
7325 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7326 == TYPE_MODE (TREE_TYPE (info->iv->base))
7327 ? 2 : 0;
7328 cand_pref
7329 += TREE_CODE (cand->iv->base) == INTEGER_CST
7330 ? 1 : 0;
7331 if (best_cand == NULL || best_pref < cand_pref)
7332 {
7333 best_cand = cand;
7334 best_pref = cand_pref;
7335 }
7336 }
7337
7338 if (!best_cand)
7339 continue;
7340
7341 tree comp = get_computation_at (data->current_loop,
7342 SSA_NAME_DEF_STMT (def),
7343 &dummy_use, best_cand);
7344 if (!comp)
7345 continue;
7346
7347 if (count > 1)
7348 {
7349 tree vexpr = make_node (DEBUG_EXPR_DECL);
7350 DECL_ARTIFICIAL (vexpr) = 1;
7351 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7352 if (SSA_NAME_VAR (def))
7353 SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7354 else
7355 SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7356 gdebug *def_temp
7357 = gimple_build_debug_bind (vexpr, comp, NULL);
7358 gimple_stmt_iterator gsi;
7359
7360 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7361 gsi = gsi_after_labels (gimple_bb
7362 (SSA_NAME_DEF_STMT (def)));
7363 else
7364 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7365
7366 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7367 comp = vexpr;
7368 }
7369
7370 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7371 {
7372 if (!gimple_debug_bind_p (stmt))
7373 continue;
7374
7375 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7376 SET_USE (use_p, comp);
7377
7378 update_stmt (stmt);
7379 }
7380 }
7381 }
7382 }
7383
7384 release_defs_bitset (toremove);
7385
7386 BITMAP_FREE (toremove);
7387 }
7388
7389 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7390 for hash_map::traverse. */
7391
7392 bool
free_tree_niter_desc(edge const &,tree_niter_desc * const & value,void *)7393 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7394 {
7395 free (value);
7396 return true;
7397 }
7398
7399 /* Frees data allocated by the optimization of a single loop. */
7400
7401 static void
free_loop_data(struct ivopts_data * data)7402 free_loop_data (struct ivopts_data *data)
7403 {
7404 unsigned i, j;
7405 bitmap_iterator bi;
7406 tree obj;
7407
7408 if (data->niters)
7409 {
7410 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7411 delete data->niters;
7412 data->niters = NULL;
7413 }
7414
7415 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7416 {
7417 struct version_info *info;
7418
7419 info = ver_info (data, i);
7420 info->iv = NULL;
7421 info->has_nonlin_use = false;
7422 info->preserve_biv = false;
7423 info->inv_id = 0;
7424 }
7425 bitmap_clear (data->relevant);
7426 bitmap_clear (data->important_candidates);
7427
7428 for (i = 0; i < data->vgroups.length (); i++)
7429 {
7430 struct iv_group *group = data->vgroups[i];
7431
7432 for (j = 0; j < group->vuses.length (); j++)
7433 free (group->vuses[j]);
7434 group->vuses.release ();
7435
7436 BITMAP_FREE (group->related_cands);
7437 for (j = 0; j < group->n_map_members; j++)
7438 {
7439 if (group->cost_map[j].inv_vars)
7440 BITMAP_FREE (group->cost_map[j].inv_vars);
7441 if (group->cost_map[j].inv_exprs)
7442 BITMAP_FREE (group->cost_map[j].inv_exprs);
7443 }
7444
7445 free (group->cost_map);
7446 free (group);
7447 }
7448 data->vgroups.truncate (0);
7449
7450 for (i = 0; i < data->vcands.length (); i++)
7451 {
7452 struct iv_cand *cand = data->vcands[i];
7453
7454 if (cand->inv_vars)
7455 BITMAP_FREE (cand->inv_vars);
7456 if (cand->inv_exprs)
7457 BITMAP_FREE (cand->inv_exprs);
7458 free (cand);
7459 }
7460 data->vcands.truncate (0);
7461
7462 if (data->version_info_size < num_ssa_names)
7463 {
7464 data->version_info_size = 2 * num_ssa_names;
7465 free (data->version_info);
7466 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7467 }
7468
7469 data->max_inv_var_id = 0;
7470 data->max_inv_expr_id = 0;
7471
7472 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7473 SET_DECL_RTL (obj, NULL_RTX);
7474
7475 decl_rtl_to_reset.truncate (0);
7476
7477 data->inv_expr_tab->empty ();
7478
7479 data->iv_common_cand_tab->empty ();
7480 data->iv_common_cands.truncate (0);
7481 }
7482
7483 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7484 loop tree. */
7485
7486 static void
tree_ssa_iv_optimize_finalize(struct ivopts_data * data)7487 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7488 {
7489 free_loop_data (data);
7490 free (data->version_info);
7491 BITMAP_FREE (data->relevant);
7492 BITMAP_FREE (data->important_candidates);
7493
7494 decl_rtl_to_reset.release ();
7495 data->vgroups.release ();
7496 data->vcands.release ();
7497 delete data->inv_expr_tab;
7498 data->inv_expr_tab = NULL;
7499 free_affine_expand_cache (&data->name_expansion_cache);
7500 if (data->base_object_map)
7501 delete data->base_object_map;
7502 delete data->iv_common_cand_tab;
7503 data->iv_common_cand_tab = NULL;
7504 data->iv_common_cands.release ();
7505 obstack_free (&data->iv_obstack, NULL);
7506 }
7507
7508 /* Returns true if the loop body BODY includes any function calls. */
7509
7510 static bool
loop_body_includes_call(basic_block * body,unsigned num_nodes)7511 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7512 {
7513 gimple_stmt_iterator gsi;
7514 unsigned i;
7515
7516 for (i = 0; i < num_nodes; i++)
7517 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7518 {
7519 gimple *stmt = gsi_stmt (gsi);
7520 if (is_gimple_call (stmt)
7521 && !gimple_call_internal_p (stmt)
7522 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7523 return true;
7524 }
7525 return false;
7526 }
7527
7528 /* Optimizes the LOOP. Returns true if anything changed. */
7529
7530 static bool
tree_ssa_iv_optimize_loop(struct ivopts_data * data,struct loop * loop)7531 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7532 {
7533 bool changed = false;
7534 struct iv_ca *iv_ca;
7535 edge exit = single_dom_exit (loop);
7536 basic_block *body;
7537
7538 gcc_assert (!data->niters);
7539 data->current_loop = loop;
7540 data->loop_loc = find_loop_location (loop);
7541 data->speed = optimize_loop_for_speed_p (loop);
7542
7543 if (dump_file && (dump_flags & TDF_DETAILS))
7544 {
7545 fprintf (dump_file, "Processing loop %d", loop->num);
7546 if (data->loop_loc != UNKNOWN_LOCATION)
7547 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7548 LOCATION_LINE (data->loop_loc));
7549 fprintf (dump_file, "\n");
7550
7551 if (exit)
7552 {
7553 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7554 exit->src->index, exit->dest->index);
7555 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7556 fprintf (dump_file, "\n");
7557 }
7558
7559 fprintf (dump_file, "\n");
7560 }
7561
7562 body = get_loop_body (loop);
7563 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7564 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7565 free (body);
7566
7567 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7568
7569 /* For each ssa name determines whether it behaves as an induction variable
7570 in some loop. */
7571 if (!find_induction_variables (data))
7572 goto finish;
7573
7574 /* Finds interesting uses (item 1). */
7575 find_interesting_uses (data);
7576 if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7577 goto finish;
7578
7579 /* Finds candidates for the induction variables (item 2). */
7580 find_iv_candidates (data);
7581
7582 /* Calculates the costs (item 3, part 1). */
7583 determine_iv_costs (data);
7584 determine_group_iv_costs (data);
7585 determine_set_costs (data);
7586
7587 /* Find the optimal set of induction variables (item 3, part 2). */
7588 iv_ca = find_optimal_iv_set (data);
7589 if (!iv_ca)
7590 goto finish;
7591 changed = true;
7592
7593 /* Create the new induction variables (item 4, part 1). */
7594 create_new_ivs (data, iv_ca);
7595 iv_ca_free (&iv_ca);
7596
7597 /* Rewrite the uses (item 4, part 2). */
7598 rewrite_groups (data);
7599
7600 /* Remove the ivs that are unused after rewriting. */
7601 remove_unused_ivs (data);
7602
7603 /* We have changed the structure of induction variables; it might happen
7604 that definitions in the scev database refer to some of them that were
7605 eliminated. */
7606 scev_reset ();
7607
7608 finish:
7609 free_loop_data (data);
7610
7611 return changed;
7612 }
7613
7614 /* Main entry point. Optimizes induction variables in loops. */
7615
7616 void
tree_ssa_iv_optimize(void)7617 tree_ssa_iv_optimize (void)
7618 {
7619 struct loop *loop;
7620 struct ivopts_data data;
7621
7622 tree_ssa_iv_optimize_init (&data);
7623
7624 /* Optimize the loops starting with the innermost ones. */
7625 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7626 {
7627 if (dump_file && (dump_flags & TDF_DETAILS))
7628 flow_loop_dump (loop, dump_file, NULL, 1);
7629
7630 tree_ssa_iv_optimize_loop (&data, loop);
7631 }
7632
7633 tree_ssa_iv_optimize_finalize (&data);
7634 }
7635
7636 #include "gt-tree-ssa-loop-ivopts.h"
7637