1 /* Induction variable optimizations.
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 Note the interesting uses are categorized and handled in group.
34 Generally, address type uses are grouped together if their iv bases
35 are different in constant offset.
36
37 2) Candidates for the induction variables are found. This includes
38
39 -- old induction variables
40 -- the variables defined by expressions derived from the "interesting
41 groups/uses" above
42
43 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 cost function assigns a cost to sets of induction variables and consists
45 of three parts:
46
47 -- The group/use costs. Each of the interesting groups/uses chooses
48 the best induction variable in the set and adds its cost to the sum.
49 The cost reflects the time spent on modifying the induction variables
50 value to be usable for the given purpose (adding base and offset for
51 arrays, etc.).
52 -- The variable costs. Each of the variables has a cost assigned that
53 reflects the costs associated with incrementing the value of the
54 variable. The original variables are somewhat preferred.
55 -- The set cost. Depending on the size of the set, extra cost may be
56 added to reflect register pressure.
57
58 All the costs are defined in a machine-specific way, using the target
59 hooks and machine descriptions to determine them.
60
61 4) The trees are transformed to use the new variables, the dead code is
62 removed.
63
64 All of this is done loop by loop. Doing it globally is theoretically
65 possible, it might give a better performance and it might enable us
66 to decide costs more precisely, but getting all the interactions right
67 would be complicated. */
68
69 #include "config.h"
70 #include "system.h"
71 #include "coretypes.h"
72 #include "backend.h"
73 #include "rtl.h"
74 #include "tree.h"
75 #include "gimple.h"
76 #include "cfghooks.h"
77 #include "tree-pass.h"
78 #include "memmodel.h"
79 #include "tm_p.h"
80 #include "ssa.h"
81 #include "expmed.h"
82 #include "insn-config.h"
83 #include "emit-rtl.h"
84 #include "recog.h"
85 #include "cgraph.h"
86 #include "gimple-pretty-print.h"
87 #include "alias.h"
88 #include "fold-const.h"
89 #include "stor-layout.h"
90 #include "tree-eh.h"
91 #include "gimplify.h"
92 #include "gimple-iterator.h"
93 #include "gimplify-me.h"
94 #include "tree-cfg.h"
95 #include "tree-ssa-loop-ivopts.h"
96 #include "tree-ssa-loop-manip.h"
97 #include "tree-ssa-loop-niter.h"
98 #include "tree-ssa-loop.h"
99 #include "explow.h"
100 #include "expr.h"
101 #include "tree-dfa.h"
102 #include "tree-ssa.h"
103 #include "cfgloop.h"
104 #include "tree-scalar-evolution.h"
105 #include "params.h"
106 #include "tree-affine.h"
107 #include "tree-ssa-propagate.h"
108 #include "tree-ssa-address.h"
109 #include "builtins.h"
110 #include "tree-vectorizer.h"
111
112 /* For lang_hooks.types.type_for_mode. */
113 #include "langhooks.h"
114
115 /* FIXME: Expressions are expanded to RTL in this pass to determine the
116 cost of different addressing modes. This should be moved to a TBD
117 interface between the GIMPLE and RTL worlds. */
118
119 /* The infinite cost. */
120 #define INFTY 10000000
121
122 /* Returns the expected number of loop iterations for LOOP.
123 The average trip count is computed from profile data if it
124 exists. */
125
126 static inline HOST_WIDE_INT
avg_loop_niter(struct loop * loop)127 avg_loop_niter (struct loop *loop)
128 {
129 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
130 if (niter == -1)
131 {
132 niter = likely_max_stmt_executions_int (loop);
133
134 if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
135 return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
136 }
137
138 return niter;
139 }
140
141 struct iv_use;
142
143 /* Representation of the induction variable. */
144 struct iv
145 {
146 tree base; /* Initial value of the iv. */
147 tree base_object; /* A memory object to that the induction variable points. */
148 tree step; /* Step of the iv (constant only). */
149 tree ssa_name; /* The ssa name with the value. */
150 struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
151 bool biv_p; /* Is it a biv? */
152 bool no_overflow; /* True if the iv doesn't overflow. */
153 bool have_address_use;/* For biv, indicate if it's used in any address
154 type use. */
155 };
156
157 /* Per-ssa version information (induction variable descriptions, etc.). */
158 struct version_info
159 {
160 tree name; /* The ssa name. */
161 struct iv *iv; /* Induction variable description. */
162 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
163 an expression that is not an induction variable. */
164 bool preserve_biv; /* For the original biv, whether to preserve it. */
165 unsigned inv_id; /* Id of an invariant. */
166 };
167
168 /* Types of uses. */
169 enum use_type
170 {
171 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
172 USE_REF_ADDRESS, /* Use is an address for an explicit memory
173 reference. */
174 USE_PTR_ADDRESS, /* Use is a pointer argument to a function in
175 cases where the expansion of the function
176 will turn the argument into a normal address. */
177 USE_COMPARE /* Use is a compare. */
178 };
179
180 /* Cost of a computation. */
181 struct comp_cost
182 {
comp_costcomp_cost183 comp_cost (): cost (0), complexity (0), scratch (0)
184 {}
185
186 comp_cost (int cost, unsigned complexity, int scratch = 0)
costcomp_cost187 : cost (cost), complexity (complexity), scratch (scratch)
188 {}
189
190 /* Returns true if COST is infinite. */
191 bool infinite_cost_p ();
192
193 /* Adds costs COST1 and COST2. */
194 friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
195
196 /* Adds COST to the comp_cost. */
197 comp_cost operator+= (comp_cost cost);
198
199 /* Adds constant C to this comp_cost. */
200 comp_cost operator+= (HOST_WIDE_INT c);
201
202 /* Subtracts constant C to this comp_cost. */
203 comp_cost operator-= (HOST_WIDE_INT c);
204
205 /* Divide the comp_cost by constant C. */
206 comp_cost operator/= (HOST_WIDE_INT c);
207
208 /* Multiply the comp_cost by constant C. */
209 comp_cost operator*= (HOST_WIDE_INT c);
210
211 /* Subtracts costs COST1 and COST2. */
212 friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
213
214 /* Subtracts COST from this comp_cost. */
215 comp_cost operator-= (comp_cost cost);
216
217 /* Returns true if COST1 is smaller than COST2. */
218 friend bool operator< (comp_cost cost1, comp_cost cost2);
219
220 /* Returns true if COST1 and COST2 are equal. */
221 friend bool operator== (comp_cost cost1, comp_cost cost2);
222
223 /* Returns true if COST1 is smaller or equal than COST2. */
224 friend bool operator<= (comp_cost cost1, comp_cost cost2);
225
226 int cost; /* The runtime cost. */
227 unsigned complexity; /* The estimate of the complexity of the code for
228 the computation (in no concrete units --
229 complexity field should be larger for more
230 complex expressions and addressing modes). */
231 int scratch; /* Scratch used during cost computation. */
232 };
233
234 static const comp_cost no_cost;
235 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
236
237 bool
infinite_cost_p()238 comp_cost::infinite_cost_p ()
239 {
240 return cost == INFTY;
241 }
242
243 comp_cost
244 operator+ (comp_cost cost1, comp_cost cost2)
245 {
246 if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
247 return infinite_cost;
248
249 cost1.cost += cost2.cost;
250 cost1.complexity += cost2.complexity;
251
252 return cost1;
253 }
254
255 comp_cost
256 operator- (comp_cost cost1, comp_cost cost2)
257 {
258 if (cost1.infinite_cost_p ())
259 return infinite_cost;
260
261 gcc_assert (!cost2.infinite_cost_p ());
262
263 cost1.cost -= cost2.cost;
264 cost1.complexity -= cost2.complexity;
265
266 return cost1;
267 }
268
269 comp_cost
270 comp_cost::operator+= (comp_cost cost)
271 {
272 *this = *this + cost;
273 return *this;
274 }
275
276 comp_cost
277 comp_cost::operator+= (HOST_WIDE_INT c)
278 {
279 if (infinite_cost_p ())
280 return *this;
281
282 this->cost += c;
283
284 return *this;
285 }
286
287 comp_cost
288 comp_cost::operator-= (HOST_WIDE_INT c)
289 {
290 if (infinite_cost_p ())
291 return *this;
292
293 this->cost -= c;
294
295 return *this;
296 }
297
298 comp_cost
299 comp_cost::operator/= (HOST_WIDE_INT c)
300 {
301 if (infinite_cost_p ())
302 return *this;
303
304 this->cost /= c;
305
306 return *this;
307 }
308
309 comp_cost
310 comp_cost::operator*= (HOST_WIDE_INT c)
311 {
312 if (infinite_cost_p ())
313 return *this;
314
315 this->cost *= c;
316
317 return *this;
318 }
319
320 comp_cost
321 comp_cost::operator-= (comp_cost cost)
322 {
323 *this = *this - cost;
324 return *this;
325 }
326
327 bool
328 operator< (comp_cost cost1, comp_cost cost2)
329 {
330 if (cost1.cost == cost2.cost)
331 return cost1.complexity < cost2.complexity;
332
333 return cost1.cost < cost2.cost;
334 }
335
336 bool
337 operator== (comp_cost cost1, comp_cost cost2)
338 {
339 return cost1.cost == cost2.cost
340 && cost1.complexity == cost2.complexity;
341 }
342
343 bool
344 operator<= (comp_cost cost1, comp_cost cost2)
345 {
346 return cost1 < cost2 || cost1 == cost2;
347 }
348
349 struct iv_inv_expr_ent;
350
351 /* The candidate - cost pair. */
352 struct cost_pair
353 {
354 struct iv_cand *cand; /* The candidate. */
355 comp_cost cost; /* The cost. */
356 enum tree_code comp; /* For iv elimination, the comparison. */
357 bitmap inv_vars; /* The list of invariant ssa_vars that have to be
358 preserved when representing iv_use with iv_cand. */
359 bitmap inv_exprs; /* The list of newly created invariant expressions
360 when representing iv_use with iv_cand. */
361 tree value; /* For final value elimination, the expression for
362 the final value of the iv. For iv elimination,
363 the new bound to compare with. */
364 };
365
366 /* Use. */
367 struct iv_use
368 {
369 unsigned id; /* The id of the use. */
370 unsigned group_id; /* The group id the use belongs to. */
371 enum use_type type; /* Type of the use. */
372 tree mem_type; /* The memory type to use when testing whether an
373 address is legitimate, and what the address's
374 cost is. */
375 struct iv *iv; /* The induction variable it is based on. */
376 gimple *stmt; /* Statement in that it occurs. */
377 tree *op_p; /* The place where it occurs. */
378
379 tree addr_base; /* Base address with const offset stripped. */
380 poly_uint64_pod addr_offset;
381 /* Const offset stripped from base address. */
382 };
383
384 /* Group of uses. */
385 struct iv_group
386 {
387 /* The id of the group. */
388 unsigned id;
389 /* Uses of the group are of the same type. */
390 enum use_type type;
391 /* The set of "related" IV candidates, plus the important ones. */
392 bitmap related_cands;
393 /* Number of IV candidates in the cost_map. */
394 unsigned n_map_members;
395 /* The costs wrto the iv candidates. */
396 struct cost_pair *cost_map;
397 /* The selected candidate for the group. */
398 struct iv_cand *selected;
399 /* Uses in the group. */
400 vec<struct iv_use *> vuses;
401 };
402
403 /* The position where the iv is computed. */
404 enum iv_position
405 {
406 IP_NORMAL, /* At the end, just before the exit condition. */
407 IP_END, /* At the end of the latch block. */
408 IP_BEFORE_USE, /* Immediately before a specific use. */
409 IP_AFTER_USE, /* Immediately after a specific use. */
410 IP_ORIGINAL /* The original biv. */
411 };
412
413 /* The induction variable candidate. */
414 struct iv_cand
415 {
416 unsigned id; /* The number of the candidate. */
417 bool important; /* Whether this is an "important" candidate, i.e. such
418 that it should be considered by all uses. */
419 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
420 gimple *incremented_at;/* For original biv, the statement where it is
421 incremented. */
422 tree var_before; /* The variable used for it before increment. */
423 tree var_after; /* The variable used for it after increment. */
424 struct iv *iv; /* The value of the candidate. NULL for
425 "pseudocandidate" used to indicate the possibility
426 to replace the final value of an iv by direct
427 computation of the value. */
428 unsigned cost; /* Cost of the candidate. */
429 unsigned cost_step; /* Cost of the candidate's increment operation. */
430 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
431 where it is incremented. */
432 bitmap inv_vars; /* The list of invariant ssa_vars used in step of the
433 iv_cand. */
434 bitmap inv_exprs; /* If step is more complicated than a single ssa_var,
435 hanlde it as a new invariant expression which will
436 be hoisted out of loop. */
437 struct iv *orig_iv; /* The original iv if this cand is added from biv with
438 smaller type. */
439 };
440
441 /* Hashtable entry for common candidate derived from iv uses. */
442 struct iv_common_cand
443 {
444 tree base;
445 tree step;
446 /* IV uses from which this common candidate is derived. */
447 auto_vec<struct iv_use *> uses;
448 hashval_t hash;
449 };
450
451 /* Hashtable helpers. */
452
453 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
454 {
455 static inline hashval_t hash (const iv_common_cand *);
456 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
457 };
458
459 /* Hash function for possible common candidates. */
460
461 inline hashval_t
hash(const iv_common_cand * ccand)462 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
463 {
464 return ccand->hash;
465 }
466
467 /* Hash table equality function for common candidates. */
468
469 inline bool
equal(const iv_common_cand * ccand1,const iv_common_cand * ccand2)470 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
471 const iv_common_cand *ccand2)
472 {
473 return (ccand1->hash == ccand2->hash
474 && operand_equal_p (ccand1->base, ccand2->base, 0)
475 && operand_equal_p (ccand1->step, ccand2->step, 0)
476 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
477 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
478 }
479
480 /* Loop invariant expression hashtable entry. */
481
482 struct iv_inv_expr_ent
483 {
484 /* Tree expression of the entry. */
485 tree expr;
486 /* Unique indentifier. */
487 int id;
488 /* Hash value. */
489 hashval_t hash;
490 };
491
492 /* Sort iv_inv_expr_ent pair A and B by id field. */
493
494 static int
sort_iv_inv_expr_ent(const void * a,const void * b)495 sort_iv_inv_expr_ent (const void *a, const void *b)
496 {
497 const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
498 const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
499
500 unsigned id1 = (*e1)->id;
501 unsigned id2 = (*e2)->id;
502
503 if (id1 < id2)
504 return -1;
505 else if (id1 > id2)
506 return 1;
507 else
508 return 0;
509 }
510
511 /* Hashtable helpers. */
512
513 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
514 {
515 static inline hashval_t hash (const iv_inv_expr_ent *);
516 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
517 };
518
519 /* Return true if uses of type TYPE represent some form of address. */
520
521 inline bool
address_p(use_type type)522 address_p (use_type type)
523 {
524 return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
525 }
526
527 /* Hash function for loop invariant expressions. */
528
529 inline hashval_t
hash(const iv_inv_expr_ent * expr)530 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
531 {
532 return expr->hash;
533 }
534
535 /* Hash table equality function for expressions. */
536
537 inline bool
equal(const iv_inv_expr_ent * expr1,const iv_inv_expr_ent * expr2)538 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
539 const iv_inv_expr_ent *expr2)
540 {
541 return expr1->hash == expr2->hash
542 && operand_equal_p (expr1->expr, expr2->expr, 0);
543 }
544
545 struct ivopts_data
546 {
547 /* The currently optimized loop. */
548 struct loop *current_loop;
549 location_t loop_loc;
550
551 /* Numbers of iterations for all exits of the current loop. */
552 hash_map<edge, tree_niter_desc *> *niters;
553
554 /* Number of registers used in it. */
555 unsigned regs_used;
556
557 /* The size of version_info array allocated. */
558 unsigned version_info_size;
559
560 /* The array of information for the ssa names. */
561 struct version_info *version_info;
562
563 /* The hashtable of loop invariant expressions created
564 by ivopt. */
565 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
566
567 /* The bitmap of indices in version_info whose value was changed. */
568 bitmap relevant;
569
570 /* The uses of induction variables. */
571 vec<iv_group *> vgroups;
572
573 /* The candidates. */
574 vec<iv_cand *> vcands;
575
576 /* A bitmap of important candidates. */
577 bitmap important_candidates;
578
579 /* Cache used by tree_to_aff_combination_expand. */
580 hash_map<tree, name_expansion *> *name_expansion_cache;
581
582 /* The hashtable of common candidates derived from iv uses. */
583 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
584
585 /* The common candidates. */
586 vec<iv_common_cand *> iv_common_cands;
587
588 /* Hash map recording base object information of tree exp. */
589 hash_map<tree, tree> *base_object_map;
590
591 /* The maximum invariant variable id. */
592 unsigned max_inv_var_id;
593
594 /* The maximum invariant expression id. */
595 unsigned max_inv_expr_id;
596
597 /* Number of no_overflow BIVs which are not used in memory address. */
598 unsigned bivs_not_used_in_addr;
599
600 /* Obstack for iv structure. */
601 struct obstack iv_obstack;
602
603 /* Whether to consider just related and important candidates when replacing a
604 use. */
605 bool consider_all_candidates;
606
607 /* Are we optimizing for speed? */
608 bool speed;
609
610 /* Whether the loop body includes any function calls. */
611 bool body_includes_call;
612
613 /* Whether the loop body can only be exited via single exit. */
614 bool loop_single_exit_p;
615 };
616
617 /* An assignment of iv candidates to uses. */
618
619 struct iv_ca
620 {
621 /* The number of uses covered by the assignment. */
622 unsigned upto;
623
624 /* Number of uses that cannot be expressed by the candidates in the set. */
625 unsigned bad_groups;
626
627 /* Candidate assigned to a use, together with the related costs. */
628 struct cost_pair **cand_for_group;
629
630 /* Number of times each candidate is used. */
631 unsigned *n_cand_uses;
632
633 /* The candidates used. */
634 bitmap cands;
635
636 /* The number of candidates in the set. */
637 unsigned n_cands;
638
639 /* The number of invariants needed, including both invariant variants and
640 invariant expressions. */
641 unsigned n_invs;
642
643 /* Total cost of expressing uses. */
644 comp_cost cand_use_cost;
645
646 /* Total cost of candidates. */
647 unsigned cand_cost;
648
649 /* Number of times each invariant variable is used. */
650 unsigned *n_inv_var_uses;
651
652 /* Number of times each invariant expression is used. */
653 unsigned *n_inv_expr_uses;
654
655 /* Total cost of the assignment. */
656 comp_cost cost;
657 };
658
659 /* Difference of two iv candidate assignments. */
660
661 struct iv_ca_delta
662 {
663 /* Changed group. */
664 struct iv_group *group;
665
666 /* An old assignment (for rollback purposes). */
667 struct cost_pair *old_cp;
668
669 /* A new assignment. */
670 struct cost_pair *new_cp;
671
672 /* Next change in the list. */
673 struct iv_ca_delta *next;
674 };
675
676 /* Bound on number of candidates below that all candidates are considered. */
677
678 #define CONSIDER_ALL_CANDIDATES_BOUND \
679 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
680
681 /* If there are more iv occurrences, we just give up (it is quite unlikely that
682 optimizing such a loop would help, and it would take ages). */
683
684 #define MAX_CONSIDERED_GROUPS \
685 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
686
687 /* If there are at most this number of ivs in the set, try removing unnecessary
688 ivs from the set always. */
689
690 #define ALWAYS_PRUNE_CAND_SET_BOUND \
691 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
692
693 /* The list of trees for that the decl_rtl field must be reset is stored
694 here. */
695
696 static vec<tree> decl_rtl_to_reset;
697
698 static comp_cost force_expr_to_var_cost (tree, bool);
699
700 /* The single loop exit if it dominates the latch, NULL otherwise. */
701
702 edge
single_dom_exit(struct loop * loop)703 single_dom_exit (struct loop *loop)
704 {
705 edge exit = single_exit (loop);
706
707 if (!exit)
708 return NULL;
709
710 if (!just_once_each_iteration_p (loop, exit->src))
711 return NULL;
712
713 return exit;
714 }
715
716 /* Dumps information about the induction variable IV to FILE. Don't dump
717 variable's name if DUMP_NAME is FALSE. The information is dumped with
718 preceding spaces indicated by INDENT_LEVEL. */
719
720 void
dump_iv(FILE * file,struct iv * iv,bool dump_name,unsigned indent_level)721 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
722 {
723 const char *p;
724 const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
725
726 if (indent_level > 4)
727 indent_level = 4;
728 p = spaces + 8 - (indent_level << 1);
729
730 fprintf (file, "%sIV struct:\n", p);
731 if (iv->ssa_name && dump_name)
732 {
733 fprintf (file, "%s SSA_NAME:\t", p);
734 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
735 fprintf (file, "\n");
736 }
737
738 fprintf (file, "%s Type:\t", p);
739 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
740 fprintf (file, "\n");
741
742 fprintf (file, "%s Base:\t", p);
743 print_generic_expr (file, iv->base, TDF_SLIM);
744 fprintf (file, "\n");
745
746 fprintf (file, "%s Step:\t", p);
747 print_generic_expr (file, iv->step, TDF_SLIM);
748 fprintf (file, "\n");
749
750 if (iv->base_object)
751 {
752 fprintf (file, "%s Object:\t", p);
753 print_generic_expr (file, iv->base_object, TDF_SLIM);
754 fprintf (file, "\n");
755 }
756
757 fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
758
759 fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
760 p, iv->no_overflow ? "No-overflow" : "Overflow");
761 }
762
763 /* Dumps information about the USE to FILE. */
764
765 void
dump_use(FILE * file,struct iv_use * use)766 dump_use (FILE *file, struct iv_use *use)
767 {
768 fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
769 fprintf (file, " At stmt:\t");
770 print_gimple_stmt (file, use->stmt, 0);
771 fprintf (file, " At pos:\t");
772 if (use->op_p)
773 print_generic_expr (file, *use->op_p, TDF_SLIM);
774 fprintf (file, "\n");
775 dump_iv (file, use->iv, false, 2);
776 }
777
778 /* Dumps information about the uses to FILE. */
779
780 void
dump_groups(FILE * file,struct ivopts_data * data)781 dump_groups (FILE *file, struct ivopts_data *data)
782 {
783 unsigned i, j;
784 struct iv_group *group;
785
786 for (i = 0; i < data->vgroups.length (); i++)
787 {
788 group = data->vgroups[i];
789 fprintf (file, "Group %d:\n", group->id);
790 if (group->type == USE_NONLINEAR_EXPR)
791 fprintf (file, " Type:\tGENERIC\n");
792 else if (group->type == USE_REF_ADDRESS)
793 fprintf (file, " Type:\tREFERENCE ADDRESS\n");
794 else if (group->type == USE_PTR_ADDRESS)
795 fprintf (file, " Type:\tPOINTER ARGUMENT ADDRESS\n");
796 else
797 {
798 gcc_assert (group->type == USE_COMPARE);
799 fprintf (file, " Type:\tCOMPARE\n");
800 }
801 for (j = 0; j < group->vuses.length (); j++)
802 dump_use (file, group->vuses[j]);
803 }
804 }
805
806 /* Dumps information about induction variable candidate CAND to FILE. */
807
808 void
dump_cand(FILE * file,struct iv_cand * cand)809 dump_cand (FILE *file, struct iv_cand *cand)
810 {
811 struct iv *iv = cand->iv;
812
813 fprintf (file, "Candidate %d:\n", cand->id);
814 if (cand->inv_vars)
815 {
816 fprintf (file, " Depend on inv.vars: ");
817 dump_bitmap (file, cand->inv_vars);
818 }
819 if (cand->inv_exprs)
820 {
821 fprintf (file, " Depend on inv.exprs: ");
822 dump_bitmap (file, cand->inv_exprs);
823 }
824
825 if (cand->var_before)
826 {
827 fprintf (file, " Var befor: ");
828 print_generic_expr (file, cand->var_before, TDF_SLIM);
829 fprintf (file, "\n");
830 }
831 if (cand->var_after)
832 {
833 fprintf (file, " Var after: ");
834 print_generic_expr (file, cand->var_after, TDF_SLIM);
835 fprintf (file, "\n");
836 }
837
838 switch (cand->pos)
839 {
840 case IP_NORMAL:
841 fprintf (file, " Incr POS: before exit test\n");
842 break;
843
844 case IP_BEFORE_USE:
845 fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
846 break;
847
848 case IP_AFTER_USE:
849 fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
850 break;
851
852 case IP_END:
853 fprintf (file, " Incr POS: at end\n");
854 break;
855
856 case IP_ORIGINAL:
857 fprintf (file, " Incr POS: orig biv\n");
858 break;
859 }
860
861 dump_iv (file, iv, false, 1);
862 }
863
864 /* Returns the info for ssa version VER. */
865
866 static inline struct version_info *
ver_info(struct ivopts_data * data,unsigned ver)867 ver_info (struct ivopts_data *data, unsigned ver)
868 {
869 return data->version_info + ver;
870 }
871
872 /* Returns the info for ssa name NAME. */
873
874 static inline struct version_info *
name_info(struct ivopts_data * data,tree name)875 name_info (struct ivopts_data *data, tree name)
876 {
877 return ver_info (data, SSA_NAME_VERSION (name));
878 }
879
880 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
881 emitted in LOOP. */
882
883 static bool
stmt_after_ip_normal_pos(struct loop * loop,gimple * stmt)884 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
885 {
886 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
887
888 gcc_assert (bb);
889
890 if (sbb == loop->latch)
891 return true;
892
893 if (sbb != bb)
894 return false;
895
896 return stmt == last_stmt (bb);
897 }
898
899 /* Returns true if STMT if after the place where the original induction
900 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
901 if the positions are identical. */
902
903 static bool
stmt_after_inc_pos(struct iv_cand * cand,gimple * stmt,bool true_if_equal)904 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
905 {
906 basic_block cand_bb = gimple_bb (cand->incremented_at);
907 basic_block stmt_bb = gimple_bb (stmt);
908
909 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
910 return false;
911
912 if (stmt_bb != cand_bb)
913 return true;
914
915 if (true_if_equal
916 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
917 return true;
918 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
919 }
920
921 /* Returns true if STMT if after the place where the induction variable
922 CAND is incremented in LOOP. */
923
924 static bool
stmt_after_increment(struct loop * loop,struct iv_cand * cand,gimple * stmt)925 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
926 {
927 switch (cand->pos)
928 {
929 case IP_END:
930 return false;
931
932 case IP_NORMAL:
933 return stmt_after_ip_normal_pos (loop, stmt);
934
935 case IP_ORIGINAL:
936 case IP_AFTER_USE:
937 return stmt_after_inc_pos (cand, stmt, false);
938
939 case IP_BEFORE_USE:
940 return stmt_after_inc_pos (cand, stmt, true);
941
942 default:
943 gcc_unreachable ();
944 }
945 }
946
947 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
948
949 static bool
abnormal_ssa_name_p(tree exp)950 abnormal_ssa_name_p (tree exp)
951 {
952 if (!exp)
953 return false;
954
955 if (TREE_CODE (exp) != SSA_NAME)
956 return false;
957
958 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
959 }
960
961 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
962 abnormal phi node. Callback for for_each_index. */
963
964 static bool
idx_contains_abnormal_ssa_name_p(tree base,tree * index,void * data ATTRIBUTE_UNUSED)965 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
966 void *data ATTRIBUTE_UNUSED)
967 {
968 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
969 {
970 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
971 return false;
972 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
973 return false;
974 }
975
976 return !abnormal_ssa_name_p (*index);
977 }
978
979 /* Returns true if EXPR contains a ssa name that occurs in an
980 abnormal phi node. */
981
982 bool
contains_abnormal_ssa_name_p(tree expr)983 contains_abnormal_ssa_name_p (tree expr)
984 {
985 enum tree_code code;
986 enum tree_code_class codeclass;
987
988 if (!expr)
989 return false;
990
991 code = TREE_CODE (expr);
992 codeclass = TREE_CODE_CLASS (code);
993
994 if (code == CALL_EXPR)
995 {
996 tree arg;
997 call_expr_arg_iterator iter;
998 FOR_EACH_CALL_EXPR_ARG (arg, iter, expr)
999 if (contains_abnormal_ssa_name_p (arg))
1000 return true;
1001 return false;
1002 }
1003
1004 if (code == SSA_NAME)
1005 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
1006
1007 if (code == INTEGER_CST
1008 || is_gimple_min_invariant (expr))
1009 return false;
1010
1011 if (code == ADDR_EXPR)
1012 return !for_each_index (&TREE_OPERAND (expr, 0),
1013 idx_contains_abnormal_ssa_name_p,
1014 NULL);
1015
1016 if (code == COND_EXPR)
1017 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
1018 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
1019 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
1020
1021 switch (codeclass)
1022 {
1023 case tcc_binary:
1024 case tcc_comparison:
1025 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
1026 return true;
1027
1028 /* Fallthru. */
1029 case tcc_unary:
1030 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
1031 return true;
1032
1033 break;
1034
1035 default:
1036 gcc_unreachable ();
1037 }
1038
1039 return false;
1040 }
1041
1042 /* Returns the structure describing number of iterations determined from
1043 EXIT of DATA->current_loop, or NULL if something goes wrong. */
1044
1045 static struct tree_niter_desc *
niter_for_exit(struct ivopts_data * data,edge exit)1046 niter_for_exit (struct ivopts_data *data, edge exit)
1047 {
1048 struct tree_niter_desc *desc;
1049 tree_niter_desc **slot;
1050
1051 if (!data->niters)
1052 {
1053 data->niters = new hash_map<edge, tree_niter_desc *>;
1054 slot = NULL;
1055 }
1056 else
1057 slot = data->niters->get (exit);
1058
1059 if (!slot)
1060 {
1061 /* Try to determine number of iterations. We cannot safely work with ssa
1062 names that appear in phi nodes on abnormal edges, so that we do not
1063 create overlapping life ranges for them (PR 27283). */
1064 desc = XNEW (struct tree_niter_desc);
1065 if (!number_of_iterations_exit (data->current_loop,
1066 exit, desc, true)
1067 || contains_abnormal_ssa_name_p (desc->niter))
1068 {
1069 XDELETE (desc);
1070 desc = NULL;
1071 }
1072 data->niters->put (exit, desc);
1073 }
1074 else
1075 desc = *slot;
1076
1077 return desc;
1078 }
1079
1080 /* Returns the structure describing number of iterations determined from
1081 single dominating exit of DATA->current_loop, or NULL if something
1082 goes wrong. */
1083
1084 static struct tree_niter_desc *
niter_for_single_dom_exit(struct ivopts_data * data)1085 niter_for_single_dom_exit (struct ivopts_data *data)
1086 {
1087 edge exit = single_dom_exit (data->current_loop);
1088
1089 if (!exit)
1090 return NULL;
1091
1092 return niter_for_exit (data, exit);
1093 }
1094
1095 /* Initializes data structures used by the iv optimization pass, stored
1096 in DATA. */
1097
1098 static void
tree_ssa_iv_optimize_init(struct ivopts_data * data)1099 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1100 {
1101 data->version_info_size = 2 * num_ssa_names;
1102 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1103 data->relevant = BITMAP_ALLOC (NULL);
1104 data->important_candidates = BITMAP_ALLOC (NULL);
1105 data->max_inv_var_id = 0;
1106 data->max_inv_expr_id = 0;
1107 data->niters = NULL;
1108 data->vgroups.create (20);
1109 data->vcands.create (20);
1110 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1111 data->name_expansion_cache = NULL;
1112 data->base_object_map = NULL;
1113 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1114 data->iv_common_cands.create (20);
1115 decl_rtl_to_reset.create (20);
1116 gcc_obstack_init (&data->iv_obstack);
1117 }
1118
1119 /* walk_tree callback for determine_base_object. */
1120
1121 static tree
determine_base_object_1(tree * tp,int * walk_subtrees,void * wdata)1122 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1123 {
1124 tree_code code = TREE_CODE (*tp);
1125 tree obj = NULL_TREE;
1126 if (code == ADDR_EXPR)
1127 {
1128 tree base = get_base_address (TREE_OPERAND (*tp, 0));
1129 if (!base)
1130 obj = *tp;
1131 else if (TREE_CODE (base) != MEM_REF)
1132 obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1133 }
1134 else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1135 obj = fold_convert (ptr_type_node, *tp);
1136
1137 if (!obj)
1138 {
1139 if (!EXPR_P (*tp))
1140 *walk_subtrees = 0;
1141
1142 return NULL_TREE;
1143 }
1144 /* Record special node for multiple base objects and stop. */
1145 if (*static_cast<tree *> (wdata))
1146 {
1147 *static_cast<tree *> (wdata) = integer_zero_node;
1148 return integer_zero_node;
1149 }
1150 /* Record the base object and continue looking. */
1151 *static_cast<tree *> (wdata) = obj;
1152 return NULL_TREE;
1153 }
1154
1155 /* Returns a memory object to that EXPR points with caching. Return NULL if we
1156 are able to determine that it does not point to any such object; specially
1157 return integer_zero_node if EXPR contains multiple base objects. */
1158
1159 static tree
determine_base_object(struct ivopts_data * data,tree expr)1160 determine_base_object (struct ivopts_data *data, tree expr)
1161 {
1162 tree *slot, obj = NULL_TREE;
1163 if (data->base_object_map)
1164 {
1165 if ((slot = data->base_object_map->get(expr)) != NULL)
1166 return *slot;
1167 }
1168 else
1169 data->base_object_map = new hash_map<tree, tree>;
1170
1171 (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1172 data->base_object_map->put (expr, obj);
1173 return obj;
1174 }
1175
1176 /* Return true if address expression with non-DECL_P operand appears
1177 in EXPR. */
1178
1179 static bool
contain_complex_addr_expr(tree expr)1180 contain_complex_addr_expr (tree expr)
1181 {
1182 bool res = false;
1183
1184 STRIP_NOPS (expr);
1185 switch (TREE_CODE (expr))
1186 {
1187 case POINTER_PLUS_EXPR:
1188 case PLUS_EXPR:
1189 case MINUS_EXPR:
1190 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1191 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1192 break;
1193
1194 case ADDR_EXPR:
1195 return (!DECL_P (TREE_OPERAND (expr, 0)));
1196
1197 default:
1198 return false;
1199 }
1200
1201 return res;
1202 }
1203
1204 /* Allocates an induction variable with given initial value BASE and step STEP
1205 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1206
1207 static struct iv *
1208 alloc_iv (struct ivopts_data *data, tree base, tree step,
1209 bool no_overflow = false)
1210 {
1211 tree expr = base;
1212 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1213 sizeof (struct iv));
1214 gcc_assert (step != NULL_TREE);
1215
1216 /* Lower address expression in base except ones with DECL_P as operand.
1217 By doing this:
1218 1) More accurate cost can be computed for address expressions;
1219 2) Duplicate candidates won't be created for bases in different
1220 forms, like &a[0] and &a. */
1221 STRIP_NOPS (expr);
1222 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1223 || contain_complex_addr_expr (expr))
1224 {
1225 aff_tree comb;
1226 tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1227 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1228 }
1229
1230 iv->base = base;
1231 iv->base_object = determine_base_object (data, base);
1232 iv->step = step;
1233 iv->biv_p = false;
1234 iv->nonlin_use = NULL;
1235 iv->ssa_name = NULL_TREE;
1236 if (!no_overflow
1237 && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1238 base, step))
1239 no_overflow = true;
1240 iv->no_overflow = no_overflow;
1241 iv->have_address_use = false;
1242
1243 return iv;
1244 }
1245
1246 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1247 doesn't overflow. */
1248
1249 static void
set_iv(struct ivopts_data * data,tree iv,tree base,tree step,bool no_overflow)1250 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1251 bool no_overflow)
1252 {
1253 struct version_info *info = name_info (data, iv);
1254
1255 gcc_assert (!info->iv);
1256
1257 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1258 info->iv = alloc_iv (data, base, step, no_overflow);
1259 info->iv->ssa_name = iv;
1260 }
1261
1262 /* Finds induction variable declaration for VAR. */
1263
1264 static struct iv *
get_iv(struct ivopts_data * data,tree var)1265 get_iv (struct ivopts_data *data, tree var)
1266 {
1267 basic_block bb;
1268 tree type = TREE_TYPE (var);
1269
1270 if (!POINTER_TYPE_P (type)
1271 && !INTEGRAL_TYPE_P (type))
1272 return NULL;
1273
1274 if (!name_info (data, var)->iv)
1275 {
1276 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1277
1278 if (!bb
1279 || !flow_bb_inside_loop_p (data->current_loop, bb))
1280 set_iv (data, var, var, build_int_cst (type, 0), true);
1281 }
1282
1283 return name_info (data, var)->iv;
1284 }
1285
1286 /* Return the first non-invariant ssa var found in EXPR. */
1287
1288 static tree
extract_single_var_from_expr(tree expr)1289 extract_single_var_from_expr (tree expr)
1290 {
1291 int i, n;
1292 tree tmp;
1293 enum tree_code code;
1294
1295 if (!expr || is_gimple_min_invariant (expr))
1296 return NULL;
1297
1298 code = TREE_CODE (expr);
1299 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1300 {
1301 n = TREE_OPERAND_LENGTH (expr);
1302 for (i = 0; i < n; i++)
1303 {
1304 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1305
1306 if (tmp)
1307 return tmp;
1308 }
1309 }
1310 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1311 }
1312
1313 /* Finds basic ivs. */
1314
1315 static bool
find_bivs(struct ivopts_data * data)1316 find_bivs (struct ivopts_data *data)
1317 {
1318 gphi *phi;
1319 affine_iv iv;
1320 tree step, type, base, stop;
1321 bool found = false;
1322 struct loop *loop = data->current_loop;
1323 gphi_iterator psi;
1324
1325 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1326 {
1327 phi = psi.phi ();
1328
1329 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1330 continue;
1331
1332 if (virtual_operand_p (PHI_RESULT (phi)))
1333 continue;
1334
1335 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1336 continue;
1337
1338 if (integer_zerop (iv.step))
1339 continue;
1340
1341 step = iv.step;
1342 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1343 /* Stop expanding iv base at the first ssa var referred by iv step.
1344 Ideally we should stop at any ssa var, because that's expensive
1345 and unusual to happen, we just do it on the first one.
1346
1347 See PR64705 for the rationale. */
1348 stop = extract_single_var_from_expr (step);
1349 base = expand_simple_operations (base, stop);
1350 if (contains_abnormal_ssa_name_p (base)
1351 || contains_abnormal_ssa_name_p (step))
1352 continue;
1353
1354 type = TREE_TYPE (PHI_RESULT (phi));
1355 base = fold_convert (type, base);
1356 if (step)
1357 {
1358 if (POINTER_TYPE_P (type))
1359 step = convert_to_ptrofftype (step);
1360 else
1361 step = fold_convert (type, step);
1362 }
1363
1364 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1365 found = true;
1366 }
1367
1368 return found;
1369 }
1370
1371 /* Marks basic ivs. */
1372
1373 static void
mark_bivs(struct ivopts_data * data)1374 mark_bivs (struct ivopts_data *data)
1375 {
1376 gphi *phi;
1377 gimple *def;
1378 tree var;
1379 struct iv *iv, *incr_iv;
1380 struct loop *loop = data->current_loop;
1381 basic_block incr_bb;
1382 gphi_iterator psi;
1383
1384 data->bivs_not_used_in_addr = 0;
1385 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1386 {
1387 phi = psi.phi ();
1388
1389 iv = get_iv (data, PHI_RESULT (phi));
1390 if (!iv)
1391 continue;
1392
1393 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1394 def = SSA_NAME_DEF_STMT (var);
1395 /* Don't mark iv peeled from other one as biv. */
1396 if (def
1397 && gimple_code (def) == GIMPLE_PHI
1398 && gimple_bb (def) == loop->header)
1399 continue;
1400
1401 incr_iv = get_iv (data, var);
1402 if (!incr_iv)
1403 continue;
1404
1405 /* If the increment is in the subloop, ignore it. */
1406 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1407 if (incr_bb->loop_father != data->current_loop
1408 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1409 continue;
1410
1411 iv->biv_p = true;
1412 incr_iv->biv_p = true;
1413 if (iv->no_overflow)
1414 data->bivs_not_used_in_addr++;
1415 if (incr_iv->no_overflow)
1416 data->bivs_not_used_in_addr++;
1417 }
1418 }
1419
1420 /* Checks whether STMT defines a linear induction variable and stores its
1421 parameters to IV. */
1422
1423 static bool
find_givs_in_stmt_scev(struct ivopts_data * data,gimple * stmt,affine_iv * iv)1424 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1425 {
1426 tree lhs, stop;
1427 struct loop *loop = data->current_loop;
1428
1429 iv->base = NULL_TREE;
1430 iv->step = NULL_TREE;
1431
1432 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1433 return false;
1434
1435 lhs = gimple_assign_lhs (stmt);
1436 if (TREE_CODE (lhs) != SSA_NAME)
1437 return false;
1438
1439 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1440 return false;
1441
1442 /* Stop expanding iv base at the first ssa var referred by iv step.
1443 Ideally we should stop at any ssa var, because that's expensive
1444 and unusual to happen, we just do it on the first one.
1445
1446 See PR64705 for the rationale. */
1447 stop = extract_single_var_from_expr (iv->step);
1448 iv->base = expand_simple_operations (iv->base, stop);
1449 if (contains_abnormal_ssa_name_p (iv->base)
1450 || contains_abnormal_ssa_name_p (iv->step))
1451 return false;
1452
1453 /* If STMT could throw, then do not consider STMT as defining a GIV.
1454 While this will suppress optimizations, we cannot safely delete this
1455 GIV and associated statements, even if it appears it is not used. */
1456 if (stmt_could_throw_p (cfun, stmt))
1457 return false;
1458
1459 return true;
1460 }
1461
1462 /* Finds general ivs in statement STMT. */
1463
1464 static void
find_givs_in_stmt(struct ivopts_data * data,gimple * stmt)1465 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1466 {
1467 affine_iv iv;
1468
1469 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1470 return;
1471
1472 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1473 }
1474
1475 /* Finds general ivs in basic block BB. */
1476
1477 static void
find_givs_in_bb(struct ivopts_data * data,basic_block bb)1478 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1479 {
1480 gimple_stmt_iterator bsi;
1481
1482 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1483 find_givs_in_stmt (data, gsi_stmt (bsi));
1484 }
1485
1486 /* Finds general ivs. */
1487
1488 static void
find_givs(struct ivopts_data * data)1489 find_givs (struct ivopts_data *data)
1490 {
1491 struct loop *loop = data->current_loop;
1492 basic_block *body = get_loop_body_in_dom_order (loop);
1493 unsigned i;
1494
1495 for (i = 0; i < loop->num_nodes; i++)
1496 find_givs_in_bb (data, body[i]);
1497 free (body);
1498 }
1499
1500 /* For each ssa name defined in LOOP determines whether it is an induction
1501 variable and if so, its initial value and step. */
1502
1503 static bool
find_induction_variables(struct ivopts_data * data)1504 find_induction_variables (struct ivopts_data *data)
1505 {
1506 unsigned i;
1507 bitmap_iterator bi;
1508
1509 if (!find_bivs (data))
1510 return false;
1511
1512 find_givs (data);
1513 mark_bivs (data);
1514
1515 if (dump_file && (dump_flags & TDF_DETAILS))
1516 {
1517 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1518
1519 if (niter)
1520 {
1521 fprintf (dump_file, " number of iterations ");
1522 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1523 if (!integer_zerop (niter->may_be_zero))
1524 {
1525 fprintf (dump_file, "; zero if ");
1526 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1527 }
1528 fprintf (dump_file, "\n");
1529 };
1530
1531 fprintf (dump_file, "\n<Induction Vars>:\n");
1532 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1533 {
1534 struct version_info *info = ver_info (data, i);
1535 if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1536 dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1537 }
1538 }
1539
1540 return true;
1541 }
1542
1543 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1544 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1545 is the const offset stripped from IV base and MEM_TYPE is the type
1546 of the memory being addressed. For uses of other types, ADDR_BASE
1547 and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. */
1548
1549 static struct iv_use *
record_use(struct iv_group * group,tree * use_p,struct iv * iv,gimple * stmt,enum use_type type,tree mem_type,tree addr_base,poly_uint64 addr_offset)1550 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1551 gimple *stmt, enum use_type type, tree mem_type,
1552 tree addr_base, poly_uint64 addr_offset)
1553 {
1554 struct iv_use *use = XCNEW (struct iv_use);
1555
1556 use->id = group->vuses.length ();
1557 use->group_id = group->id;
1558 use->type = type;
1559 use->mem_type = mem_type;
1560 use->iv = iv;
1561 use->stmt = stmt;
1562 use->op_p = use_p;
1563 use->addr_base = addr_base;
1564 use->addr_offset = addr_offset;
1565
1566 group->vuses.safe_push (use);
1567 return use;
1568 }
1569
1570 /* Checks whether OP is a loop-level invariant and if so, records it.
1571 NONLINEAR_USE is true if the invariant is used in a way we do not
1572 handle specially. */
1573
1574 static void
record_invariant(struct ivopts_data * data,tree op,bool nonlinear_use)1575 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1576 {
1577 basic_block bb;
1578 struct version_info *info;
1579
1580 if (TREE_CODE (op) != SSA_NAME
1581 || virtual_operand_p (op))
1582 return;
1583
1584 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1585 if (bb
1586 && flow_bb_inside_loop_p (data->current_loop, bb))
1587 return;
1588
1589 info = name_info (data, op);
1590 info->name = op;
1591 info->has_nonlin_use |= nonlinear_use;
1592 if (!info->inv_id)
1593 info->inv_id = ++data->max_inv_var_id;
1594 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1595 }
1596
1597 /* Record a group of TYPE. */
1598
1599 static struct iv_group *
record_group(struct ivopts_data * data,enum use_type type)1600 record_group (struct ivopts_data *data, enum use_type type)
1601 {
1602 struct iv_group *group = XCNEW (struct iv_group);
1603
1604 group->id = data->vgroups.length ();
1605 group->type = type;
1606 group->related_cands = BITMAP_ALLOC (NULL);
1607 group->vuses.create (1);
1608
1609 data->vgroups.safe_push (group);
1610 return group;
1611 }
1612
1613 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1614 New group will be created if there is no existing group for the use.
1615 MEM_TYPE is the type of memory being addressed, or NULL if this
1616 isn't an address reference. */
1617
1618 static struct iv_use *
record_group_use(struct ivopts_data * data,tree * use_p,struct iv * iv,gimple * stmt,enum use_type type,tree mem_type)1619 record_group_use (struct ivopts_data *data, tree *use_p,
1620 struct iv *iv, gimple *stmt, enum use_type type,
1621 tree mem_type)
1622 {
1623 tree addr_base = NULL;
1624 struct iv_group *group = NULL;
1625 poly_uint64 addr_offset = 0;
1626
1627 /* Record non address type use in a new group. */
1628 if (address_p (type))
1629 {
1630 unsigned int i;
1631
1632 addr_base = strip_offset (iv->base, &addr_offset);
1633 for (i = 0; i < data->vgroups.length (); i++)
1634 {
1635 struct iv_use *use;
1636
1637 group = data->vgroups[i];
1638 use = group->vuses[0];
1639 if (!address_p (use->type))
1640 continue;
1641
1642 /* Check if it has the same stripped base and step. */
1643 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1644 && operand_equal_p (iv->step, use->iv->step, 0)
1645 && operand_equal_p (addr_base, use->addr_base, 0))
1646 break;
1647 }
1648 if (i == data->vgroups.length ())
1649 group = NULL;
1650 }
1651
1652 if (!group)
1653 group = record_group (data, type);
1654
1655 return record_use (group, use_p, iv, stmt, type, mem_type,
1656 addr_base, addr_offset);
1657 }
1658
1659 /* Checks whether the use OP is interesting and if so, records it. */
1660
1661 static struct iv_use *
find_interesting_uses_op(struct ivopts_data * data,tree op)1662 find_interesting_uses_op (struct ivopts_data *data, tree op)
1663 {
1664 struct iv *iv;
1665 gimple *stmt;
1666 struct iv_use *use;
1667
1668 if (TREE_CODE (op) != SSA_NAME)
1669 return NULL;
1670
1671 iv = get_iv (data, op);
1672 if (!iv)
1673 return NULL;
1674
1675 if (iv->nonlin_use)
1676 {
1677 gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1678 return iv->nonlin_use;
1679 }
1680
1681 if (integer_zerop (iv->step))
1682 {
1683 record_invariant (data, op, true);
1684 return NULL;
1685 }
1686
1687 stmt = SSA_NAME_DEF_STMT (op);
1688 gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1689
1690 use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1691 iv->nonlin_use = use;
1692 return use;
1693 }
1694
1695 /* Indicate how compare type iv_use can be handled. */
1696 enum comp_iv_rewrite
1697 {
1698 COMP_IV_NA,
1699 /* We may rewrite compare type iv_use by expressing value of the iv_use. */
1700 COMP_IV_EXPR,
1701 /* We may rewrite compare type iv_uses on both sides of comparison by
1702 expressing value of each iv_use. */
1703 COMP_IV_EXPR_2,
1704 /* We may rewrite compare type iv_use by expressing value of the iv_use
1705 or by eliminating it with other iv_cand. */
1706 COMP_IV_ELIM
1707 };
1708
1709 /* Given a condition in statement STMT, checks whether it is a compare
1710 of an induction variable and an invariant. If this is the case,
1711 CONTROL_VAR is set to location of the iv, BOUND to the location of
1712 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1713 induction variable descriptions, and true is returned. If this is not
1714 the case, CONTROL_VAR and BOUND are set to the arguments of the
1715 condition and false is returned. */
1716
1717 static enum comp_iv_rewrite
extract_cond_operands(struct ivopts_data * data,gimple * stmt,tree ** control_var,tree ** bound,struct iv ** iv_var,struct iv ** iv_bound)1718 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1719 tree **control_var, tree **bound,
1720 struct iv **iv_var, struct iv **iv_bound)
1721 {
1722 /* The objects returned when COND has constant operands. */
1723 static struct iv const_iv;
1724 static tree zero;
1725 tree *op0 = &zero, *op1 = &zero;
1726 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1727 enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1728
1729 if (gimple_code (stmt) == GIMPLE_COND)
1730 {
1731 gcond *cond_stmt = as_a <gcond *> (stmt);
1732 op0 = gimple_cond_lhs_ptr (cond_stmt);
1733 op1 = gimple_cond_rhs_ptr (cond_stmt);
1734 }
1735 else
1736 {
1737 op0 = gimple_assign_rhs1_ptr (stmt);
1738 op1 = gimple_assign_rhs2_ptr (stmt);
1739 }
1740
1741 zero = integer_zero_node;
1742 const_iv.step = integer_zero_node;
1743
1744 if (TREE_CODE (*op0) == SSA_NAME)
1745 iv0 = get_iv (data, *op0);
1746 if (TREE_CODE (*op1) == SSA_NAME)
1747 iv1 = get_iv (data, *op1);
1748
1749 /* If both sides of comparison are IVs. We can express ivs on both end. */
1750 if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1751 {
1752 rewrite_type = COMP_IV_EXPR_2;
1753 goto end;
1754 }
1755
1756 /* If none side of comparison is IV. */
1757 if ((!iv0 || integer_zerop (iv0->step))
1758 && (!iv1 || integer_zerop (iv1->step)))
1759 goto end;
1760
1761 /* Control variable may be on the other side. */
1762 if (!iv0 || integer_zerop (iv0->step))
1763 {
1764 std::swap (op0, op1);
1765 std::swap (iv0, iv1);
1766 }
1767 /* If one side is IV and the other side isn't loop invariant. */
1768 if (!iv1)
1769 rewrite_type = COMP_IV_EXPR;
1770 /* If one side is IV and the other side is loop invariant. */
1771 else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1772 rewrite_type = COMP_IV_ELIM;
1773
1774 end:
1775 if (control_var)
1776 *control_var = op0;
1777 if (iv_var)
1778 *iv_var = iv0;
1779 if (bound)
1780 *bound = op1;
1781 if (iv_bound)
1782 *iv_bound = iv1;
1783
1784 return rewrite_type;
1785 }
1786
1787 /* Checks whether the condition in STMT is interesting and if so,
1788 records it. */
1789
1790 static void
find_interesting_uses_cond(struct ivopts_data * data,gimple * stmt)1791 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1792 {
1793 tree *var_p, *bound_p;
1794 struct iv *var_iv, *bound_iv;
1795 enum comp_iv_rewrite ret;
1796
1797 ret = extract_cond_operands (data, stmt,
1798 &var_p, &bound_p, &var_iv, &bound_iv);
1799 if (ret == COMP_IV_NA)
1800 {
1801 find_interesting_uses_op (data, *var_p);
1802 find_interesting_uses_op (data, *bound_p);
1803 return;
1804 }
1805
1806 record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1807 /* Record compare type iv_use for iv on the other side of comparison. */
1808 if (ret == COMP_IV_EXPR_2)
1809 record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1810 }
1811
1812 /* Returns the outermost loop EXPR is obviously invariant in
1813 relative to the loop LOOP, i.e. if all its operands are defined
1814 outside of the returned loop. Returns NULL if EXPR is not
1815 even obviously invariant in LOOP. */
1816
1817 struct loop *
outermost_invariant_loop_for_expr(struct loop * loop,tree expr)1818 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1819 {
1820 basic_block def_bb;
1821 unsigned i, len;
1822
1823 if (is_gimple_min_invariant (expr))
1824 return current_loops->tree_root;
1825
1826 if (TREE_CODE (expr) == SSA_NAME)
1827 {
1828 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1829 if (def_bb)
1830 {
1831 if (flow_bb_inside_loop_p (loop, def_bb))
1832 return NULL;
1833 return superloop_at_depth (loop,
1834 loop_depth (def_bb->loop_father) + 1);
1835 }
1836
1837 return current_loops->tree_root;
1838 }
1839
1840 if (!EXPR_P (expr))
1841 return NULL;
1842
1843 unsigned maxdepth = 0;
1844 len = TREE_OPERAND_LENGTH (expr);
1845 for (i = 0; i < len; i++)
1846 {
1847 struct loop *ivloop;
1848 if (!TREE_OPERAND (expr, i))
1849 continue;
1850
1851 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1852 if (!ivloop)
1853 return NULL;
1854 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1855 }
1856
1857 return superloop_at_depth (loop, maxdepth);
1858 }
1859
1860 /* Returns true if expression EXPR is obviously invariant in LOOP,
1861 i.e. if all its operands are defined outside of the LOOP. LOOP
1862 should not be the function body. */
1863
1864 bool
expr_invariant_in_loop_p(struct loop * loop,tree expr)1865 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1866 {
1867 basic_block def_bb;
1868 unsigned i, len;
1869
1870 gcc_assert (loop_depth (loop) > 0);
1871
1872 if (is_gimple_min_invariant (expr))
1873 return true;
1874
1875 if (TREE_CODE (expr) == SSA_NAME)
1876 {
1877 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1878 if (def_bb
1879 && flow_bb_inside_loop_p (loop, def_bb))
1880 return false;
1881
1882 return true;
1883 }
1884
1885 if (!EXPR_P (expr))
1886 return false;
1887
1888 len = TREE_OPERAND_LENGTH (expr);
1889 for (i = 0; i < len; i++)
1890 if (TREE_OPERAND (expr, i)
1891 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1892 return false;
1893
1894 return true;
1895 }
1896
1897 /* Given expression EXPR which computes inductive values with respect
1898 to loop recorded in DATA, this function returns biv from which EXPR
1899 is derived by tracing definition chains of ssa variables in EXPR. */
1900
1901 static struct iv*
find_deriving_biv_for_expr(struct ivopts_data * data,tree expr)1902 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1903 {
1904 struct iv *iv;
1905 unsigned i, n;
1906 tree e2, e1;
1907 enum tree_code code;
1908 gimple *stmt;
1909
1910 if (expr == NULL_TREE)
1911 return NULL;
1912
1913 if (is_gimple_min_invariant (expr))
1914 return NULL;
1915
1916 code = TREE_CODE (expr);
1917 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1918 {
1919 n = TREE_OPERAND_LENGTH (expr);
1920 for (i = 0; i < n; i++)
1921 {
1922 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1923 if (iv)
1924 return iv;
1925 }
1926 }
1927
1928 /* Stop if it's not ssa name. */
1929 if (code != SSA_NAME)
1930 return NULL;
1931
1932 iv = get_iv (data, expr);
1933 if (!iv || integer_zerop (iv->step))
1934 return NULL;
1935 else if (iv->biv_p)
1936 return iv;
1937
1938 stmt = SSA_NAME_DEF_STMT (expr);
1939 if (gphi *phi = dyn_cast <gphi *> (stmt))
1940 {
1941 ssa_op_iter iter;
1942 use_operand_p use_p;
1943 basic_block phi_bb = gimple_bb (phi);
1944
1945 /* Skip loop header PHI that doesn't define biv. */
1946 if (phi_bb->loop_father == data->current_loop)
1947 return NULL;
1948
1949 if (virtual_operand_p (gimple_phi_result (phi)))
1950 return NULL;
1951
1952 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1953 {
1954 tree use = USE_FROM_PTR (use_p);
1955 iv = find_deriving_biv_for_expr (data, use);
1956 if (iv)
1957 return iv;
1958 }
1959 return NULL;
1960 }
1961 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1962 return NULL;
1963
1964 e1 = gimple_assign_rhs1 (stmt);
1965 code = gimple_assign_rhs_code (stmt);
1966 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1967 return find_deriving_biv_for_expr (data, e1);
1968
1969 switch (code)
1970 {
1971 case MULT_EXPR:
1972 case PLUS_EXPR:
1973 case MINUS_EXPR:
1974 case POINTER_PLUS_EXPR:
1975 /* Increments, decrements and multiplications by a constant
1976 are simple. */
1977 e2 = gimple_assign_rhs2 (stmt);
1978 iv = find_deriving_biv_for_expr (data, e2);
1979 if (iv)
1980 return iv;
1981 gcc_fallthrough ();
1982
1983 CASE_CONVERT:
1984 /* Casts are simple. */
1985 return find_deriving_biv_for_expr (data, e1);
1986
1987 default:
1988 break;
1989 }
1990
1991 return NULL;
1992 }
1993
1994 /* Record BIV, its predecessor and successor that they are used in
1995 address type uses. */
1996
1997 static void
record_biv_for_address_use(struct ivopts_data * data,struct iv * biv)1998 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1999 {
2000 unsigned i;
2001 tree type, base_1, base_2;
2002 bitmap_iterator bi;
2003
2004 if (!biv || !biv->biv_p || integer_zerop (biv->step)
2005 || biv->have_address_use || !biv->no_overflow)
2006 return;
2007
2008 type = TREE_TYPE (biv->base);
2009 if (!INTEGRAL_TYPE_P (type))
2010 return;
2011
2012 biv->have_address_use = true;
2013 data->bivs_not_used_in_addr--;
2014 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
2015 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2016 {
2017 struct iv *iv = ver_info (data, i)->iv;
2018
2019 if (!iv || !iv->biv_p || integer_zerop (iv->step)
2020 || iv->have_address_use || !iv->no_overflow)
2021 continue;
2022
2023 if (type != TREE_TYPE (iv->base)
2024 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2025 continue;
2026
2027 if (!operand_equal_p (biv->step, iv->step, 0))
2028 continue;
2029
2030 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2031 if (operand_equal_p (base_1, iv->base, 0)
2032 || operand_equal_p (base_2, biv->base, 0))
2033 {
2034 iv->have_address_use = true;
2035 data->bivs_not_used_in_addr--;
2036 }
2037 }
2038 }
2039
2040 /* Cumulates the steps of indices into DATA and replaces their values with the
2041 initial ones. Returns false when the value of the index cannot be determined.
2042 Callback for for_each_index. */
2043
2044 struct ifs_ivopts_data
2045 {
2046 struct ivopts_data *ivopts_data;
2047 gimple *stmt;
2048 tree step;
2049 };
2050
2051 static bool
idx_find_step(tree base,tree * idx,void * data)2052 idx_find_step (tree base, tree *idx, void *data)
2053 {
2054 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2055 struct iv *iv;
2056 bool use_overflow_semantics = false;
2057 tree step, iv_base, iv_step, lbound, off;
2058 struct loop *loop = dta->ivopts_data->current_loop;
2059
2060 /* If base is a component ref, require that the offset of the reference
2061 be invariant. */
2062 if (TREE_CODE (base) == COMPONENT_REF)
2063 {
2064 off = component_ref_field_offset (base);
2065 return expr_invariant_in_loop_p (loop, off);
2066 }
2067
2068 /* If base is array, first check whether we will be able to move the
2069 reference out of the loop (in order to take its address in strength
2070 reduction). In order for this to work we need both lower bound
2071 and step to be loop invariants. */
2072 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2073 {
2074 /* Moreover, for a range, the size needs to be invariant as well. */
2075 if (TREE_CODE (base) == ARRAY_RANGE_REF
2076 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2077 return false;
2078
2079 step = array_ref_element_size (base);
2080 lbound = array_ref_low_bound (base);
2081
2082 if (!expr_invariant_in_loop_p (loop, step)
2083 || !expr_invariant_in_loop_p (loop, lbound))
2084 return false;
2085 }
2086
2087 if (TREE_CODE (*idx) != SSA_NAME)
2088 return true;
2089
2090 iv = get_iv (dta->ivopts_data, *idx);
2091 if (!iv)
2092 return false;
2093
2094 /* XXX We produce for a base of *D42 with iv->base being &x[0]
2095 *&x[0], which is not folded and does not trigger the
2096 ARRAY_REF path below. */
2097 *idx = iv->base;
2098
2099 if (integer_zerop (iv->step))
2100 return true;
2101
2102 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2103 {
2104 step = array_ref_element_size (base);
2105
2106 /* We only handle addresses whose step is an integer constant. */
2107 if (TREE_CODE (step) != INTEGER_CST)
2108 return false;
2109 }
2110 else
2111 /* The step for pointer arithmetics already is 1 byte. */
2112 step = size_one_node;
2113
2114 iv_base = iv->base;
2115 iv_step = iv->step;
2116 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2117 use_overflow_semantics = true;
2118
2119 if (!convert_affine_scev (dta->ivopts_data->current_loop,
2120 sizetype, &iv_base, &iv_step, dta->stmt,
2121 use_overflow_semantics))
2122 {
2123 /* The index might wrap. */
2124 return false;
2125 }
2126
2127 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2128 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2129
2130 if (dta->ivopts_data->bivs_not_used_in_addr)
2131 {
2132 if (!iv->biv_p)
2133 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2134
2135 record_biv_for_address_use (dta->ivopts_data, iv);
2136 }
2137 return true;
2138 }
2139
2140 /* Records use in index IDX. Callback for for_each_index. Ivopts data
2141 object is passed to it in DATA. */
2142
2143 static bool
idx_record_use(tree base,tree * idx,void * vdata)2144 idx_record_use (tree base, tree *idx,
2145 void *vdata)
2146 {
2147 struct ivopts_data *data = (struct ivopts_data *) vdata;
2148 find_interesting_uses_op (data, *idx);
2149 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2150 {
2151 find_interesting_uses_op (data, array_ref_element_size (base));
2152 find_interesting_uses_op (data, array_ref_low_bound (base));
2153 }
2154 return true;
2155 }
2156
2157 /* If we can prove that TOP = cst * BOT for some constant cst,
2158 store cst to MUL and return true. Otherwise return false.
2159 The returned value is always sign-extended, regardless of the
2160 signedness of TOP and BOT. */
2161
2162 static bool
constant_multiple_of(tree top,tree bot,widest_int * mul)2163 constant_multiple_of (tree top, tree bot, widest_int *mul)
2164 {
2165 tree mby;
2166 enum tree_code code;
2167 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2168 widest_int res, p0, p1;
2169
2170 STRIP_NOPS (top);
2171 STRIP_NOPS (bot);
2172
2173 if (operand_equal_p (top, bot, 0))
2174 {
2175 *mul = 1;
2176 return true;
2177 }
2178
2179 code = TREE_CODE (top);
2180 switch (code)
2181 {
2182 case MULT_EXPR:
2183 mby = TREE_OPERAND (top, 1);
2184 if (TREE_CODE (mby) != INTEGER_CST)
2185 return false;
2186
2187 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2188 return false;
2189
2190 *mul = wi::sext (res * wi::to_widest (mby), precision);
2191 return true;
2192
2193 case PLUS_EXPR:
2194 case MINUS_EXPR:
2195 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2196 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2197 return false;
2198
2199 if (code == MINUS_EXPR)
2200 p1 = -p1;
2201 *mul = wi::sext (p0 + p1, precision);
2202 return true;
2203
2204 case INTEGER_CST:
2205 if (TREE_CODE (bot) != INTEGER_CST)
2206 return false;
2207
2208 p0 = widest_int::from (wi::to_wide (top), SIGNED);
2209 p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2210 if (p1 == 0)
2211 return false;
2212 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2213 return res == 0;
2214
2215 default:
2216 if (POLY_INT_CST_P (top)
2217 && POLY_INT_CST_P (bot)
2218 && constant_multiple_p (wi::to_poly_widest (top),
2219 wi::to_poly_widest (bot), mul))
2220 return true;
2221
2222 return false;
2223 }
2224 }
2225
2226 /* Return true if memory reference REF with step STEP may be unaligned. */
2227
2228 static bool
may_be_unaligned_p(tree ref,tree step)2229 may_be_unaligned_p (tree ref, tree step)
2230 {
2231 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2232 thus they are not misaligned. */
2233 if (TREE_CODE (ref) == TARGET_MEM_REF)
2234 return false;
2235
2236 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2237 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2238 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2239
2240 unsigned HOST_WIDE_INT bitpos;
2241 unsigned int ref_align;
2242 get_object_alignment_1 (ref, &ref_align, &bitpos);
2243 if (ref_align < align
2244 || (bitpos % align) != 0
2245 || (bitpos % BITS_PER_UNIT) != 0)
2246 return true;
2247
2248 unsigned int trailing_zeros = tree_ctz (step);
2249 if (trailing_zeros < HOST_BITS_PER_INT
2250 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2251 return true;
2252
2253 return false;
2254 }
2255
2256 /* Return true if EXPR may be non-addressable. */
2257
2258 bool
may_be_nonaddressable_p(tree expr)2259 may_be_nonaddressable_p (tree expr)
2260 {
2261 switch (TREE_CODE (expr))
2262 {
2263 case VAR_DECL:
2264 /* Check if it's a register variable. */
2265 return DECL_HARD_REGISTER (expr);
2266
2267 case TARGET_MEM_REF:
2268 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2269 target, thus they are always addressable. */
2270 return false;
2271
2272 case MEM_REF:
2273 /* Likewise for MEM_REFs, modulo the storage order. */
2274 return REF_REVERSE_STORAGE_ORDER (expr);
2275
2276 case BIT_FIELD_REF:
2277 if (REF_REVERSE_STORAGE_ORDER (expr))
2278 return true;
2279 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2280
2281 case COMPONENT_REF:
2282 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2283 return true;
2284 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2285 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2286
2287 case ARRAY_REF:
2288 case ARRAY_RANGE_REF:
2289 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2290 return true;
2291 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2292
2293 case VIEW_CONVERT_EXPR:
2294 /* This kind of view-conversions may wrap non-addressable objects
2295 and make them look addressable. After some processing the
2296 non-addressability may be uncovered again, causing ADDR_EXPRs
2297 of inappropriate objects to be built. */
2298 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2299 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2300 return true;
2301 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2302
2303 CASE_CONVERT:
2304 return true;
2305
2306 default:
2307 break;
2308 }
2309
2310 return false;
2311 }
2312
2313 /* Finds addresses in *OP_P inside STMT. */
2314
2315 static void
find_interesting_uses_address(struct ivopts_data * data,gimple * stmt,tree * op_p)2316 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2317 tree *op_p)
2318 {
2319 tree base = *op_p, step = size_zero_node;
2320 struct iv *civ;
2321 struct ifs_ivopts_data ifs_ivopts_data;
2322
2323 /* Do not play with volatile memory references. A bit too conservative,
2324 perhaps, but safe. */
2325 if (gimple_has_volatile_ops (stmt))
2326 goto fail;
2327
2328 /* Ignore bitfields for now. Not really something terribly complicated
2329 to handle. TODO. */
2330 if (TREE_CODE (base) == BIT_FIELD_REF)
2331 goto fail;
2332
2333 base = unshare_expr (base);
2334
2335 if (TREE_CODE (base) == TARGET_MEM_REF)
2336 {
2337 tree type = build_pointer_type (TREE_TYPE (base));
2338 tree astep;
2339
2340 if (TMR_BASE (base)
2341 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2342 {
2343 civ = get_iv (data, TMR_BASE (base));
2344 if (!civ)
2345 goto fail;
2346
2347 TMR_BASE (base) = civ->base;
2348 step = civ->step;
2349 }
2350 if (TMR_INDEX2 (base)
2351 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2352 {
2353 civ = get_iv (data, TMR_INDEX2 (base));
2354 if (!civ)
2355 goto fail;
2356
2357 TMR_INDEX2 (base) = civ->base;
2358 step = civ->step;
2359 }
2360 if (TMR_INDEX (base)
2361 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2362 {
2363 civ = get_iv (data, TMR_INDEX (base));
2364 if (!civ)
2365 goto fail;
2366
2367 TMR_INDEX (base) = civ->base;
2368 astep = civ->step;
2369
2370 if (astep)
2371 {
2372 if (TMR_STEP (base))
2373 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2374
2375 step = fold_build2 (PLUS_EXPR, type, step, astep);
2376 }
2377 }
2378
2379 if (integer_zerop (step))
2380 goto fail;
2381 base = tree_mem_ref_addr (type, base);
2382 }
2383 else
2384 {
2385 ifs_ivopts_data.ivopts_data = data;
2386 ifs_ivopts_data.stmt = stmt;
2387 ifs_ivopts_data.step = size_zero_node;
2388 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2389 || integer_zerop (ifs_ivopts_data.step))
2390 goto fail;
2391 step = ifs_ivopts_data.step;
2392
2393 /* Check that the base expression is addressable. This needs
2394 to be done after substituting bases of IVs into it. */
2395 if (may_be_nonaddressable_p (base))
2396 goto fail;
2397
2398 /* Moreover, on strict alignment platforms, check that it is
2399 sufficiently aligned. */
2400 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2401 goto fail;
2402
2403 base = build_fold_addr_expr (base);
2404
2405 /* Substituting bases of IVs into the base expression might
2406 have caused folding opportunities. */
2407 if (TREE_CODE (base) == ADDR_EXPR)
2408 {
2409 tree *ref = &TREE_OPERAND (base, 0);
2410 while (handled_component_p (*ref))
2411 ref = &TREE_OPERAND (*ref, 0);
2412 if (TREE_CODE (*ref) == MEM_REF)
2413 {
2414 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2415 TREE_OPERAND (*ref, 0),
2416 TREE_OPERAND (*ref, 1));
2417 if (tem)
2418 *ref = tem;
2419 }
2420 }
2421 }
2422
2423 civ = alloc_iv (data, base, step);
2424 /* Fail if base object of this memory reference is unknown. */
2425 if (civ->base_object == NULL_TREE)
2426 goto fail;
2427
2428 record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2429 return;
2430
2431 fail:
2432 for_each_index (op_p, idx_record_use, data);
2433 }
2434
2435 /* Finds and records invariants used in STMT. */
2436
2437 static void
find_invariants_stmt(struct ivopts_data * data,gimple * stmt)2438 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2439 {
2440 ssa_op_iter iter;
2441 use_operand_p use_p;
2442 tree op;
2443
2444 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2445 {
2446 op = USE_FROM_PTR (use_p);
2447 record_invariant (data, op, false);
2448 }
2449 }
2450
2451 /* CALL calls an internal function. If operand *OP_P will become an
2452 address when the call is expanded, return the type of the memory
2453 being addressed, otherwise return null. */
2454
2455 static tree
get_mem_type_for_internal_fn(gcall * call,tree * op_p)2456 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2457 {
2458 switch (gimple_call_internal_fn (call))
2459 {
2460 case IFN_MASK_LOAD:
2461 if (op_p == gimple_call_arg_ptr (call, 0))
2462 return TREE_TYPE (gimple_call_lhs (call));
2463 return NULL_TREE;
2464
2465 case IFN_MASK_STORE:
2466 if (op_p == gimple_call_arg_ptr (call, 0))
2467 return TREE_TYPE (gimple_call_arg (call, 3));
2468 return NULL_TREE;
2469
2470 default:
2471 return NULL_TREE;
2472 }
2473 }
2474
2475 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2476 Return true if the operand will become an address when STMT
2477 is expanded and record the associated address use if so. */
2478
2479 static bool
find_address_like_use(struct ivopts_data * data,gimple * stmt,tree * op_p,struct iv * iv)2480 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2481 struct iv *iv)
2482 {
2483 /* Fail if base object of this memory reference is unknown. */
2484 if (iv->base_object == NULL_TREE)
2485 return false;
2486
2487 tree mem_type = NULL_TREE;
2488 if (gcall *call = dyn_cast <gcall *> (stmt))
2489 if (gimple_call_internal_p (call))
2490 mem_type = get_mem_type_for_internal_fn (call, op_p);
2491 if (mem_type)
2492 {
2493 iv = alloc_iv (data, iv->base, iv->step);
2494 record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2495 return true;
2496 }
2497 return false;
2498 }
2499
2500 /* Finds interesting uses of induction variables in the statement STMT. */
2501
2502 static void
find_interesting_uses_stmt(struct ivopts_data * data,gimple * stmt)2503 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2504 {
2505 struct iv *iv;
2506 tree op, *lhs, *rhs;
2507 ssa_op_iter iter;
2508 use_operand_p use_p;
2509 enum tree_code code;
2510
2511 find_invariants_stmt (data, stmt);
2512
2513 if (gimple_code (stmt) == GIMPLE_COND)
2514 {
2515 find_interesting_uses_cond (data, stmt);
2516 return;
2517 }
2518
2519 if (is_gimple_assign (stmt))
2520 {
2521 lhs = gimple_assign_lhs_ptr (stmt);
2522 rhs = gimple_assign_rhs1_ptr (stmt);
2523
2524 if (TREE_CODE (*lhs) == SSA_NAME)
2525 {
2526 /* If the statement defines an induction variable, the uses are not
2527 interesting by themselves. */
2528
2529 iv = get_iv (data, *lhs);
2530
2531 if (iv && !integer_zerop (iv->step))
2532 return;
2533 }
2534
2535 code = gimple_assign_rhs_code (stmt);
2536 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2537 && (REFERENCE_CLASS_P (*rhs)
2538 || is_gimple_val (*rhs)))
2539 {
2540 if (REFERENCE_CLASS_P (*rhs))
2541 find_interesting_uses_address (data, stmt, rhs);
2542 else
2543 find_interesting_uses_op (data, *rhs);
2544
2545 if (REFERENCE_CLASS_P (*lhs))
2546 find_interesting_uses_address (data, stmt, lhs);
2547 return;
2548 }
2549 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2550 {
2551 find_interesting_uses_cond (data, stmt);
2552 return;
2553 }
2554
2555 /* TODO -- we should also handle address uses of type
2556
2557 memory = call (whatever);
2558
2559 and
2560
2561 call (memory). */
2562 }
2563
2564 if (gimple_code (stmt) == GIMPLE_PHI
2565 && gimple_bb (stmt) == data->current_loop->header)
2566 {
2567 iv = get_iv (data, PHI_RESULT (stmt));
2568
2569 if (iv && !integer_zerop (iv->step))
2570 return;
2571 }
2572
2573 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2574 {
2575 op = USE_FROM_PTR (use_p);
2576
2577 if (TREE_CODE (op) != SSA_NAME)
2578 continue;
2579
2580 iv = get_iv (data, op);
2581 if (!iv)
2582 continue;
2583
2584 if (!find_address_like_use (data, stmt, use_p->use, iv))
2585 find_interesting_uses_op (data, op);
2586 }
2587 }
2588
2589 /* Finds interesting uses of induction variables outside of loops
2590 on loop exit edge EXIT. */
2591
2592 static void
find_interesting_uses_outside(struct ivopts_data * data,edge exit)2593 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2594 {
2595 gphi *phi;
2596 gphi_iterator psi;
2597 tree def;
2598
2599 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2600 {
2601 phi = psi.phi ();
2602 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2603 if (!virtual_operand_p (def))
2604 find_interesting_uses_op (data, def);
2605 }
2606 }
2607
2608 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2609 mode for memory reference represented by USE. */
2610
2611 static GTY (()) vec<rtx, va_gc> *addr_list;
2612
2613 static bool
addr_offset_valid_p(struct iv_use * use,poly_int64 offset)2614 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2615 {
2616 rtx reg, addr;
2617 unsigned list_index;
2618 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2619 machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2620
2621 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2622 if (list_index >= vec_safe_length (addr_list))
2623 vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2624
2625 addr = (*addr_list)[list_index];
2626 if (!addr)
2627 {
2628 addr_mode = targetm.addr_space.address_mode (as);
2629 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2630 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2631 (*addr_list)[list_index] = addr;
2632 }
2633 else
2634 addr_mode = GET_MODE (addr);
2635
2636 XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2637 return (memory_address_addr_space_p (mem_mode, addr, as));
2638 }
2639
2640 /* Comparison function to sort group in ascending order of addr_offset. */
2641
2642 static int
group_compare_offset(const void * a,const void * b)2643 group_compare_offset (const void *a, const void *b)
2644 {
2645 const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2646 const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2647
2648 return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2649 }
2650
2651 /* Check if small groups should be split. Return true if no group
2652 contains more than two uses with distinct addr_offsets. Return
2653 false otherwise. We want to split such groups because:
2654
2655 1) Small groups don't have much benefit and may interfer with
2656 general candidate selection.
2657 2) Size for problem with only small groups is usually small and
2658 general algorithm can handle it well.
2659
2660 TODO -- Above claim may not hold when we want to merge memory
2661 accesses with conseuctive addresses. */
2662
2663 static bool
split_small_address_groups_p(struct ivopts_data * data)2664 split_small_address_groups_p (struct ivopts_data *data)
2665 {
2666 unsigned int i, j, distinct = 1;
2667 struct iv_use *pre;
2668 struct iv_group *group;
2669
2670 for (i = 0; i < data->vgroups.length (); i++)
2671 {
2672 group = data->vgroups[i];
2673 if (group->vuses.length () == 1)
2674 continue;
2675
2676 gcc_assert (address_p (group->type));
2677 if (group->vuses.length () == 2)
2678 {
2679 if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2680 group->vuses[1]->addr_offset) > 0)
2681 std::swap (group->vuses[0], group->vuses[1]);
2682 }
2683 else
2684 group->vuses.qsort (group_compare_offset);
2685
2686 if (distinct > 2)
2687 continue;
2688
2689 distinct = 1;
2690 for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2691 {
2692 if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2693 {
2694 pre = group->vuses[j];
2695 distinct++;
2696 }
2697
2698 if (distinct > 2)
2699 break;
2700 }
2701 }
2702
2703 return (distinct <= 2);
2704 }
2705
2706 /* For each group of address type uses, this function further groups
2707 these uses according to the maximum offset supported by target's
2708 [base + offset] addressing mode. */
2709
2710 static void
split_address_groups(struct ivopts_data * data)2711 split_address_groups (struct ivopts_data *data)
2712 {
2713 unsigned int i, j;
2714 /* Always split group. */
2715 bool split_p = split_small_address_groups_p (data);
2716
2717 for (i = 0; i < data->vgroups.length (); i++)
2718 {
2719 struct iv_group *new_group = NULL;
2720 struct iv_group *group = data->vgroups[i];
2721 struct iv_use *use = group->vuses[0];
2722
2723 use->id = 0;
2724 use->group_id = group->id;
2725 if (group->vuses.length () == 1)
2726 continue;
2727
2728 gcc_assert (address_p (use->type));
2729
2730 for (j = 1; j < group->vuses.length ();)
2731 {
2732 struct iv_use *next = group->vuses[j];
2733 poly_int64 offset = next->addr_offset - use->addr_offset;
2734
2735 /* Split group if aksed to, or the offset against the first
2736 use can't fit in offset part of addressing mode. IV uses
2737 having the same offset are still kept in one group. */
2738 if (maybe_ne (offset, 0)
2739 && (split_p || !addr_offset_valid_p (use, offset)))
2740 {
2741 if (!new_group)
2742 new_group = record_group (data, group->type);
2743 group->vuses.ordered_remove (j);
2744 new_group->vuses.safe_push (next);
2745 continue;
2746 }
2747
2748 next->id = j;
2749 next->group_id = group->id;
2750 j++;
2751 }
2752 }
2753 }
2754
2755 /* Finds uses of the induction variables that are interesting. */
2756
2757 static void
find_interesting_uses(struct ivopts_data * data)2758 find_interesting_uses (struct ivopts_data *data)
2759 {
2760 basic_block bb;
2761 gimple_stmt_iterator bsi;
2762 basic_block *body = get_loop_body (data->current_loop);
2763 unsigned i;
2764 edge e;
2765
2766 for (i = 0; i < data->current_loop->num_nodes; i++)
2767 {
2768 edge_iterator ei;
2769 bb = body[i];
2770
2771 FOR_EACH_EDGE (e, ei, bb->succs)
2772 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2773 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2774 find_interesting_uses_outside (data, e);
2775
2776 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2777 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2778 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2779 if (!is_gimple_debug (gsi_stmt (bsi)))
2780 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2781 }
2782 free (body);
2783
2784 split_address_groups (data);
2785
2786 if (dump_file && (dump_flags & TDF_DETAILS))
2787 {
2788 fprintf (dump_file, "\n<IV Groups>:\n");
2789 dump_groups (dump_file, data);
2790 fprintf (dump_file, "\n");
2791 }
2792 }
2793
2794 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2795 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2796 we are at the top-level of the processed address. */
2797
2798 static tree
strip_offset_1(tree expr,bool inside_addr,bool top_compref,poly_int64 * offset)2799 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2800 poly_int64 *offset)
2801 {
2802 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2803 enum tree_code code;
2804 tree type, orig_type = TREE_TYPE (expr);
2805 poly_int64 off0, off1;
2806 HOST_WIDE_INT st;
2807 tree orig_expr = expr;
2808
2809 STRIP_NOPS (expr);
2810
2811 type = TREE_TYPE (expr);
2812 code = TREE_CODE (expr);
2813 *offset = 0;
2814
2815 switch (code)
2816 {
2817 case POINTER_PLUS_EXPR:
2818 case PLUS_EXPR:
2819 case MINUS_EXPR:
2820 op0 = TREE_OPERAND (expr, 0);
2821 op1 = TREE_OPERAND (expr, 1);
2822
2823 op0 = strip_offset_1 (op0, false, false, &off0);
2824 op1 = strip_offset_1 (op1, false, false, &off1);
2825
2826 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2827 if (op0 == TREE_OPERAND (expr, 0)
2828 && op1 == TREE_OPERAND (expr, 1))
2829 return orig_expr;
2830
2831 if (integer_zerop (op1))
2832 expr = op0;
2833 else if (integer_zerop (op0))
2834 {
2835 if (code == MINUS_EXPR)
2836 expr = fold_build1 (NEGATE_EXPR, type, op1);
2837 else
2838 expr = op1;
2839 }
2840 else
2841 expr = fold_build2 (code, type, op0, op1);
2842
2843 return fold_convert (orig_type, expr);
2844
2845 case MULT_EXPR:
2846 op1 = TREE_OPERAND (expr, 1);
2847 if (!cst_and_fits_in_hwi (op1))
2848 return orig_expr;
2849
2850 op0 = TREE_OPERAND (expr, 0);
2851 op0 = strip_offset_1 (op0, false, false, &off0);
2852 if (op0 == TREE_OPERAND (expr, 0))
2853 return orig_expr;
2854
2855 *offset = off0 * int_cst_value (op1);
2856 if (integer_zerop (op0))
2857 expr = op0;
2858 else
2859 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2860
2861 return fold_convert (orig_type, expr);
2862
2863 case ARRAY_REF:
2864 case ARRAY_RANGE_REF:
2865 if (!inside_addr)
2866 return orig_expr;
2867
2868 step = array_ref_element_size (expr);
2869 if (!cst_and_fits_in_hwi (step))
2870 break;
2871
2872 st = int_cst_value (step);
2873 op1 = TREE_OPERAND (expr, 1);
2874 op1 = strip_offset_1 (op1, false, false, &off1);
2875 *offset = off1 * st;
2876
2877 if (top_compref
2878 && integer_zerop (op1))
2879 {
2880 /* Strip the component reference completely. */
2881 op0 = TREE_OPERAND (expr, 0);
2882 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2883 *offset += off0;
2884 return op0;
2885 }
2886 break;
2887
2888 case COMPONENT_REF:
2889 {
2890 tree field;
2891
2892 if (!inside_addr)
2893 return orig_expr;
2894
2895 tmp = component_ref_field_offset (expr);
2896 field = TREE_OPERAND (expr, 1);
2897 if (top_compref
2898 && cst_and_fits_in_hwi (tmp)
2899 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2900 {
2901 HOST_WIDE_INT boffset, abs_off;
2902
2903 /* Strip the component reference completely. */
2904 op0 = TREE_OPERAND (expr, 0);
2905 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2906 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2907 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2908 if (boffset < 0)
2909 abs_off = -abs_off;
2910
2911 *offset = off0 + int_cst_value (tmp) + abs_off;
2912 return op0;
2913 }
2914 }
2915 break;
2916
2917 case ADDR_EXPR:
2918 op0 = TREE_OPERAND (expr, 0);
2919 op0 = strip_offset_1 (op0, true, true, &off0);
2920 *offset += off0;
2921
2922 if (op0 == TREE_OPERAND (expr, 0))
2923 return orig_expr;
2924
2925 expr = build_fold_addr_expr (op0);
2926 return fold_convert (orig_type, expr);
2927
2928 case MEM_REF:
2929 /* ??? Offset operand? */
2930 inside_addr = false;
2931 break;
2932
2933 default:
2934 if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2935 return build_int_cst (orig_type, 0);
2936 return orig_expr;
2937 }
2938
2939 /* Default handling of expressions for that we want to recurse into
2940 the first operand. */
2941 op0 = TREE_OPERAND (expr, 0);
2942 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2943 *offset += off0;
2944
2945 if (op0 == TREE_OPERAND (expr, 0)
2946 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2947 return orig_expr;
2948
2949 expr = copy_node (expr);
2950 TREE_OPERAND (expr, 0) = op0;
2951 if (op1)
2952 TREE_OPERAND (expr, 1) = op1;
2953
2954 /* Inside address, we might strip the top level component references,
2955 thus changing type of the expression. Handling of ADDR_EXPR
2956 will fix that. */
2957 expr = fold_convert (orig_type, expr);
2958
2959 return expr;
2960 }
2961
2962 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2963
2964 tree
strip_offset(tree expr,poly_uint64_pod * offset)2965 strip_offset (tree expr, poly_uint64_pod *offset)
2966 {
2967 poly_int64 off;
2968 tree core = strip_offset_1 (expr, false, false, &off);
2969 *offset = off;
2970 return core;
2971 }
2972
2973 /* Returns variant of TYPE that can be used as base for different uses.
2974 We return unsigned type with the same precision, which avoids problems
2975 with overflows. */
2976
2977 static tree
generic_type_for(tree type)2978 generic_type_for (tree type)
2979 {
2980 if (POINTER_TYPE_P (type))
2981 return unsigned_type_for (type);
2982
2983 if (TYPE_UNSIGNED (type))
2984 return type;
2985
2986 return unsigned_type_for (type);
2987 }
2988
2989 /* Private data for walk_tree. */
2990
2991 struct walk_tree_data
2992 {
2993 bitmap *inv_vars;
2994 struct ivopts_data *idata;
2995 };
2996
2997 /* Callback function for walk_tree, it records invariants and symbol
2998 reference in *EXPR_P. DATA is the structure storing result info. */
2999
3000 static tree
find_inv_vars_cb(tree * expr_p,int * ws ATTRIBUTE_UNUSED,void * data)3001 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
3002 {
3003 tree op = *expr_p;
3004 struct version_info *info;
3005 struct walk_tree_data *wdata = (struct walk_tree_data*) data;
3006
3007 if (TREE_CODE (op) != SSA_NAME)
3008 return NULL_TREE;
3009
3010 info = name_info (wdata->idata, op);
3011 /* Because we expand simple operations when finding IVs, loop invariant
3012 variable that isn't referred by the original loop could be used now.
3013 Record such invariant variables here. */
3014 if (!info->iv)
3015 {
3016 struct ivopts_data *idata = wdata->idata;
3017 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
3018
3019 if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
3020 {
3021 set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
3022 record_invariant (idata, op, false);
3023 }
3024 }
3025 if (!info->inv_id || info->has_nonlin_use)
3026 return NULL_TREE;
3027
3028 if (!*wdata->inv_vars)
3029 *wdata->inv_vars = BITMAP_ALLOC (NULL);
3030 bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3031
3032 return NULL_TREE;
3033 }
3034
3035 /* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should
3036 store it. */
3037
3038 static inline void
find_inv_vars(struct ivopts_data * data,tree * expr_p,bitmap * inv_vars)3039 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3040 {
3041 struct walk_tree_data wdata;
3042
3043 if (!inv_vars)
3044 return;
3045
3046 wdata.idata = data;
3047 wdata.inv_vars = inv_vars;
3048 walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3049 }
3050
3051 /* Get entry from invariant expr hash table for INV_EXPR. New entry
3052 will be recorded if it doesn't exist yet. Given below two exprs:
3053 inv_expr + cst1, inv_expr + cst2
3054 It's hard to make decision whether constant part should be stripped
3055 or not. We choose to not strip based on below facts:
3056 1) We need to count ADD cost for constant part if it's stripped,
3057 which isn't always trivial where this functions is called.
3058 2) Stripping constant away may be conflict with following loop
3059 invariant hoisting pass.
3060 3) Not stripping constant away results in more invariant exprs,
3061 which usually leads to decision preferring lower reg pressure. */
3062
3063 static iv_inv_expr_ent *
get_loop_invariant_expr(struct ivopts_data * data,tree inv_expr)3064 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3065 {
3066 STRIP_NOPS (inv_expr);
3067
3068 if (poly_int_tree_p (inv_expr)
3069 || TREE_CODE (inv_expr) == SSA_NAME)
3070 return NULL;
3071
3072 /* Don't strip constant part away as we used to. */
3073
3074 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
3075 struct iv_inv_expr_ent ent;
3076 ent.expr = inv_expr;
3077 ent.hash = iterative_hash_expr (inv_expr, 0);
3078 struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3079
3080 if (!*slot)
3081 {
3082 *slot = XNEW (struct iv_inv_expr_ent);
3083 (*slot)->expr = inv_expr;
3084 (*slot)->hash = ent.hash;
3085 (*slot)->id = ++data->max_inv_expr_id;
3086 }
3087
3088 return *slot;
3089 }
3090
3091 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3092 position to POS. If USE is not NULL, the candidate is set as related to
3093 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
3094 replacement of the final value of the iv by a direct computation. */
3095
3096 static struct iv_cand *
3097 add_candidate_1 (struct ivopts_data *data,
3098 tree base, tree step, bool important, enum iv_position pos,
3099 struct iv_use *use, gimple *incremented_at,
3100 struct iv *orig_iv = NULL)
3101 {
3102 unsigned i;
3103 struct iv_cand *cand = NULL;
3104 tree type, orig_type;
3105
3106 gcc_assert (base && step);
3107
3108 /* -fkeep-gc-roots-live means that we have to keep a real pointer
3109 live, but the ivopts code may replace a real pointer with one
3110 pointing before or after the memory block that is then adjusted
3111 into the memory block during the loop. FIXME: It would likely be
3112 better to actually force the pointer live and still use ivopts;
3113 for example, it would be enough to write the pointer into memory
3114 and keep it there until after the loop. */
3115 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3116 return NULL;
3117
3118 /* For non-original variables, make sure their values are computed in a type
3119 that does not invoke undefined behavior on overflows (since in general,
3120 we cannot prove that these induction variables are non-wrapping). */
3121 if (pos != IP_ORIGINAL)
3122 {
3123 orig_type = TREE_TYPE (base);
3124 type = generic_type_for (orig_type);
3125 if (type != orig_type)
3126 {
3127 base = fold_convert (type, base);
3128 step = fold_convert (type, step);
3129 }
3130 }
3131
3132 for (i = 0; i < data->vcands.length (); i++)
3133 {
3134 cand = data->vcands[i];
3135
3136 if (cand->pos != pos)
3137 continue;
3138
3139 if (cand->incremented_at != incremented_at
3140 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3141 && cand->ainc_use != use))
3142 continue;
3143
3144 if (operand_equal_p (base, cand->iv->base, 0)
3145 && operand_equal_p (step, cand->iv->step, 0)
3146 && (TYPE_PRECISION (TREE_TYPE (base))
3147 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3148 break;
3149 }
3150
3151 if (i == data->vcands.length ())
3152 {
3153 cand = XCNEW (struct iv_cand);
3154 cand->id = i;
3155 cand->iv = alloc_iv (data, base, step);
3156 cand->pos = pos;
3157 if (pos != IP_ORIGINAL)
3158 {
3159 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3160 cand->var_after = cand->var_before;
3161 }
3162 cand->important = important;
3163 cand->incremented_at = incremented_at;
3164 data->vcands.safe_push (cand);
3165
3166 if (!poly_int_tree_p (step))
3167 {
3168 find_inv_vars (data, &step, &cand->inv_vars);
3169
3170 iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3171 /* Share bitmap between inv_vars and inv_exprs for cand. */
3172 if (inv_expr != NULL)
3173 {
3174 cand->inv_exprs = cand->inv_vars;
3175 cand->inv_vars = NULL;
3176 if (cand->inv_exprs)
3177 bitmap_clear (cand->inv_exprs);
3178 else
3179 cand->inv_exprs = BITMAP_ALLOC (NULL);
3180
3181 bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3182 }
3183 }
3184
3185 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3186 cand->ainc_use = use;
3187 else
3188 cand->ainc_use = NULL;
3189
3190 cand->orig_iv = orig_iv;
3191 if (dump_file && (dump_flags & TDF_DETAILS))
3192 dump_cand (dump_file, cand);
3193 }
3194
3195 cand->important |= important;
3196
3197 /* Relate candidate to the group for which it is added. */
3198 if (use)
3199 bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3200
3201 return cand;
3202 }
3203
3204 /* Returns true if incrementing the induction variable at the end of the LOOP
3205 is allowed.
3206
3207 The purpose is to avoid splitting latch edge with a biv increment, thus
3208 creating a jump, possibly confusing other optimization passes and leaving
3209 less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
3210 available (so we do not have a better alternative), or if the latch edge
3211 is already nonempty. */
3212
3213 static bool
allow_ip_end_pos_p(struct loop * loop)3214 allow_ip_end_pos_p (struct loop *loop)
3215 {
3216 if (!ip_normal_pos (loop))
3217 return true;
3218
3219 if (!empty_block_p (ip_end_pos (loop)))
3220 return true;
3221
3222 return false;
3223 }
3224
3225 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3226 Important field is set to IMPORTANT. */
3227
3228 static void
add_autoinc_candidates(struct ivopts_data * data,tree base,tree step,bool important,struct iv_use * use)3229 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3230 bool important, struct iv_use *use)
3231 {
3232 basic_block use_bb = gimple_bb (use->stmt);
3233 machine_mode mem_mode;
3234 unsigned HOST_WIDE_INT cstepi;
3235
3236 /* If we insert the increment in any position other than the standard
3237 ones, we must ensure that it is incremented once per iteration.
3238 It must not be in an inner nested loop, or one side of an if
3239 statement. */
3240 if (use_bb->loop_father != data->current_loop
3241 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3242 || stmt_can_throw_internal (cfun, use->stmt)
3243 || !cst_and_fits_in_hwi (step))
3244 return;
3245
3246 cstepi = int_cst_value (step);
3247
3248 mem_mode = TYPE_MODE (use->mem_type);
3249 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3250 || USE_STORE_PRE_INCREMENT (mem_mode))
3251 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3252 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3253 || USE_STORE_PRE_DECREMENT (mem_mode))
3254 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3255 {
3256 enum tree_code code = MINUS_EXPR;
3257 tree new_base;
3258 tree new_step = step;
3259
3260 if (POINTER_TYPE_P (TREE_TYPE (base)))
3261 {
3262 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3263 code = POINTER_PLUS_EXPR;
3264 }
3265 else
3266 new_step = fold_convert (TREE_TYPE (base), new_step);
3267 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3268 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3269 use->stmt);
3270 }
3271 if (((USE_LOAD_POST_INCREMENT (mem_mode)
3272 || USE_STORE_POST_INCREMENT (mem_mode))
3273 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3274 || ((USE_LOAD_POST_DECREMENT (mem_mode)
3275 || USE_STORE_POST_DECREMENT (mem_mode))
3276 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3277 {
3278 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3279 use->stmt);
3280 }
3281 }
3282
3283 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3284 position to POS. If USE is not NULL, the candidate is set as related to
3285 it. The candidate computation is scheduled before exit condition and at
3286 the end of loop. */
3287
3288 static void
3289 add_candidate (struct ivopts_data *data,
3290 tree base, tree step, bool important, struct iv_use *use,
3291 struct iv *orig_iv = NULL)
3292 {
3293 if (ip_normal_pos (data->current_loop))
3294 add_candidate_1 (data, base, step, important,
3295 IP_NORMAL, use, NULL, orig_iv);
3296 if (ip_end_pos (data->current_loop)
3297 && allow_ip_end_pos_p (data->current_loop))
3298 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3299 }
3300
3301 /* Adds standard iv candidates. */
3302
3303 static void
add_standard_iv_candidates(struct ivopts_data * data)3304 add_standard_iv_candidates (struct ivopts_data *data)
3305 {
3306 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3307
3308 /* The same for a double-integer type if it is still fast enough. */
3309 if (TYPE_PRECISION
3310 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3311 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3312 add_candidate (data, build_int_cst (long_integer_type_node, 0),
3313 build_int_cst (long_integer_type_node, 1), true, NULL);
3314
3315 /* The same for a double-integer type if it is still fast enough. */
3316 if (TYPE_PRECISION
3317 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3318 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3319 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3320 build_int_cst (long_long_integer_type_node, 1), true, NULL);
3321 }
3322
3323
3324 /* Adds candidates bases on the old induction variable IV. */
3325
3326 static void
add_iv_candidate_for_biv(struct ivopts_data * data,struct iv * iv)3327 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3328 {
3329 gimple *phi;
3330 tree def;
3331 struct iv_cand *cand;
3332
3333 /* Check if this biv is used in address type use. */
3334 if (iv->no_overflow && iv->have_address_use
3335 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3336 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3337 {
3338 tree base = fold_convert (sizetype, iv->base);
3339 tree step = fold_convert (sizetype, iv->step);
3340
3341 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3342 add_candidate (data, base, step, true, NULL, iv);
3343 /* Add iv cand of the original type only if it has nonlinear use. */
3344 if (iv->nonlin_use)
3345 add_candidate (data, iv->base, iv->step, true, NULL);
3346 }
3347 else
3348 add_candidate (data, iv->base, iv->step, true, NULL);
3349
3350 /* The same, but with initial value zero. */
3351 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3352 add_candidate (data, size_int (0), iv->step, true, NULL);
3353 else
3354 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3355 iv->step, true, NULL);
3356
3357 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3358 if (gimple_code (phi) == GIMPLE_PHI)
3359 {
3360 /* Additionally record the possibility of leaving the original iv
3361 untouched. */
3362 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3363 /* Don't add candidate if it's from another PHI node because
3364 it's an affine iv appearing in the form of PEELED_CHREC. */
3365 phi = SSA_NAME_DEF_STMT (def);
3366 if (gimple_code (phi) != GIMPLE_PHI)
3367 {
3368 cand = add_candidate_1 (data,
3369 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3370 SSA_NAME_DEF_STMT (def));
3371 if (cand)
3372 {
3373 cand->var_before = iv->ssa_name;
3374 cand->var_after = def;
3375 }
3376 }
3377 else
3378 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3379 }
3380 }
3381
3382 /* Adds candidates based on the old induction variables. */
3383
3384 static void
add_iv_candidate_for_bivs(struct ivopts_data * data)3385 add_iv_candidate_for_bivs (struct ivopts_data *data)
3386 {
3387 unsigned i;
3388 struct iv *iv;
3389 bitmap_iterator bi;
3390
3391 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3392 {
3393 iv = ver_info (data, i)->iv;
3394 if (iv && iv->biv_p && !integer_zerop (iv->step))
3395 add_iv_candidate_for_biv (data, iv);
3396 }
3397 }
3398
3399 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3400
3401 static void
record_common_cand(struct ivopts_data * data,tree base,tree step,struct iv_use * use)3402 record_common_cand (struct ivopts_data *data, tree base,
3403 tree step, struct iv_use *use)
3404 {
3405 struct iv_common_cand ent;
3406 struct iv_common_cand **slot;
3407
3408 ent.base = base;
3409 ent.step = step;
3410 ent.hash = iterative_hash_expr (base, 0);
3411 ent.hash = iterative_hash_expr (step, ent.hash);
3412
3413 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3414 if (*slot == NULL)
3415 {
3416 *slot = new iv_common_cand ();
3417 (*slot)->base = base;
3418 (*slot)->step = step;
3419 (*slot)->uses.create (8);
3420 (*slot)->hash = ent.hash;
3421 data->iv_common_cands.safe_push ((*slot));
3422 }
3423
3424 gcc_assert (use != NULL);
3425 (*slot)->uses.safe_push (use);
3426 return;
3427 }
3428
3429 /* Comparison function used to sort common candidates. */
3430
3431 static int
common_cand_cmp(const void * p1,const void * p2)3432 common_cand_cmp (const void *p1, const void *p2)
3433 {
3434 unsigned n1, n2;
3435 const struct iv_common_cand *const *const ccand1
3436 = (const struct iv_common_cand *const *)p1;
3437 const struct iv_common_cand *const *const ccand2
3438 = (const struct iv_common_cand *const *)p2;
3439
3440 n1 = (*ccand1)->uses.length ();
3441 n2 = (*ccand2)->uses.length ();
3442 return n2 - n1;
3443 }
3444
3445 /* Adds IV candidates based on common candidated recorded. */
3446
3447 static void
add_iv_candidate_derived_from_uses(struct ivopts_data * data)3448 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3449 {
3450 unsigned i, j;
3451 struct iv_cand *cand_1, *cand_2;
3452
3453 data->iv_common_cands.qsort (common_cand_cmp);
3454 for (i = 0; i < data->iv_common_cands.length (); i++)
3455 {
3456 struct iv_common_cand *ptr = data->iv_common_cands[i];
3457
3458 /* Only add IV candidate if it's derived from multiple uses. */
3459 if (ptr->uses.length () <= 1)
3460 break;
3461
3462 cand_1 = NULL;
3463 cand_2 = NULL;
3464 if (ip_normal_pos (data->current_loop))
3465 cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3466 false, IP_NORMAL, NULL, NULL);
3467
3468 if (ip_end_pos (data->current_loop)
3469 && allow_ip_end_pos_p (data->current_loop))
3470 cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3471 false, IP_END, NULL, NULL);
3472
3473 /* Bind deriving uses and the new candidates. */
3474 for (j = 0; j < ptr->uses.length (); j++)
3475 {
3476 struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3477 if (cand_1)
3478 bitmap_set_bit (group->related_cands, cand_1->id);
3479 if (cand_2)
3480 bitmap_set_bit (group->related_cands, cand_2->id);
3481 }
3482 }
3483
3484 /* Release data since it is useless from this point. */
3485 data->iv_common_cand_tab->empty ();
3486 data->iv_common_cands.truncate (0);
3487 }
3488
3489 /* Adds candidates based on the value of USE's iv. */
3490
3491 static void
add_iv_candidate_for_use(struct ivopts_data * data,struct iv_use * use)3492 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3493 {
3494 poly_uint64 offset;
3495 tree base;
3496 struct iv *iv = use->iv;
3497 tree basetype = TREE_TYPE (iv->base);
3498
3499 /* Don't add candidate for iv_use with non integer, pointer or non-mode
3500 precision types, instead, add candidate for the corresponding scev in
3501 unsigned type with the same precision. See PR93674 for more info. */
3502 if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3503 || !type_has_mode_precision_p (basetype))
3504 {
3505 basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3506 TYPE_UNSIGNED (basetype));
3507 add_candidate (data, fold_convert (basetype, iv->base),
3508 fold_convert (basetype, iv->step), false, NULL);
3509 return;
3510 }
3511
3512 add_candidate (data, iv->base, iv->step, false, use);
3513
3514 /* Record common candidate for use in case it can be shared by others. */
3515 record_common_cand (data, iv->base, iv->step, use);
3516
3517 /* Record common candidate with initial value zero. */
3518 basetype = TREE_TYPE (iv->base);
3519 if (POINTER_TYPE_P (basetype))
3520 basetype = sizetype;
3521 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3522
3523 /* Record common candidate with constant offset stripped in base.
3524 Like the use itself, we also add candidate directly for it. */
3525 base = strip_offset (iv->base, &offset);
3526 if (maybe_ne (offset, 0U) || base != iv->base)
3527 {
3528 record_common_cand (data, base, iv->step, use);
3529 add_candidate (data, base, iv->step, false, use);
3530 }
3531
3532 /* Record common candidate with base_object removed in base. */
3533 base = iv->base;
3534 STRIP_NOPS (base);
3535 if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3536 {
3537 tree step = iv->step;
3538
3539 STRIP_NOPS (step);
3540 base = TREE_OPERAND (base, 1);
3541 step = fold_convert (sizetype, step);
3542 record_common_cand (data, base, step, use);
3543 /* Also record common candidate with offset stripped. */
3544 base = strip_offset (base, &offset);
3545 if (maybe_ne (offset, 0U))
3546 record_common_cand (data, base, step, use);
3547 }
3548
3549 /* At last, add auto-incremental candidates. Make such variables
3550 important since other iv uses with same base object may be based
3551 on it. */
3552 if (use != NULL && address_p (use->type))
3553 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3554 }
3555
3556 /* Adds candidates based on the uses. */
3557
3558 static void
add_iv_candidate_for_groups(struct ivopts_data * data)3559 add_iv_candidate_for_groups (struct ivopts_data *data)
3560 {
3561 unsigned i;
3562
3563 /* Only add candidate for the first use in group. */
3564 for (i = 0; i < data->vgroups.length (); i++)
3565 {
3566 struct iv_group *group = data->vgroups[i];
3567
3568 gcc_assert (group->vuses[0] != NULL);
3569 add_iv_candidate_for_use (data, group->vuses[0]);
3570 }
3571 add_iv_candidate_derived_from_uses (data);
3572 }
3573
3574 /* Record important candidates and add them to related_cands bitmaps. */
3575
3576 static void
record_important_candidates(struct ivopts_data * data)3577 record_important_candidates (struct ivopts_data *data)
3578 {
3579 unsigned i;
3580 struct iv_group *group;
3581
3582 for (i = 0; i < data->vcands.length (); i++)
3583 {
3584 struct iv_cand *cand = data->vcands[i];
3585
3586 if (cand->important)
3587 bitmap_set_bit (data->important_candidates, i);
3588 }
3589
3590 data->consider_all_candidates = (data->vcands.length ()
3591 <= CONSIDER_ALL_CANDIDATES_BOUND);
3592
3593 /* Add important candidates to groups' related_cands bitmaps. */
3594 for (i = 0; i < data->vgroups.length (); i++)
3595 {
3596 group = data->vgroups[i];
3597 bitmap_ior_into (group->related_cands, data->important_candidates);
3598 }
3599 }
3600
3601 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3602 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3603 we allocate a simple list to every use. */
3604
3605 static void
alloc_use_cost_map(struct ivopts_data * data)3606 alloc_use_cost_map (struct ivopts_data *data)
3607 {
3608 unsigned i, size, s;
3609
3610 for (i = 0; i < data->vgroups.length (); i++)
3611 {
3612 struct iv_group *group = data->vgroups[i];
3613
3614 if (data->consider_all_candidates)
3615 size = data->vcands.length ();
3616 else
3617 {
3618 s = bitmap_count_bits (group->related_cands);
3619
3620 /* Round up to the power of two, so that moduling by it is fast. */
3621 size = s ? (1 << ceil_log2 (s)) : 1;
3622 }
3623
3624 group->n_map_members = size;
3625 group->cost_map = XCNEWVEC (struct cost_pair, size);
3626 }
3627 }
3628
3629 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3630 on invariants INV_VARS and that the value used in expressing it is
3631 VALUE, and in case of iv elimination the comparison operator is COMP. */
3632
3633 static void
set_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand,comp_cost cost,bitmap inv_vars,tree value,enum tree_code comp,bitmap inv_exprs)3634 set_group_iv_cost (struct ivopts_data *data,
3635 struct iv_group *group, struct iv_cand *cand,
3636 comp_cost cost, bitmap inv_vars, tree value,
3637 enum tree_code comp, bitmap inv_exprs)
3638 {
3639 unsigned i, s;
3640
3641 if (cost.infinite_cost_p ())
3642 {
3643 BITMAP_FREE (inv_vars);
3644 BITMAP_FREE (inv_exprs);
3645 return;
3646 }
3647
3648 if (data->consider_all_candidates)
3649 {
3650 group->cost_map[cand->id].cand = cand;
3651 group->cost_map[cand->id].cost = cost;
3652 group->cost_map[cand->id].inv_vars = inv_vars;
3653 group->cost_map[cand->id].inv_exprs = inv_exprs;
3654 group->cost_map[cand->id].value = value;
3655 group->cost_map[cand->id].comp = comp;
3656 return;
3657 }
3658
3659 /* n_map_members is a power of two, so this computes modulo. */
3660 s = cand->id & (group->n_map_members - 1);
3661 for (i = s; i < group->n_map_members; i++)
3662 if (!group->cost_map[i].cand)
3663 goto found;
3664 for (i = 0; i < s; i++)
3665 if (!group->cost_map[i].cand)
3666 goto found;
3667
3668 gcc_unreachable ();
3669
3670 found:
3671 group->cost_map[i].cand = cand;
3672 group->cost_map[i].cost = cost;
3673 group->cost_map[i].inv_vars = inv_vars;
3674 group->cost_map[i].inv_exprs = inv_exprs;
3675 group->cost_map[i].value = value;
3676 group->cost_map[i].comp = comp;
3677 }
3678
3679 /* Gets cost of (GROUP, CAND) pair. */
3680
3681 static struct cost_pair *
get_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)3682 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3683 struct iv_cand *cand)
3684 {
3685 unsigned i, s;
3686 struct cost_pair *ret;
3687
3688 if (!cand)
3689 return NULL;
3690
3691 if (data->consider_all_candidates)
3692 {
3693 ret = group->cost_map + cand->id;
3694 if (!ret->cand)
3695 return NULL;
3696
3697 return ret;
3698 }
3699
3700 /* n_map_members is a power of two, so this computes modulo. */
3701 s = cand->id & (group->n_map_members - 1);
3702 for (i = s; i < group->n_map_members; i++)
3703 if (group->cost_map[i].cand == cand)
3704 return group->cost_map + i;
3705 else if (group->cost_map[i].cand == NULL)
3706 return NULL;
3707 for (i = 0; i < s; i++)
3708 if (group->cost_map[i].cand == cand)
3709 return group->cost_map + i;
3710 else if (group->cost_map[i].cand == NULL)
3711 return NULL;
3712
3713 return NULL;
3714 }
3715
3716 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3717 static rtx
produce_memory_decl_rtl(tree obj,int * regno)3718 produce_memory_decl_rtl (tree obj, int *regno)
3719 {
3720 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3721 machine_mode address_mode = targetm.addr_space.address_mode (as);
3722 rtx x;
3723
3724 gcc_assert (obj);
3725 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3726 {
3727 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3728 x = gen_rtx_SYMBOL_REF (address_mode, name);
3729 SET_SYMBOL_REF_DECL (x, obj);
3730 x = gen_rtx_MEM (DECL_MODE (obj), x);
3731 set_mem_addr_space (x, as);
3732 targetm.encode_section_info (obj, x, true);
3733 }
3734 else
3735 {
3736 x = gen_raw_REG (address_mode, (*regno)++);
3737 x = gen_rtx_MEM (DECL_MODE (obj), x);
3738 set_mem_addr_space (x, as);
3739 }
3740
3741 return x;
3742 }
3743
3744 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3745 walk_tree. DATA contains the actual fake register number. */
3746
3747 static tree
prepare_decl_rtl(tree * expr_p,int * ws,void * data)3748 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3749 {
3750 tree obj = NULL_TREE;
3751 rtx x = NULL_RTX;
3752 int *regno = (int *) data;
3753
3754 switch (TREE_CODE (*expr_p))
3755 {
3756 case ADDR_EXPR:
3757 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3758 handled_component_p (*expr_p);
3759 expr_p = &TREE_OPERAND (*expr_p, 0))
3760 continue;
3761 obj = *expr_p;
3762 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3763 x = produce_memory_decl_rtl (obj, regno);
3764 break;
3765
3766 case SSA_NAME:
3767 *ws = 0;
3768 obj = SSA_NAME_VAR (*expr_p);
3769 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3770 if (!obj)
3771 return NULL_TREE;
3772 if (!DECL_RTL_SET_P (obj))
3773 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3774 break;
3775
3776 case VAR_DECL:
3777 case PARM_DECL:
3778 case RESULT_DECL:
3779 *ws = 0;
3780 obj = *expr_p;
3781
3782 if (DECL_RTL_SET_P (obj))
3783 break;
3784
3785 if (DECL_MODE (obj) == BLKmode)
3786 x = produce_memory_decl_rtl (obj, regno);
3787 else
3788 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3789
3790 break;
3791
3792 default:
3793 break;
3794 }
3795
3796 if (x)
3797 {
3798 decl_rtl_to_reset.safe_push (obj);
3799 SET_DECL_RTL (obj, x);
3800 }
3801
3802 return NULL_TREE;
3803 }
3804
3805 /* Determines cost of the computation of EXPR. */
3806
3807 static unsigned
computation_cost(tree expr,bool speed)3808 computation_cost (tree expr, bool speed)
3809 {
3810 rtx_insn *seq;
3811 rtx rslt;
3812 tree type = TREE_TYPE (expr);
3813 unsigned cost;
3814 /* Avoid using hard regs in ways which may be unsupported. */
3815 int regno = LAST_VIRTUAL_REGISTER + 1;
3816 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3817 enum node_frequency real_frequency = node->frequency;
3818
3819 node->frequency = NODE_FREQUENCY_NORMAL;
3820 crtl->maybe_hot_insn_p = speed;
3821 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
3822 start_sequence ();
3823 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3824 seq = get_insns ();
3825 end_sequence ();
3826 default_rtl_profile ();
3827 node->frequency = real_frequency;
3828
3829 cost = seq_cost (seq, speed);
3830 if (MEM_P (rslt))
3831 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3832 TYPE_ADDR_SPACE (type), speed);
3833 else if (!REG_P (rslt))
3834 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3835
3836 return cost;
3837 }
3838
3839 /* Returns variable containing the value of candidate CAND at statement AT. */
3840
3841 static tree
var_at_stmt(struct loop * loop,struct iv_cand * cand,gimple * stmt)3842 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3843 {
3844 if (stmt_after_increment (loop, cand, stmt))
3845 return cand->var_after;
3846 else
3847 return cand->var_before;
3848 }
3849
3850 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3851 same precision that is at least as wide as the precision of TYPE, stores
3852 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3853 type of A and B. */
3854
3855 static tree
determine_common_wider_type(tree * a,tree * b)3856 determine_common_wider_type (tree *a, tree *b)
3857 {
3858 tree wider_type = NULL;
3859 tree suba, subb;
3860 tree atype = TREE_TYPE (*a);
3861
3862 if (CONVERT_EXPR_P (*a))
3863 {
3864 suba = TREE_OPERAND (*a, 0);
3865 wider_type = TREE_TYPE (suba);
3866 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3867 return atype;
3868 }
3869 else
3870 return atype;
3871
3872 if (CONVERT_EXPR_P (*b))
3873 {
3874 subb = TREE_OPERAND (*b, 0);
3875 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3876 return atype;
3877 }
3878 else
3879 return atype;
3880
3881 *a = suba;
3882 *b = subb;
3883 return wider_type;
3884 }
3885
3886 /* Determines the expression by that USE is expressed from induction variable
3887 CAND at statement AT in LOOP. The expression is stored in two parts in a
3888 decomposed form. The invariant part is stored in AFF_INV; while variant
3889 part in AFF_VAR. Store ratio of CAND.step over USE.step in PRAT if it's
3890 non-null. Returns false if USE cannot be expressed using CAND. */
3891
3892 static bool
3893 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3894 struct iv_cand *cand, struct aff_tree *aff_inv,
3895 struct aff_tree *aff_var, widest_int *prat = NULL)
3896 {
3897 tree ubase = use->iv->base, ustep = use->iv->step;
3898 tree cbase = cand->iv->base, cstep = cand->iv->step;
3899 tree common_type, uutype, var, cstep_common;
3900 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3901 aff_tree aff_cbase;
3902 widest_int rat;
3903
3904 /* We must have a precision to express the values of use. */
3905 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3906 return false;
3907
3908 var = var_at_stmt (loop, cand, at);
3909 uutype = unsigned_type_for (utype);
3910
3911 /* If the conversion is not noop, perform it. */
3912 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3913 {
3914 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3915 && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3916 {
3917 tree inner_base, inner_step, inner_type;
3918 inner_base = TREE_OPERAND (cbase, 0);
3919 if (CONVERT_EXPR_P (cstep))
3920 inner_step = TREE_OPERAND (cstep, 0);
3921 else
3922 inner_step = cstep;
3923
3924 inner_type = TREE_TYPE (inner_base);
3925 /* If candidate is added from a biv whose type is smaller than
3926 ctype, we know both candidate and the biv won't overflow.
3927 In this case, it's safe to skip the convertion in candidate.
3928 As an example, (unsigned short)((unsigned long)A) equals to
3929 (unsigned short)A, if A has a type no larger than short. */
3930 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3931 {
3932 cbase = inner_base;
3933 cstep = inner_step;
3934 }
3935 }
3936 cbase = fold_convert (uutype, cbase);
3937 cstep = fold_convert (uutype, cstep);
3938 var = fold_convert (uutype, var);
3939 }
3940
3941 /* Ratio is 1 when computing the value of biv cand by itself.
3942 We can't rely on constant_multiple_of in this case because the
3943 use is created after the original biv is selected. The call
3944 could fail because of inconsistent fold behavior. See PR68021
3945 for more information. */
3946 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3947 {
3948 gcc_assert (is_gimple_assign (use->stmt));
3949 gcc_assert (use->iv->ssa_name == cand->var_after);
3950 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3951 rat = 1;
3952 }
3953 else if (!constant_multiple_of (ustep, cstep, &rat))
3954 return false;
3955
3956 if (prat)
3957 *prat = rat;
3958
3959 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3960 type, we achieve better folding by computing their difference in this
3961 wider type, and cast the result to UUTYPE. We do not need to worry about
3962 overflows, as all the arithmetics will in the end be performed in UUTYPE
3963 anyway. */
3964 common_type = determine_common_wider_type (&ubase, &cbase);
3965
3966 /* use = ubase - ratio * cbase + ratio * var. */
3967 tree_to_aff_combination (ubase, common_type, aff_inv);
3968 tree_to_aff_combination (cbase, common_type, &aff_cbase);
3969 tree_to_aff_combination (var, uutype, aff_var);
3970
3971 /* We need to shift the value if we are after the increment. */
3972 if (stmt_after_increment (loop, cand, at))
3973 {
3974 aff_tree cstep_aff;
3975
3976 if (common_type != uutype)
3977 cstep_common = fold_convert (common_type, cstep);
3978 else
3979 cstep_common = cstep;
3980
3981 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3982 aff_combination_add (&aff_cbase, &cstep_aff);
3983 }
3984
3985 aff_combination_scale (&aff_cbase, -rat);
3986 aff_combination_add (aff_inv, &aff_cbase);
3987 if (common_type != uutype)
3988 aff_combination_convert (aff_inv, uutype);
3989
3990 aff_combination_scale (aff_var, rat);
3991 return true;
3992 }
3993
3994 /* Determines the expression by that USE is expressed from induction variable
3995 CAND at statement AT in LOOP. The expression is stored in a decomposed
3996 form into AFF. Returns false if USE cannot be expressed using CAND. */
3997
3998 static bool
get_computation_aff(struct loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand,struct aff_tree * aff)3999 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
4000 struct iv_cand *cand, struct aff_tree *aff)
4001 {
4002 aff_tree aff_var;
4003
4004 if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
4005 return false;
4006
4007 aff_combination_add (aff, &aff_var);
4008 return true;
4009 }
4010
4011 /* Return the type of USE. */
4012
4013 static tree
get_use_type(struct iv_use * use)4014 get_use_type (struct iv_use *use)
4015 {
4016 tree base_type = TREE_TYPE (use->iv->base);
4017 tree type;
4018
4019 if (use->type == USE_REF_ADDRESS)
4020 {
4021 /* The base_type may be a void pointer. Create a pointer type based on
4022 the mem_ref instead. */
4023 type = build_pointer_type (TREE_TYPE (*use->op_p));
4024 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4025 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4026 }
4027 else
4028 type = base_type;
4029
4030 return type;
4031 }
4032
4033 /* Determines the expression by that USE is expressed from induction variable
4034 CAND at statement AT in LOOP. The computation is unshared. */
4035
4036 static tree
get_computation_at(struct loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand)4037 get_computation_at (struct loop *loop, gimple *at,
4038 struct iv_use *use, struct iv_cand *cand)
4039 {
4040 aff_tree aff;
4041 tree type = get_use_type (use);
4042
4043 if (!get_computation_aff (loop, at, use, cand, &aff))
4044 return NULL_TREE;
4045 unshare_aff_combination (&aff);
4046 return fold_convert (type, aff_combination_to_tree (&aff));
4047 }
4048
4049 /* Adjust the cost COST for being in loop setup rather than loop body.
4050 If we're optimizing for space, the loop setup overhead is constant;
4051 if we're optimizing for speed, amortize it over the per-iteration cost.
4052 If ROUND_UP_P is true, the result is round up rather than to zero when
4053 optimizing for speed. */
4054 static unsigned
4055 adjust_setup_cost (struct ivopts_data *data, unsigned cost,
4056 bool round_up_p = false)
4057 {
4058 if (cost == INFTY)
4059 return cost;
4060 else if (optimize_loop_for_speed_p (data->current_loop))
4061 {
4062 HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
4063 return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
4064 }
4065 else
4066 return cost;
4067 }
4068
4069 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4070 EXPR operand holding the shift. COST0 and COST1 are the costs for
4071 calculating the operands of EXPR. Returns true if successful, and returns
4072 the cost in COST. */
4073
4074 static bool
get_shiftadd_cost(tree expr,scalar_int_mode mode,comp_cost cost0,comp_cost cost1,tree mult,bool speed,comp_cost * cost)4075 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4076 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4077 {
4078 comp_cost res;
4079 tree op1 = TREE_OPERAND (expr, 1);
4080 tree cst = TREE_OPERAND (mult, 1);
4081 tree multop = TREE_OPERAND (mult, 0);
4082 int m = exact_log2 (int_cst_value (cst));
4083 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4084 int as_cost, sa_cost;
4085 bool mult_in_op1;
4086
4087 if (!(m >= 0 && m < maxm))
4088 return false;
4089
4090 STRIP_NOPS (op1);
4091 mult_in_op1 = operand_equal_p (op1, mult, 0);
4092
4093 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4094
4095 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4096 use that in preference to a shift insn followed by an add insn. */
4097 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4098 ? shiftadd_cost (speed, mode, m)
4099 : (mult_in_op1
4100 ? shiftsub1_cost (speed, mode, m)
4101 : shiftsub0_cost (speed, mode, m)));
4102
4103 res = comp_cost (MIN (as_cost, sa_cost), 0);
4104 res += (mult_in_op1 ? cost0 : cost1);
4105
4106 STRIP_NOPS (multop);
4107 if (!is_gimple_val (multop))
4108 res += force_expr_to_var_cost (multop, speed);
4109
4110 *cost = res;
4111 return true;
4112 }
4113
4114 /* Estimates cost of forcing expression EXPR into a variable. */
4115
4116 static comp_cost
force_expr_to_var_cost(tree expr,bool speed)4117 force_expr_to_var_cost (tree expr, bool speed)
4118 {
4119 static bool costs_initialized = false;
4120 static unsigned integer_cost [2];
4121 static unsigned symbol_cost [2];
4122 static unsigned address_cost [2];
4123 tree op0, op1;
4124 comp_cost cost0, cost1, cost;
4125 machine_mode mode;
4126 scalar_int_mode int_mode;
4127
4128 if (!costs_initialized)
4129 {
4130 tree type = build_pointer_type (integer_type_node);
4131 tree var, addr;
4132 rtx x;
4133 int i;
4134
4135 var = create_tmp_var_raw (integer_type_node, "test_var");
4136 TREE_STATIC (var) = 1;
4137 x = produce_memory_decl_rtl (var, NULL);
4138 SET_DECL_RTL (var, x);
4139
4140 addr = build1 (ADDR_EXPR, type, var);
4141
4142
4143 for (i = 0; i < 2; i++)
4144 {
4145 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4146 2000), i);
4147
4148 symbol_cost[i] = computation_cost (addr, i) + 1;
4149
4150 address_cost[i]
4151 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4152 if (dump_file && (dump_flags & TDF_DETAILS))
4153 {
4154 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4155 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4156 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4157 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4158 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4159 fprintf (dump_file, "\n");
4160 }
4161 }
4162
4163 costs_initialized = true;
4164 }
4165
4166 STRIP_NOPS (expr);
4167
4168 if (SSA_VAR_P (expr))
4169 return no_cost;
4170
4171 if (is_gimple_min_invariant (expr))
4172 {
4173 if (poly_int_tree_p (expr))
4174 return comp_cost (integer_cost [speed], 0);
4175
4176 if (TREE_CODE (expr) == ADDR_EXPR)
4177 {
4178 tree obj = TREE_OPERAND (expr, 0);
4179
4180 if (VAR_P (obj)
4181 || TREE_CODE (obj) == PARM_DECL
4182 || TREE_CODE (obj) == RESULT_DECL)
4183 return comp_cost (symbol_cost [speed], 0);
4184 }
4185
4186 return comp_cost (address_cost [speed], 0);
4187 }
4188
4189 switch (TREE_CODE (expr))
4190 {
4191 case POINTER_PLUS_EXPR:
4192 case PLUS_EXPR:
4193 case MINUS_EXPR:
4194 case MULT_EXPR:
4195 case TRUNC_DIV_EXPR:
4196 case BIT_AND_EXPR:
4197 case BIT_IOR_EXPR:
4198 case LSHIFT_EXPR:
4199 case RSHIFT_EXPR:
4200 op0 = TREE_OPERAND (expr, 0);
4201 op1 = TREE_OPERAND (expr, 1);
4202 STRIP_NOPS (op0);
4203 STRIP_NOPS (op1);
4204 break;
4205
4206 CASE_CONVERT:
4207 case NEGATE_EXPR:
4208 case BIT_NOT_EXPR:
4209 op0 = TREE_OPERAND (expr, 0);
4210 STRIP_NOPS (op0);
4211 op1 = NULL_TREE;
4212 break;
4213
4214 default:
4215 /* Just an arbitrary value, FIXME. */
4216 return comp_cost (target_spill_cost[speed], 0);
4217 }
4218
4219 if (op0 == NULL_TREE
4220 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4221 cost0 = no_cost;
4222 else
4223 cost0 = force_expr_to_var_cost (op0, speed);
4224
4225 if (op1 == NULL_TREE
4226 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4227 cost1 = no_cost;
4228 else
4229 cost1 = force_expr_to_var_cost (op1, speed);
4230
4231 mode = TYPE_MODE (TREE_TYPE (expr));
4232 switch (TREE_CODE (expr))
4233 {
4234 case POINTER_PLUS_EXPR:
4235 case PLUS_EXPR:
4236 case MINUS_EXPR:
4237 case NEGATE_EXPR:
4238 cost = comp_cost (add_cost (speed, mode), 0);
4239 if (TREE_CODE (expr) != NEGATE_EXPR)
4240 {
4241 tree mult = NULL_TREE;
4242 comp_cost sa_cost;
4243 if (TREE_CODE (op1) == MULT_EXPR)
4244 mult = op1;
4245 else if (TREE_CODE (op0) == MULT_EXPR)
4246 mult = op0;
4247
4248 if (mult != NULL_TREE
4249 && is_a <scalar_int_mode> (mode, &int_mode)
4250 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4251 && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4252 speed, &sa_cost))
4253 return sa_cost;
4254 }
4255 break;
4256
4257 CASE_CONVERT:
4258 {
4259 tree inner_mode, outer_mode;
4260 outer_mode = TREE_TYPE (expr);
4261 inner_mode = TREE_TYPE (op0);
4262 cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4263 TYPE_MODE (inner_mode), speed), 0);
4264 }
4265 break;
4266
4267 case MULT_EXPR:
4268 if (cst_and_fits_in_hwi (op0))
4269 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4270 mode, speed), 0);
4271 else if (cst_and_fits_in_hwi (op1))
4272 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4273 mode, speed), 0);
4274 else
4275 return comp_cost (target_spill_cost [speed], 0);
4276 break;
4277
4278 case TRUNC_DIV_EXPR:
4279 /* Division by power of two is usually cheap, so we allow it. Forbid
4280 anything else. */
4281 if (integer_pow2p (TREE_OPERAND (expr, 1)))
4282 cost = comp_cost (add_cost (speed, mode), 0);
4283 else
4284 cost = comp_cost (target_spill_cost[speed], 0);
4285 break;
4286
4287 case BIT_AND_EXPR:
4288 case BIT_IOR_EXPR:
4289 case BIT_NOT_EXPR:
4290 case LSHIFT_EXPR:
4291 case RSHIFT_EXPR:
4292 cost = comp_cost (add_cost (speed, mode), 0);
4293 break;
4294
4295 default:
4296 gcc_unreachable ();
4297 }
4298
4299 cost += cost0;
4300 cost += cost1;
4301 return cost;
4302 }
4303
4304 /* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the
4305 invariants the computation depends on. */
4306
4307 static comp_cost
force_var_cost(struct ivopts_data * data,tree expr,bitmap * inv_vars)4308 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4309 {
4310 if (!expr)
4311 return no_cost;
4312
4313 find_inv_vars (data, &expr, inv_vars);
4314 return force_expr_to_var_cost (expr, data->speed);
4315 }
4316
4317 /* Returns cost of auto-modifying address expression in shape base + offset.
4318 AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4319 address expression. The address expression has ADDR_MODE in addr space
4320 AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4321 speed or size. */
4322
4323 enum ainc_type
4324 {
4325 AINC_PRE_INC, /* Pre increment. */
4326 AINC_PRE_DEC, /* Pre decrement. */
4327 AINC_POST_INC, /* Post increment. */
4328 AINC_POST_DEC, /* Post decrement. */
4329 AINC_NONE /* Also the number of auto increment types. */
4330 };
4331
4332 struct ainc_cost_data
4333 {
4334 unsigned costs[AINC_NONE];
4335 };
4336
4337 static comp_cost
get_address_cost_ainc(poly_int64 ainc_step,poly_int64 ainc_offset,machine_mode addr_mode,machine_mode mem_mode,addr_space_t as,bool speed)4338 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4339 machine_mode addr_mode, machine_mode mem_mode,
4340 addr_space_t as, bool speed)
4341 {
4342 if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4343 && !USE_STORE_PRE_DECREMENT (mem_mode)
4344 && !USE_LOAD_POST_DECREMENT (mem_mode)
4345 && !USE_STORE_POST_DECREMENT (mem_mode)
4346 && !USE_LOAD_PRE_INCREMENT (mem_mode)
4347 && !USE_STORE_PRE_INCREMENT (mem_mode)
4348 && !USE_LOAD_POST_INCREMENT (mem_mode)
4349 && !USE_STORE_POST_INCREMENT (mem_mode))
4350 return infinite_cost;
4351
4352 static vec<ainc_cost_data *> ainc_cost_data_list;
4353 unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4354 if (idx >= ainc_cost_data_list.length ())
4355 {
4356 unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4357
4358 gcc_assert (nsize > idx);
4359 ainc_cost_data_list.safe_grow_cleared (nsize);
4360 }
4361
4362 ainc_cost_data *data = ainc_cost_data_list[idx];
4363 if (data == NULL)
4364 {
4365 rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4366
4367 data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4368 data->costs[AINC_PRE_DEC] = INFTY;
4369 data->costs[AINC_POST_DEC] = INFTY;
4370 data->costs[AINC_PRE_INC] = INFTY;
4371 data->costs[AINC_POST_INC] = INFTY;
4372 if (USE_LOAD_PRE_DECREMENT (mem_mode)
4373 || USE_STORE_PRE_DECREMENT (mem_mode))
4374 {
4375 rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4376
4377 if (memory_address_addr_space_p (mem_mode, addr, as))
4378 data->costs[AINC_PRE_DEC]
4379 = address_cost (addr, mem_mode, as, speed);
4380 }
4381 if (USE_LOAD_POST_DECREMENT (mem_mode)
4382 || USE_STORE_POST_DECREMENT (mem_mode))
4383 {
4384 rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4385
4386 if (memory_address_addr_space_p (mem_mode, addr, as))
4387 data->costs[AINC_POST_DEC]
4388 = address_cost (addr, mem_mode, as, speed);
4389 }
4390 if (USE_LOAD_PRE_INCREMENT (mem_mode)
4391 || USE_STORE_PRE_INCREMENT (mem_mode))
4392 {
4393 rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4394
4395 if (memory_address_addr_space_p (mem_mode, addr, as))
4396 data->costs[AINC_PRE_INC]
4397 = address_cost (addr, mem_mode, as, speed);
4398 }
4399 if (USE_LOAD_POST_INCREMENT (mem_mode)
4400 || USE_STORE_POST_INCREMENT (mem_mode))
4401 {
4402 rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4403
4404 if (memory_address_addr_space_p (mem_mode, addr, as))
4405 data->costs[AINC_POST_INC]
4406 = address_cost (addr, mem_mode, as, speed);
4407 }
4408 ainc_cost_data_list[idx] = data;
4409 }
4410
4411 poly_int64 msize = GET_MODE_SIZE (mem_mode);
4412 if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4413 return comp_cost (data->costs[AINC_POST_INC], 0);
4414 if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4415 return comp_cost (data->costs[AINC_POST_DEC], 0);
4416 if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4417 return comp_cost (data->costs[AINC_PRE_INC], 0);
4418 if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4419 return comp_cost (data->costs[AINC_PRE_DEC], 0);
4420
4421 return infinite_cost;
4422 }
4423
4424 /* Return cost of computing USE's address expression by using CAND.
4425 AFF_INV and AFF_VAR represent invariant and variant parts of the
4426 address expression, respectively. If AFF_INV is simple, store
4427 the loop invariant variables which are depended by it in INV_VARS;
4428 if AFF_INV is complicated, handle it as a new invariant expression
4429 and record it in INV_EXPR. RATIO indicates multiple times between
4430 steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4431 value to it indicating if this is an auto-increment address. */
4432
4433 static comp_cost
get_address_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,aff_tree * aff_inv,aff_tree * aff_var,HOST_WIDE_INT ratio,bitmap * inv_vars,iv_inv_expr_ent ** inv_expr,bool * can_autoinc,bool speed)4434 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4435 struct iv_cand *cand, aff_tree *aff_inv,
4436 aff_tree *aff_var, HOST_WIDE_INT ratio,
4437 bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4438 bool *can_autoinc, bool speed)
4439 {
4440 rtx addr;
4441 bool simple_inv = true;
4442 tree comp_inv = NULL_TREE, type = aff_var->type;
4443 comp_cost var_cost = no_cost, cost = no_cost;
4444 struct mem_address parts = {NULL_TREE, integer_one_node,
4445 NULL_TREE, NULL_TREE, NULL_TREE};
4446 machine_mode addr_mode = TYPE_MODE (type);
4447 machine_mode mem_mode = TYPE_MODE (use->mem_type);
4448 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4449 /* Only true if ratio != 1. */
4450 bool ok_with_ratio_p = false;
4451 bool ok_without_ratio_p = false;
4452
4453 if (!aff_combination_const_p (aff_inv))
4454 {
4455 parts.index = integer_one_node;
4456 /* Addressing mode "base + index". */
4457 ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4458 if (ratio != 1)
4459 {
4460 parts.step = wide_int_to_tree (type, ratio);
4461 /* Addressing mode "base + index << scale". */
4462 ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4463 if (!ok_with_ratio_p)
4464 parts.step = NULL_TREE;
4465 }
4466 if (ok_with_ratio_p || ok_without_ratio_p)
4467 {
4468 if (maybe_ne (aff_inv->offset, 0))
4469 {
4470 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4471 /* Addressing mode "base + index [<< scale] + offset". */
4472 if (!valid_mem_ref_p (mem_mode, as, &parts))
4473 parts.offset = NULL_TREE;
4474 else
4475 aff_inv->offset = 0;
4476 }
4477
4478 move_fixed_address_to_symbol (&parts, aff_inv);
4479 /* Base is fixed address and is moved to symbol part. */
4480 if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4481 parts.base = NULL_TREE;
4482
4483 /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
4484 if (parts.symbol != NULL_TREE
4485 && !valid_mem_ref_p (mem_mode, as, &parts))
4486 {
4487 aff_combination_add_elt (aff_inv, parts.symbol, 1);
4488 parts.symbol = NULL_TREE;
4489 /* Reset SIMPLE_INV since symbol address needs to be computed
4490 outside of address expression in this case. */
4491 simple_inv = false;
4492 /* Symbol part is moved back to base part, it can't be NULL. */
4493 parts.base = integer_one_node;
4494 }
4495 }
4496 else
4497 parts.index = NULL_TREE;
4498 }
4499 else
4500 {
4501 poly_int64 ainc_step;
4502 if (can_autoinc
4503 && ratio == 1
4504 && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4505 {
4506 poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4507
4508 if (stmt_after_increment (data->current_loop, cand, use->stmt))
4509 ainc_offset += ainc_step;
4510 cost = get_address_cost_ainc (ainc_step, ainc_offset,
4511 addr_mode, mem_mode, as, speed);
4512 if (!cost.infinite_cost_p ())
4513 {
4514 *can_autoinc = true;
4515 return cost;
4516 }
4517 cost = no_cost;
4518 }
4519 if (!aff_combination_zero_p (aff_inv))
4520 {
4521 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4522 /* Addressing mode "base + offset". */
4523 if (!valid_mem_ref_p (mem_mode, as, &parts))
4524 parts.offset = NULL_TREE;
4525 else
4526 aff_inv->offset = 0;
4527 }
4528 }
4529
4530 if (simple_inv)
4531 simple_inv = (aff_inv == NULL
4532 || aff_combination_const_p (aff_inv)
4533 || aff_combination_singleton_var_p (aff_inv));
4534 if (!aff_combination_zero_p (aff_inv))
4535 comp_inv = aff_combination_to_tree (aff_inv);
4536 if (comp_inv != NULL_TREE)
4537 cost = force_var_cost (data, comp_inv, inv_vars);
4538 if (ratio != 1 && parts.step == NULL_TREE)
4539 var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4540 if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4541 var_cost += add_cost (speed, addr_mode);
4542
4543 if (comp_inv && inv_expr && !simple_inv)
4544 {
4545 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4546 /* Clear depends on. */
4547 if (*inv_expr != NULL && inv_vars && *inv_vars)
4548 bitmap_clear (*inv_vars);
4549
4550 /* Cost of small invariant expression adjusted against loop niters
4551 is usually zero, which makes it difficult to be differentiated
4552 from candidate based on loop invariant variables. Secondly, the
4553 generated invariant expression may not be hoisted out of loop by
4554 following pass. We penalize the cost by rounding up in order to
4555 neutralize such effects. */
4556 cost.cost = adjust_setup_cost (data, cost.cost, true);
4557 cost.scratch = cost.cost;
4558 }
4559
4560 cost += var_cost;
4561 addr = addr_for_mem_ref (&parts, as, false);
4562 gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4563 cost += address_cost (addr, mem_mode, as, speed);
4564
4565 if (parts.symbol != NULL_TREE)
4566 cost.complexity += 1;
4567 /* Don't increase the complexity of adding a scaled index if it's
4568 the only kind of index that the target allows. */
4569 if (parts.step != NULL_TREE && ok_without_ratio_p)
4570 cost.complexity += 1;
4571 if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4572 cost.complexity += 1;
4573 if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4574 cost.complexity += 1;
4575
4576 return cost;
4577 }
4578
4579 /* Scale (multiply) the computed COST (except scratch part that should be
4580 hoisted out a loop) by header->frequency / AT->frequency, which makes
4581 expected cost more accurate. */
4582
4583 static comp_cost
get_scaled_computation_cost_at(ivopts_data * data,gimple * at,comp_cost cost)4584 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4585 {
4586 int loop_freq = data->current_loop->header->count.to_frequency (cfun);
4587 int bb_freq = gimple_bb (at)->count.to_frequency (cfun);
4588 if (loop_freq != 0)
4589 {
4590 gcc_assert (cost.scratch <= cost.cost);
4591 int scaled_cost
4592 = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4593
4594 if (dump_file && (dump_flags & TDF_DETAILS))
4595 fprintf (dump_file, "Scaling cost based on bb prob "
4596 "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4597 1.0f * bb_freq / loop_freq, cost.cost,
4598 cost.scratch, scaled_cost, bb_freq, loop_freq);
4599
4600 cost.cost = scaled_cost;
4601 }
4602
4603 return cost;
4604 }
4605
4606 /* Determines the cost of the computation by that USE is expressed
4607 from induction variable CAND. If ADDRESS_P is true, we just need
4608 to create an address from it, otherwise we want to get it into
4609 register. A set of invariants we depend on is stored in INV_VARS.
4610 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4611 addressing is likely. If INV_EXPR is nonnull, record invariant
4612 expr entry in it. */
4613
4614 static comp_cost
get_computation_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,bool address_p,bitmap * inv_vars,bool * can_autoinc,iv_inv_expr_ent ** inv_expr)4615 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4616 struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4617 bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4618 {
4619 gimple *at = use->stmt;
4620 tree ubase = use->iv->base, cbase = cand->iv->base;
4621 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4622 tree comp_inv = NULL_TREE;
4623 HOST_WIDE_INT ratio, aratio;
4624 comp_cost cost;
4625 widest_int rat;
4626 aff_tree aff_inv, aff_var;
4627 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4628
4629 if (inv_vars)
4630 *inv_vars = NULL;
4631 if (can_autoinc)
4632 *can_autoinc = false;
4633 if (inv_expr)
4634 *inv_expr = NULL;
4635
4636 /* Check if we have enough precision to express the values of use. */
4637 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4638 return infinite_cost;
4639
4640 if (address_p
4641 || (use->iv->base_object
4642 && cand->iv->base_object
4643 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4644 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4645 {
4646 /* Do not try to express address of an object with computation based
4647 on address of a different object. This may cause problems in rtl
4648 level alias analysis (that does not expect this to be happening,
4649 as this is illegal in C), and would be unlikely to be useful
4650 anyway. */
4651 if (use->iv->base_object
4652 && cand->iv->base_object
4653 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4654 return infinite_cost;
4655 }
4656
4657 if (!get_computation_aff_1 (data->current_loop, at, use,
4658 cand, &aff_inv, &aff_var, &rat)
4659 || !wi::fits_shwi_p (rat))
4660 return infinite_cost;
4661
4662 ratio = rat.to_shwi ();
4663 if (address_p)
4664 {
4665 cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4666 inv_vars, inv_expr, can_autoinc, speed);
4667 return get_scaled_computation_cost_at (data, at, cost);
4668 }
4669
4670 bool simple_inv = (aff_combination_const_p (&aff_inv)
4671 || aff_combination_singleton_var_p (&aff_inv));
4672 tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4673 aff_combination_convert (&aff_inv, signed_type);
4674 if (!aff_combination_zero_p (&aff_inv))
4675 comp_inv = aff_combination_to_tree (&aff_inv);
4676
4677 cost = force_var_cost (data, comp_inv, inv_vars);
4678 if (comp_inv && inv_expr && !simple_inv)
4679 {
4680 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4681 /* Clear depends on. */
4682 if (*inv_expr != NULL && inv_vars && *inv_vars)
4683 bitmap_clear (*inv_vars);
4684
4685 cost.cost = adjust_setup_cost (data, cost.cost);
4686 /* Record setup cost in scratch field. */
4687 cost.scratch = cost.cost;
4688 }
4689 /* Cost of constant integer can be covered when adding invariant part to
4690 variant part. */
4691 else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4692 cost = no_cost;
4693
4694 /* Need type narrowing to represent use with cand. */
4695 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4696 {
4697 machine_mode outer_mode = TYPE_MODE (utype);
4698 machine_mode inner_mode = TYPE_MODE (ctype);
4699 cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4700 }
4701
4702 /* Turn a + i * (-c) into a - i * c. */
4703 if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4704 aratio = -ratio;
4705 else
4706 aratio = ratio;
4707
4708 if (ratio != 1)
4709 cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4710
4711 /* TODO: We may also need to check if we can compute a + i * 4 in one
4712 instruction. */
4713 /* Need to add up the invariant and variant parts. */
4714 if (comp_inv && !integer_zerop (comp_inv))
4715 cost += add_cost (speed, TYPE_MODE (utype));
4716
4717 return get_scaled_computation_cost_at (data, at, cost);
4718 }
4719
4720 /* Determines cost of computing the use in GROUP with CAND in a generic
4721 expression. */
4722
4723 static bool
determine_group_iv_cost_generic(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)4724 determine_group_iv_cost_generic (struct ivopts_data *data,
4725 struct iv_group *group, struct iv_cand *cand)
4726 {
4727 comp_cost cost;
4728 iv_inv_expr_ent *inv_expr = NULL;
4729 bitmap inv_vars = NULL, inv_exprs = NULL;
4730 struct iv_use *use = group->vuses[0];
4731
4732 /* The simple case first -- if we need to express value of the preserved
4733 original biv, the cost is 0. This also prevents us from counting the
4734 cost of increment twice -- once at this use and once in the cost of
4735 the candidate. */
4736 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4737 cost = no_cost;
4738 else
4739 cost = get_computation_cost (data, use, cand, false,
4740 &inv_vars, NULL, &inv_expr);
4741
4742 if (inv_expr)
4743 {
4744 inv_exprs = BITMAP_ALLOC (NULL);
4745 bitmap_set_bit (inv_exprs, inv_expr->id);
4746 }
4747 set_group_iv_cost (data, group, cand, cost, inv_vars,
4748 NULL_TREE, ERROR_MARK, inv_exprs);
4749 return !cost.infinite_cost_p ();
4750 }
4751
4752 /* Determines cost of computing uses in GROUP with CAND in addresses. */
4753
4754 static bool
determine_group_iv_cost_address(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)4755 determine_group_iv_cost_address (struct ivopts_data *data,
4756 struct iv_group *group, struct iv_cand *cand)
4757 {
4758 unsigned i;
4759 bitmap inv_vars = NULL, inv_exprs = NULL;
4760 bool can_autoinc;
4761 iv_inv_expr_ent *inv_expr = NULL;
4762 struct iv_use *use = group->vuses[0];
4763 comp_cost sum_cost = no_cost, cost;
4764
4765 cost = get_computation_cost (data, use, cand, true,
4766 &inv_vars, &can_autoinc, &inv_expr);
4767
4768 if (inv_expr)
4769 {
4770 inv_exprs = BITMAP_ALLOC (NULL);
4771 bitmap_set_bit (inv_exprs, inv_expr->id);
4772 }
4773 sum_cost = cost;
4774 if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4775 {
4776 if (can_autoinc)
4777 sum_cost -= cand->cost_step;
4778 /* If we generated the candidate solely for exploiting autoincrement
4779 opportunities, and it turns out it can't be used, set the cost to
4780 infinity to make sure we ignore it. */
4781 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4782 sum_cost = infinite_cost;
4783 }
4784
4785 /* Uses in a group can share setup code, so only add setup cost once. */
4786 cost -= cost.scratch;
4787 /* Compute and add costs for rest uses of this group. */
4788 for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4789 {
4790 struct iv_use *next = group->vuses[i];
4791
4792 /* TODO: We could skip computing cost for sub iv_use when it has the
4793 same cost as the first iv_use, but the cost really depends on the
4794 offset and where the iv_use is. */
4795 cost = get_computation_cost (data, next, cand, true,
4796 NULL, &can_autoinc, &inv_expr);
4797 if (inv_expr)
4798 {
4799 if (!inv_exprs)
4800 inv_exprs = BITMAP_ALLOC (NULL);
4801
4802 bitmap_set_bit (inv_exprs, inv_expr->id);
4803 }
4804 sum_cost += cost;
4805 }
4806 set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4807 NULL_TREE, ERROR_MARK, inv_exprs);
4808
4809 return !sum_cost.infinite_cost_p ();
4810 }
4811
4812 /* Computes value of candidate CAND at position AT in iteration NITER, and
4813 stores it to VAL. */
4814
4815 static void
cand_value_at(struct loop * loop,struct iv_cand * cand,gimple * at,tree niter,aff_tree * val)4816 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4817 aff_tree *val)
4818 {
4819 aff_tree step, delta, nit;
4820 struct iv *iv = cand->iv;
4821 tree type = TREE_TYPE (iv->base);
4822 tree steptype;
4823 if (POINTER_TYPE_P (type))
4824 steptype = sizetype;
4825 else
4826 steptype = unsigned_type_for (type);
4827
4828 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4829 aff_combination_convert (&step, steptype);
4830 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4831 aff_combination_convert (&nit, steptype);
4832 aff_combination_mult (&nit, &step, &delta);
4833 if (stmt_after_increment (loop, cand, at))
4834 aff_combination_add (&delta, &step);
4835
4836 tree_to_aff_combination (iv->base, type, val);
4837 if (!POINTER_TYPE_P (type))
4838 aff_combination_convert (val, steptype);
4839 aff_combination_add (val, &delta);
4840 }
4841
4842 /* Returns period of induction variable iv. */
4843
4844 static tree
iv_period(struct iv * iv)4845 iv_period (struct iv *iv)
4846 {
4847 tree step = iv->step, period, type;
4848 tree pow2div;
4849
4850 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4851
4852 type = unsigned_type_for (TREE_TYPE (step));
4853 /* Period of the iv is lcm (step, type_range)/step -1,
4854 i.e., N*type_range/step - 1. Since type range is power
4855 of two, N == (step >> num_of_ending_zeros_binary (step),
4856 so the final result is
4857
4858 (type_range >> num_of_ending_zeros_binary (step)) - 1
4859
4860 */
4861 pow2div = num_ending_zeros (step);
4862
4863 period = build_low_bits_mask (type,
4864 (TYPE_PRECISION (type)
4865 - tree_to_uhwi (pow2div)));
4866
4867 return period;
4868 }
4869
4870 /* Returns the comparison operator used when eliminating the iv USE. */
4871
4872 static enum tree_code
iv_elimination_compare(struct ivopts_data * data,struct iv_use * use)4873 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4874 {
4875 struct loop *loop = data->current_loop;
4876 basic_block ex_bb;
4877 edge exit;
4878
4879 ex_bb = gimple_bb (use->stmt);
4880 exit = EDGE_SUCC (ex_bb, 0);
4881 if (flow_bb_inside_loop_p (loop, exit->dest))
4882 exit = EDGE_SUCC (ex_bb, 1);
4883
4884 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4885 }
4886
4887 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4888 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4889 calculation is performed in non-wrapping type.
4890
4891 TODO: More generally, we could test for the situation that
4892 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4893 This would require knowing the sign of OFFSET. */
4894
4895 static bool
difference_cannot_overflow_p(struct ivopts_data * data,tree base,tree offset)4896 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4897 {
4898 enum tree_code code;
4899 tree e1, e2;
4900 aff_tree aff_e1, aff_e2, aff_offset;
4901
4902 if (!nowrap_type_p (TREE_TYPE (base)))
4903 return false;
4904
4905 base = expand_simple_operations (base);
4906
4907 if (TREE_CODE (base) == SSA_NAME)
4908 {
4909 gimple *stmt = SSA_NAME_DEF_STMT (base);
4910
4911 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4912 return false;
4913
4914 code = gimple_assign_rhs_code (stmt);
4915 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4916 return false;
4917
4918 e1 = gimple_assign_rhs1 (stmt);
4919 e2 = gimple_assign_rhs2 (stmt);
4920 }
4921 else
4922 {
4923 code = TREE_CODE (base);
4924 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4925 return false;
4926 e1 = TREE_OPERAND (base, 0);
4927 e2 = TREE_OPERAND (base, 1);
4928 }
4929
4930 /* Use affine expansion as deeper inspection to prove the equality. */
4931 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4932 &aff_e2, &data->name_expansion_cache);
4933 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4934 &aff_offset, &data->name_expansion_cache);
4935 aff_combination_scale (&aff_offset, -1);
4936 switch (code)
4937 {
4938 case PLUS_EXPR:
4939 aff_combination_add (&aff_e2, &aff_offset);
4940 if (aff_combination_zero_p (&aff_e2))
4941 return true;
4942
4943 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4944 &aff_e1, &data->name_expansion_cache);
4945 aff_combination_add (&aff_e1, &aff_offset);
4946 return aff_combination_zero_p (&aff_e1);
4947
4948 case POINTER_PLUS_EXPR:
4949 aff_combination_add (&aff_e2, &aff_offset);
4950 return aff_combination_zero_p (&aff_e2);
4951
4952 default:
4953 return false;
4954 }
4955 }
4956
4957 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4958 comparison with CAND. NITER describes the number of iterations of
4959 the loops. If successful, the comparison in COMP_P is altered accordingly.
4960
4961 We aim to handle the following situation:
4962
4963 sometype *base, *p;
4964 int a, b, i;
4965
4966 i = a;
4967 p = p_0 = base + a;
4968
4969 do
4970 {
4971 bla (*p);
4972 p++;
4973 i++;
4974 }
4975 while (i < b);
4976
4977 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4978 We aim to optimize this to
4979
4980 p = p_0 = base + a;
4981 do
4982 {
4983 bla (*p);
4984 p++;
4985 }
4986 while (p < p_0 - a + b);
4987
4988 This preserves the correctness, since the pointer arithmetics does not
4989 overflow. More precisely:
4990
4991 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4992 overflow in computing it or the values of p.
4993 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4994 overflow. To prove this, we use the fact that p_0 = base + a. */
4995
4996 static bool
iv_elimination_compare_lt(struct ivopts_data * data,struct iv_cand * cand,enum tree_code * comp_p,struct tree_niter_desc * niter)4997 iv_elimination_compare_lt (struct ivopts_data *data,
4998 struct iv_cand *cand, enum tree_code *comp_p,
4999 struct tree_niter_desc *niter)
5000 {
5001 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5002 struct aff_tree nit, tmpa, tmpb;
5003 enum tree_code comp;
5004 HOST_WIDE_INT step;
5005
5006 /* We need to know that the candidate induction variable does not overflow.
5007 While more complex analysis may be used to prove this, for now just
5008 check that the variable appears in the original program and that it
5009 is computed in a type that guarantees no overflows. */
5010 cand_type = TREE_TYPE (cand->iv->base);
5011 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5012 return false;
5013
5014 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5015 the calculation of the BOUND could overflow, making the comparison
5016 invalid. */
5017 if (!data->loop_single_exit_p)
5018 return false;
5019
5020 /* We need to be able to decide whether candidate is increasing or decreasing
5021 in order to choose the right comparison operator. */
5022 if (!cst_and_fits_in_hwi (cand->iv->step))
5023 return false;
5024 step = int_cst_value (cand->iv->step);
5025
5026 /* Check that the number of iterations matches the expected pattern:
5027 a + 1 > b ? 0 : b - a - 1. */
5028 mbz = niter->may_be_zero;
5029 if (TREE_CODE (mbz) == GT_EXPR)
5030 {
5031 /* Handle a + 1 > b. */
5032 tree op0 = TREE_OPERAND (mbz, 0);
5033 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5034 {
5035 a = TREE_OPERAND (op0, 0);
5036 b = TREE_OPERAND (mbz, 1);
5037 }
5038 else
5039 return false;
5040 }
5041 else if (TREE_CODE (mbz) == LT_EXPR)
5042 {
5043 tree op1 = TREE_OPERAND (mbz, 1);
5044
5045 /* Handle b < a + 1. */
5046 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5047 {
5048 a = TREE_OPERAND (op1, 0);
5049 b = TREE_OPERAND (mbz, 0);
5050 }
5051 else
5052 return false;
5053 }
5054 else
5055 return false;
5056
5057 /* Expected number of iterations is B - A - 1. Check that it matches
5058 the actual number, i.e., that B - A - NITER = 1. */
5059 tree_to_aff_combination (niter->niter, nit_type, &nit);
5060 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5061 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5062 aff_combination_scale (&nit, -1);
5063 aff_combination_scale (&tmpa, -1);
5064 aff_combination_add (&tmpb, &tmpa);
5065 aff_combination_add (&tmpb, &nit);
5066 if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5067 return false;
5068
5069 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5070 overflow. */
5071 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5072 cand->iv->step,
5073 fold_convert (TREE_TYPE (cand->iv->step), a));
5074 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5075 return false;
5076
5077 /* Determine the new comparison operator. */
5078 comp = step < 0 ? GT_EXPR : LT_EXPR;
5079 if (*comp_p == NE_EXPR)
5080 *comp_p = comp;
5081 else if (*comp_p == EQ_EXPR)
5082 *comp_p = invert_tree_comparison (comp, false);
5083 else
5084 gcc_unreachable ();
5085
5086 return true;
5087 }
5088
5089 /* Check whether it is possible to express the condition in USE by comparison
5090 of candidate CAND. If so, store the value compared with to BOUND, and the
5091 comparison operator to COMP. */
5092
5093 static bool
may_eliminate_iv(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,tree * bound,enum tree_code * comp)5094 may_eliminate_iv (struct ivopts_data *data,
5095 struct iv_use *use, struct iv_cand *cand, tree *bound,
5096 enum tree_code *comp)
5097 {
5098 basic_block ex_bb;
5099 edge exit;
5100 tree period;
5101 struct loop *loop = data->current_loop;
5102 aff_tree bnd;
5103 struct tree_niter_desc *desc = NULL;
5104
5105 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5106 return false;
5107
5108 /* For now works only for exits that dominate the loop latch.
5109 TODO: extend to other conditions inside loop body. */
5110 ex_bb = gimple_bb (use->stmt);
5111 if (use->stmt != last_stmt (ex_bb)
5112 || gimple_code (use->stmt) != GIMPLE_COND
5113 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5114 return false;
5115
5116 exit = EDGE_SUCC (ex_bb, 0);
5117 if (flow_bb_inside_loop_p (loop, exit->dest))
5118 exit = EDGE_SUCC (ex_bb, 1);
5119 if (flow_bb_inside_loop_p (loop, exit->dest))
5120 return false;
5121
5122 desc = niter_for_exit (data, exit);
5123 if (!desc)
5124 return false;
5125
5126 /* Determine whether we can use the variable to test the exit condition.
5127 This is the case iff the period of the induction variable is greater
5128 than the number of iterations for which the exit condition is true. */
5129 period = iv_period (cand->iv);
5130
5131 /* If the number of iterations is constant, compare against it directly. */
5132 if (TREE_CODE (desc->niter) == INTEGER_CST)
5133 {
5134 /* See cand_value_at. */
5135 if (stmt_after_increment (loop, cand, use->stmt))
5136 {
5137 if (!tree_int_cst_lt (desc->niter, period))
5138 return false;
5139 }
5140 else
5141 {
5142 if (tree_int_cst_lt (period, desc->niter))
5143 return false;
5144 }
5145 }
5146
5147 /* If not, and if this is the only possible exit of the loop, see whether
5148 we can get a conservative estimate on the number of iterations of the
5149 entire loop and compare against that instead. */
5150 else
5151 {
5152 widest_int period_value, max_niter;
5153
5154 max_niter = desc->max;
5155 if (stmt_after_increment (loop, cand, use->stmt))
5156 max_niter += 1;
5157 period_value = wi::to_widest (period);
5158 if (wi::gtu_p (max_niter, period_value))
5159 {
5160 /* See if we can take advantage of inferred loop bound
5161 information. */
5162 if (data->loop_single_exit_p)
5163 {
5164 if (!max_loop_iterations (loop, &max_niter))
5165 return false;
5166 /* The loop bound is already adjusted by adding 1. */
5167 if (wi::gtu_p (max_niter, period_value))
5168 return false;
5169 }
5170 else
5171 return false;
5172 }
5173 }
5174
5175 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5176
5177 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5178 aff_combination_to_tree (&bnd));
5179 *comp = iv_elimination_compare (data, use);
5180
5181 /* It is unlikely that computing the number of iterations using division
5182 would be more profitable than keeping the original induction variable. */
5183 if (expression_expensive_p (*bound))
5184 return false;
5185
5186 /* Sometimes, it is possible to handle the situation that the number of
5187 iterations may be zero unless additional assumptions by using <
5188 instead of != in the exit condition.
5189
5190 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5191 base the exit condition on it. However, that is often too
5192 expensive. */
5193 if (!integer_zerop (desc->may_be_zero))
5194 return iv_elimination_compare_lt (data, cand, comp, desc);
5195
5196 return true;
5197 }
5198
5199 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5200 be copied, if it is used in the loop body and DATA->body_includes_call. */
5201
5202 static int
parm_decl_cost(struct ivopts_data * data,tree bound)5203 parm_decl_cost (struct ivopts_data *data, tree bound)
5204 {
5205 tree sbound = bound;
5206 STRIP_NOPS (sbound);
5207
5208 if (TREE_CODE (sbound) == SSA_NAME
5209 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5210 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5211 && data->body_includes_call)
5212 return COSTS_N_INSNS (1);
5213
5214 return 0;
5215 }
5216
5217 /* Determines cost of computing the use in GROUP with CAND in a condition. */
5218
5219 static bool
determine_group_iv_cost_cond(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5220 determine_group_iv_cost_cond (struct ivopts_data *data,
5221 struct iv_group *group, struct iv_cand *cand)
5222 {
5223 tree bound = NULL_TREE;
5224 struct iv *cmp_iv;
5225 bitmap inv_exprs = NULL;
5226 bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5227 comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5228 enum comp_iv_rewrite rewrite_type;
5229 iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5230 tree *control_var, *bound_cst;
5231 enum tree_code comp = ERROR_MARK;
5232 struct iv_use *use = group->vuses[0];
5233
5234 /* Extract condition operands. */
5235 rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5236 &bound_cst, NULL, &cmp_iv);
5237 gcc_assert (rewrite_type != COMP_IV_NA);
5238
5239 /* Try iv elimination. */
5240 if (rewrite_type == COMP_IV_ELIM
5241 && may_eliminate_iv (data, use, cand, &bound, &comp))
5242 {
5243 elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5244 if (elim_cost.cost == 0)
5245 elim_cost.cost = parm_decl_cost (data, bound);
5246 else if (TREE_CODE (bound) == INTEGER_CST)
5247 elim_cost.cost = 0;
5248 /* If we replace a loop condition 'i < n' with 'p < base + n',
5249 inv_vars_elim will have 'base' and 'n' set, which implies that both
5250 'base' and 'n' will be live during the loop. More likely,
5251 'base + n' will be loop invariant, resulting in only one live value
5252 during the loop. So in that case we clear inv_vars_elim and set
5253 inv_expr_elim instead. */
5254 if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5255 {
5256 inv_expr_elim = get_loop_invariant_expr (data, bound);
5257 bitmap_clear (inv_vars_elim);
5258 }
5259 /* The bound is a loop invariant, so it will be only computed
5260 once. */
5261 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5262 }
5263
5264 /* When the condition is a comparison of the candidate IV against
5265 zero, prefer this IV.
5266
5267 TODO: The constant that we're subtracting from the cost should
5268 be target-dependent. This information should be added to the
5269 target costs for each backend. */
5270 if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5271 && integer_zerop (*bound_cst)
5272 && (operand_equal_p (*control_var, cand->var_after, 0)
5273 || operand_equal_p (*control_var, cand->var_before, 0)))
5274 elim_cost -= 1;
5275
5276 express_cost = get_computation_cost (data, use, cand, false,
5277 &inv_vars_express, NULL,
5278 &inv_expr_express);
5279 if (cmp_iv != NULL)
5280 find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5281
5282 /* Count the cost of the original bound as well. */
5283 bound_cost = force_var_cost (data, *bound_cst, NULL);
5284 if (bound_cost.cost == 0)
5285 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5286 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5287 bound_cost.cost = 0;
5288 express_cost += bound_cost;
5289
5290 /* Choose the better approach, preferring the eliminated IV. */
5291 if (elim_cost <= express_cost)
5292 {
5293 cost = elim_cost;
5294 inv_vars = inv_vars_elim;
5295 inv_vars_elim = NULL;
5296 inv_expr = inv_expr_elim;
5297 }
5298 else
5299 {
5300 cost = express_cost;
5301 inv_vars = inv_vars_express;
5302 inv_vars_express = NULL;
5303 bound = NULL_TREE;
5304 comp = ERROR_MARK;
5305 inv_expr = inv_expr_express;
5306 }
5307
5308 if (inv_expr)
5309 {
5310 inv_exprs = BITMAP_ALLOC (NULL);
5311 bitmap_set_bit (inv_exprs, inv_expr->id);
5312 }
5313 set_group_iv_cost (data, group, cand, cost,
5314 inv_vars, bound, comp, inv_exprs);
5315
5316 if (inv_vars_elim)
5317 BITMAP_FREE (inv_vars_elim);
5318 if (inv_vars_express)
5319 BITMAP_FREE (inv_vars_express);
5320
5321 return !cost.infinite_cost_p ();
5322 }
5323
5324 /* Determines cost of computing uses in GROUP with CAND. Returns false
5325 if USE cannot be represented with CAND. */
5326
5327 static bool
determine_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5328 determine_group_iv_cost (struct ivopts_data *data,
5329 struct iv_group *group, struct iv_cand *cand)
5330 {
5331 switch (group->type)
5332 {
5333 case USE_NONLINEAR_EXPR:
5334 return determine_group_iv_cost_generic (data, group, cand);
5335
5336 case USE_REF_ADDRESS:
5337 case USE_PTR_ADDRESS:
5338 return determine_group_iv_cost_address (data, group, cand);
5339
5340 case USE_COMPARE:
5341 return determine_group_iv_cost_cond (data, group, cand);
5342
5343 default:
5344 gcc_unreachable ();
5345 }
5346 }
5347
5348 /* Return true if get_computation_cost indicates that autoincrement is
5349 a possibility for the pair of USE and CAND, false otherwise. */
5350
5351 static bool
autoinc_possible_for_pair(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5352 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5353 struct iv_cand *cand)
5354 {
5355 if (!address_p (use->type))
5356 return false;
5357
5358 bool can_autoinc = false;
5359 get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5360 return can_autoinc;
5361 }
5362
5363 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5364 use that allows autoincrement, and set their AINC_USE if possible. */
5365
5366 static void
set_autoinc_for_original_candidates(struct ivopts_data * data)5367 set_autoinc_for_original_candidates (struct ivopts_data *data)
5368 {
5369 unsigned i, j;
5370
5371 for (i = 0; i < data->vcands.length (); i++)
5372 {
5373 struct iv_cand *cand = data->vcands[i];
5374 struct iv_use *closest_before = NULL;
5375 struct iv_use *closest_after = NULL;
5376 if (cand->pos != IP_ORIGINAL)
5377 continue;
5378
5379 for (j = 0; j < data->vgroups.length (); j++)
5380 {
5381 struct iv_group *group = data->vgroups[j];
5382 struct iv_use *use = group->vuses[0];
5383 unsigned uid = gimple_uid (use->stmt);
5384
5385 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5386 continue;
5387
5388 if (uid < gimple_uid (cand->incremented_at)
5389 && (closest_before == NULL
5390 || uid > gimple_uid (closest_before->stmt)))
5391 closest_before = use;
5392
5393 if (uid > gimple_uid (cand->incremented_at)
5394 && (closest_after == NULL
5395 || uid < gimple_uid (closest_after->stmt)))
5396 closest_after = use;
5397 }
5398
5399 if (closest_before != NULL
5400 && autoinc_possible_for_pair (data, closest_before, cand))
5401 cand->ainc_use = closest_before;
5402 else if (closest_after != NULL
5403 && autoinc_possible_for_pair (data, closest_after, cand))
5404 cand->ainc_use = closest_after;
5405 }
5406 }
5407
5408 /* Relate compare use with all candidates. */
5409
5410 static void
relate_compare_use_with_all_cands(struct ivopts_data * data)5411 relate_compare_use_with_all_cands (struct ivopts_data *data)
5412 {
5413 unsigned i, count = data->vcands.length ();
5414 for (i = 0; i < data->vgroups.length (); i++)
5415 {
5416 struct iv_group *group = data->vgroups[i];
5417
5418 if (group->type == USE_COMPARE)
5419 bitmap_set_range (group->related_cands, 0, count);
5420 }
5421 }
5422
5423 /* Finds the candidates for the induction variables. */
5424
5425 static void
find_iv_candidates(struct ivopts_data * data)5426 find_iv_candidates (struct ivopts_data *data)
5427 {
5428 /* Add commonly used ivs. */
5429 add_standard_iv_candidates (data);
5430
5431 /* Add old induction variables. */
5432 add_iv_candidate_for_bivs (data);
5433
5434 /* Add induction variables derived from uses. */
5435 add_iv_candidate_for_groups (data);
5436
5437 set_autoinc_for_original_candidates (data);
5438
5439 /* Record the important candidates. */
5440 record_important_candidates (data);
5441
5442 /* Relate compare iv_use with all candidates. */
5443 if (!data->consider_all_candidates)
5444 relate_compare_use_with_all_cands (data);
5445
5446 if (dump_file && (dump_flags & TDF_DETAILS))
5447 {
5448 unsigned i;
5449
5450 fprintf (dump_file, "\n<Important Candidates>:\t");
5451 for (i = 0; i < data->vcands.length (); i++)
5452 if (data->vcands[i]->important)
5453 fprintf (dump_file, " %d,", data->vcands[i]->id);
5454 fprintf (dump_file, "\n");
5455
5456 fprintf (dump_file, "\n<Group, Cand> Related:\n");
5457 for (i = 0; i < data->vgroups.length (); i++)
5458 {
5459 struct iv_group *group = data->vgroups[i];
5460
5461 if (group->related_cands)
5462 {
5463 fprintf (dump_file, " Group %d:\t", group->id);
5464 dump_bitmap (dump_file, group->related_cands);
5465 }
5466 }
5467 fprintf (dump_file, "\n");
5468 }
5469 }
5470
5471 /* Determines costs of computing use of iv with an iv candidate. */
5472
5473 static void
determine_group_iv_costs(struct ivopts_data * data)5474 determine_group_iv_costs (struct ivopts_data *data)
5475 {
5476 unsigned i, j;
5477 struct iv_cand *cand;
5478 struct iv_group *group;
5479 bitmap to_clear = BITMAP_ALLOC (NULL);
5480
5481 alloc_use_cost_map (data);
5482
5483 for (i = 0; i < data->vgroups.length (); i++)
5484 {
5485 group = data->vgroups[i];
5486
5487 if (data->consider_all_candidates)
5488 {
5489 for (j = 0; j < data->vcands.length (); j++)
5490 {
5491 cand = data->vcands[j];
5492 determine_group_iv_cost (data, group, cand);
5493 }
5494 }
5495 else
5496 {
5497 bitmap_iterator bi;
5498
5499 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5500 {
5501 cand = data->vcands[j];
5502 if (!determine_group_iv_cost (data, group, cand))
5503 bitmap_set_bit (to_clear, j);
5504 }
5505
5506 /* Remove the candidates for that the cost is infinite from
5507 the list of related candidates. */
5508 bitmap_and_compl_into (group->related_cands, to_clear);
5509 bitmap_clear (to_clear);
5510 }
5511 }
5512
5513 BITMAP_FREE (to_clear);
5514
5515 if (dump_file && (dump_flags & TDF_DETAILS))
5516 {
5517 bitmap_iterator bi;
5518
5519 /* Dump invariant variables. */
5520 fprintf (dump_file, "\n<Invariant Vars>:\n");
5521 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5522 {
5523 struct version_info *info = ver_info (data, i);
5524 if (info->inv_id)
5525 {
5526 fprintf (dump_file, "Inv %d:\t", info->inv_id);
5527 print_generic_expr (dump_file, info->name, TDF_SLIM);
5528 fprintf (dump_file, "%s\n",
5529 info->has_nonlin_use ? "" : "\t(eliminable)");
5530 }
5531 }
5532
5533 /* Dump invariant expressions. */
5534 fprintf (dump_file, "\n<Invariant Expressions>:\n");
5535 auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5536
5537 for (hash_table<iv_inv_expr_hasher>::iterator it
5538 = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5539 ++it)
5540 list.safe_push (*it);
5541
5542 list.qsort (sort_iv_inv_expr_ent);
5543
5544 for (i = 0; i < list.length (); ++i)
5545 {
5546 fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5547 print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5548 fprintf (dump_file, "\n");
5549 }
5550
5551 fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5552
5553 for (i = 0; i < data->vgroups.length (); i++)
5554 {
5555 group = data->vgroups[i];
5556
5557 fprintf (dump_file, "Group %d:\n", i);
5558 fprintf (dump_file, " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5559 for (j = 0; j < group->n_map_members; j++)
5560 {
5561 if (!group->cost_map[j].cand
5562 || group->cost_map[j].cost.infinite_cost_p ())
5563 continue;
5564
5565 fprintf (dump_file, " %d\t%d\t%d\t",
5566 group->cost_map[j].cand->id,
5567 group->cost_map[j].cost.cost,
5568 group->cost_map[j].cost.complexity);
5569 if (!group->cost_map[j].inv_exprs
5570 || bitmap_empty_p (group->cost_map[j].inv_exprs))
5571 fprintf (dump_file, "NIL;\t");
5572 else
5573 bitmap_print (dump_file,
5574 group->cost_map[j].inv_exprs, "", ";\t");
5575 if (!group->cost_map[j].inv_vars
5576 || bitmap_empty_p (group->cost_map[j].inv_vars))
5577 fprintf (dump_file, "NIL;\n");
5578 else
5579 bitmap_print (dump_file,
5580 group->cost_map[j].inv_vars, "", "\n");
5581 }
5582
5583 fprintf (dump_file, "\n");
5584 }
5585 fprintf (dump_file, "\n");
5586 }
5587 }
5588
5589 /* Determines cost of the candidate CAND. */
5590
5591 static void
determine_iv_cost(struct ivopts_data * data,struct iv_cand * cand)5592 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5593 {
5594 comp_cost cost_base;
5595 unsigned cost, cost_step;
5596 tree base;
5597
5598 gcc_assert (cand->iv != NULL);
5599
5600 /* There are two costs associated with the candidate -- its increment
5601 and its initialization. The second is almost negligible for any loop
5602 that rolls enough, so we take it just very little into account. */
5603
5604 base = cand->iv->base;
5605 cost_base = force_var_cost (data, base, NULL);
5606 /* It will be exceptional that the iv register happens to be initialized with
5607 the proper value at no cost. In general, there will at least be a regcopy
5608 or a const set. */
5609 if (cost_base.cost == 0)
5610 cost_base.cost = COSTS_N_INSNS (1);
5611 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5612
5613 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5614
5615 /* Prefer the original ivs unless we may gain something by replacing it.
5616 The reason is to make debugging simpler; so this is not relevant for
5617 artificial ivs created by other optimization passes. */
5618 if (cand->pos != IP_ORIGINAL
5619 || !SSA_NAME_VAR (cand->var_before)
5620 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5621 cost++;
5622
5623 /* Prefer not to insert statements into latch unless there are some
5624 already (so that we do not create unnecessary jumps). */
5625 if (cand->pos == IP_END
5626 && empty_block_p (ip_end_pos (data->current_loop)))
5627 cost++;
5628
5629 cand->cost = cost;
5630 cand->cost_step = cost_step;
5631 }
5632
5633 /* Determines costs of computation of the candidates. */
5634
5635 static void
determine_iv_costs(struct ivopts_data * data)5636 determine_iv_costs (struct ivopts_data *data)
5637 {
5638 unsigned i;
5639
5640 if (dump_file && (dump_flags & TDF_DETAILS))
5641 {
5642 fprintf (dump_file, "<Candidate Costs>:\n");
5643 fprintf (dump_file, " cand\tcost\n");
5644 }
5645
5646 for (i = 0; i < data->vcands.length (); i++)
5647 {
5648 struct iv_cand *cand = data->vcands[i];
5649
5650 determine_iv_cost (data, cand);
5651
5652 if (dump_file && (dump_flags & TDF_DETAILS))
5653 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5654 }
5655
5656 if (dump_file && (dump_flags & TDF_DETAILS))
5657 fprintf (dump_file, "\n");
5658 }
5659
5660 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5661 induction variables. Note N_INVS includes both invariant variables and
5662 invariant expressions. */
5663
5664 static unsigned
ivopts_estimate_reg_pressure(struct ivopts_data * data,unsigned n_invs,unsigned n_cands)5665 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5666 unsigned n_cands)
5667 {
5668 unsigned cost;
5669 unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5670 unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5671 bool speed = data->speed;
5672
5673 /* If there is a call in the loop body, the call-clobbered registers
5674 are not available for loop invariants. */
5675 if (data->body_includes_call)
5676 available_regs = available_regs - target_clobbered_regs;
5677
5678 /* If we have enough registers. */
5679 if (regs_needed + target_res_regs < available_regs)
5680 cost = n_new;
5681 /* If close to running out of registers, try to preserve them. */
5682 else if (regs_needed <= available_regs)
5683 cost = target_reg_cost [speed] * regs_needed;
5684 /* If we run out of available registers but the number of candidates
5685 does not, we penalize extra registers using target_spill_cost. */
5686 else if (n_cands <= available_regs)
5687 cost = target_reg_cost [speed] * available_regs
5688 + target_spill_cost [speed] * (regs_needed - available_regs);
5689 /* If the number of candidates runs out available registers, we penalize
5690 extra candidate registers using target_spill_cost * 2. Because it is
5691 more expensive to spill induction variable than invariant. */
5692 else
5693 cost = target_reg_cost [speed] * available_regs
5694 + target_spill_cost [speed] * (n_cands - available_regs) * 2
5695 + target_spill_cost [speed] * (regs_needed - n_cands);
5696
5697 /* Finally, add the number of candidates, so that we prefer eliminating
5698 induction variables if possible. */
5699 return cost + n_cands;
5700 }
5701
5702 /* For each size of the induction variable set determine the penalty. */
5703
5704 static void
determine_set_costs(struct ivopts_data * data)5705 determine_set_costs (struct ivopts_data *data)
5706 {
5707 unsigned j, n;
5708 gphi *phi;
5709 gphi_iterator psi;
5710 tree op;
5711 struct loop *loop = data->current_loop;
5712 bitmap_iterator bi;
5713
5714 if (dump_file && (dump_flags & TDF_DETAILS))
5715 {
5716 fprintf (dump_file, "<Global Costs>:\n");
5717 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5718 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5719 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5720 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5721 }
5722
5723 n = 0;
5724 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5725 {
5726 phi = psi.phi ();
5727 op = PHI_RESULT (phi);
5728
5729 if (virtual_operand_p (op))
5730 continue;
5731
5732 if (get_iv (data, op))
5733 continue;
5734
5735 if (!POINTER_TYPE_P (TREE_TYPE (op))
5736 && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5737 continue;
5738
5739 n++;
5740 }
5741
5742 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5743 {
5744 struct version_info *info = ver_info (data, j);
5745
5746 if (info->inv_id && info->has_nonlin_use)
5747 n++;
5748 }
5749
5750 data->regs_used = n;
5751 if (dump_file && (dump_flags & TDF_DETAILS))
5752 fprintf (dump_file, " regs_used %d\n", n);
5753
5754 if (dump_file && (dump_flags & TDF_DETAILS))
5755 {
5756 fprintf (dump_file, " cost for size:\n");
5757 fprintf (dump_file, " ivs\tcost\n");
5758 for (j = 0; j <= 2 * target_avail_regs; j++)
5759 fprintf (dump_file, " %d\t%d\n", j,
5760 ivopts_estimate_reg_pressure (data, 0, j));
5761 fprintf (dump_file, "\n");
5762 }
5763 }
5764
5765 /* Returns true if A is a cheaper cost pair than B. */
5766
5767 static bool
cheaper_cost_pair(struct cost_pair * a,struct cost_pair * b)5768 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5769 {
5770 if (!a)
5771 return false;
5772
5773 if (!b)
5774 return true;
5775
5776 if (a->cost < b->cost)
5777 return true;
5778
5779 if (b->cost < a->cost)
5780 return false;
5781
5782 /* In case the costs are the same, prefer the cheaper candidate. */
5783 if (a->cand->cost < b->cand->cost)
5784 return true;
5785
5786 return false;
5787 }
5788
5789 /* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1
5790 for more expensive, equal and cheaper respectively. */
5791
5792 static int
compare_cost_pair(struct cost_pair * a,struct cost_pair * b)5793 compare_cost_pair (struct cost_pair *a, struct cost_pair *b)
5794 {
5795 if (cheaper_cost_pair (a, b))
5796 return -1;
5797 if (cheaper_cost_pair (b, a))
5798 return 1;
5799
5800 return 0;
5801 }
5802
5803 /* Returns candidate by that USE is expressed in IVS. */
5804
5805 static struct cost_pair *
iv_ca_cand_for_group(struct iv_ca * ivs,struct iv_group * group)5806 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5807 {
5808 return ivs->cand_for_group[group->id];
5809 }
5810
5811 /* Computes the cost field of IVS structure. */
5812
5813 static void
iv_ca_recount_cost(struct ivopts_data * data,struct iv_ca * ivs)5814 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5815 {
5816 comp_cost cost = ivs->cand_use_cost;
5817
5818 cost += ivs->cand_cost;
5819 cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
5820 ivs->cost = cost;
5821 }
5822
5823 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5824 and IVS. */
5825
5826 static void
iv_ca_set_remove_invs(struct iv_ca * ivs,bitmap invs,unsigned * n_inv_uses)5827 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5828 {
5829 bitmap_iterator bi;
5830 unsigned iid;
5831
5832 if (!invs)
5833 return;
5834
5835 gcc_assert (n_inv_uses != NULL);
5836 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5837 {
5838 n_inv_uses[iid]--;
5839 if (n_inv_uses[iid] == 0)
5840 ivs->n_invs--;
5841 }
5842 }
5843
5844 /* Set USE not to be expressed by any candidate in IVS. */
5845
5846 static void
iv_ca_set_no_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group)5847 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5848 struct iv_group *group)
5849 {
5850 unsigned gid = group->id, cid;
5851 struct cost_pair *cp;
5852
5853 cp = ivs->cand_for_group[gid];
5854 if (!cp)
5855 return;
5856 cid = cp->cand->id;
5857
5858 ivs->bad_groups++;
5859 ivs->cand_for_group[gid] = NULL;
5860 ivs->n_cand_uses[cid]--;
5861
5862 if (ivs->n_cand_uses[cid] == 0)
5863 {
5864 bitmap_clear_bit (ivs->cands, cid);
5865 ivs->n_cands--;
5866 ivs->cand_cost -= cp->cand->cost;
5867 iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5868 iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5869 }
5870
5871 ivs->cand_use_cost -= cp->cost;
5872 iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5873 iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5874 iv_ca_recount_cost (data, ivs);
5875 }
5876
5877 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5878 IVS. */
5879
5880 static void
iv_ca_set_add_invs(struct iv_ca * ivs,bitmap invs,unsigned * n_inv_uses)5881 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5882 {
5883 bitmap_iterator bi;
5884 unsigned iid;
5885
5886 if (!invs)
5887 return;
5888
5889 gcc_assert (n_inv_uses != NULL);
5890 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5891 {
5892 n_inv_uses[iid]++;
5893 if (n_inv_uses[iid] == 1)
5894 ivs->n_invs++;
5895 }
5896 }
5897
5898 /* Set cost pair for GROUP in set IVS to CP. */
5899
5900 static void
iv_ca_set_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group,struct cost_pair * cp)5901 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5902 struct iv_group *group, struct cost_pair *cp)
5903 {
5904 unsigned gid = group->id, cid;
5905
5906 if (ivs->cand_for_group[gid] == cp)
5907 return;
5908
5909 if (ivs->cand_for_group[gid])
5910 iv_ca_set_no_cp (data, ivs, group);
5911
5912 if (cp)
5913 {
5914 cid = cp->cand->id;
5915
5916 ivs->bad_groups--;
5917 ivs->cand_for_group[gid] = cp;
5918 ivs->n_cand_uses[cid]++;
5919 if (ivs->n_cand_uses[cid] == 1)
5920 {
5921 bitmap_set_bit (ivs->cands, cid);
5922 ivs->n_cands++;
5923 ivs->cand_cost += cp->cand->cost;
5924 iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5925 iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5926 }
5927
5928 ivs->cand_use_cost += cp->cost;
5929 iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5930 iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5931 iv_ca_recount_cost (data, ivs);
5932 }
5933 }
5934
5935 /* Extend set IVS by expressing USE by some of the candidates in it
5936 if possible. Consider all important candidates if candidates in
5937 set IVS don't give any result. */
5938
5939 static void
iv_ca_add_group(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group)5940 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5941 struct iv_group *group)
5942 {
5943 struct cost_pair *best_cp = NULL, *cp;
5944 bitmap_iterator bi;
5945 unsigned i;
5946 struct iv_cand *cand;
5947
5948 gcc_assert (ivs->upto >= group->id);
5949 ivs->upto++;
5950 ivs->bad_groups++;
5951
5952 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5953 {
5954 cand = data->vcands[i];
5955 cp = get_group_iv_cost (data, group, cand);
5956 if (cheaper_cost_pair (cp, best_cp))
5957 best_cp = cp;
5958 }
5959
5960 if (best_cp == NULL)
5961 {
5962 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5963 {
5964 cand = data->vcands[i];
5965 cp = get_group_iv_cost (data, group, cand);
5966 if (cheaper_cost_pair (cp, best_cp))
5967 best_cp = cp;
5968 }
5969 }
5970
5971 iv_ca_set_cp (data, ivs, group, best_cp);
5972 }
5973
5974 /* Get cost for assignment IVS. */
5975
5976 static comp_cost
iv_ca_cost(struct iv_ca * ivs)5977 iv_ca_cost (struct iv_ca *ivs)
5978 {
5979 /* This was a conditional expression but it triggered a bug in
5980 Sun C 5.5. */
5981 if (ivs->bad_groups)
5982 return infinite_cost;
5983 else
5984 return ivs->cost;
5985 }
5986
5987 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5988 than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants
5989 respectively. */
5990
5991 static int
iv_ca_compare_deps(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group,struct cost_pair * old_cp,struct cost_pair * new_cp)5992 iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs,
5993 struct iv_group *group, struct cost_pair *old_cp,
5994 struct cost_pair *new_cp)
5995 {
5996 gcc_assert (old_cp && new_cp && old_cp != new_cp);
5997 unsigned old_n_invs = ivs->n_invs;
5998 iv_ca_set_cp (data, ivs, group, new_cp);
5999 unsigned new_n_invs = ivs->n_invs;
6000 iv_ca_set_cp (data, ivs, group, old_cp);
6001
6002 return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6003 }
6004
6005 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6006 it before NEXT. */
6007
6008 static struct iv_ca_delta *
iv_ca_delta_add(struct iv_group * group,struct cost_pair * old_cp,struct cost_pair * new_cp,struct iv_ca_delta * next)6009 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
6010 struct cost_pair *new_cp, struct iv_ca_delta *next)
6011 {
6012 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6013
6014 change->group = group;
6015 change->old_cp = old_cp;
6016 change->new_cp = new_cp;
6017 change->next = next;
6018
6019 return change;
6020 }
6021
6022 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6023 are rewritten. */
6024
6025 static struct iv_ca_delta *
iv_ca_delta_join(struct iv_ca_delta * l1,struct iv_ca_delta * l2)6026 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6027 {
6028 struct iv_ca_delta *last;
6029
6030 if (!l2)
6031 return l1;
6032
6033 if (!l1)
6034 return l2;
6035
6036 for (last = l1; last->next; last = last->next)
6037 continue;
6038 last->next = l2;
6039
6040 return l1;
6041 }
6042
6043 /* Reverse the list of changes DELTA, forming the inverse to it. */
6044
6045 static struct iv_ca_delta *
iv_ca_delta_reverse(struct iv_ca_delta * delta)6046 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6047 {
6048 struct iv_ca_delta *act, *next, *prev = NULL;
6049
6050 for (act = delta; act; act = next)
6051 {
6052 next = act->next;
6053 act->next = prev;
6054 prev = act;
6055
6056 std::swap (act->old_cp, act->new_cp);
6057 }
6058
6059 return prev;
6060 }
6061
6062 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6063 reverted instead. */
6064
6065 static void
iv_ca_delta_commit(struct ivopts_data * data,struct iv_ca * ivs,struct iv_ca_delta * delta,bool forward)6066 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6067 struct iv_ca_delta *delta, bool forward)
6068 {
6069 struct cost_pair *from, *to;
6070 struct iv_ca_delta *act;
6071
6072 if (!forward)
6073 delta = iv_ca_delta_reverse (delta);
6074
6075 for (act = delta; act; act = act->next)
6076 {
6077 from = act->old_cp;
6078 to = act->new_cp;
6079 gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6080 iv_ca_set_cp (data, ivs, act->group, to);
6081 }
6082
6083 if (!forward)
6084 iv_ca_delta_reverse (delta);
6085 }
6086
6087 /* Returns true if CAND is used in IVS. */
6088
6089 static bool
iv_ca_cand_used_p(struct iv_ca * ivs,struct iv_cand * cand)6090 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6091 {
6092 return ivs->n_cand_uses[cand->id] > 0;
6093 }
6094
6095 /* Returns number of induction variable candidates in the set IVS. */
6096
6097 static unsigned
iv_ca_n_cands(struct iv_ca * ivs)6098 iv_ca_n_cands (struct iv_ca *ivs)
6099 {
6100 return ivs->n_cands;
6101 }
6102
6103 /* Free the list of changes DELTA. */
6104
6105 static void
iv_ca_delta_free(struct iv_ca_delta ** delta)6106 iv_ca_delta_free (struct iv_ca_delta **delta)
6107 {
6108 struct iv_ca_delta *act, *next;
6109
6110 for (act = *delta; act; act = next)
6111 {
6112 next = act->next;
6113 free (act);
6114 }
6115
6116 *delta = NULL;
6117 }
6118
6119 /* Allocates new iv candidates assignment. */
6120
6121 static struct iv_ca *
iv_ca_new(struct ivopts_data * data)6122 iv_ca_new (struct ivopts_data *data)
6123 {
6124 struct iv_ca *nw = XNEW (struct iv_ca);
6125
6126 nw->upto = 0;
6127 nw->bad_groups = 0;
6128 nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6129 data->vgroups.length ());
6130 nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6131 nw->cands = BITMAP_ALLOC (NULL);
6132 nw->n_cands = 0;
6133 nw->n_invs = 0;
6134 nw->cand_use_cost = no_cost;
6135 nw->cand_cost = 0;
6136 nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6137 nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6138 nw->cost = no_cost;
6139
6140 return nw;
6141 }
6142
6143 /* Free memory occupied by the set IVS. */
6144
6145 static void
iv_ca_free(struct iv_ca ** ivs)6146 iv_ca_free (struct iv_ca **ivs)
6147 {
6148 free ((*ivs)->cand_for_group);
6149 free ((*ivs)->n_cand_uses);
6150 BITMAP_FREE ((*ivs)->cands);
6151 free ((*ivs)->n_inv_var_uses);
6152 free ((*ivs)->n_inv_expr_uses);
6153 free (*ivs);
6154 *ivs = NULL;
6155 }
6156
6157 /* Dumps IVS to FILE. */
6158
6159 static void
iv_ca_dump(struct ivopts_data * data,FILE * file,struct iv_ca * ivs)6160 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6161 {
6162 unsigned i;
6163 comp_cost cost = iv_ca_cost (ivs);
6164
6165 fprintf (file, " cost: %d (complexity %d)\n", cost.cost,
6166 cost.complexity);
6167 fprintf (file, " cand_cost: %d\n cand_group_cost: %d (complexity %d)\n",
6168 ivs->cand_cost, ivs->cand_use_cost.cost,
6169 ivs->cand_use_cost.complexity);
6170 bitmap_print (file, ivs->cands, " candidates: ","\n");
6171
6172 for (i = 0; i < ivs->upto; i++)
6173 {
6174 struct iv_group *group = data->vgroups[i];
6175 struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6176 if (cp)
6177 fprintf (file, " group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6178 group->id, cp->cand->id, cp->cost.cost,
6179 cp->cost.complexity);
6180 else
6181 fprintf (file, " group:%d --> ??\n", group->id);
6182 }
6183
6184 const char *pref = "";
6185 fprintf (file, " invariant variables: ");
6186 for (i = 1; i <= data->max_inv_var_id; i++)
6187 if (ivs->n_inv_var_uses[i])
6188 {
6189 fprintf (file, "%s%d", pref, i);
6190 pref = ", ";
6191 }
6192
6193 pref = "";
6194 fprintf (file, "\n invariant expressions: ");
6195 for (i = 1; i <= data->max_inv_expr_id; i++)
6196 if (ivs->n_inv_expr_uses[i])
6197 {
6198 fprintf (file, "%s%d", pref, i);
6199 pref = ", ";
6200 }
6201
6202 fprintf (file, "\n\n");
6203 }
6204
6205 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6206 new set, and store differences in DELTA. Number of induction variables
6207 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6208 the function will try to find a solution with mimimal iv candidates. */
6209
6210 static comp_cost
iv_ca_extend(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_ca_delta ** delta,unsigned * n_ivs,bool min_ncand)6211 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6212 struct iv_cand *cand, struct iv_ca_delta **delta,
6213 unsigned *n_ivs, bool min_ncand)
6214 {
6215 unsigned i;
6216 comp_cost cost;
6217 struct iv_group *group;
6218 struct cost_pair *old_cp, *new_cp;
6219
6220 *delta = NULL;
6221 for (i = 0; i < ivs->upto; i++)
6222 {
6223 group = data->vgroups[i];
6224 old_cp = iv_ca_cand_for_group (ivs, group);
6225
6226 if (old_cp
6227 && old_cp->cand == cand)
6228 continue;
6229
6230 new_cp = get_group_iv_cost (data, group, cand);
6231 if (!new_cp)
6232 continue;
6233
6234 if (!min_ncand)
6235 {
6236 int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6237 /* Skip if new_cp depends on more invariants. */
6238 if (cmp_invs > 0)
6239 continue;
6240
6241 int cmp_cost = compare_cost_pair (new_cp, old_cp);
6242 /* Skip if new_cp is not cheaper. */
6243 if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6244 continue;
6245 }
6246
6247 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6248 }
6249
6250 iv_ca_delta_commit (data, ivs, *delta, true);
6251 cost = iv_ca_cost (ivs);
6252 if (n_ivs)
6253 *n_ivs = iv_ca_n_cands (ivs);
6254 iv_ca_delta_commit (data, ivs, *delta, false);
6255
6256 return cost;
6257 }
6258
6259 /* Try narrowing set IVS by removing CAND. Return the cost of
6260 the new set and store the differences in DELTA. START is
6261 the candidate with which we start narrowing. */
6262
6263 static comp_cost
iv_ca_narrow(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_cand * start,struct iv_ca_delta ** delta)6264 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6265 struct iv_cand *cand, struct iv_cand *start,
6266 struct iv_ca_delta **delta)
6267 {
6268 unsigned i, ci;
6269 struct iv_group *group;
6270 struct cost_pair *old_cp, *new_cp, *cp;
6271 bitmap_iterator bi;
6272 struct iv_cand *cnd;
6273 comp_cost cost, best_cost, acost;
6274
6275 *delta = NULL;
6276 for (i = 0; i < data->vgroups.length (); i++)
6277 {
6278 group = data->vgroups[i];
6279
6280 old_cp = iv_ca_cand_for_group (ivs, group);
6281 if (old_cp->cand != cand)
6282 continue;
6283
6284 best_cost = iv_ca_cost (ivs);
6285 /* Start narrowing with START. */
6286 new_cp = get_group_iv_cost (data, group, start);
6287
6288 if (data->consider_all_candidates)
6289 {
6290 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6291 {
6292 if (ci == cand->id || (start && ci == start->id))
6293 continue;
6294
6295 cnd = data->vcands[ci];
6296
6297 cp = get_group_iv_cost (data, group, cnd);
6298 if (!cp)
6299 continue;
6300
6301 iv_ca_set_cp (data, ivs, group, cp);
6302 acost = iv_ca_cost (ivs);
6303
6304 if (acost < best_cost)
6305 {
6306 best_cost = acost;
6307 new_cp = cp;
6308 }
6309 }
6310 }
6311 else
6312 {
6313 EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6314 {
6315 if (ci == cand->id || (start && ci == start->id))
6316 continue;
6317
6318 cnd = data->vcands[ci];
6319
6320 cp = get_group_iv_cost (data, group, cnd);
6321 if (!cp)
6322 continue;
6323
6324 iv_ca_set_cp (data, ivs, group, cp);
6325 acost = iv_ca_cost (ivs);
6326
6327 if (acost < best_cost)
6328 {
6329 best_cost = acost;
6330 new_cp = cp;
6331 }
6332 }
6333 }
6334 /* Restore to old cp for use. */
6335 iv_ca_set_cp (data, ivs, group, old_cp);
6336
6337 if (!new_cp)
6338 {
6339 iv_ca_delta_free (delta);
6340 return infinite_cost;
6341 }
6342
6343 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6344 }
6345
6346 iv_ca_delta_commit (data, ivs, *delta, true);
6347 cost = iv_ca_cost (ivs);
6348 iv_ca_delta_commit (data, ivs, *delta, false);
6349
6350 return cost;
6351 }
6352
6353 /* Try optimizing the set of candidates IVS by removing candidates different
6354 from to EXCEPT_CAND from it. Return cost of the new set, and store
6355 differences in DELTA. */
6356
6357 static comp_cost
iv_ca_prune(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * except_cand,struct iv_ca_delta ** delta)6358 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6359 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6360 {
6361 bitmap_iterator bi;
6362 struct iv_ca_delta *act_delta, *best_delta;
6363 unsigned i;
6364 comp_cost best_cost, acost;
6365 struct iv_cand *cand;
6366
6367 best_delta = NULL;
6368 best_cost = iv_ca_cost (ivs);
6369
6370 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6371 {
6372 cand = data->vcands[i];
6373
6374 if (cand == except_cand)
6375 continue;
6376
6377 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6378
6379 if (acost < best_cost)
6380 {
6381 best_cost = acost;
6382 iv_ca_delta_free (&best_delta);
6383 best_delta = act_delta;
6384 }
6385 else
6386 iv_ca_delta_free (&act_delta);
6387 }
6388
6389 if (!best_delta)
6390 {
6391 *delta = NULL;
6392 return best_cost;
6393 }
6394
6395 /* Recurse to possibly remove other unnecessary ivs. */
6396 iv_ca_delta_commit (data, ivs, best_delta, true);
6397 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6398 iv_ca_delta_commit (data, ivs, best_delta, false);
6399 *delta = iv_ca_delta_join (best_delta, *delta);
6400 return best_cost;
6401 }
6402
6403 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6404 cheaper local cost for GROUP than BEST_CP. Return pointer to
6405 the corresponding cost_pair, otherwise just return BEST_CP. */
6406
6407 static struct cost_pair*
cheaper_cost_with_cand(struct ivopts_data * data,struct iv_group * group,unsigned int cand_idx,struct iv_cand * old_cand,struct cost_pair * best_cp)6408 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6409 unsigned int cand_idx, struct iv_cand *old_cand,
6410 struct cost_pair *best_cp)
6411 {
6412 struct iv_cand *cand;
6413 struct cost_pair *cp;
6414
6415 gcc_assert (old_cand != NULL && best_cp != NULL);
6416 if (cand_idx == old_cand->id)
6417 return best_cp;
6418
6419 cand = data->vcands[cand_idx];
6420 cp = get_group_iv_cost (data, group, cand);
6421 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6422 return cp;
6423
6424 return best_cp;
6425 }
6426
6427 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6428 which are used by more than one iv uses. For each of those candidates,
6429 this function tries to represent iv uses under that candidate using
6430 other ones with lower local cost, then tries to prune the new set.
6431 If the new set has lower cost, It returns the new cost after recording
6432 candidate replacement in list DELTA. */
6433
6434 static comp_cost
iv_ca_replace(struct ivopts_data * data,struct iv_ca * ivs,struct iv_ca_delta ** delta)6435 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6436 struct iv_ca_delta **delta)
6437 {
6438 bitmap_iterator bi, bj;
6439 unsigned int i, j, k;
6440 struct iv_cand *cand;
6441 comp_cost orig_cost, acost;
6442 struct iv_ca_delta *act_delta, *tmp_delta;
6443 struct cost_pair *old_cp, *best_cp = NULL;
6444
6445 *delta = NULL;
6446 orig_cost = iv_ca_cost (ivs);
6447
6448 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6449 {
6450 if (ivs->n_cand_uses[i] == 1
6451 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6452 continue;
6453
6454 cand = data->vcands[i];
6455
6456 act_delta = NULL;
6457 /* Represent uses under current candidate using other ones with
6458 lower local cost. */
6459 for (j = 0; j < ivs->upto; j++)
6460 {
6461 struct iv_group *group = data->vgroups[j];
6462 old_cp = iv_ca_cand_for_group (ivs, group);
6463
6464 if (old_cp->cand != cand)
6465 continue;
6466
6467 best_cp = old_cp;
6468 if (data->consider_all_candidates)
6469 for (k = 0; k < data->vcands.length (); k++)
6470 best_cp = cheaper_cost_with_cand (data, group, k,
6471 old_cp->cand, best_cp);
6472 else
6473 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6474 best_cp = cheaper_cost_with_cand (data, group, k,
6475 old_cp->cand, best_cp);
6476
6477 if (best_cp == old_cp)
6478 continue;
6479
6480 act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6481 }
6482 /* No need for further prune. */
6483 if (!act_delta)
6484 continue;
6485
6486 /* Prune the new candidate set. */
6487 iv_ca_delta_commit (data, ivs, act_delta, true);
6488 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6489 iv_ca_delta_commit (data, ivs, act_delta, false);
6490 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6491
6492 if (acost < orig_cost)
6493 {
6494 *delta = act_delta;
6495 return acost;
6496 }
6497 else
6498 iv_ca_delta_free (&act_delta);
6499 }
6500
6501 return orig_cost;
6502 }
6503
6504 /* Tries to extend the sets IVS in the best possible way in order to
6505 express the GROUP. If ORIGINALP is true, prefer candidates from
6506 the original set of IVs, otherwise favor important candidates not
6507 based on any memory object. */
6508
6509 static bool
try_add_cand_for(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group,bool originalp)6510 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6511 struct iv_group *group, bool originalp)
6512 {
6513 comp_cost best_cost, act_cost;
6514 unsigned i;
6515 bitmap_iterator bi;
6516 struct iv_cand *cand;
6517 struct iv_ca_delta *best_delta = NULL, *act_delta;
6518 struct cost_pair *cp;
6519
6520 iv_ca_add_group (data, ivs, group);
6521 best_cost = iv_ca_cost (ivs);
6522 cp = iv_ca_cand_for_group (ivs, group);
6523 if (cp)
6524 {
6525 best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6526 iv_ca_set_no_cp (data, ivs, group);
6527 }
6528
6529 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6530 first try important candidates not based on any memory object. Only if
6531 this fails, try the specific ones. Rationale -- in loops with many
6532 variables the best choice often is to use just one generic biv. If we
6533 added here many ivs specific to the uses, the optimization algorithm later
6534 would be likely to get stuck in a local minimum, thus causing us to create
6535 too many ivs. The approach from few ivs to more seems more likely to be
6536 successful -- starting from few ivs, replacing an expensive use by a
6537 specific iv should always be a win. */
6538 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6539 {
6540 cand = data->vcands[i];
6541
6542 if (originalp && cand->pos !=IP_ORIGINAL)
6543 continue;
6544
6545 if (!originalp && cand->iv->base_object != NULL_TREE)
6546 continue;
6547
6548 if (iv_ca_cand_used_p (ivs, cand))
6549 continue;
6550
6551 cp = get_group_iv_cost (data, group, cand);
6552 if (!cp)
6553 continue;
6554
6555 iv_ca_set_cp (data, ivs, group, cp);
6556 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6557 true);
6558 iv_ca_set_no_cp (data, ivs, group);
6559 act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6560
6561 if (act_cost < best_cost)
6562 {
6563 best_cost = act_cost;
6564
6565 iv_ca_delta_free (&best_delta);
6566 best_delta = act_delta;
6567 }
6568 else
6569 iv_ca_delta_free (&act_delta);
6570 }
6571
6572 if (best_cost.infinite_cost_p ())
6573 {
6574 for (i = 0; i < group->n_map_members; i++)
6575 {
6576 cp = group->cost_map + i;
6577 cand = cp->cand;
6578 if (!cand)
6579 continue;
6580
6581 /* Already tried this. */
6582 if (cand->important)
6583 {
6584 if (originalp && cand->pos == IP_ORIGINAL)
6585 continue;
6586 if (!originalp && cand->iv->base_object == NULL_TREE)
6587 continue;
6588 }
6589
6590 if (iv_ca_cand_used_p (ivs, cand))
6591 continue;
6592
6593 act_delta = NULL;
6594 iv_ca_set_cp (data, ivs, group, cp);
6595 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6596 iv_ca_set_no_cp (data, ivs, group);
6597 act_delta = iv_ca_delta_add (group,
6598 iv_ca_cand_for_group (ivs, group),
6599 cp, act_delta);
6600
6601 if (act_cost < best_cost)
6602 {
6603 best_cost = act_cost;
6604
6605 if (best_delta)
6606 iv_ca_delta_free (&best_delta);
6607 best_delta = act_delta;
6608 }
6609 else
6610 iv_ca_delta_free (&act_delta);
6611 }
6612 }
6613
6614 iv_ca_delta_commit (data, ivs, best_delta, true);
6615 iv_ca_delta_free (&best_delta);
6616
6617 return !best_cost.infinite_cost_p ();
6618 }
6619
6620 /* Finds an initial assignment of candidates to uses. */
6621
6622 static struct iv_ca *
get_initial_solution(struct ivopts_data * data,bool originalp)6623 get_initial_solution (struct ivopts_data *data, bool originalp)
6624 {
6625 unsigned i;
6626 struct iv_ca *ivs = iv_ca_new (data);
6627
6628 for (i = 0; i < data->vgroups.length (); i++)
6629 if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6630 {
6631 iv_ca_free (&ivs);
6632 return NULL;
6633 }
6634
6635 return ivs;
6636 }
6637
6638 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6639 points to a bool variable, this function tries to break local
6640 optimal fixed-point by replacing candidates in IVS if it's true. */
6641
6642 static bool
try_improve_iv_set(struct ivopts_data * data,struct iv_ca * ivs,bool * try_replace_p)6643 try_improve_iv_set (struct ivopts_data *data,
6644 struct iv_ca *ivs, bool *try_replace_p)
6645 {
6646 unsigned i, n_ivs;
6647 comp_cost acost, best_cost = iv_ca_cost (ivs);
6648 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6649 struct iv_cand *cand;
6650
6651 /* Try extending the set of induction variables by one. */
6652 for (i = 0; i < data->vcands.length (); i++)
6653 {
6654 cand = data->vcands[i];
6655
6656 if (iv_ca_cand_used_p (ivs, cand))
6657 continue;
6658
6659 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6660 if (!act_delta)
6661 continue;
6662
6663 /* If we successfully added the candidate and the set is small enough,
6664 try optimizing it by removing other candidates. */
6665 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6666 {
6667 iv_ca_delta_commit (data, ivs, act_delta, true);
6668 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6669 iv_ca_delta_commit (data, ivs, act_delta, false);
6670 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6671 }
6672
6673 if (acost < best_cost)
6674 {
6675 best_cost = acost;
6676 iv_ca_delta_free (&best_delta);
6677 best_delta = act_delta;
6678 }
6679 else
6680 iv_ca_delta_free (&act_delta);
6681 }
6682
6683 if (!best_delta)
6684 {
6685 /* Try removing the candidates from the set instead. */
6686 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6687
6688 if (!best_delta && *try_replace_p)
6689 {
6690 *try_replace_p = false;
6691 /* So far candidate selecting algorithm tends to choose fewer IVs
6692 so that it can handle cases in which loops have many variables
6693 but the best choice is often to use only one general biv. One
6694 weakness is it can't handle opposite cases, in which different
6695 candidates should be chosen with respect to each use. To solve
6696 the problem, we replace candidates in a manner described by the
6697 comments of iv_ca_replace, thus give general algorithm a chance
6698 to break local optimal fixed-point in these cases. */
6699 best_cost = iv_ca_replace (data, ivs, &best_delta);
6700 }
6701
6702 if (!best_delta)
6703 return false;
6704 }
6705
6706 iv_ca_delta_commit (data, ivs, best_delta, true);
6707 gcc_assert (best_cost == iv_ca_cost (ivs));
6708 iv_ca_delta_free (&best_delta);
6709 return true;
6710 }
6711
6712 /* Attempts to find the optimal set of induction variables. We do simple
6713 greedy heuristic -- we try to replace at most one candidate in the selected
6714 solution and remove the unused ivs while this improves the cost. */
6715
6716 static struct iv_ca *
find_optimal_iv_set_1(struct ivopts_data * data,bool originalp)6717 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6718 {
6719 struct iv_ca *set;
6720 bool try_replace_p = true;
6721
6722 /* Get the initial solution. */
6723 set = get_initial_solution (data, originalp);
6724 if (!set)
6725 {
6726 if (dump_file && (dump_flags & TDF_DETAILS))
6727 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6728 return NULL;
6729 }
6730
6731 if (dump_file && (dump_flags & TDF_DETAILS))
6732 {
6733 fprintf (dump_file, "Initial set of candidates:\n");
6734 iv_ca_dump (data, dump_file, set);
6735 }
6736
6737 while (try_improve_iv_set (data, set, &try_replace_p))
6738 {
6739 if (dump_file && (dump_flags & TDF_DETAILS))
6740 {
6741 fprintf (dump_file, "Improved to:\n");
6742 iv_ca_dump (data, dump_file, set);
6743 }
6744 }
6745
6746 return set;
6747 }
6748
6749 static struct iv_ca *
find_optimal_iv_set(struct ivopts_data * data)6750 find_optimal_iv_set (struct ivopts_data *data)
6751 {
6752 unsigned i;
6753 comp_cost cost, origcost;
6754 struct iv_ca *set, *origset;
6755
6756 /* Determine the cost based on a strategy that starts with original IVs,
6757 and try again using a strategy that prefers candidates not based
6758 on any IVs. */
6759 origset = find_optimal_iv_set_1 (data, true);
6760 set = find_optimal_iv_set_1 (data, false);
6761
6762 if (!origset && !set)
6763 return NULL;
6764
6765 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6766 cost = set ? iv_ca_cost (set) : infinite_cost;
6767
6768 if (dump_file && (dump_flags & TDF_DETAILS))
6769 {
6770 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6771 origcost.cost, origcost.complexity);
6772 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6773 cost.cost, cost.complexity);
6774 }
6775
6776 /* Choose the one with the best cost. */
6777 if (origcost <= cost)
6778 {
6779 if (set)
6780 iv_ca_free (&set);
6781 set = origset;
6782 }
6783 else if (origset)
6784 iv_ca_free (&origset);
6785
6786 for (i = 0; i < data->vgroups.length (); i++)
6787 {
6788 struct iv_group *group = data->vgroups[i];
6789 group->selected = iv_ca_cand_for_group (set, group)->cand;
6790 }
6791
6792 return set;
6793 }
6794
6795 /* Creates a new induction variable corresponding to CAND. */
6796
6797 static void
create_new_iv(struct ivopts_data * data,struct iv_cand * cand)6798 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6799 {
6800 gimple_stmt_iterator incr_pos;
6801 tree base;
6802 struct iv_use *use;
6803 struct iv_group *group;
6804 bool after = false;
6805
6806 gcc_assert (cand->iv != NULL);
6807
6808 switch (cand->pos)
6809 {
6810 case IP_NORMAL:
6811 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6812 break;
6813
6814 case IP_END:
6815 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6816 after = true;
6817 break;
6818
6819 case IP_AFTER_USE:
6820 after = true;
6821 /* fall through */
6822 case IP_BEFORE_USE:
6823 incr_pos = gsi_for_stmt (cand->incremented_at);
6824 break;
6825
6826 case IP_ORIGINAL:
6827 /* Mark that the iv is preserved. */
6828 name_info (data, cand->var_before)->preserve_biv = true;
6829 name_info (data, cand->var_after)->preserve_biv = true;
6830
6831 /* Rewrite the increment so that it uses var_before directly. */
6832 use = find_interesting_uses_op (data, cand->var_after);
6833 group = data->vgroups[use->group_id];
6834 group->selected = cand;
6835 return;
6836 }
6837
6838 gimple_add_tmp_var (cand->var_before);
6839
6840 base = unshare_expr (cand->iv->base);
6841
6842 create_iv (base, unshare_expr (cand->iv->step),
6843 cand->var_before, data->current_loop,
6844 &incr_pos, after, &cand->var_before, &cand->var_after);
6845 }
6846
6847 /* Creates new induction variables described in SET. */
6848
6849 static void
create_new_ivs(struct ivopts_data * data,struct iv_ca * set)6850 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6851 {
6852 unsigned i;
6853 struct iv_cand *cand;
6854 bitmap_iterator bi;
6855
6856 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6857 {
6858 cand = data->vcands[i];
6859 create_new_iv (data, cand);
6860 }
6861
6862 if (dump_file && (dump_flags & TDF_DETAILS))
6863 {
6864 fprintf (dump_file, "Selected IV set for loop %d",
6865 data->current_loop->num);
6866 if (data->loop_loc != UNKNOWN_LOCATION)
6867 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6868 LOCATION_LINE (data->loop_loc));
6869 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
6870 avg_loop_niter (data->current_loop));
6871 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6872 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6873 {
6874 cand = data->vcands[i];
6875 dump_cand (dump_file, cand);
6876 }
6877 fprintf (dump_file, "\n");
6878 }
6879 }
6880
6881 /* Rewrites USE (definition of iv used in a nonlinear expression)
6882 using candidate CAND. */
6883
6884 static void
rewrite_use_nonlinear_expr(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)6885 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6886 struct iv_use *use, struct iv_cand *cand)
6887 {
6888 gassign *ass;
6889 gimple_stmt_iterator bsi;
6890 tree comp, type = get_use_type (use), tgt;
6891
6892 /* An important special case -- if we are asked to express value of
6893 the original iv by itself, just exit; there is no need to
6894 introduce a new computation (that might also need casting the
6895 variable to unsigned and back). */
6896 if (cand->pos == IP_ORIGINAL
6897 && cand->incremented_at == use->stmt)
6898 {
6899 tree op = NULL_TREE;
6900 enum tree_code stmt_code;
6901
6902 gcc_assert (is_gimple_assign (use->stmt));
6903 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6904
6905 /* Check whether we may leave the computation unchanged.
6906 This is the case only if it does not rely on other
6907 computations in the loop -- otherwise, the computation
6908 we rely upon may be removed in remove_unused_ivs,
6909 thus leading to ICE. */
6910 stmt_code = gimple_assign_rhs_code (use->stmt);
6911 if (stmt_code == PLUS_EXPR
6912 || stmt_code == MINUS_EXPR
6913 || stmt_code == POINTER_PLUS_EXPR)
6914 {
6915 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6916 op = gimple_assign_rhs2 (use->stmt);
6917 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6918 op = gimple_assign_rhs1 (use->stmt);
6919 }
6920
6921 if (op != NULL_TREE)
6922 {
6923 if (expr_invariant_in_loop_p (data->current_loop, op))
6924 return;
6925 if (TREE_CODE (op) == SSA_NAME)
6926 {
6927 struct iv *iv = get_iv (data, op);
6928 if (iv != NULL && integer_zerop (iv->step))
6929 return;
6930 }
6931 }
6932 }
6933
6934 switch (gimple_code (use->stmt))
6935 {
6936 case GIMPLE_PHI:
6937 tgt = PHI_RESULT (use->stmt);
6938
6939 /* If we should keep the biv, do not replace it. */
6940 if (name_info (data, tgt)->preserve_biv)
6941 return;
6942
6943 bsi = gsi_after_labels (gimple_bb (use->stmt));
6944 break;
6945
6946 case GIMPLE_ASSIGN:
6947 tgt = gimple_assign_lhs (use->stmt);
6948 bsi = gsi_for_stmt (use->stmt);
6949 break;
6950
6951 default:
6952 gcc_unreachable ();
6953 }
6954
6955 aff_tree aff_inv, aff_var;
6956 if (!get_computation_aff_1 (data->current_loop, use->stmt,
6957 use, cand, &aff_inv, &aff_var))
6958 gcc_unreachable ();
6959
6960 unshare_aff_combination (&aff_inv);
6961 unshare_aff_combination (&aff_var);
6962 /* Prefer CSE opportunity than loop invariant by adding offset at last
6963 so that iv_uses have different offsets can be CSEed. */
6964 poly_widest_int offset = aff_inv.offset;
6965 aff_inv.offset = 0;
6966
6967 gimple_seq stmt_list = NULL, seq = NULL;
6968 tree comp_op1 = aff_combination_to_tree (&aff_inv);
6969 tree comp_op2 = aff_combination_to_tree (&aff_var);
6970 gcc_assert (comp_op1 && comp_op2);
6971
6972 comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
6973 gimple_seq_add_seq (&stmt_list, seq);
6974 comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
6975 gimple_seq_add_seq (&stmt_list, seq);
6976
6977 if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
6978 std::swap (comp_op1, comp_op2);
6979
6980 if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
6981 {
6982 comp = fold_build_pointer_plus (comp_op1,
6983 fold_convert (sizetype, comp_op2));
6984 comp = fold_build_pointer_plus (comp,
6985 wide_int_to_tree (sizetype, offset));
6986 }
6987 else
6988 {
6989 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
6990 fold_convert (TREE_TYPE (comp_op1), comp_op2));
6991 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
6992 wide_int_to_tree (TREE_TYPE (comp_op1), offset));
6993 }
6994
6995 comp = fold_convert (type, comp);
6996 if (!valid_gimple_rhs_p (comp)
6997 || (gimple_code (use->stmt) != GIMPLE_PHI
6998 /* We can't allow re-allocating the stmt as it might be pointed
6999 to still. */
7000 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7001 >= gimple_num_ops (gsi_stmt (bsi)))))
7002 {
7003 comp = force_gimple_operand (comp, &seq, true, NULL);
7004 gimple_seq_add_seq (&stmt_list, seq);
7005 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7006 {
7007 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7008 /* As this isn't a plain copy we have to reset alignment
7009 information. */
7010 if (SSA_NAME_PTR_INFO (comp))
7011 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7012 }
7013 }
7014
7015 gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7016 if (gimple_code (use->stmt) == GIMPLE_PHI)
7017 {
7018 ass = gimple_build_assign (tgt, comp);
7019 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7020
7021 bsi = gsi_for_stmt (use->stmt);
7022 remove_phi_node (&bsi, false);
7023 }
7024 else
7025 {
7026 gimple_assign_set_rhs_from_tree (&bsi, comp);
7027 use->stmt = gsi_stmt (bsi);
7028 }
7029 }
7030
7031 /* Performs a peephole optimization to reorder the iv update statement with
7032 a mem ref to enable instruction combining in later phases. The mem ref uses
7033 the iv value before the update, so the reordering transformation requires
7034 adjustment of the offset. CAND is the selected IV_CAND.
7035
7036 Example:
7037
7038 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7039 iv2 = iv1 + 1;
7040
7041 if (t < val) (1)
7042 goto L;
7043 goto Head;
7044
7045
7046 directly propagating t over to (1) will introduce overlapping live range
7047 thus increase register pressure. This peephole transform it into:
7048
7049
7050 iv2 = iv1 + 1;
7051 t = MEM_REF (base, iv2, 8, 8);
7052 if (t < val)
7053 goto L;
7054 goto Head;
7055 */
7056
7057 static void
adjust_iv_update_pos(struct iv_cand * cand,struct iv_use * use)7058 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7059 {
7060 tree var_after;
7061 gimple *iv_update, *stmt;
7062 basic_block bb;
7063 gimple_stmt_iterator gsi, gsi_iv;
7064
7065 if (cand->pos != IP_NORMAL)
7066 return;
7067
7068 var_after = cand->var_after;
7069 iv_update = SSA_NAME_DEF_STMT (var_after);
7070
7071 bb = gimple_bb (iv_update);
7072 gsi = gsi_last_nondebug_bb (bb);
7073 stmt = gsi_stmt (gsi);
7074
7075 /* Only handle conditional statement for now. */
7076 if (gimple_code (stmt) != GIMPLE_COND)
7077 return;
7078
7079 gsi_prev_nondebug (&gsi);
7080 stmt = gsi_stmt (gsi);
7081 if (stmt != iv_update)
7082 return;
7083
7084 gsi_prev_nondebug (&gsi);
7085 if (gsi_end_p (gsi))
7086 return;
7087
7088 stmt = gsi_stmt (gsi);
7089 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7090 return;
7091
7092 if (stmt != use->stmt)
7093 return;
7094
7095 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7096 return;
7097
7098 if (dump_file && (dump_flags & TDF_DETAILS))
7099 {
7100 fprintf (dump_file, "Reordering \n");
7101 print_gimple_stmt (dump_file, iv_update, 0);
7102 print_gimple_stmt (dump_file, use->stmt, 0);
7103 fprintf (dump_file, "\n");
7104 }
7105
7106 gsi = gsi_for_stmt (use->stmt);
7107 gsi_iv = gsi_for_stmt (iv_update);
7108 gsi_move_before (&gsi_iv, &gsi);
7109
7110 cand->pos = IP_BEFORE_USE;
7111 cand->incremented_at = use->stmt;
7112 }
7113
7114 /* Return the alias pointer type that should be used for a MEM_REF
7115 associated with USE, which has type USE_PTR_ADDRESS. */
7116
7117 static tree
get_alias_ptr_type_for_ptr_address(iv_use * use)7118 get_alias_ptr_type_for_ptr_address (iv_use *use)
7119 {
7120 gcall *call = as_a <gcall *> (use->stmt);
7121 switch (gimple_call_internal_fn (call))
7122 {
7123 case IFN_MASK_LOAD:
7124 case IFN_MASK_STORE:
7125 /* The second argument contains the correct alias type. */
7126 gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7127 return TREE_TYPE (gimple_call_arg (call, 1));
7128
7129 default:
7130 gcc_unreachable ();
7131 }
7132 }
7133
7134
7135 /* Rewrites USE (address that is an iv) using candidate CAND. */
7136
7137 static void
rewrite_use_address(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7138 rewrite_use_address (struct ivopts_data *data,
7139 struct iv_use *use, struct iv_cand *cand)
7140 {
7141 aff_tree aff;
7142 bool ok;
7143
7144 adjust_iv_update_pos (cand, use);
7145 ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7146 gcc_assert (ok);
7147 unshare_aff_combination (&aff);
7148
7149 /* To avoid undefined overflow problems, all IV candidates use unsigned
7150 integer types. The drawback is that this makes it impossible for
7151 create_mem_ref to distinguish an IV that is based on a memory object
7152 from one that represents simply an offset.
7153
7154 To work around this problem, we pass a hint to create_mem_ref that
7155 indicates which variable (if any) in aff is an IV based on a memory
7156 object. Note that we only consider the candidate. If this is not
7157 based on an object, the base of the reference is in some subexpression
7158 of the use -- but these will use pointer types, so they are recognized
7159 by the create_mem_ref heuristics anyway. */
7160 tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7161 tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7162 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7163 tree type = use->mem_type;
7164 tree alias_ptr_type;
7165 if (use->type == USE_PTR_ADDRESS)
7166 alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7167 else
7168 {
7169 gcc_assert (type == TREE_TYPE (*use->op_p));
7170 unsigned int align = get_object_alignment (*use->op_p);
7171 if (align != TYPE_ALIGN (type))
7172 type = build_aligned_type (type, align);
7173 alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7174 }
7175 tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7176 iv, base_hint, data->speed);
7177
7178 if (use->type == USE_PTR_ADDRESS)
7179 {
7180 ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7181 ref = fold_convert (get_use_type (use), ref);
7182 ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7183 true, GSI_SAME_STMT);
7184 }
7185 else
7186 copy_ref_info (ref, *use->op_p);
7187
7188 *use->op_p = ref;
7189 }
7190
7191 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7192 candidate CAND. */
7193
7194 static void
rewrite_use_compare(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7195 rewrite_use_compare (struct ivopts_data *data,
7196 struct iv_use *use, struct iv_cand *cand)
7197 {
7198 tree comp, op, bound;
7199 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7200 enum tree_code compare;
7201 struct iv_group *group = data->vgroups[use->group_id];
7202 struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7203
7204 bound = cp->value;
7205 if (bound)
7206 {
7207 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7208 tree var_type = TREE_TYPE (var);
7209 gimple_seq stmts;
7210
7211 if (dump_file && (dump_flags & TDF_DETAILS))
7212 {
7213 fprintf (dump_file, "Replacing exit test: ");
7214 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7215 }
7216 compare = cp->comp;
7217 bound = unshare_expr (fold_convert (var_type, bound));
7218 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7219 if (stmts)
7220 gsi_insert_seq_on_edge_immediate (
7221 loop_preheader_edge (data->current_loop),
7222 stmts);
7223
7224 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7225 gimple_cond_set_lhs (cond_stmt, var);
7226 gimple_cond_set_code (cond_stmt, compare);
7227 gimple_cond_set_rhs (cond_stmt, op);
7228 return;
7229 }
7230
7231 /* The induction variable elimination failed; just express the original
7232 giv. */
7233 comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7234 gcc_assert (comp != NULL_TREE);
7235 gcc_assert (use->op_p != NULL);
7236 *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7237 SSA_NAME_VAR (*use->op_p),
7238 true, GSI_SAME_STMT);
7239 }
7240
7241 /* Rewrite the groups using the selected induction variables. */
7242
7243 static void
rewrite_groups(struct ivopts_data * data)7244 rewrite_groups (struct ivopts_data *data)
7245 {
7246 unsigned i, j;
7247
7248 for (i = 0; i < data->vgroups.length (); i++)
7249 {
7250 struct iv_group *group = data->vgroups[i];
7251 struct iv_cand *cand = group->selected;
7252
7253 gcc_assert (cand);
7254
7255 if (group->type == USE_NONLINEAR_EXPR)
7256 {
7257 for (j = 0; j < group->vuses.length (); j++)
7258 {
7259 rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7260 update_stmt (group->vuses[j]->stmt);
7261 }
7262 }
7263 else if (address_p (group->type))
7264 {
7265 for (j = 0; j < group->vuses.length (); j++)
7266 {
7267 rewrite_use_address (data, group->vuses[j], cand);
7268 update_stmt (group->vuses[j]->stmt);
7269 }
7270 }
7271 else
7272 {
7273 gcc_assert (group->type == USE_COMPARE);
7274
7275 for (j = 0; j < group->vuses.length (); j++)
7276 {
7277 rewrite_use_compare (data, group->vuses[j], cand);
7278 update_stmt (group->vuses[j]->stmt);
7279 }
7280 }
7281 }
7282 }
7283
7284 /* Removes the ivs that are not used after rewriting. */
7285
7286 static void
remove_unused_ivs(struct ivopts_data * data,bitmap toremove)7287 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7288 {
7289 unsigned j;
7290 bitmap_iterator bi;
7291
7292 /* Figure out an order in which to release SSA DEFs so that we don't
7293 release something that we'd have to propagate into a debug stmt
7294 afterwards. */
7295 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7296 {
7297 struct version_info *info;
7298
7299 info = ver_info (data, j);
7300 if (info->iv
7301 && !integer_zerop (info->iv->step)
7302 && !info->inv_id
7303 && !info->iv->nonlin_use
7304 && !info->preserve_biv)
7305 {
7306 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7307
7308 tree def = info->iv->ssa_name;
7309
7310 if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7311 {
7312 imm_use_iterator imm_iter;
7313 use_operand_p use_p;
7314 gimple *stmt;
7315 int count = 0;
7316
7317 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7318 {
7319 if (!gimple_debug_bind_p (stmt))
7320 continue;
7321
7322 /* We just want to determine whether to do nothing
7323 (count == 0), to substitute the computed
7324 expression into a single use of the SSA DEF by
7325 itself (count == 1), or to use a debug temp
7326 because the SSA DEF is used multiple times or as
7327 part of a larger expression (count > 1). */
7328 count++;
7329 if (gimple_debug_bind_get_value (stmt) != def)
7330 count++;
7331
7332 if (count > 1)
7333 BREAK_FROM_IMM_USE_STMT (imm_iter);
7334 }
7335
7336 if (!count)
7337 continue;
7338
7339 struct iv_use dummy_use;
7340 struct iv_cand *best_cand = NULL, *cand;
7341 unsigned i, best_pref = 0, cand_pref;
7342
7343 memset (&dummy_use, 0, sizeof (dummy_use));
7344 dummy_use.iv = info->iv;
7345 for (i = 0; i < data->vgroups.length () && i < 64; i++)
7346 {
7347 cand = data->vgroups[i]->selected;
7348 if (cand == best_cand)
7349 continue;
7350 cand_pref = operand_equal_p (cand->iv->step,
7351 info->iv->step, 0)
7352 ? 4 : 0;
7353 cand_pref
7354 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7355 == TYPE_MODE (TREE_TYPE (info->iv->base))
7356 ? 2 : 0;
7357 cand_pref
7358 += TREE_CODE (cand->iv->base) == INTEGER_CST
7359 ? 1 : 0;
7360 if (best_cand == NULL || best_pref < cand_pref)
7361 {
7362 best_cand = cand;
7363 best_pref = cand_pref;
7364 }
7365 }
7366
7367 if (!best_cand)
7368 continue;
7369
7370 tree comp = get_computation_at (data->current_loop,
7371 SSA_NAME_DEF_STMT (def),
7372 &dummy_use, best_cand);
7373 if (!comp)
7374 continue;
7375
7376 if (count > 1)
7377 {
7378 tree vexpr = make_node (DEBUG_EXPR_DECL);
7379 DECL_ARTIFICIAL (vexpr) = 1;
7380 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7381 if (SSA_NAME_VAR (def))
7382 SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7383 else
7384 SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7385 gdebug *def_temp
7386 = gimple_build_debug_bind (vexpr, comp, NULL);
7387 gimple_stmt_iterator gsi;
7388
7389 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7390 gsi = gsi_after_labels (gimple_bb
7391 (SSA_NAME_DEF_STMT (def)));
7392 else
7393 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7394
7395 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7396 comp = vexpr;
7397 }
7398
7399 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7400 {
7401 if (!gimple_debug_bind_p (stmt))
7402 continue;
7403
7404 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7405 SET_USE (use_p, comp);
7406
7407 update_stmt (stmt);
7408 }
7409 }
7410 }
7411 }
7412 }
7413
7414 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7415 for hash_map::traverse. */
7416
7417 bool
free_tree_niter_desc(edge const &,tree_niter_desc * const & value,void *)7418 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7419 {
7420 free (value);
7421 return true;
7422 }
7423
7424 /* Frees data allocated by the optimization of a single loop. */
7425
7426 static void
free_loop_data(struct ivopts_data * data)7427 free_loop_data (struct ivopts_data *data)
7428 {
7429 unsigned i, j;
7430 bitmap_iterator bi;
7431 tree obj;
7432
7433 if (data->niters)
7434 {
7435 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7436 delete data->niters;
7437 data->niters = NULL;
7438 }
7439
7440 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7441 {
7442 struct version_info *info;
7443
7444 info = ver_info (data, i);
7445 info->iv = NULL;
7446 info->has_nonlin_use = false;
7447 info->preserve_biv = false;
7448 info->inv_id = 0;
7449 }
7450 bitmap_clear (data->relevant);
7451 bitmap_clear (data->important_candidates);
7452
7453 for (i = 0; i < data->vgroups.length (); i++)
7454 {
7455 struct iv_group *group = data->vgroups[i];
7456
7457 for (j = 0; j < group->vuses.length (); j++)
7458 free (group->vuses[j]);
7459 group->vuses.release ();
7460
7461 BITMAP_FREE (group->related_cands);
7462 for (j = 0; j < group->n_map_members; j++)
7463 {
7464 if (group->cost_map[j].inv_vars)
7465 BITMAP_FREE (group->cost_map[j].inv_vars);
7466 if (group->cost_map[j].inv_exprs)
7467 BITMAP_FREE (group->cost_map[j].inv_exprs);
7468 }
7469
7470 free (group->cost_map);
7471 free (group);
7472 }
7473 data->vgroups.truncate (0);
7474
7475 for (i = 0; i < data->vcands.length (); i++)
7476 {
7477 struct iv_cand *cand = data->vcands[i];
7478
7479 if (cand->inv_vars)
7480 BITMAP_FREE (cand->inv_vars);
7481 if (cand->inv_exprs)
7482 BITMAP_FREE (cand->inv_exprs);
7483 free (cand);
7484 }
7485 data->vcands.truncate (0);
7486
7487 if (data->version_info_size < num_ssa_names)
7488 {
7489 data->version_info_size = 2 * num_ssa_names;
7490 free (data->version_info);
7491 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7492 }
7493
7494 data->max_inv_var_id = 0;
7495 data->max_inv_expr_id = 0;
7496
7497 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7498 SET_DECL_RTL (obj, NULL_RTX);
7499
7500 decl_rtl_to_reset.truncate (0);
7501
7502 data->inv_expr_tab->empty ();
7503
7504 data->iv_common_cand_tab->empty ();
7505 data->iv_common_cands.truncate (0);
7506 }
7507
7508 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7509 loop tree. */
7510
7511 static void
tree_ssa_iv_optimize_finalize(struct ivopts_data * data)7512 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7513 {
7514 free_loop_data (data);
7515 free (data->version_info);
7516 BITMAP_FREE (data->relevant);
7517 BITMAP_FREE (data->important_candidates);
7518
7519 decl_rtl_to_reset.release ();
7520 data->vgroups.release ();
7521 data->vcands.release ();
7522 delete data->inv_expr_tab;
7523 data->inv_expr_tab = NULL;
7524 free_affine_expand_cache (&data->name_expansion_cache);
7525 if (data->base_object_map)
7526 delete data->base_object_map;
7527 delete data->iv_common_cand_tab;
7528 data->iv_common_cand_tab = NULL;
7529 data->iv_common_cands.release ();
7530 obstack_free (&data->iv_obstack, NULL);
7531 }
7532
7533 /* Returns true if the loop body BODY includes any function calls. */
7534
7535 static bool
loop_body_includes_call(basic_block * body,unsigned num_nodes)7536 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7537 {
7538 gimple_stmt_iterator gsi;
7539 unsigned i;
7540
7541 for (i = 0; i < num_nodes; i++)
7542 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7543 {
7544 gimple *stmt = gsi_stmt (gsi);
7545 if (is_gimple_call (stmt)
7546 && !gimple_call_internal_p (stmt)
7547 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7548 return true;
7549 }
7550 return false;
7551 }
7552
7553 /* Optimizes the LOOP. Returns true if anything changed. */
7554
7555 static bool
tree_ssa_iv_optimize_loop(struct ivopts_data * data,struct loop * loop,bitmap toremove)7556 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop,
7557 bitmap toremove)
7558 {
7559 bool changed = false;
7560 struct iv_ca *iv_ca;
7561 edge exit = single_dom_exit (loop);
7562 basic_block *body;
7563
7564 gcc_assert (!data->niters);
7565 data->current_loop = loop;
7566 data->loop_loc = find_loop_location (loop).get_location_t ();
7567 data->speed = optimize_loop_for_speed_p (loop);
7568
7569 if (dump_file && (dump_flags & TDF_DETAILS))
7570 {
7571 fprintf (dump_file, "Processing loop %d", loop->num);
7572 if (data->loop_loc != UNKNOWN_LOCATION)
7573 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7574 LOCATION_LINE (data->loop_loc));
7575 fprintf (dump_file, "\n");
7576
7577 if (exit)
7578 {
7579 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7580 exit->src->index, exit->dest->index);
7581 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7582 fprintf (dump_file, "\n");
7583 }
7584
7585 fprintf (dump_file, "\n");
7586 }
7587
7588 body = get_loop_body (loop);
7589 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7590 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7591 free (body);
7592
7593 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7594
7595 /* For each ssa name determines whether it behaves as an induction variable
7596 in some loop. */
7597 if (!find_induction_variables (data))
7598 goto finish;
7599
7600 /* Finds interesting uses (item 1). */
7601 find_interesting_uses (data);
7602 if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7603 goto finish;
7604
7605 /* Finds candidates for the induction variables (item 2). */
7606 find_iv_candidates (data);
7607
7608 /* Calculates the costs (item 3, part 1). */
7609 determine_iv_costs (data);
7610 determine_group_iv_costs (data);
7611 determine_set_costs (data);
7612
7613 /* Find the optimal set of induction variables (item 3, part 2). */
7614 iv_ca = find_optimal_iv_set (data);
7615 if (!iv_ca)
7616 goto finish;
7617 changed = true;
7618
7619 /* Create the new induction variables (item 4, part 1). */
7620 create_new_ivs (data, iv_ca);
7621 iv_ca_free (&iv_ca);
7622
7623 /* Rewrite the uses (item 4, part 2). */
7624 rewrite_groups (data);
7625
7626 /* Remove the ivs that are unused after rewriting. */
7627 remove_unused_ivs (data, toremove);
7628
7629 finish:
7630 free_loop_data (data);
7631
7632 return changed;
7633 }
7634
7635 /* Main entry point. Optimizes induction variables in loops. */
7636
7637 void
tree_ssa_iv_optimize(void)7638 tree_ssa_iv_optimize (void)
7639 {
7640 struct loop *loop;
7641 struct ivopts_data data;
7642 auto_bitmap toremove;
7643
7644 tree_ssa_iv_optimize_init (&data);
7645
7646 /* Optimize the loops starting with the innermost ones. */
7647 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7648 {
7649 if (dump_file && (dump_flags & TDF_DETAILS))
7650 flow_loop_dump (loop, dump_file, NULL, 1);
7651
7652 tree_ssa_iv_optimize_loop (&data, loop, toremove);
7653 }
7654
7655 /* Remove eliminated IV defs. */
7656 release_defs_bitset (toremove);
7657
7658 /* We have changed the structure of induction variables; it might happen
7659 that definitions in the scev database refer to some of them that were
7660 eliminated. */
7661 scev_reset_htab ();
7662 /* Likewise niter and control-IV information. */
7663 free_numbers_of_iterations_estimates (cfun);
7664
7665 tree_ssa_iv_optimize_finalize (&data);
7666 }
7667
7668 #include "gt-tree-ssa-loop-ivopts.h"
7669