1 /* Straight-line strength reduction.
2    Copyright (C) 2012-2021 Free Software Foundation, Inc.
3    Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* There are many algorithms for performing strength reduction on
22    loops.  This is not one of them.  IVOPTS handles strength reduction
23    of induction variables just fine.  This pass is intended to pick
24    up the crumbs it leaves behind, by considering opportunities for
25    strength reduction along dominator paths.
26 
27    Strength reduction addresses explicit multiplies, and certain
28    multiplies implicit in addressing expressions.  It would also be
29    possible to apply strength reduction to divisions and modulos,
30    but such opportunities are relatively uncommon.
31 
32    Strength reduction is also currently restricted to integer operations.
33    If desired, it could be extended to floating-point operations under
34    control of something like -funsafe-math-optimizations.  */
35 
36 #include "config.h"
37 #include "system.h"
38 #include "coretypes.h"
39 #include "backend.h"
40 #include "rtl.h"
41 #include "tree.h"
42 #include "gimple.h"
43 #include "cfghooks.h"
44 #include "tree-pass.h"
45 #include "ssa.h"
46 #include "expmed.h"
47 #include "gimple-pretty-print.h"
48 #include "fold-const.h"
49 #include "gimple-iterator.h"
50 #include "gimplify-me.h"
51 #include "stor-layout.h"
52 #include "cfgloop.h"
53 #include "tree-cfg.h"
54 #include "domwalk.h"
55 #include "tree-ssa-address.h"
56 #include "tree-affine.h"
57 #include "tree-eh.h"
58 #include "builtins.h"
59 
60 /* Information about a strength reduction candidate.  Each statement
61    in the candidate table represents an expression of one of the
62    following forms (the special case of CAND_REF will be described
63    later):
64 
65    (CAND_MULT)  S1:  X = (B + i) * S
66    (CAND_ADD)   S1:  X = B + (i * S)
67 
68    Here X and B are SSA names, i is an integer constant, and S is
69    either an SSA name or a constant.  We call B the "base," i the
70    "index", and S the "stride."
71 
72    Any statement S0 that dominates S1 and is of the form:
73 
74    (CAND_MULT)  S0:  Y = (B + i') * S
75    (CAND_ADD)   S0:  Y = B + (i' * S)
76 
77    is called a "basis" for S1.  In both cases, S1 may be replaced by
78 
79                 S1':  X = Y + (i - i') * S,
80 
81    where (i - i') * S is folded to the extent possible.
82 
83    All gimple statements are visited in dominator order, and each
84    statement that may contribute to one of the forms of S1 above is
85    given at least one entry in the candidate table.  Such statements
86    include addition, pointer addition, subtraction, multiplication,
87    negation, copies, and nontrivial type casts.  If a statement may
88    represent more than one expression of the forms of S1 above,
89    multiple "interpretations" are stored in the table and chained
90    together.  Examples:
91 
92    * An add of two SSA names may treat either operand as the base.
93    * A multiply of two SSA names, likewise.
94    * A copy or cast may be thought of as either a CAND_MULT with
95      i = 0 and S = 1, or as a CAND_ADD with i = 0 or S = 0.
96 
97    Candidate records are allocated from an obstack.  They are addressed
98    both from a hash table keyed on S1, and from a vector of candidate
99    pointers arranged in predominator order.
100 
101    Opportunity note
102    ----------------
103    Currently we don't recognize:
104 
105      S0: Y = (S * i') - B
106      S1: X = (S * i) - B
107 
108    as a strength reduction opportunity, even though this S1 would
109    also be replaceable by the S1' above.  This can be added if it
110    comes up in practice.
111 
112    Strength reduction in addressing
113    --------------------------------
114    There is another kind of candidate known as CAND_REF.  A CAND_REF
115    describes a statement containing a memory reference having
116    complex addressing that might benefit from strength reduction.
117    Specifically, we are interested in references for which
118    get_inner_reference returns a base address, offset, and bitpos as
119    follows:
120 
121      base:    MEM_REF (T1, C1)
122      offset:  MULT_EXPR (PLUS_EXPR (T2, C2), C3)
123      bitpos:  C4 * BITS_PER_UNIT
124 
125    Here T1 and T2 are arbitrary trees, and C1, C2, C3, C4 are
126    arbitrary integer constants.  Note that C2 may be zero, in which
127    case the offset will be MULT_EXPR (T2, C3).
128 
129    When this pattern is recognized, the original memory reference
130    can be replaced with:
131 
132      MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)),
133               C1 + (C2 * C3) + C4)
134 
135    which distributes the multiply to allow constant folding.  When
136    two or more addressing expressions can be represented by MEM_REFs
137    of this form, differing only in the constants C1, C2, and C4,
138    making this substitution produces more efficient addressing during
139    the RTL phases.  When there are not at least two expressions with
140    the same values of T1, T2, and C3, there is nothing to be gained
141    by the replacement.
142 
143    Strength reduction of CAND_REFs uses the same infrastructure as
144    that used by CAND_MULTs and CAND_ADDs.  We record T1 in the base (B)
145    field, MULT_EXPR (T2, C3) in the stride (S) field, and
146    C1 + (C2 * C3) + C4 in the index (i) field.  A basis for a CAND_REF
147    is thus another CAND_REF with the same B and S values.  When at
148    least two CAND_REFs are chained together using the basis relation,
149    each of them is replaced as above, resulting in improved code
150    generation for addressing.
151 
152    Conditional candidates
153    ======================
154 
155    Conditional candidates are best illustrated with an example.
156    Consider the code sequence:
157 
158    (1)  x_0 = ...;
159    (2)  a_0 = x_0 * 5;          MULT (B: x_0; i: 0; S: 5)
160         if (...)
161    (3)    x_1 = x_0 + 1;        ADD  (B: x_0, i: 1; S: 1)
162    (4)  x_2 = PHI <x_0, x_1>;   PHI  (B: x_0, i: 0, S: 1)
163    (5)  x_3 = x_2 + 1;          ADD  (B: x_2, i: 1, S: 1)
164    (6)  a_1 = x_3 * 5;          MULT (B: x_2, i: 1; S: 5)
165 
166    Here strength reduction is complicated by the uncertain value of x_2.
167    A legitimate transformation is:
168 
169    (1)  x_0 = ...;
170    (2)  a_0 = x_0 * 5;
171         if (...)
172 	  {
173    (3)      [x_1 = x_0 + 1;]
174    (3a)     t_1 = a_0 + 5;
175           }
176    (4)  [x_2 = PHI <x_0, x_1>;]
177    (4a) t_2 = PHI <a_0, t_1>;
178    (5)  [x_3 = x_2 + 1;]
179    (6r) a_1 = t_2 + 5;
180 
181    where the bracketed instructions may go dead.
182 
183    To recognize this opportunity, we have to observe that statement (6)
184    has a "hidden basis" (2).  The hidden basis is unlike a normal basis
185    in that the statement and the hidden basis have different base SSA
186    names (x_2 and x_0, respectively).  The relationship is established
187    when a statement's base name (x_2) is defined by a phi statement (4),
188    each argument of which (x_0, x_1) has an identical "derived base name."
189    If the argument is defined by a candidate (as x_1 is by (3)) that is a
190    CAND_ADD having a stride of 1, the derived base name of the argument is
191    the base name of the candidate (x_0).  Otherwise, the argument itself
192    is its derived base name (as is the case with argument x_0).
193 
194    The hidden basis for statement (6) is the nearest dominating candidate
195    whose base name is the derived base name (x_0) of the feeding phi (4),
196    and whose stride is identical to that of the statement.  We can then
197    create the new "phi basis" (4a) and feeding adds along incoming arcs (3a),
198    allowing the final replacement of (6) by the strength-reduced (6r).
199 
200    To facilitate this, a new kind of candidate (CAND_PHI) is introduced.
201    A CAND_PHI is not a candidate for replacement, but is maintained in the
202    candidate table to ease discovery of hidden bases.  Any phi statement
203    whose arguments share a common derived base name is entered into the
204    table with the derived base name, an (arbitrary) index of zero, and a
205    stride of 1.  A statement with a hidden basis can then be detected by
206    simply looking up its feeding phi definition in the candidate table,
207    extracting the derived base name, and searching for a basis in the
208    usual manner after substituting the derived base name.
209 
210    Note that the transformation is only valid when the original phi and
211    the statements that define the phi's arguments are all at the same
212    position in the loop hierarchy.  */
213 
214 
215 /* Index into the candidate vector, offset by 1.  VECs are zero-based,
216    while cand_idx's are one-based, with zero indicating null.  */
217 typedef unsigned cand_idx;
218 
219 /* The kind of candidate.  */
220 enum cand_kind
221 {
222   CAND_MULT,
223   CAND_ADD,
224   CAND_REF,
225   CAND_PHI
226 };
227 
228 class slsr_cand_d
229 {
230 public:
231   /* The candidate statement S1.  */
232   gimple *cand_stmt;
233 
234   /* The base expression B:  often an SSA name, but not always.  */
235   tree base_expr;
236 
237   /* The stride S.  */
238   tree stride;
239 
240   /* The index constant i.  */
241   widest_int index;
242 
243   /* The type of the candidate.  This is normally the type of base_expr,
244      but casts may have occurred when combining feeding instructions.
245      A candidate can only be a basis for candidates of the same final type.
246      (For CAND_REFs, this is the type to be used for operand 1 of the
247      replacement MEM_REF.)  */
248   tree cand_type;
249 
250   /* The type to be used to interpret the stride field when the stride
251      is not a constant.  Normally the same as the type of the recorded
252      stride, but when the stride has been cast we need to maintain that
253      knowledge in order to make legal substitutions without losing
254      precision.  When the stride is a constant, this will be sizetype.  */
255   tree stride_type;
256 
257   /* The kind of candidate (CAND_MULT, etc.).  */
258   enum cand_kind kind;
259 
260   /* Index of this candidate in the candidate vector.  */
261   cand_idx cand_num;
262 
263   /* Index of the next candidate record for the same statement.
264      A statement may be useful in more than one way (e.g., due to
265      commutativity).  So we can have multiple "interpretations"
266      of a statement.  */
267   cand_idx next_interp;
268 
269   /* Index of the first candidate record in a chain for the same
270      statement.  */
271   cand_idx first_interp;
272 
273   /* Index of the basis statement S0, if any, in the candidate vector.  */
274   cand_idx basis;
275 
276   /* First candidate for which this candidate is a basis, if one exists.  */
277   cand_idx dependent;
278 
279   /* Next candidate having the same basis as this one.  */
280   cand_idx sibling;
281 
282   /* If this is a conditional candidate, the CAND_PHI candidate
283      that defines the base SSA name B.  */
284   cand_idx def_phi;
285 
286   /* Savings that can be expected from eliminating dead code if this
287      candidate is replaced.  */
288   int dead_savings;
289 
290   /* For PHI candidates, use a visited flag to keep from processing the
291      same PHI twice from multiple paths.  */
292   int visited;
293 
294   /* We sometimes have to cache a phi basis with a phi candidate to
295      avoid processing it twice.  Valid only if visited==1.  */
296   tree cached_basis;
297 };
298 
299 typedef class slsr_cand_d slsr_cand, *slsr_cand_t;
300 typedef const class slsr_cand_d *const_slsr_cand_t;
301 
302 /* Pointers to candidates are chained together as part of a mapping
303    from base expressions to the candidates that use them.  */
304 
305 struct cand_chain_d
306 {
307   /* Base expression for the chain of candidates:  often, but not
308      always, an SSA name.  */
309   tree base_expr;
310 
311   /* Pointer to a candidate.  */
312   slsr_cand_t cand;
313 
314   /* Chain pointer.  */
315   struct cand_chain_d *next;
316 
317 };
318 
319 typedef struct cand_chain_d cand_chain, *cand_chain_t;
320 typedef const struct cand_chain_d *const_cand_chain_t;
321 
322 /* Information about a unique "increment" associated with candidates
323    having an SSA name for a stride.  An increment is the difference
324    between the index of the candidate and the index of its basis,
325    i.e., (i - i') as discussed in the module commentary.
326 
327    When we are not going to generate address arithmetic we treat
328    increments that differ only in sign as the same, allowing sharing
329    of the cost of initializers.  The absolute value of the increment
330    is stored in the incr_info.  */
331 
332 class incr_info_d
333 {
334 public:
335   /* The increment that relates a candidate to its basis.  */
336   widest_int incr;
337 
338   /* How many times the increment occurs in the candidate tree.  */
339   unsigned count;
340 
341   /* Cost of replacing candidates using this increment.  Negative and
342      zero costs indicate replacement should be performed.  */
343   int cost;
344 
345   /* If this increment is profitable but is not -1, 0, or 1, it requires
346      an initializer T_0 = stride * incr to be found or introduced in the
347      nearest common dominator of all candidates.  This field holds T_0
348      for subsequent use.  */
349   tree initializer;
350 
351   /* If the initializer was found to already exist, this is the block
352      where it was found.  */
353   basic_block init_bb;
354 };
355 
356 typedef class incr_info_d incr_info, *incr_info_t;
357 
358 /* Candidates are maintained in a vector.  If candidate X dominates
359    candidate Y, then X appears before Y in the vector; but the
360    converse does not necessarily hold.  */
361 static vec<slsr_cand_t> cand_vec;
362 
363 enum cost_consts
364 {
365   COST_NEUTRAL = 0,
366   COST_INFINITE = 1000
367 };
368 
369 enum stride_status
370 {
371   UNKNOWN_STRIDE = 0,
372   KNOWN_STRIDE = 1
373 };
374 
375 enum phi_adjust_status
376 {
377   NOT_PHI_ADJUST = 0,
378   PHI_ADJUST = 1
379 };
380 
381 enum count_phis_status
382 {
383   DONT_COUNT_PHIS = 0,
384   COUNT_PHIS = 1
385 };
386 
387 /* Constrain how many PHI nodes we will visit for a conditional
388    candidate (depth and breadth).  */
389 const int MAX_SPREAD = 16;
390 
391 /* Pointer map embodying a mapping from statements to candidates.  */
392 static hash_map<gimple *, slsr_cand_t> *stmt_cand_map;
393 
394 /* Obstack for candidates.  */
395 static struct obstack cand_obstack;
396 
397 /* Obstack for candidate chains.  */
398 static struct obstack chain_obstack;
399 
400 /* An array INCR_VEC of incr_infos is used during analysis of related
401    candidates having an SSA name for a stride.  INCR_VEC_LEN describes
402    its current length.  MAX_INCR_VEC_LEN is used to avoid costly
403    pathological cases. */
404 static incr_info_t incr_vec;
405 static unsigned incr_vec_len;
406 const int MAX_INCR_VEC_LEN = 16;
407 
408 /* For a chain of candidates with unknown stride, indicates whether or not
409    we must generate pointer arithmetic when replacing statements.  */
410 static bool address_arithmetic_p;
411 
412 /* Forward function declarations.  */
413 static slsr_cand_t base_cand_from_table (tree);
414 static tree introduce_cast_before_cand (slsr_cand_t, tree, tree);
415 static bool legal_cast_p_1 (tree, tree);
416 
417 /* Produce a pointer to the IDX'th candidate in the candidate vector.  */
418 
419 static slsr_cand_t
lookup_cand(cand_idx idx)420 lookup_cand (cand_idx idx)
421 {
422   return cand_vec[idx];
423 }
424 
425 /* Helper for hashing a candidate chain header.  */
426 
427 struct cand_chain_hasher : nofree_ptr_hash <cand_chain>
428 {
429   static inline hashval_t hash (const cand_chain *);
430   static inline bool equal (const cand_chain *, const cand_chain *);
431 };
432 
433 inline hashval_t
hash(const cand_chain * p)434 cand_chain_hasher::hash (const cand_chain *p)
435 {
436   tree base_expr = p->base_expr;
437   return iterative_hash_expr (base_expr, 0);
438 }
439 
440 inline bool
equal(const cand_chain * chain1,const cand_chain * chain2)441 cand_chain_hasher::equal (const cand_chain *chain1, const cand_chain *chain2)
442 {
443   return operand_equal_p (chain1->base_expr, chain2->base_expr, 0);
444 }
445 
446 /* Hash table embodying a mapping from base exprs to chains of candidates.  */
447 static hash_table<cand_chain_hasher> *base_cand_map;
448 
449 /* Pointer map used by tree_to_aff_combination_expand.  */
450 static hash_map<tree, name_expansion *> *name_expansions;
451 /* Pointer map embodying a mapping from bases to alternative bases.  */
452 static hash_map<tree, tree> *alt_base_map;
453 
454 /* Given BASE, use the tree affine combiniation facilities to
455    find the underlying tree expression for BASE, with any
456    immediate offset excluded.
457 
458    N.B. we should eliminate this backtracking with better forward
459    analysis in a future release.  */
460 
461 static tree
get_alternative_base(tree base)462 get_alternative_base (tree base)
463 {
464   tree *result = alt_base_map->get (base);
465 
466   if (result == NULL)
467     {
468       tree expr;
469       aff_tree aff;
470 
471       tree_to_aff_combination_expand (base, TREE_TYPE (base),
472 				      &aff, &name_expansions);
473       aff.offset = 0;
474       expr = aff_combination_to_tree (&aff);
475 
476       gcc_assert (!alt_base_map->put (base, base == expr ? NULL : expr));
477 
478       return expr == base ? NULL : expr;
479     }
480 
481   return *result;
482 }
483 
484 /* Look in the candidate table for a CAND_PHI that defines BASE and
485    return it if found; otherwise return NULL.  */
486 
487 static cand_idx
find_phi_def(tree base)488 find_phi_def (tree base)
489 {
490   slsr_cand_t c;
491 
492   if (TREE_CODE (base) != SSA_NAME)
493     return 0;
494 
495   c = base_cand_from_table (base);
496 
497   if (!c || c->kind != CAND_PHI
498       || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (c->cand_stmt)))
499     return 0;
500 
501   return c->cand_num;
502 }
503 
504 /* Determine whether all uses of NAME are directly or indirectly
505    used by STMT.  That is, we want to know whether if STMT goes
506    dead, the definition of NAME also goes dead.  */
507 static bool
508 uses_consumed_by_stmt (tree name, gimple *stmt, unsigned recurse = 0)
509 {
510   gimple *use_stmt;
511   imm_use_iterator iter;
512   bool retval = true;
513 
FOR_EACH_IMM_USE_STMT(use_stmt,iter,name)514   FOR_EACH_IMM_USE_STMT (use_stmt, iter, name)
515     {
516       if (use_stmt == stmt || is_gimple_debug (use_stmt))
517 	continue;
518 
519       if (!is_gimple_assign (use_stmt)
520 	  || !gimple_get_lhs (use_stmt)
521 	  || !is_gimple_reg (gimple_get_lhs (use_stmt))
522 	  || recurse >= 10
523 	  || !uses_consumed_by_stmt (gimple_get_lhs (use_stmt), stmt,
524 				     recurse + 1))
525 	{
526 	  retval = false;
527 	  break;
528 	}
529     }
530 
531   return retval;
532 }
533 
534 /* Helper routine for find_basis_for_candidate.  May be called twice:
535    once for the candidate's base expr, and optionally again either for
536    the candidate's phi definition or for a CAND_REF's alternative base
537    expression.  */
538 
539 static slsr_cand_t
find_basis_for_base_expr(slsr_cand_t c,tree base_expr)540 find_basis_for_base_expr (slsr_cand_t c, tree base_expr)
541 {
542   cand_chain mapping_key;
543   cand_chain_t chain;
544   slsr_cand_t basis = NULL;
545 
546   // Limit potential of N^2 behavior for long candidate chains.
547   int iters = 0;
548   int max_iters = param_max_slsr_candidate_scan;
549 
550   mapping_key.base_expr = base_expr;
551   chain = base_cand_map->find (&mapping_key);
552 
553   for (; chain && iters < max_iters; chain = chain->next, ++iters)
554     {
555       slsr_cand_t one_basis = chain->cand;
556 
557       if (one_basis->kind != c->kind
558 	  || one_basis->cand_stmt == c->cand_stmt
559 	  || !operand_equal_p (one_basis->stride, c->stride, 0)
560 	  || !types_compatible_p (one_basis->cand_type, c->cand_type)
561 	  || !types_compatible_p (one_basis->stride_type, c->stride_type)
562 	  || !dominated_by_p (CDI_DOMINATORS,
563 			      gimple_bb (c->cand_stmt),
564 			      gimple_bb (one_basis->cand_stmt)))
565 	continue;
566 
567       tree lhs = gimple_assign_lhs (one_basis->cand_stmt);
568       if (lhs && TREE_CODE (lhs) == SSA_NAME
569 	  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
570 	continue;
571 
572       if (!basis || basis->cand_num < one_basis->cand_num)
573 	basis = one_basis;
574     }
575 
576   return basis;
577 }
578 
579 /* Use the base expr from candidate C to look for possible candidates
580    that can serve as a basis for C.  Each potential basis must also
581    appear in a block that dominates the candidate statement and have
582    the same stride and type.  If more than one possible basis exists,
583    the one with highest index in the vector is chosen; this will be
584    the most immediately dominating basis.  */
585 
586 static int
find_basis_for_candidate(slsr_cand_t c)587 find_basis_for_candidate (slsr_cand_t c)
588 {
589   slsr_cand_t basis = find_basis_for_base_expr (c, c->base_expr);
590 
591   /* If a candidate doesn't have a basis using its base expression,
592      it may have a basis hidden by one or more intervening phis.  */
593   if (!basis && c->def_phi)
594     {
595       basic_block basis_bb, phi_bb;
596       slsr_cand_t phi_cand = lookup_cand (c->def_phi);
597       basis = find_basis_for_base_expr (c, phi_cand->base_expr);
598 
599       if (basis)
600 	{
601 	  /* A hidden basis must dominate the phi-definition of the
602 	     candidate's base name.  */
603 	  phi_bb = gimple_bb (phi_cand->cand_stmt);
604 	  basis_bb = gimple_bb (basis->cand_stmt);
605 
606 	  if (phi_bb == basis_bb
607 	      || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
608 	    {
609 	      basis = NULL;
610 	      c->basis = 0;
611 	    }
612 
613 	  /* If we found a hidden basis, estimate additional dead-code
614 	     savings if the phi and its feeding statements can be removed.  */
615 	  tree feeding_var = gimple_phi_result (phi_cand->cand_stmt);
616 	  if (basis && uses_consumed_by_stmt (feeding_var, c->cand_stmt))
617 	    c->dead_savings += phi_cand->dead_savings;
618 	}
619     }
620 
621   if (flag_expensive_optimizations && !basis && c->kind == CAND_REF)
622     {
623       tree alt_base_expr = get_alternative_base (c->base_expr);
624       if (alt_base_expr)
625 	basis = find_basis_for_base_expr (c, alt_base_expr);
626     }
627 
628   if (basis)
629     {
630       c->sibling = basis->dependent;
631       basis->dependent = c->cand_num;
632       return basis->cand_num;
633     }
634 
635   return 0;
636 }
637 
638 /* Record a mapping from BASE to C, indicating that C may potentially serve
639    as a basis using that base expression.  BASE may be the same as
640    C->BASE_EXPR; alternatively BASE can be a different tree that share the
641    underlining expression of C->BASE_EXPR.  */
642 
643 static void
record_potential_basis(slsr_cand_t c,tree base)644 record_potential_basis (slsr_cand_t c, tree base)
645 {
646   cand_chain_t node;
647   cand_chain **slot;
648 
649   gcc_assert (base);
650 
651   node = (cand_chain_t) obstack_alloc (&chain_obstack, sizeof (cand_chain));
652   node->base_expr = base;
653   node->cand = c;
654   node->next = NULL;
655   slot = base_cand_map->find_slot (node, INSERT);
656 
657   if (*slot)
658     {
659       cand_chain_t head = (cand_chain_t) (*slot);
660       node->next = head->next;
661       head->next = node;
662     }
663   else
664     *slot = node;
665 }
666 
667 /* Allocate storage for a new candidate and initialize its fields.
668    Attempt to find a basis for the candidate.
669 
670    For CAND_REF, an alternative base may also be recorded and used
671    to find a basis.  This helps cases where the expression hidden
672    behind BASE (which is usually an SSA_NAME) has immediate offset,
673    e.g.
674 
675      a2[i][j] = 1;
676      a2[i + 20][j] = 2;  */
677 
678 static slsr_cand_t
alloc_cand_and_find_basis(enum cand_kind kind,gimple * gs,tree base,const widest_int & index,tree stride,tree ctype,tree stype,unsigned savings)679 alloc_cand_and_find_basis (enum cand_kind kind, gimple *gs, tree base,
680 			   const widest_int &index, tree stride, tree ctype,
681 			   tree stype, unsigned savings)
682 {
683   slsr_cand_t c = (slsr_cand_t) obstack_alloc (&cand_obstack,
684 					       sizeof (slsr_cand));
685   c->cand_stmt = gs;
686   c->base_expr = base;
687   c->stride = stride;
688   c->index = index;
689   c->cand_type = ctype;
690   c->stride_type = stype;
691   c->kind = kind;
692   c->cand_num = cand_vec.length ();
693   c->next_interp = 0;
694   c->first_interp = c->cand_num;
695   c->dependent = 0;
696   c->sibling = 0;
697   c->def_phi = kind == CAND_MULT ? find_phi_def (base) : 0;
698   c->dead_savings = savings;
699   c->visited = 0;
700   c->cached_basis = NULL_TREE;
701 
702   cand_vec.safe_push (c);
703 
704   if (kind == CAND_PHI)
705     c->basis = 0;
706   else
707     c->basis = find_basis_for_candidate (c);
708 
709   record_potential_basis (c, base);
710   if (flag_expensive_optimizations && kind == CAND_REF)
711     {
712       tree alt_base = get_alternative_base (base);
713       if (alt_base)
714 	record_potential_basis (c, alt_base);
715     }
716 
717   return c;
718 }
719 
720 /* Determine the target cost of statement GS when compiling according
721    to SPEED.  */
722 
723 static int
stmt_cost(gimple * gs,bool speed)724 stmt_cost (gimple *gs, bool speed)
725 {
726   tree lhs, rhs1, rhs2;
727   machine_mode lhs_mode;
728 
729   gcc_assert (is_gimple_assign (gs));
730   lhs = gimple_assign_lhs (gs);
731   rhs1 = gimple_assign_rhs1 (gs);
732   lhs_mode = TYPE_MODE (TREE_TYPE (lhs));
733 
734   switch (gimple_assign_rhs_code (gs))
735     {
736     case MULT_EXPR:
737       rhs2 = gimple_assign_rhs2 (gs);
738 
739       if (tree_fits_shwi_p (rhs2))
740 	return mult_by_coeff_cost (tree_to_shwi (rhs2), lhs_mode, speed);
741 
742       gcc_assert (TREE_CODE (rhs1) != INTEGER_CST);
743       return mul_cost (speed, lhs_mode);
744 
745     case PLUS_EXPR:
746     case POINTER_PLUS_EXPR:
747     case MINUS_EXPR:
748       return add_cost (speed, lhs_mode);
749 
750     case NEGATE_EXPR:
751       return neg_cost (speed, lhs_mode);
752 
753     CASE_CONVERT:
754       return convert_cost (lhs_mode, TYPE_MODE (TREE_TYPE (rhs1)), speed);
755 
756     /* Note that we don't assign costs to copies that in most cases
757        will go away.  */
758     case SSA_NAME:
759       return 0;
760 
761     default:
762       ;
763     }
764 
765   gcc_unreachable ();
766 }
767 
768 /* Look up the defining statement for BASE_IN and return a pointer
769    to its candidate in the candidate table, if any; otherwise NULL.
770    Only CAND_ADD and CAND_MULT candidates are returned.  */
771 
772 static slsr_cand_t
base_cand_from_table(tree base_in)773 base_cand_from_table (tree base_in)
774 {
775   slsr_cand_t *result;
776 
777   gimple *def = SSA_NAME_DEF_STMT (base_in);
778   if (!def)
779     return (slsr_cand_t) NULL;
780 
781   result = stmt_cand_map->get (def);
782 
783   if (result && (*result)->kind != CAND_REF)
784     return *result;
785 
786   return (slsr_cand_t) NULL;
787 }
788 
789 /* Add an entry to the statement-to-candidate mapping.  */
790 
791 static void
add_cand_for_stmt(gimple * gs,slsr_cand_t c)792 add_cand_for_stmt (gimple *gs, slsr_cand_t c)
793 {
794   gcc_assert (!stmt_cand_map->put (gs, c));
795 }
796 
797 /* Given PHI which contains a phi statement, determine whether it
798    satisfies all the requirements of a phi candidate.  If so, create
799    a candidate.  Note that a CAND_PHI never has a basis itself, but
800    is used to help find a basis for subsequent candidates.  */
801 
802 static void
slsr_process_phi(gphi * phi,bool speed)803 slsr_process_phi (gphi *phi, bool speed)
804 {
805   unsigned i;
806   tree arg0_base = NULL_TREE, base_type;
807   slsr_cand_t c;
808   class loop *cand_loop = gimple_bb (phi)->loop_father;
809   unsigned savings = 0;
810 
811   /* A CAND_PHI requires each of its arguments to have the same
812      derived base name.  (See the module header commentary for a
813      definition of derived base names.)  Furthermore, all feeding
814      definitions must be in the same position in the loop hierarchy
815      as PHI.  */
816 
817   for (i = 0; i < gimple_phi_num_args (phi); i++)
818     {
819       slsr_cand_t arg_cand;
820       tree arg = gimple_phi_arg_def (phi, i);
821       tree derived_base_name = NULL_TREE;
822       gimple *arg_stmt = NULL;
823       basic_block arg_bb = NULL;
824 
825       if (TREE_CODE (arg) != SSA_NAME)
826 	return;
827 
828       arg_cand = base_cand_from_table (arg);
829 
830       if (arg_cand)
831 	{
832 	  while (arg_cand->kind != CAND_ADD && arg_cand->kind != CAND_PHI)
833 	    {
834 	      if (!arg_cand->next_interp)
835 		return;
836 
837 	      arg_cand = lookup_cand (arg_cand->next_interp);
838 	    }
839 
840 	  if (!integer_onep (arg_cand->stride))
841 	    return;
842 
843 	  derived_base_name = arg_cand->base_expr;
844 	  arg_stmt = arg_cand->cand_stmt;
845 	  arg_bb = gimple_bb (arg_stmt);
846 
847 	  /* Gather potential dead code savings if the phi statement
848 	     can be removed later on.  */
849 	  if (uses_consumed_by_stmt (arg, phi))
850 	    {
851 	      if (gimple_code (arg_stmt) == GIMPLE_PHI)
852 		savings += arg_cand->dead_savings;
853 	      else
854 		savings += stmt_cost (arg_stmt, speed);
855 	    }
856 	}
857       else if (SSA_NAME_IS_DEFAULT_DEF (arg))
858 	{
859 	  derived_base_name = arg;
860 	  arg_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
861 	}
862 
863       if (!arg_bb || arg_bb->loop_father != cand_loop)
864 	return;
865 
866       if (i == 0)
867 	arg0_base = derived_base_name;
868       else if (!operand_equal_p (derived_base_name, arg0_base, 0))
869 	return;
870     }
871 
872   /* Create the candidate.  "alloc_cand_and_find_basis" is named
873      misleadingly for this case, as no basis will be sought for a
874      CAND_PHI.  */
875   base_type = TREE_TYPE (arg0_base);
876 
877   c = alloc_cand_and_find_basis (CAND_PHI, phi, arg0_base,
878 				 0, integer_one_node, base_type,
879 				 sizetype, savings);
880 
881   /* Add the candidate to the statement-candidate mapping.  */
882   add_cand_for_stmt (phi, c);
883 }
884 
885 /* Given PBASE which is a pointer to tree, look up the defining
886    statement for it and check whether the candidate is in the
887    form of:
888 
889      X = B + (1 * S), S is integer constant
890      X = B + (i * S), S is integer one
891 
892    If so, set PBASE to the candidate's base_expr and return double
893    int (i * S).
894    Otherwise, just return double int zero.  */
895 
896 static widest_int
backtrace_base_for_ref(tree * pbase)897 backtrace_base_for_ref (tree *pbase)
898 {
899   tree base_in = *pbase;
900   slsr_cand_t base_cand;
901 
902   STRIP_NOPS (base_in);
903 
904   /* Strip off widening conversion(s) to handle cases where
905      e.g. 'B' is widened from an 'int' in order to calculate
906      a 64-bit address.  */
907   if (CONVERT_EXPR_P (base_in)
908       && legal_cast_p_1 (TREE_TYPE (base_in),
909 			 TREE_TYPE (TREE_OPERAND (base_in, 0))))
910     base_in = get_unwidened (base_in, NULL_TREE);
911 
912   if (TREE_CODE (base_in) != SSA_NAME)
913     return 0;
914 
915   base_cand = base_cand_from_table (base_in);
916 
917   while (base_cand && base_cand->kind != CAND_PHI)
918     {
919       if (base_cand->kind == CAND_ADD
920 	  && base_cand->index == 1
921 	  && TREE_CODE (base_cand->stride) == INTEGER_CST)
922 	{
923 	  /* X = B + (1 * S), S is integer constant.  */
924 	  *pbase = base_cand->base_expr;
925 	  return wi::to_widest (base_cand->stride);
926 	}
927       else if (base_cand->kind == CAND_ADD
928 	       && TREE_CODE (base_cand->stride) == INTEGER_CST
929 	       && integer_onep (base_cand->stride))
930 	{
931 	  /* X = B + (i * S), S is integer one.  */
932 	  *pbase = base_cand->base_expr;
933 	  return base_cand->index;
934 	}
935 
936       base_cand = lookup_cand (base_cand->next_interp);
937     }
938 
939   return 0;
940 }
941 
942 /* Look for the following pattern:
943 
944     *PBASE:    MEM_REF (T1, C1)
945 
946     *POFFSET:  MULT_EXPR (T2, C3)        [C2 is zero]
947                      or
948                MULT_EXPR (PLUS_EXPR (T2, C2), C3)
949                      or
950                MULT_EXPR (MINUS_EXPR (T2, -C2), C3)
951 
952     *PINDEX:   C4 * BITS_PER_UNIT
953 
954    If not present, leave the input values unchanged and return FALSE.
955    Otherwise, modify the input values as follows and return TRUE:
956 
957     *PBASE:    T1
958     *POFFSET:  MULT_EXPR (T2, C3)
959     *PINDEX:   C1 + (C2 * C3) + C4
960 
961    When T2 is recorded by a CAND_ADD in the form of (T2' + C5), it
962    will be further restructured to:
963 
964     *PBASE:    T1
965     *POFFSET:  MULT_EXPR (T2', C3)
966     *PINDEX:   C1 + (C2 * C3) + C4 + (C5 * C3)  */
967 
968 static bool
restructure_reference(tree * pbase,tree * poffset,widest_int * pindex,tree * ptype)969 restructure_reference (tree *pbase, tree *poffset, widest_int *pindex,
970 		       tree *ptype)
971 {
972   tree base = *pbase, offset = *poffset;
973   widest_int index = *pindex;
974   tree mult_op0, t1, t2, type;
975   widest_int c1, c2, c3, c4, c5;
976   offset_int mem_offset;
977 
978   if (!base
979       || !offset
980       || TREE_CODE (base) != MEM_REF
981       || !mem_ref_offset (base).is_constant (&mem_offset)
982       || TREE_CODE (offset) != MULT_EXPR
983       || TREE_CODE (TREE_OPERAND (offset, 1)) != INTEGER_CST
984       || wi::umod_floor (index, BITS_PER_UNIT) != 0)
985     return false;
986 
987   t1 = TREE_OPERAND (base, 0);
988   c1 = widest_int::from (mem_offset, SIGNED);
989   type = TREE_TYPE (TREE_OPERAND (base, 1));
990 
991   mult_op0 = TREE_OPERAND (offset, 0);
992   c3 = wi::to_widest (TREE_OPERAND (offset, 1));
993 
994   if (TREE_CODE (mult_op0) == PLUS_EXPR)
995 
996     if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
997       {
998 	t2 = TREE_OPERAND (mult_op0, 0);
999 	c2 = wi::to_widest (TREE_OPERAND (mult_op0, 1));
1000       }
1001     else
1002       return false;
1003 
1004   else if (TREE_CODE (mult_op0) == MINUS_EXPR)
1005 
1006     if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
1007       {
1008 	t2 = TREE_OPERAND (mult_op0, 0);
1009 	c2 = -wi::to_widest (TREE_OPERAND (mult_op0, 1));
1010       }
1011     else
1012       return false;
1013 
1014   else
1015     {
1016       t2 = mult_op0;
1017       c2 = 0;
1018     }
1019 
1020   c4 = index >> LOG2_BITS_PER_UNIT;
1021   c5 = backtrace_base_for_ref (&t2);
1022 
1023   *pbase = t1;
1024   *poffset = fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, t2),
1025 			  wide_int_to_tree (sizetype, c3));
1026   *pindex = c1 + c2 * c3 + c4 + c5 * c3;
1027   *ptype = type;
1028 
1029   return true;
1030 }
1031 
1032 /* Given GS which contains a data reference, create a CAND_REF entry in
1033    the candidate table and attempt to find a basis.  */
1034 
1035 static void
slsr_process_ref(gimple * gs)1036 slsr_process_ref (gimple *gs)
1037 {
1038   tree ref_expr, base, offset, type;
1039   poly_int64 bitsize, bitpos;
1040   machine_mode mode;
1041   int unsignedp, reversep, volatilep;
1042   slsr_cand_t c;
1043 
1044   if (gimple_vdef (gs))
1045     ref_expr = gimple_assign_lhs (gs);
1046   else
1047     ref_expr = gimple_assign_rhs1 (gs);
1048 
1049   if (!handled_component_p (ref_expr)
1050       || TREE_CODE (ref_expr) == BIT_FIELD_REF
1051       || (TREE_CODE (ref_expr) == COMPONENT_REF
1052 	  && DECL_BIT_FIELD (TREE_OPERAND (ref_expr, 1))))
1053     return;
1054 
1055   base = get_inner_reference (ref_expr, &bitsize, &bitpos, &offset, &mode,
1056 			      &unsignedp, &reversep, &volatilep);
1057   HOST_WIDE_INT cbitpos;
1058   if (reversep || !bitpos.is_constant (&cbitpos))
1059     return;
1060   widest_int index = cbitpos;
1061 
1062   if (!restructure_reference (&base, &offset, &index, &type))
1063     return;
1064 
1065   c = alloc_cand_and_find_basis (CAND_REF, gs, base, index, offset,
1066 				 type, sizetype, 0);
1067 
1068   /* Add the candidate to the statement-candidate mapping.  */
1069   add_cand_for_stmt (gs, c);
1070 }
1071 
1072 /* Create a candidate entry for a statement GS, where GS multiplies
1073    two SSA names BASE_IN and STRIDE_IN.  Propagate any known information
1074    about the two SSA names into the new candidate.  Return the new
1075    candidate.  */
1076 
1077 static slsr_cand_t
create_mul_ssa_cand(gimple * gs,tree base_in,tree stride_in,bool speed)1078 create_mul_ssa_cand (gimple *gs, tree base_in, tree stride_in, bool speed)
1079 {
1080   tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1081   tree stype = NULL_TREE;
1082   widest_int index;
1083   unsigned savings = 0;
1084   slsr_cand_t c;
1085   slsr_cand_t base_cand = base_cand_from_table (base_in);
1086 
1087   /* Look at all interpretations of the base candidate, if necessary,
1088      to find information to propagate into this candidate.  */
1089   while (base_cand && !base && base_cand->kind != CAND_PHI)
1090     {
1091 
1092       if (base_cand->kind == CAND_MULT && integer_onep (base_cand->stride))
1093 	{
1094 	  /* Y = (B + i') * 1
1095 	     X = Y * Z
1096 	     ================
1097 	     X = (B + i') * Z  */
1098 	  base = base_cand->base_expr;
1099 	  index = base_cand->index;
1100 	  stride = stride_in;
1101 	  ctype = base_cand->cand_type;
1102 	  stype = TREE_TYPE (stride_in);
1103 	  if (has_single_use (base_in))
1104 	    savings = (base_cand->dead_savings
1105 		       + stmt_cost (base_cand->cand_stmt, speed));
1106 	}
1107       else if (base_cand->kind == CAND_ADD
1108 	       && TREE_CODE (base_cand->stride) == INTEGER_CST)
1109 	{
1110 	  /* Y = B + (i' * S), S constant
1111 	     X = Y * Z
1112 	     ============================
1113 	     X = B + ((i' * S) * Z)  */
1114 	  base = base_cand->base_expr;
1115 	  index = base_cand->index * wi::to_widest (base_cand->stride);
1116 	  stride = stride_in;
1117 	  ctype = base_cand->cand_type;
1118 	  stype = TREE_TYPE (stride_in);
1119 	  if (has_single_use (base_in))
1120 	    savings = (base_cand->dead_savings
1121 		       + stmt_cost (base_cand->cand_stmt, speed));
1122 	}
1123 
1124       base_cand = lookup_cand (base_cand->next_interp);
1125     }
1126 
1127   if (!base)
1128     {
1129       /* No interpretations had anything useful to propagate, so
1130 	 produce X = (Y + 0) * Z.  */
1131       base = base_in;
1132       index = 0;
1133       stride = stride_in;
1134       ctype = TREE_TYPE (base_in);
1135       stype = TREE_TYPE (stride_in);
1136     }
1137 
1138   c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
1139 				 ctype, stype, savings);
1140   return c;
1141 }
1142 
1143 /* Create a candidate entry for a statement GS, where GS multiplies
1144    SSA name BASE_IN by constant STRIDE_IN.  Propagate any known
1145    information about BASE_IN into the new candidate.  Return the new
1146    candidate.  */
1147 
1148 static slsr_cand_t
create_mul_imm_cand(gimple * gs,tree base_in,tree stride_in,bool speed)1149 create_mul_imm_cand (gimple *gs, tree base_in, tree stride_in, bool speed)
1150 {
1151   tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1152   widest_int index, temp;
1153   unsigned savings = 0;
1154   slsr_cand_t c;
1155   slsr_cand_t base_cand = base_cand_from_table (base_in);
1156 
1157   /* Look at all interpretations of the base candidate, if necessary,
1158      to find information to propagate into this candidate.  */
1159   while (base_cand && !base && base_cand->kind != CAND_PHI)
1160     {
1161       if (base_cand->kind == CAND_MULT
1162 	  && TREE_CODE (base_cand->stride) == INTEGER_CST)
1163 	{
1164 	  /* Y = (B + i') * S, S constant
1165 	     X = Y * c
1166 	     ============================
1167 	     X = (B + i') * (S * c)  */
1168 	  temp = wi::to_widest (base_cand->stride) * wi::to_widest (stride_in);
1169 	  if (wi::fits_to_tree_p (temp, TREE_TYPE (stride_in)))
1170 	    {
1171 	      base = base_cand->base_expr;
1172 	      index = base_cand->index;
1173 	      stride = wide_int_to_tree (TREE_TYPE (stride_in), temp);
1174 	      ctype = base_cand->cand_type;
1175 	      if (has_single_use (base_in))
1176 		savings = (base_cand->dead_savings
1177 			   + stmt_cost (base_cand->cand_stmt, speed));
1178 	    }
1179 	}
1180       else if (base_cand->kind == CAND_ADD && integer_onep (base_cand->stride))
1181 	{
1182 	  /* Y = B + (i' * 1)
1183 	     X = Y * c
1184 	     ===========================
1185 	     X = (B + i') * c  */
1186 	  base = base_cand->base_expr;
1187 	  index = base_cand->index;
1188 	  stride = stride_in;
1189 	  ctype = base_cand->cand_type;
1190 	  if (has_single_use (base_in))
1191 	    savings = (base_cand->dead_savings
1192 		       + stmt_cost (base_cand->cand_stmt, speed));
1193 	}
1194       else if (base_cand->kind == CAND_ADD
1195 	       && base_cand->index == 1
1196 	       && TREE_CODE (base_cand->stride) == INTEGER_CST)
1197 	{
1198 	  /* Y = B + (1 * S), S constant
1199 	     X = Y * c
1200 	     ===========================
1201 	     X = (B + S) * c  */
1202 	  base = base_cand->base_expr;
1203 	  index = wi::to_widest (base_cand->stride);
1204 	  stride = stride_in;
1205 	  ctype = base_cand->cand_type;
1206 	  if (has_single_use (base_in))
1207 	    savings = (base_cand->dead_savings
1208 		       + stmt_cost (base_cand->cand_stmt, speed));
1209 	}
1210 
1211       base_cand = lookup_cand (base_cand->next_interp);
1212     }
1213 
1214   if (!base)
1215     {
1216       /* No interpretations had anything useful to propagate, so
1217 	 produce X = (Y + 0) * c.  */
1218       base = base_in;
1219       index = 0;
1220       stride = stride_in;
1221       ctype = TREE_TYPE (base_in);
1222     }
1223 
1224   c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
1225 				 ctype, sizetype, savings);
1226   return c;
1227 }
1228 
1229 /* Given GS which is a multiply of scalar integers, make an appropriate
1230    entry in the candidate table.  If this is a multiply of two SSA names,
1231    create two CAND_MULT interpretations and attempt to find a basis for
1232    each of them.  Otherwise, create a single CAND_MULT and attempt to
1233    find a basis.  */
1234 
1235 static void
slsr_process_mul(gimple * gs,tree rhs1,tree rhs2,bool speed)1236 slsr_process_mul (gimple *gs, tree rhs1, tree rhs2, bool speed)
1237 {
1238   slsr_cand_t c, c2;
1239 
1240   /* If this is a multiply of an SSA name with itself, it is highly
1241      unlikely that we will get a strength reduction opportunity, so
1242      don't record it as a candidate.  This simplifies the logic for
1243      finding a basis, so if this is removed that must be considered.  */
1244   if (rhs1 == rhs2)
1245     return;
1246 
1247   if (TREE_CODE (rhs2) == SSA_NAME)
1248     {
1249       /* Record an interpretation of this statement in the candidate table
1250 	 assuming RHS1 is the base expression and RHS2 is the stride.  */
1251       c = create_mul_ssa_cand (gs, rhs1, rhs2, speed);
1252 
1253       /* Add the first interpretation to the statement-candidate mapping.  */
1254       add_cand_for_stmt (gs, c);
1255 
1256       /* Record another interpretation of this statement assuming RHS1
1257 	 is the stride and RHS2 is the base expression.  */
1258       c2 = create_mul_ssa_cand (gs, rhs2, rhs1, speed);
1259       c->next_interp = c2->cand_num;
1260       c2->first_interp = c->cand_num;
1261     }
1262   else if (TREE_CODE (rhs2) == INTEGER_CST && !integer_zerop (rhs2))
1263     {
1264       /* Record an interpretation for the multiply-immediate.  */
1265       c = create_mul_imm_cand (gs, rhs1, rhs2, speed);
1266 
1267       /* Add the interpretation to the statement-candidate mapping.  */
1268       add_cand_for_stmt (gs, c);
1269     }
1270 }
1271 
1272 /* Create a candidate entry for a statement GS, where GS adds two
1273    SSA names BASE_IN and ADDEND_IN if SUBTRACT_P is false, and
1274    subtracts ADDEND_IN from BASE_IN otherwise.  Propagate any known
1275    information about the two SSA names into the new candidate.
1276    Return the new candidate.  */
1277 
1278 static slsr_cand_t
create_add_ssa_cand(gimple * gs,tree base_in,tree addend_in,bool subtract_p,bool speed)1279 create_add_ssa_cand (gimple *gs, tree base_in, tree addend_in,
1280 		     bool subtract_p, bool speed)
1281 {
1282   tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1283   tree stype = NULL_TREE;
1284   widest_int index;
1285   unsigned savings = 0;
1286   slsr_cand_t c;
1287   slsr_cand_t base_cand = base_cand_from_table (base_in);
1288   slsr_cand_t addend_cand = base_cand_from_table (addend_in);
1289 
1290   /* The most useful transformation is a multiply-immediate feeding
1291      an add or subtract.  Look for that first.  */
1292   while (addend_cand && !base && addend_cand->kind != CAND_PHI)
1293     {
1294       if (addend_cand->kind == CAND_MULT
1295 	  && addend_cand->index == 0
1296 	  && TREE_CODE (addend_cand->stride) == INTEGER_CST)
1297 	{
1298 	  /* Z = (B + 0) * S, S constant
1299 	     X = Y +/- Z
1300 	     ===========================
1301 	     X = Y + ((+/-1 * S) * B)  */
1302 	  base = base_in;
1303 	  index = wi::to_widest (addend_cand->stride);
1304 	  if (subtract_p)
1305 	    index = -index;
1306 	  stride = addend_cand->base_expr;
1307 	  ctype = TREE_TYPE (base_in);
1308 	  stype = addend_cand->cand_type;
1309 	  if (has_single_use (addend_in))
1310 	    savings = (addend_cand->dead_savings
1311 		       + stmt_cost (addend_cand->cand_stmt, speed));
1312 	}
1313 
1314       addend_cand = lookup_cand (addend_cand->next_interp);
1315     }
1316 
1317   while (base_cand && !base && base_cand->kind != CAND_PHI)
1318     {
1319       if (base_cand->kind == CAND_ADD
1320 	  && (base_cand->index == 0
1321 	      || operand_equal_p (base_cand->stride,
1322 				  integer_zero_node, 0)))
1323 	{
1324 	  /* Y = B + (i' * S), i' * S = 0
1325 	     X = Y +/- Z
1326 	     ============================
1327 	     X = B + (+/-1 * Z)  */
1328 	  base = base_cand->base_expr;
1329 	  index = subtract_p ? -1 : 1;
1330 	  stride = addend_in;
1331 	  ctype = base_cand->cand_type;
1332 	  stype = (TREE_CODE (addend_in) == INTEGER_CST ? sizetype
1333 		   : TREE_TYPE (addend_in));
1334 	  if (has_single_use (base_in))
1335 	    savings = (base_cand->dead_savings
1336 		       + stmt_cost (base_cand->cand_stmt, speed));
1337 	}
1338       else if (subtract_p)
1339 	{
1340 	  slsr_cand_t subtrahend_cand = base_cand_from_table (addend_in);
1341 
1342 	  while (subtrahend_cand && !base && subtrahend_cand->kind != CAND_PHI)
1343 	    {
1344 	      if (subtrahend_cand->kind == CAND_MULT
1345 		  && subtrahend_cand->index == 0
1346 		  && TREE_CODE (subtrahend_cand->stride) == INTEGER_CST)
1347 		{
1348 		  /* Z = (B + 0) * S, S constant
1349 		     X = Y - Z
1350 		     ===========================
1351 		     Value:  X = Y + ((-1 * S) * B)  */
1352 		  base = base_in;
1353 		  index = wi::to_widest (subtrahend_cand->stride);
1354 		  index = -index;
1355 		  stride = subtrahend_cand->base_expr;
1356 		  ctype = TREE_TYPE (base_in);
1357 		  stype = subtrahend_cand->cand_type;
1358 		  if (has_single_use (addend_in))
1359 		    savings = (subtrahend_cand->dead_savings
1360 			       + stmt_cost (subtrahend_cand->cand_stmt, speed));
1361 		}
1362 
1363 	      subtrahend_cand = lookup_cand (subtrahend_cand->next_interp);
1364 	    }
1365 	}
1366 
1367       base_cand = lookup_cand (base_cand->next_interp);
1368     }
1369 
1370   if (!base)
1371     {
1372       /* No interpretations had anything useful to propagate, so
1373 	 produce X = Y + (1 * Z).  */
1374       base = base_in;
1375       index = subtract_p ? -1 : 1;
1376       stride = addend_in;
1377       ctype = TREE_TYPE (base_in);
1378       stype = (TREE_CODE (addend_in) == INTEGER_CST ? sizetype
1379 	       : TREE_TYPE (addend_in));
1380     }
1381 
1382   c = alloc_cand_and_find_basis (CAND_ADD, gs, base, index, stride,
1383 				 ctype, stype, savings);
1384   return c;
1385 }
1386 
1387 /* Create a candidate entry for a statement GS, where GS adds SSA
1388    name BASE_IN to constant INDEX_IN.  Propagate any known information
1389    about BASE_IN into the new candidate.  Return the new candidate.  */
1390 
1391 static slsr_cand_t
create_add_imm_cand(gimple * gs,tree base_in,const widest_int & index_in,bool speed)1392 create_add_imm_cand (gimple *gs, tree base_in, const widest_int &index_in,
1393 		     bool speed)
1394 {
1395   enum cand_kind kind = CAND_ADD;
1396   tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1397   tree stype = NULL_TREE;
1398   widest_int index, multiple;
1399   unsigned savings = 0;
1400   slsr_cand_t c;
1401   slsr_cand_t base_cand = base_cand_from_table (base_in);
1402 
1403   while (base_cand && !base && base_cand->kind != CAND_PHI)
1404     {
1405       signop sign = TYPE_SIGN (TREE_TYPE (base_cand->stride));
1406 
1407       if (TREE_CODE (base_cand->stride) == INTEGER_CST
1408 	  && wi::multiple_of_p (index_in, wi::to_widest (base_cand->stride),
1409 				sign, &multiple))
1410 	{
1411 	  /* Y = (B + i') * S, S constant, c = kS for some integer k
1412 	     X = Y + c
1413 	     ============================
1414 	     X = (B + (i'+ k)) * S
1415 	  OR
1416 	     Y = B + (i' * S), S constant, c = kS for some integer k
1417 	     X = Y + c
1418 	     ============================
1419 	     X = (B + (i'+ k)) * S  */
1420 	  kind = base_cand->kind;
1421 	  base = base_cand->base_expr;
1422 	  index = base_cand->index + multiple;
1423 	  stride = base_cand->stride;
1424 	  ctype = base_cand->cand_type;
1425 	  stype = base_cand->stride_type;
1426 	  if (has_single_use (base_in))
1427 	    savings = (base_cand->dead_savings
1428 		       + stmt_cost (base_cand->cand_stmt, speed));
1429 	}
1430 
1431       base_cand = lookup_cand (base_cand->next_interp);
1432     }
1433 
1434   if (!base)
1435     {
1436       /* No interpretations had anything useful to propagate, so
1437 	 produce X = Y + (c * 1).  */
1438       kind = CAND_ADD;
1439       base = base_in;
1440       index = index_in;
1441       stride = integer_one_node;
1442       ctype = TREE_TYPE (base_in);
1443       stype = sizetype;
1444     }
1445 
1446   c = alloc_cand_and_find_basis (kind, gs, base, index, stride,
1447 				 ctype, stype, savings);
1448   return c;
1449 }
1450 
1451 /* Given GS which is an add or subtract of scalar integers or pointers,
1452    make at least one appropriate entry in the candidate table.  */
1453 
1454 static void
slsr_process_add(gimple * gs,tree rhs1,tree rhs2,bool speed)1455 slsr_process_add (gimple *gs, tree rhs1, tree rhs2, bool speed)
1456 {
1457   bool subtract_p = gimple_assign_rhs_code (gs) == MINUS_EXPR;
1458   slsr_cand_t c = NULL, c2;
1459 
1460   if (TREE_CODE (rhs2) == SSA_NAME)
1461     {
1462       /* First record an interpretation assuming RHS1 is the base expression
1463 	 and RHS2 is the stride.  But it doesn't make sense for the
1464 	 stride to be a pointer, so don't record a candidate in that case.  */
1465       if (!POINTER_TYPE_P (TREE_TYPE (rhs2)))
1466 	{
1467 	  c = create_add_ssa_cand (gs, rhs1, rhs2, subtract_p, speed);
1468 
1469 	  /* Add the first interpretation to the statement-candidate
1470 	     mapping.  */
1471 	  add_cand_for_stmt (gs, c);
1472 	}
1473 
1474       /* If the two RHS operands are identical, or this is a subtract,
1475 	 we're done.  */
1476       if (operand_equal_p (rhs1, rhs2, 0) || subtract_p)
1477 	return;
1478 
1479       /* Otherwise, record another interpretation assuming RHS2 is the
1480 	 base expression and RHS1 is the stride, again provided that the
1481 	 stride is not a pointer.  */
1482       if (!POINTER_TYPE_P (TREE_TYPE (rhs1)))
1483 	{
1484 	  c2 = create_add_ssa_cand (gs, rhs2, rhs1, false, speed);
1485 	  if (c)
1486 	    {
1487 	      c->next_interp = c2->cand_num;
1488 	      c2->first_interp = c->cand_num;
1489 	    }
1490 	  else
1491 	    add_cand_for_stmt (gs, c2);
1492 	}
1493     }
1494   else if (TREE_CODE (rhs2) == INTEGER_CST)
1495     {
1496       /* Record an interpretation for the add-immediate.  */
1497       widest_int index = wi::to_widest (rhs2);
1498       if (subtract_p)
1499 	index = -index;
1500 
1501       c = create_add_imm_cand (gs, rhs1, index, speed);
1502 
1503       /* Add the interpretation to the statement-candidate mapping.  */
1504       add_cand_for_stmt (gs, c);
1505     }
1506 }
1507 
1508 /* Given GS which is a negate of a scalar integer, make an appropriate
1509    entry in the candidate table.  A negate is equivalent to a multiply
1510    by -1.  */
1511 
1512 static void
slsr_process_neg(gimple * gs,tree rhs1,bool speed)1513 slsr_process_neg (gimple *gs, tree rhs1, bool speed)
1514 {
1515   /* Record a CAND_MULT interpretation for the multiply by -1.  */
1516   slsr_cand_t c = create_mul_imm_cand (gs, rhs1, integer_minus_one_node, speed);
1517 
1518   /* Add the interpretation to the statement-candidate mapping.  */
1519   add_cand_for_stmt (gs, c);
1520 }
1521 
1522 /* Help function for legal_cast_p, operating on two trees.  Checks
1523    whether it's allowable to cast from RHS to LHS.  See legal_cast_p
1524    for more details.  */
1525 
1526 static bool
legal_cast_p_1(tree lhs_type,tree rhs_type)1527 legal_cast_p_1 (tree lhs_type, tree rhs_type)
1528 {
1529   unsigned lhs_size, rhs_size;
1530   bool lhs_wraps, rhs_wraps;
1531 
1532   lhs_size = TYPE_PRECISION (lhs_type);
1533   rhs_size = TYPE_PRECISION (rhs_type);
1534   lhs_wraps = ANY_INTEGRAL_TYPE_P (lhs_type) && TYPE_OVERFLOW_WRAPS (lhs_type);
1535   rhs_wraps = ANY_INTEGRAL_TYPE_P (rhs_type) && TYPE_OVERFLOW_WRAPS (rhs_type);
1536 
1537   if (lhs_size < rhs_size
1538       || (rhs_wraps && !lhs_wraps)
1539       || (rhs_wraps && lhs_wraps && rhs_size != lhs_size))
1540     return false;
1541 
1542   return true;
1543 }
1544 
1545 /* Return TRUE if GS is a statement that defines an SSA name from
1546    a conversion and is legal for us to combine with an add and multiply
1547    in the candidate table.  For example, suppose we have:
1548 
1549      A = B + i;
1550      C = (type) A;
1551      D = C * S;
1552 
1553    Without the type-cast, we would create a CAND_MULT for D with base B,
1554    index i, and stride S.  We want to record this candidate only if it
1555    is equivalent to apply the type cast following the multiply:
1556 
1557      A = B + i;
1558      E = A * S;
1559      D = (type) E;
1560 
1561    We will record the type with the candidate for D.  This allows us
1562    to use a similar previous candidate as a basis.  If we have earlier seen
1563 
1564      A' = B + i';
1565      C' = (type) A';
1566      D' = C' * S;
1567 
1568    we can replace D with
1569 
1570      D = D' + (i - i') * S;
1571 
1572    But if moving the type-cast would change semantics, we mustn't do this.
1573 
1574    This is legitimate for casts from a non-wrapping integral type to
1575    any integral type of the same or larger size.  It is not legitimate
1576    to convert a wrapping type to a non-wrapping type, or to a wrapping
1577    type of a different size.  I.e., with a wrapping type, we must
1578    assume that the addition B + i could wrap, in which case performing
1579    the multiply before or after one of the "illegal" type casts will
1580    have different semantics.  */
1581 
1582 static bool
legal_cast_p(gimple * gs,tree rhs)1583 legal_cast_p (gimple *gs, tree rhs)
1584 {
1585   if (!is_gimple_assign (gs)
1586       || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (gs)))
1587     return false;
1588 
1589   return legal_cast_p_1 (TREE_TYPE (gimple_assign_lhs (gs)), TREE_TYPE (rhs));
1590 }
1591 
1592 /* Given GS which is a cast to a scalar integer type, determine whether
1593    the cast is legal for strength reduction.  If so, make at least one
1594    appropriate entry in the candidate table.  */
1595 
1596 static void
slsr_process_cast(gimple * gs,tree rhs1,bool speed)1597 slsr_process_cast (gimple *gs, tree rhs1, bool speed)
1598 {
1599   tree lhs, ctype;
1600   slsr_cand_t base_cand, c = NULL, c2;
1601   unsigned savings = 0;
1602 
1603   if (!legal_cast_p (gs, rhs1))
1604     return;
1605 
1606   lhs = gimple_assign_lhs (gs);
1607   base_cand = base_cand_from_table (rhs1);
1608   ctype = TREE_TYPE (lhs);
1609 
1610   if (base_cand && base_cand->kind != CAND_PHI)
1611     {
1612       slsr_cand_t first_cand = NULL;
1613 
1614       while (base_cand)
1615 	{
1616 	  /* Propagate all data from the base candidate except the type,
1617 	     which comes from the cast, and the base candidate's cast,
1618 	     which is no longer applicable.  */
1619 	  if (has_single_use (rhs1))
1620 	    savings = (base_cand->dead_savings
1621 		       + stmt_cost (base_cand->cand_stmt, speed));
1622 
1623 	  c = alloc_cand_and_find_basis (base_cand->kind, gs,
1624 					 base_cand->base_expr,
1625 					 base_cand->index, base_cand->stride,
1626 					 ctype, base_cand->stride_type,
1627 					 savings);
1628 	  if (!first_cand)
1629 	    first_cand = c;
1630 
1631 	  if (first_cand != c)
1632 	    c->first_interp = first_cand->cand_num;
1633 
1634 	  base_cand = lookup_cand (base_cand->next_interp);
1635 	}
1636     }
1637   else
1638     {
1639       /* If nothing is known about the RHS, create fresh CAND_ADD and
1640 	 CAND_MULT interpretations:
1641 
1642 	 X = Y + (0 * 1)
1643 	 X = (Y + 0) * 1
1644 
1645 	 The first of these is somewhat arbitrary, but the choice of
1646 	 1 for the stride simplifies the logic for propagating casts
1647 	 into their uses.  */
1648       c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, 0,
1649 				     integer_one_node, ctype, sizetype, 0);
1650       c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, 0,
1651 				      integer_one_node, ctype, sizetype, 0);
1652       c->next_interp = c2->cand_num;
1653       c2->first_interp = c->cand_num;
1654     }
1655 
1656   /* Add the first (or only) interpretation to the statement-candidate
1657      mapping.  */
1658   add_cand_for_stmt (gs, c);
1659 }
1660 
1661 /* Given GS which is a copy of a scalar integer type, make at least one
1662    appropriate entry in the candidate table.
1663 
1664    This interface is included for completeness, but is unnecessary
1665    if this pass immediately follows a pass that performs copy
1666    propagation, such as DOM.  */
1667 
1668 static void
slsr_process_copy(gimple * gs,tree rhs1,bool speed)1669 slsr_process_copy (gimple *gs, tree rhs1, bool speed)
1670 {
1671   slsr_cand_t base_cand, c = NULL, c2;
1672   unsigned savings = 0;
1673 
1674   base_cand = base_cand_from_table (rhs1);
1675 
1676   if (base_cand && base_cand->kind != CAND_PHI)
1677     {
1678       slsr_cand_t first_cand = NULL;
1679 
1680       while (base_cand)
1681 	{
1682 	  /* Propagate all data from the base candidate.  */
1683 	  if (has_single_use (rhs1))
1684 	    savings = (base_cand->dead_savings
1685 		       + stmt_cost (base_cand->cand_stmt, speed));
1686 
1687 	  c = alloc_cand_and_find_basis (base_cand->kind, gs,
1688 					 base_cand->base_expr,
1689 					 base_cand->index, base_cand->stride,
1690 					 base_cand->cand_type,
1691 					 base_cand->stride_type, savings);
1692 	  if (!first_cand)
1693 	    first_cand = c;
1694 
1695 	  if (first_cand != c)
1696 	    c->first_interp = first_cand->cand_num;
1697 
1698 	  base_cand = lookup_cand (base_cand->next_interp);
1699 	}
1700     }
1701   else
1702     {
1703       /* If nothing is known about the RHS, create fresh CAND_ADD and
1704 	 CAND_MULT interpretations:
1705 
1706 	 X = Y + (0 * 1)
1707 	 X = (Y + 0) * 1
1708 
1709 	 The first of these is somewhat arbitrary, but the choice of
1710 	 1 for the stride simplifies the logic for propagating casts
1711 	 into their uses.  */
1712       c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, 0,
1713 				     integer_one_node, TREE_TYPE (rhs1),
1714 				     sizetype, 0);
1715       c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, 0,
1716 				      integer_one_node, TREE_TYPE (rhs1),
1717 				      sizetype, 0);
1718       c->next_interp = c2->cand_num;
1719       c2->first_interp = c->cand_num;
1720     }
1721 
1722   /* Add the first (or only) interpretation to the statement-candidate
1723      mapping.  */
1724   add_cand_for_stmt (gs, c);
1725 }
1726 
1727 class find_candidates_dom_walker : public dom_walker
1728 {
1729 public:
find_candidates_dom_walker(cdi_direction direction)1730   find_candidates_dom_walker (cdi_direction direction)
1731     : dom_walker (direction) {}
1732   virtual edge before_dom_children (basic_block);
1733 };
1734 
1735 /* Find strength-reduction candidates in block BB.  */
1736 
1737 edge
before_dom_children(basic_block bb)1738 find_candidates_dom_walker::before_dom_children (basic_block bb)
1739 {
1740   bool speed = optimize_bb_for_speed_p (bb);
1741 
1742   for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
1743        gsi_next (&gsi))
1744     slsr_process_phi (gsi.phi (), speed);
1745 
1746   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1747        gsi_next (&gsi))
1748     {
1749       gimple *gs = gsi_stmt (gsi);
1750 
1751       if (stmt_could_throw_p (cfun, gs))
1752 	continue;
1753 
1754       if (gimple_vuse (gs) && gimple_assign_single_p (gs))
1755 	slsr_process_ref (gs);
1756 
1757       else if (is_gimple_assign (gs)
1758 	       && (INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (gs)))
1759 		   || POINTER_TYPE_P (TREE_TYPE (gimple_assign_lhs (gs)))))
1760 	{
1761 	  tree rhs1 = NULL_TREE, rhs2 = NULL_TREE;
1762 
1763 	  switch (gimple_assign_rhs_code (gs))
1764 	    {
1765 	    case MULT_EXPR:
1766 	    case PLUS_EXPR:
1767 	      rhs1 = gimple_assign_rhs1 (gs);
1768 	      rhs2 = gimple_assign_rhs2 (gs);
1769 	      /* Should never happen, but currently some buggy situations
1770 		 in earlier phases put constants in rhs1.  */
1771 	      if (TREE_CODE (rhs1) != SSA_NAME)
1772 		continue;
1773 	      break;
1774 
1775 	    /* Possible future opportunity: rhs1 of a ptr+ can be
1776 	       an ADDR_EXPR.  */
1777 	    case POINTER_PLUS_EXPR:
1778 	    case MINUS_EXPR:
1779 	      rhs2 = gimple_assign_rhs2 (gs);
1780 	      gcc_fallthrough ();
1781 
1782 	    CASE_CONVERT:
1783 	    case SSA_NAME:
1784 	    case NEGATE_EXPR:
1785 	      rhs1 = gimple_assign_rhs1 (gs);
1786 	      if (TREE_CODE (rhs1) != SSA_NAME)
1787 		continue;
1788 	      break;
1789 
1790 	    default:
1791 	      ;
1792 	    }
1793 
1794 	  switch (gimple_assign_rhs_code (gs))
1795 	    {
1796 	    case MULT_EXPR:
1797 	      slsr_process_mul (gs, rhs1, rhs2, speed);
1798 	      break;
1799 
1800 	    case PLUS_EXPR:
1801 	    case POINTER_PLUS_EXPR:
1802 	    case MINUS_EXPR:
1803 	      slsr_process_add (gs, rhs1, rhs2, speed);
1804 	      break;
1805 
1806 	    case NEGATE_EXPR:
1807 	      slsr_process_neg (gs, rhs1, speed);
1808 	      break;
1809 
1810 	    CASE_CONVERT:
1811 	      slsr_process_cast (gs, rhs1, speed);
1812 	      break;
1813 
1814 	    case SSA_NAME:
1815 	      slsr_process_copy (gs, rhs1, speed);
1816 	      break;
1817 
1818 	    default:
1819 	      ;
1820 	    }
1821 	}
1822     }
1823   return NULL;
1824 }
1825 
1826 /* Dump a candidate for debug.  */
1827 
1828 static void
dump_candidate(slsr_cand_t c)1829 dump_candidate (slsr_cand_t c)
1830 {
1831   fprintf (dump_file, "%3d  [%d] ", c->cand_num,
1832 	   gimple_bb (c->cand_stmt)->index);
1833   print_gimple_stmt (dump_file, c->cand_stmt, 0);
1834   switch (c->kind)
1835     {
1836     case CAND_MULT:
1837       fputs ("     MULT : (", dump_file);
1838       print_generic_expr (dump_file, c->base_expr);
1839       fputs (" + ", dump_file);
1840       print_decs (c->index, dump_file);
1841       fputs (") * ", dump_file);
1842       if (TREE_CODE (c->stride) != INTEGER_CST
1843 	  && c->stride_type != TREE_TYPE (c->stride))
1844 	{
1845 	  fputs ("(", dump_file);
1846 	  print_generic_expr (dump_file, c->stride_type);
1847 	  fputs (")", dump_file);
1848 	}
1849       print_generic_expr (dump_file, c->stride);
1850       fputs (" : ", dump_file);
1851       break;
1852     case CAND_ADD:
1853       fputs ("     ADD  : ", dump_file);
1854       print_generic_expr (dump_file, c->base_expr);
1855       fputs (" + (", dump_file);
1856       print_decs (c->index, dump_file);
1857       fputs (" * ", dump_file);
1858       if (TREE_CODE (c->stride) != INTEGER_CST
1859 	  && c->stride_type != TREE_TYPE (c->stride))
1860 	{
1861 	  fputs ("(", dump_file);
1862 	  print_generic_expr (dump_file, c->stride_type);
1863 	  fputs (")", dump_file);
1864 	}
1865       print_generic_expr (dump_file, c->stride);
1866       fputs (") : ", dump_file);
1867       break;
1868     case CAND_REF:
1869       fputs ("     REF  : ", dump_file);
1870       print_generic_expr (dump_file, c->base_expr);
1871       fputs (" + (", dump_file);
1872       print_generic_expr (dump_file, c->stride);
1873       fputs (") + ", dump_file);
1874       print_decs (c->index, dump_file);
1875       fputs (" : ", dump_file);
1876       break;
1877     case CAND_PHI:
1878       fputs ("     PHI  : ", dump_file);
1879       print_generic_expr (dump_file, c->base_expr);
1880       fputs (" + (unknown * ", dump_file);
1881       print_generic_expr (dump_file, c->stride);
1882       fputs (") : ", dump_file);
1883       break;
1884     default:
1885       gcc_unreachable ();
1886     }
1887   print_generic_expr (dump_file, c->cand_type);
1888   fprintf (dump_file, "\n     basis: %d  dependent: %d  sibling: %d\n",
1889 	   c->basis, c->dependent, c->sibling);
1890   fprintf (dump_file,
1891 	   "     next-interp: %d  first-interp: %d  dead-savings: %d\n",
1892 	   c->next_interp, c->first_interp, c->dead_savings);
1893   if (c->def_phi)
1894     fprintf (dump_file, "     phi:  %d\n", c->def_phi);
1895   fputs ("\n", dump_file);
1896 }
1897 
1898 /* Dump the candidate vector for debug.  */
1899 
1900 static void
dump_cand_vec(void)1901 dump_cand_vec (void)
1902 {
1903   unsigned i;
1904   slsr_cand_t c;
1905 
1906   fprintf (dump_file, "\nStrength reduction candidate vector:\n\n");
1907 
1908   FOR_EACH_VEC_ELT (cand_vec, i, c)
1909     if (c != NULL)
1910       dump_candidate (c);
1911 }
1912 
1913 /* Callback used to dump the candidate chains hash table.  */
1914 
1915 int
ssa_base_cand_dump_callback(cand_chain ** slot,void * ignored ATTRIBUTE_UNUSED)1916 ssa_base_cand_dump_callback (cand_chain **slot, void *ignored ATTRIBUTE_UNUSED)
1917 {
1918   const_cand_chain_t chain = *slot;
1919   cand_chain_t p;
1920 
1921   print_generic_expr (dump_file, chain->base_expr);
1922   fprintf (dump_file, " -> %d", chain->cand->cand_num);
1923 
1924   for (p = chain->next; p; p = p->next)
1925     fprintf (dump_file, " -> %d", p->cand->cand_num);
1926 
1927   fputs ("\n", dump_file);
1928   return 1;
1929 }
1930 
1931 /* Dump the candidate chains.  */
1932 
1933 static void
dump_cand_chains(void)1934 dump_cand_chains (void)
1935 {
1936   fprintf (dump_file, "\nStrength reduction candidate chains:\n\n");
1937   base_cand_map->traverse_noresize <void *, ssa_base_cand_dump_callback>
1938     (NULL);
1939   fputs ("\n", dump_file);
1940 }
1941 
1942 /* Dump the increment vector for debug.  */
1943 
1944 static void
dump_incr_vec(void)1945 dump_incr_vec (void)
1946 {
1947   if (dump_file && (dump_flags & TDF_DETAILS))
1948     {
1949       unsigned i;
1950 
1951       fprintf (dump_file, "\nIncrement vector:\n\n");
1952 
1953       for (i = 0; i < incr_vec_len; i++)
1954 	{
1955 	  fprintf (dump_file, "%3d  increment:   ", i);
1956 	  print_decs (incr_vec[i].incr, dump_file);
1957 	  fprintf (dump_file, "\n     count:       %d", incr_vec[i].count);
1958 	  fprintf (dump_file, "\n     cost:        %d", incr_vec[i].cost);
1959 	  fputs ("\n     initializer: ", dump_file);
1960 	  print_generic_expr (dump_file, incr_vec[i].initializer);
1961 	  fputs ("\n\n", dump_file);
1962 	}
1963     }
1964 }
1965 
1966 /* Replace *EXPR in candidate C with an equivalent strength-reduced
1967    data reference.  */
1968 
1969 static void
replace_ref(tree * expr,slsr_cand_t c)1970 replace_ref (tree *expr, slsr_cand_t c)
1971 {
1972   tree add_expr, mem_ref, acc_type = TREE_TYPE (*expr);
1973   unsigned HOST_WIDE_INT misalign;
1974   unsigned align;
1975 
1976   /* Ensure the memory reference carries the minimum alignment
1977      requirement for the data type.  See PR58041.  */
1978   get_object_alignment_1 (*expr, &align, &misalign);
1979   if (misalign != 0)
1980     align = least_bit_hwi (misalign);
1981   if (align < TYPE_ALIGN (acc_type))
1982     acc_type = build_aligned_type (acc_type, align);
1983 
1984   add_expr = fold_build2 (POINTER_PLUS_EXPR, c->cand_type,
1985 			  c->base_expr, c->stride);
1986   mem_ref = fold_build2 (MEM_REF, acc_type, add_expr,
1987 			 wide_int_to_tree (c->cand_type, c->index));
1988 
1989   /* Gimplify the base addressing expression for the new MEM_REF tree.  */
1990   gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1991   TREE_OPERAND (mem_ref, 0)
1992     = force_gimple_operand_gsi (&gsi, TREE_OPERAND (mem_ref, 0),
1993 				/*simple_p=*/true, NULL,
1994 				/*before=*/true, GSI_SAME_STMT);
1995   copy_ref_info (mem_ref, *expr);
1996   *expr = mem_ref;
1997   update_stmt (c->cand_stmt);
1998 }
1999 
2000 /* Return true if CAND_REF candidate C is a valid memory reference.  */
2001 
2002 static bool
valid_mem_ref_cand_p(slsr_cand_t c)2003 valid_mem_ref_cand_p (slsr_cand_t c)
2004 {
2005   if (TREE_CODE (TREE_OPERAND (c->stride, 1)) != INTEGER_CST)
2006     return false;
2007 
2008   struct mem_address addr
2009     = { NULL_TREE, c->base_expr, TREE_OPERAND (c->stride, 0),
2010 	TREE_OPERAND (c->stride, 1), wide_int_to_tree (sizetype, c->index) };
2011 
2012   return
2013     valid_mem_ref_p (TYPE_MODE (c->cand_type), TYPE_ADDR_SPACE (c->cand_type),
2014 		     &addr);
2015 }
2016 
2017 /* Replace CAND_REF candidate C, each sibling of candidate C, and each
2018    dependent of candidate C with an equivalent strength-reduced data
2019    reference.  */
2020 
2021 static void
replace_refs(slsr_cand_t c)2022 replace_refs (slsr_cand_t c)
2023 {
2024   /* Replacing a chain of only 2 candidates which are valid memory references
2025      is generally counter-productive because you cannot recoup the additional
2026      calculation added in front of them.  */
2027   if (c->basis == 0
2028       && c->dependent
2029       && !lookup_cand (c->dependent)->dependent
2030       && valid_mem_ref_cand_p (c)
2031       && valid_mem_ref_cand_p (lookup_cand (c->dependent)))
2032     return;
2033 
2034   if (dump_file && (dump_flags & TDF_DETAILS))
2035     {
2036       fputs ("Replacing reference: ", dump_file);
2037       print_gimple_stmt (dump_file, c->cand_stmt, 0);
2038     }
2039 
2040   if (gimple_vdef (c->cand_stmt))
2041     {
2042       tree *lhs = gimple_assign_lhs_ptr (c->cand_stmt);
2043       replace_ref (lhs, c);
2044     }
2045   else
2046     {
2047       tree *rhs = gimple_assign_rhs1_ptr (c->cand_stmt);
2048       replace_ref (rhs, c);
2049     }
2050 
2051   if (dump_file && (dump_flags & TDF_DETAILS))
2052     {
2053       fputs ("With: ", dump_file);
2054       print_gimple_stmt (dump_file, c->cand_stmt, 0);
2055       fputs ("\n", dump_file);
2056     }
2057 
2058   if (c->sibling)
2059     replace_refs (lookup_cand (c->sibling));
2060 
2061   if (c->dependent)
2062     replace_refs (lookup_cand (c->dependent));
2063 }
2064 
2065 /* Return TRUE if candidate C is dependent upon a PHI.  */
2066 
2067 static bool
phi_dependent_cand_p(slsr_cand_t c)2068 phi_dependent_cand_p (slsr_cand_t c)
2069 {
2070   /* A candidate is not necessarily dependent upon a PHI just because
2071      it has a phi definition for its base name.  It may have a basis
2072      that relies upon the same phi definition, in which case the PHI
2073      is irrelevant to this candidate.  */
2074   return (c->def_phi
2075 	  && c->basis
2076 	  && lookup_cand (c->basis)->def_phi != c->def_phi);
2077 }
2078 
2079 /* Calculate the increment required for candidate C relative to
2080    its basis.  */
2081 
2082 static widest_int
cand_increment(slsr_cand_t c)2083 cand_increment (slsr_cand_t c)
2084 {
2085   slsr_cand_t basis;
2086 
2087   /* If the candidate doesn't have a basis, just return its own
2088      index.  This is useful in record_increments to help us find
2089      an existing initializer.  Also, if the candidate's basis is
2090      hidden by a phi, then its own index will be the increment
2091      from the newly introduced phi basis.  */
2092   if (!c->basis || phi_dependent_cand_p (c))
2093     return c->index;
2094 
2095   basis = lookup_cand (c->basis);
2096   gcc_assert (operand_equal_p (c->base_expr, basis->base_expr, 0));
2097   return c->index - basis->index;
2098 }
2099 
2100 /* Calculate the increment required for candidate C relative to
2101    its basis.  If we aren't going to generate pointer arithmetic
2102    for this candidate, return the absolute value of that increment
2103    instead.  */
2104 
2105 static inline widest_int
cand_abs_increment(slsr_cand_t c)2106 cand_abs_increment (slsr_cand_t c)
2107 {
2108   widest_int increment = cand_increment (c);
2109 
2110   if (!address_arithmetic_p && wi::neg_p (increment))
2111     increment = -increment;
2112 
2113   return increment;
2114 }
2115 
2116 /* Return TRUE iff candidate C has already been replaced under
2117    another interpretation.  */
2118 
2119 static inline bool
cand_already_replaced(slsr_cand_t c)2120 cand_already_replaced (slsr_cand_t c)
2121 {
2122   return (gimple_bb (c->cand_stmt) == 0);
2123 }
2124 
2125 /* Common logic used by replace_unconditional_candidate and
2126    replace_conditional_candidate.  */
2127 
2128 static void
replace_mult_candidate(slsr_cand_t c,tree basis_name,widest_int bump)2129 replace_mult_candidate (slsr_cand_t c, tree basis_name, widest_int bump)
2130 {
2131   tree target_type = TREE_TYPE (gimple_assign_lhs (c->cand_stmt));
2132   enum tree_code cand_code = gimple_assign_rhs_code (c->cand_stmt);
2133 
2134   /* It is not useful to replace casts, copies, negates, or adds of
2135      an SSA name and a constant.  */
2136   if (cand_code == SSA_NAME
2137       || CONVERT_EXPR_CODE_P (cand_code)
2138       || cand_code == PLUS_EXPR
2139       || cand_code == POINTER_PLUS_EXPR
2140       || cand_code == MINUS_EXPR
2141       || cand_code == NEGATE_EXPR)
2142     return;
2143 
2144   enum tree_code code = PLUS_EXPR;
2145   tree bump_tree;
2146   gimple *stmt_to_print = NULL;
2147 
2148   if (wi::neg_p (bump))
2149     {
2150       code = MINUS_EXPR;
2151       bump = -bump;
2152     }
2153 
2154   /* It is possible that the resulting bump doesn't fit in target_type.
2155      Abandon the replacement in this case.  This does not affect
2156      siblings or dependents of C.  */
2157   if (bump != wi::ext (bump, TYPE_PRECISION (target_type),
2158 		       TYPE_SIGN (target_type)))
2159     return;
2160 
2161   bump_tree = wide_int_to_tree (target_type, bump);
2162 
2163   /* If the basis name and the candidate's LHS have incompatible types,
2164      introduce a cast.  */
2165   if (!useless_type_conversion_p (target_type, TREE_TYPE (basis_name)))
2166     basis_name = introduce_cast_before_cand (c, target_type, basis_name);
2167 
2168   if (dump_file && (dump_flags & TDF_DETAILS))
2169     {
2170       fputs ("Replacing: ", dump_file);
2171       print_gimple_stmt (dump_file, c->cand_stmt, 0);
2172     }
2173 
2174   if (bump == 0)
2175     {
2176       tree lhs = gimple_assign_lhs (c->cand_stmt);
2177       gassign *copy_stmt = gimple_build_assign (lhs, basis_name);
2178       gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2179       slsr_cand_t cc = lookup_cand (c->first_interp);
2180       gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
2181       gsi_replace (&gsi, copy_stmt, false);
2182       while (cc)
2183 	{
2184 	  cc->cand_stmt = copy_stmt;
2185 	  cc = lookup_cand (cc->next_interp);
2186 	}
2187       if (dump_file && (dump_flags & TDF_DETAILS))
2188 	stmt_to_print = copy_stmt;
2189     }
2190   else
2191     {
2192       tree rhs1, rhs2;
2193       if (cand_code != NEGATE_EXPR) {
2194 	rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2195 	rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2196       }
2197       if (cand_code != NEGATE_EXPR
2198 	  && ((operand_equal_p (rhs1, basis_name, 0)
2199 	       && operand_equal_p (rhs2, bump_tree, 0))
2200 	      || (operand_equal_p (rhs1, bump_tree, 0)
2201 		  && operand_equal_p (rhs2, basis_name, 0))))
2202 	{
2203 	  if (dump_file && (dump_flags & TDF_DETAILS))
2204 	    {
2205 	      fputs ("(duplicate, not actually replacing)", dump_file);
2206 	      stmt_to_print = c->cand_stmt;
2207 	    }
2208 	}
2209       else
2210 	{
2211 	  gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2212 	  slsr_cand_t cc = lookup_cand (c->first_interp);
2213 	  gimple_assign_set_rhs_with_ops (&gsi, code, basis_name, bump_tree);
2214 	  update_stmt (gsi_stmt (gsi));
2215 	  while (cc)
2216 	    {
2217 	      cc->cand_stmt = gsi_stmt (gsi);
2218 	      cc = lookup_cand (cc->next_interp);
2219 	    }
2220 	  if (dump_file && (dump_flags & TDF_DETAILS))
2221 	    stmt_to_print = gsi_stmt (gsi);
2222 	}
2223     }
2224 
2225   if (dump_file && (dump_flags & TDF_DETAILS))
2226     {
2227       fputs ("With: ", dump_file);
2228       print_gimple_stmt (dump_file, stmt_to_print, 0);
2229       fputs ("\n", dump_file);
2230     }
2231 }
2232 
2233 /* Replace candidate C with an add or subtract.   Note that we only
2234    operate on CAND_MULTs with known strides, so we will never generate
2235    a POINTER_PLUS_EXPR.  Each candidate X = (B + i) * S is replaced by
2236    X = Y + ((i - i') * S), as described in the module commentary.  The
2237    folded value ((i - i') * S) is referred to here as the "bump."  */
2238 
2239 static void
replace_unconditional_candidate(slsr_cand_t c)2240 replace_unconditional_candidate (slsr_cand_t c)
2241 {
2242   slsr_cand_t basis;
2243 
2244   if (cand_already_replaced (c))
2245     return;
2246 
2247   basis = lookup_cand (c->basis);
2248   widest_int bump = cand_increment (c) * wi::to_widest (c->stride);
2249 
2250   replace_mult_candidate (c, gimple_assign_lhs (basis->cand_stmt), bump);
2251 }
2252 
2253 /* Return the index in the increment vector of the given INCREMENT,
2254    or -1 if not found.  The latter can occur if more than
2255    MAX_INCR_VEC_LEN increments have been found.  */
2256 
2257 static inline int
incr_vec_index(const widest_int & increment)2258 incr_vec_index (const widest_int &increment)
2259 {
2260   unsigned i;
2261 
2262   for (i = 0; i < incr_vec_len && increment != incr_vec[i].incr; i++)
2263     ;
2264 
2265   if (i < incr_vec_len)
2266     return i;
2267   else
2268     return -1;
2269 }
2270 
2271 /* Create a new statement along edge E to add BASIS_NAME to the product
2272    of INCREMENT and the stride of candidate C.  Create and return a new
2273    SSA name from *VAR to be used as the LHS of the new statement.
2274    KNOWN_STRIDE is true iff C's stride is a constant.  */
2275 
2276 static tree
create_add_on_incoming_edge(slsr_cand_t c,tree basis_name,widest_int increment,edge e,location_t loc,bool known_stride)2277 create_add_on_incoming_edge (slsr_cand_t c, tree basis_name,
2278 			     widest_int increment, edge e, location_t loc,
2279 			     bool known_stride)
2280 {
2281   tree lhs, basis_type;
2282   gassign *new_stmt, *cast_stmt = NULL;
2283 
2284   /* If the add candidate along this incoming edge has the same
2285      index as C's hidden basis, the hidden basis represents this
2286      edge correctly.  */
2287   if (increment == 0)
2288     return basis_name;
2289 
2290   basis_type = TREE_TYPE (basis_name);
2291   lhs = make_temp_ssa_name (basis_type, NULL, "slsr");
2292 
2293   /* Occasionally people convert integers to pointers without a
2294      cast, leading us into trouble if we aren't careful.  */
2295   enum tree_code plus_code
2296     = POINTER_TYPE_P (basis_type) ? POINTER_PLUS_EXPR : PLUS_EXPR;
2297 
2298   if (known_stride)
2299     {
2300       tree bump_tree;
2301       enum tree_code code = plus_code;
2302       widest_int bump = increment * wi::to_widest (c->stride);
2303       if (wi::neg_p (bump) && !POINTER_TYPE_P (basis_type))
2304 	{
2305 	  code = MINUS_EXPR;
2306 	  bump = -bump;
2307 	}
2308 
2309       tree stride_type = POINTER_TYPE_P (basis_type) ? sizetype : basis_type;
2310       bump_tree = wide_int_to_tree (stride_type, bump);
2311       new_stmt = gimple_build_assign (lhs, code, basis_name, bump_tree);
2312     }
2313   else
2314     {
2315       int i;
2316       bool negate_incr = !POINTER_TYPE_P (basis_type) && wi::neg_p (increment);
2317       i = incr_vec_index (negate_incr ? -increment : increment);
2318       gcc_assert (i >= 0);
2319 
2320       if (incr_vec[i].initializer)
2321 	{
2322 	  enum tree_code code = negate_incr ? MINUS_EXPR : plus_code;
2323 	  new_stmt = gimple_build_assign (lhs, code, basis_name,
2324 					  incr_vec[i].initializer);
2325 	}
2326       else {
2327 	tree stride;
2328 
2329 	if (!types_compatible_p (TREE_TYPE (c->stride), c->stride_type))
2330 	  {
2331 	    tree cast_stride = make_temp_ssa_name (c->stride_type, NULL,
2332 						   "slsr");
2333 	    cast_stmt = gimple_build_assign (cast_stride, NOP_EXPR,
2334 					     c->stride);
2335 	    stride = cast_stride;
2336 	  }
2337 	else
2338 	  stride = c->stride;
2339 
2340 	if (increment == 1)
2341 	  new_stmt = gimple_build_assign (lhs, plus_code, basis_name, stride);
2342 	else if (increment == -1)
2343 	  new_stmt = gimple_build_assign (lhs, MINUS_EXPR, basis_name, stride);
2344 	else
2345 	  gcc_unreachable ();
2346       }
2347     }
2348 
2349   if (cast_stmt)
2350     {
2351       gimple_set_location (cast_stmt, loc);
2352       gsi_insert_on_edge (e, cast_stmt);
2353     }
2354 
2355   gimple_set_location (new_stmt, loc);
2356   gsi_insert_on_edge (e, new_stmt);
2357 
2358   if (dump_file && (dump_flags & TDF_DETAILS))
2359     {
2360       if (cast_stmt)
2361 	{
2362 	  fprintf (dump_file, "Inserting cast on edge %d->%d: ",
2363 		   e->src->index, e->dest->index);
2364 	  print_gimple_stmt (dump_file, cast_stmt, 0);
2365 	}
2366       fprintf (dump_file, "Inserting on edge %d->%d: ", e->src->index,
2367 	       e->dest->index);
2368       print_gimple_stmt (dump_file, new_stmt, 0);
2369     }
2370 
2371   return lhs;
2372 }
2373 
2374 /* Clear the visited field for a tree of PHI candidates.  */
2375 
2376 static void
clear_visited(gphi * phi)2377 clear_visited (gphi *phi)
2378 {
2379   unsigned i;
2380   slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
2381 
2382   if (phi_cand->visited)
2383     {
2384       phi_cand->visited = 0;
2385 
2386       for (i = 0; i < gimple_phi_num_args (phi); i++)
2387 	{
2388 	  tree arg = gimple_phi_arg_def (phi, i);
2389 	  gimple *arg_def = SSA_NAME_DEF_STMT (arg);
2390 	  if (gimple_code (arg_def) == GIMPLE_PHI)
2391 	    clear_visited (as_a <gphi *> (arg_def));
2392 	}
2393     }
2394 }
2395 
2396 /* Recursive helper function for create_phi_basis.  */
2397 
2398 static tree
create_phi_basis_1(slsr_cand_t c,gimple * from_phi,tree basis_name,location_t loc,bool known_stride)2399 create_phi_basis_1 (slsr_cand_t c, gimple *from_phi, tree basis_name,
2400 		    location_t loc, bool known_stride)
2401 {
2402   int i;
2403   tree name, phi_arg;
2404   gphi *phi;
2405   slsr_cand_t basis = lookup_cand (c->basis);
2406   int nargs = gimple_phi_num_args (from_phi);
2407   basic_block phi_bb = gimple_bb (from_phi);
2408   slsr_cand_t phi_cand = *stmt_cand_map->get (from_phi);
2409   auto_vec<tree> phi_args (nargs);
2410 
2411   if (phi_cand->visited)
2412     return phi_cand->cached_basis;
2413   phi_cand->visited = 1;
2414 
2415   /* Process each argument of the existing phi that represents
2416      conditionally-executed add candidates.  */
2417   for (i = 0; i < nargs; i++)
2418     {
2419       edge e = (*phi_bb->preds)[i];
2420       tree arg = gimple_phi_arg_def (from_phi, i);
2421       tree feeding_def;
2422 
2423       /* If the phi argument is the base name of the CAND_PHI, then
2424 	 this incoming arc should use the hidden basis.  */
2425       if (operand_equal_p (arg, phi_cand->base_expr, 0))
2426 	if (basis->index == 0)
2427 	  feeding_def = gimple_assign_lhs (basis->cand_stmt);
2428 	else
2429 	  {
2430 	    widest_int incr = -basis->index;
2431 	    feeding_def = create_add_on_incoming_edge (c, basis_name, incr,
2432 						       e, loc, known_stride);
2433 	  }
2434       else
2435 	{
2436 	  gimple *arg_def = SSA_NAME_DEF_STMT (arg);
2437 
2438 	  /* If there is another phi along this incoming edge, we must
2439 	     process it in the same fashion to ensure that all basis
2440 	     adjustments are made along its incoming edges.  */
2441 	  if (gimple_code (arg_def) == GIMPLE_PHI)
2442 	    feeding_def = create_phi_basis_1 (c, arg_def, basis_name,
2443 					      loc, known_stride);
2444 	  else
2445 	    {
2446 	      slsr_cand_t arg_cand = base_cand_from_table (arg);
2447 	      widest_int diff = arg_cand->index - basis->index;
2448 	      feeding_def = create_add_on_incoming_edge (c, basis_name, diff,
2449 							 e, loc, known_stride);
2450 	    }
2451 	}
2452 
2453       /* Because of recursion, we need to save the arguments in a vector
2454 	 so we can create the PHI statement all at once.  Otherwise the
2455 	 storage for the half-created PHI can be reclaimed.  */
2456       phi_args.safe_push (feeding_def);
2457     }
2458 
2459   /* Create the new phi basis.  */
2460   name = make_temp_ssa_name (TREE_TYPE (basis_name), NULL, "slsr");
2461   phi = create_phi_node (name, phi_bb);
2462   SSA_NAME_DEF_STMT (name) = phi;
2463 
2464   FOR_EACH_VEC_ELT (phi_args, i, phi_arg)
2465     {
2466       edge e = (*phi_bb->preds)[i];
2467       add_phi_arg (phi, phi_arg, e, loc);
2468     }
2469 
2470   update_stmt (phi);
2471 
2472   if (dump_file && (dump_flags & TDF_DETAILS))
2473     {
2474       fputs ("Introducing new phi basis: ", dump_file);
2475       print_gimple_stmt (dump_file, phi, 0);
2476     }
2477 
2478   phi_cand->cached_basis = name;
2479   return name;
2480 }
2481 
2482 /* Given a candidate C with BASIS_NAME being the LHS of C's basis which
2483    is hidden by the phi node FROM_PHI, create a new phi node in the same
2484    block as FROM_PHI.  The new phi is suitable for use as a basis by C,
2485    with its phi arguments representing conditional adjustments to the
2486    hidden basis along conditional incoming paths.  Those adjustments are
2487    made by creating add statements (and sometimes recursively creating
2488    phis) along those incoming paths.  LOC is the location to attach to
2489    the introduced statements.  KNOWN_STRIDE is true iff C's stride is a
2490    constant.  */
2491 
2492 static tree
create_phi_basis(slsr_cand_t c,gimple * from_phi,tree basis_name,location_t loc,bool known_stride)2493 create_phi_basis (slsr_cand_t c, gimple *from_phi, tree basis_name,
2494 		  location_t loc, bool known_stride)
2495 {
2496   tree retval = create_phi_basis_1 (c, from_phi, basis_name, loc,
2497 				    known_stride);
2498   gcc_assert (retval);
2499   clear_visited (as_a <gphi *> (from_phi));
2500   return retval;
2501 }
2502 
2503 /* Given a candidate C whose basis is hidden by at least one intervening
2504    phi, introduce a matching number of new phis to represent its basis
2505    adjusted by conditional increments along possible incoming paths.  Then
2506    replace C as though it were an unconditional candidate, using the new
2507    basis.  */
2508 
2509 static void
replace_conditional_candidate(slsr_cand_t c)2510 replace_conditional_candidate (slsr_cand_t c)
2511 {
2512   tree basis_name, name;
2513   slsr_cand_t basis;
2514   location_t loc;
2515 
2516   /* Look up the LHS SSA name from C's basis.  This will be the
2517      RHS1 of the adds we will introduce to create new phi arguments.  */
2518   basis = lookup_cand (c->basis);
2519   basis_name = gimple_assign_lhs (basis->cand_stmt);
2520 
2521   /* Create a new phi statement which will represent C's true basis
2522      after the transformation is complete.  */
2523   loc = gimple_location (c->cand_stmt);
2524   name = create_phi_basis (c, lookup_cand (c->def_phi)->cand_stmt,
2525 			   basis_name, loc, KNOWN_STRIDE);
2526 
2527   /* Replace C with an add of the new basis phi and a constant.  */
2528   widest_int bump = c->index * wi::to_widest (c->stride);
2529 
2530   replace_mult_candidate (c, name, bump);
2531 }
2532 
2533 /* Recursive helper function for phi_add_costs.  SPREAD is a measure of
2534    how many PHI nodes we have visited at this point in the tree walk.  */
2535 
2536 static int
phi_add_costs_1(gimple * phi,slsr_cand_t c,int one_add_cost,int * spread)2537 phi_add_costs_1 (gimple *phi, slsr_cand_t c, int one_add_cost, int *spread)
2538 {
2539   unsigned i;
2540   int cost = 0;
2541   slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
2542 
2543   if (phi_cand->visited)
2544     return 0;
2545 
2546   phi_cand->visited = 1;
2547   (*spread)++;
2548 
2549   /* If we work our way back to a phi that isn't dominated by the hidden
2550      basis, this isn't a candidate for replacement.  Indicate this by
2551      returning an unreasonably high cost.  It's not easy to detect
2552      these situations when determining the basis, so we defer the
2553      decision until now.  */
2554   basic_block phi_bb = gimple_bb (phi);
2555   slsr_cand_t basis = lookup_cand (c->basis);
2556   basic_block basis_bb = gimple_bb (basis->cand_stmt);
2557 
2558   if (phi_bb == basis_bb || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
2559     return COST_INFINITE;
2560 
2561   for (i = 0; i < gimple_phi_num_args (phi); i++)
2562     {
2563       tree arg = gimple_phi_arg_def (phi, i);
2564 
2565       if (arg != phi_cand->base_expr)
2566 	{
2567 	  gimple *arg_def = SSA_NAME_DEF_STMT (arg);
2568 
2569 	  if (gimple_code (arg_def) == GIMPLE_PHI)
2570 	    {
2571 	      cost += phi_add_costs_1 (arg_def, c, one_add_cost, spread);
2572 
2573 	      if (cost >= COST_INFINITE || *spread > MAX_SPREAD)
2574 		return COST_INFINITE;
2575 	    }
2576 	  else
2577 	    {
2578 	      slsr_cand_t arg_cand = base_cand_from_table (arg);
2579 
2580 	      if (arg_cand->index != c->index)
2581 		cost += one_add_cost;
2582 	    }
2583 	}
2584     }
2585 
2586   return cost;
2587 }
2588 
2589 /* Compute the expected costs of inserting basis adjustments for
2590    candidate C with phi-definition PHI.  The cost of inserting
2591    one adjustment is given by ONE_ADD_COST.  If PHI has arguments
2592    which are themselves phi results, recursively calculate costs
2593    for those phis as well.  */
2594 
2595 static int
phi_add_costs(gimple * phi,slsr_cand_t c,int one_add_cost)2596 phi_add_costs (gimple *phi, slsr_cand_t c, int one_add_cost)
2597 {
2598   int spread = 0;
2599   int retval = phi_add_costs_1 (phi, c, one_add_cost, &spread);
2600   clear_visited (as_a <gphi *> (phi));
2601   return retval;
2602 }
2603 /* For candidate C, each sibling of candidate C, and each dependent of
2604    candidate C, determine whether the candidate is dependent upon a
2605    phi that hides its basis.  If not, replace the candidate unconditionally.
2606    Otherwise, determine whether the cost of introducing compensation code
2607    for the candidate is offset by the gains from strength reduction.  If
2608    so, replace the candidate and introduce the compensation code.  */
2609 
2610 static void
replace_uncond_cands_and_profitable_phis(slsr_cand_t c)2611 replace_uncond_cands_and_profitable_phis (slsr_cand_t c)
2612 {
2613   if (phi_dependent_cand_p (c))
2614     {
2615       /* A multiply candidate with a stride of 1 is just an artifice
2616 	 of a copy or cast; there is no value in replacing it.  */
2617       if (c->kind == CAND_MULT && wi::to_widest (c->stride) != 1)
2618 	{
2619 	  /* A candidate dependent upon a phi will replace a multiply by
2620 	     a constant with an add, and will insert at most one add for
2621 	     each phi argument.  Add these costs with the potential dead-code
2622 	     savings to determine profitability.  */
2623 	  bool speed = optimize_bb_for_speed_p (gimple_bb (c->cand_stmt));
2624 	  int mult_savings = stmt_cost (c->cand_stmt, speed);
2625 	  gimple *phi = lookup_cand (c->def_phi)->cand_stmt;
2626 	  tree phi_result = gimple_phi_result (phi);
2627 	  int one_add_cost = add_cost (speed,
2628 				       TYPE_MODE (TREE_TYPE (phi_result)));
2629 	  int add_costs = one_add_cost + phi_add_costs (phi, c, one_add_cost);
2630 	  int cost = add_costs - mult_savings - c->dead_savings;
2631 
2632 	  if (dump_file && (dump_flags & TDF_DETAILS))
2633 	    {
2634 	      fprintf (dump_file, "  Conditional candidate %d:\n", c->cand_num);
2635 	      fprintf (dump_file, "    add_costs = %d\n", add_costs);
2636 	      fprintf (dump_file, "    mult_savings = %d\n", mult_savings);
2637 	      fprintf (dump_file, "    dead_savings = %d\n", c->dead_savings);
2638 	      fprintf (dump_file, "    cost = %d\n", cost);
2639 	      if (cost <= COST_NEUTRAL)
2640 		fputs ("  Replacing...\n", dump_file);
2641 	      else
2642 		fputs ("  Not replaced.\n", dump_file);
2643 	    }
2644 
2645 	  if (cost <= COST_NEUTRAL)
2646 	    replace_conditional_candidate (c);
2647 	}
2648     }
2649   else
2650     replace_unconditional_candidate (c);
2651 
2652   if (c->sibling)
2653     replace_uncond_cands_and_profitable_phis (lookup_cand (c->sibling));
2654 
2655   if (c->dependent)
2656     replace_uncond_cands_and_profitable_phis (lookup_cand (c->dependent));
2657 }
2658 
2659 /* Count the number of candidates in the tree rooted at C that have
2660    not already been replaced under other interpretations.  */
2661 
2662 static int
count_candidates(slsr_cand_t c)2663 count_candidates (slsr_cand_t c)
2664 {
2665   unsigned count = cand_already_replaced (c) ? 0 : 1;
2666 
2667   if (c->sibling)
2668     count += count_candidates (lookup_cand (c->sibling));
2669 
2670   if (c->dependent)
2671     count += count_candidates (lookup_cand (c->dependent));
2672 
2673   return count;
2674 }
2675 
2676 /* Increase the count of INCREMENT by one in the increment vector.
2677    INCREMENT is associated with candidate C.  If INCREMENT is to be
2678    conditionally executed as part of a conditional candidate replacement,
2679    IS_PHI_ADJUST is true, otherwise false.  If an initializer
2680    T_0 = stride * I is provided by a candidate that dominates all
2681    candidates with the same increment, also record T_0 for subsequent use.  */
2682 
2683 static void
record_increment(slsr_cand_t c,widest_int increment,bool is_phi_adjust)2684 record_increment (slsr_cand_t c, widest_int increment, bool is_phi_adjust)
2685 {
2686   bool found = false;
2687   unsigned i;
2688 
2689   /* Treat increments that differ only in sign as identical so as to
2690      share initializers, unless we are generating pointer arithmetic.  */
2691   if (!address_arithmetic_p && wi::neg_p (increment))
2692     increment = -increment;
2693 
2694   for (i = 0; i < incr_vec_len; i++)
2695     {
2696       if (incr_vec[i].incr == increment)
2697 	{
2698 	  incr_vec[i].count++;
2699 	  found = true;
2700 
2701 	  /* If we previously recorded an initializer that doesn't
2702 	     dominate this candidate, it's not going to be useful to
2703 	     us after all.  */
2704 	  if (incr_vec[i].initializer
2705 	      && !dominated_by_p (CDI_DOMINATORS,
2706 				  gimple_bb (c->cand_stmt),
2707 				  incr_vec[i].init_bb))
2708 	    {
2709 	      incr_vec[i].initializer = NULL_TREE;
2710 	      incr_vec[i].init_bb = NULL;
2711 	    }
2712 
2713 	  break;
2714 	}
2715     }
2716 
2717   if (!found && incr_vec_len < MAX_INCR_VEC_LEN - 1)
2718     {
2719       /* The first time we see an increment, create the entry for it.
2720 	 If this is the root candidate which doesn't have a basis, set
2721 	 the count to zero.  We're only processing it so it can possibly
2722 	 provide an initializer for other candidates.  */
2723       incr_vec[incr_vec_len].incr = increment;
2724       incr_vec[incr_vec_len].count = c->basis || is_phi_adjust ? 1 : 0;
2725       incr_vec[incr_vec_len].cost = COST_INFINITE;
2726 
2727       /* Optimistically record the first occurrence of this increment
2728 	 as providing an initializer (if it does); we will revise this
2729 	 opinion later if it doesn't dominate all other occurrences.
2730          Exception:  increments of 0, 1 never need initializers;
2731 	 and phi adjustments don't ever provide initializers.  */
2732       if (c->kind == CAND_ADD
2733 	  && !is_phi_adjust
2734 	  && c->index == increment
2735 	  && (increment > 1 || increment < 0)
2736 	  && (gimple_assign_rhs_code (c->cand_stmt) == PLUS_EXPR
2737 	      || gimple_assign_rhs_code (c->cand_stmt) == POINTER_PLUS_EXPR))
2738 	{
2739 	  tree t0 = NULL_TREE;
2740 	  tree rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2741 	  tree rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2742 	  if (operand_equal_p (rhs1, c->base_expr, 0))
2743 	    t0 = rhs2;
2744 	  else if (operand_equal_p (rhs2, c->base_expr, 0))
2745 	    t0 = rhs1;
2746 	  if (t0
2747 	      && SSA_NAME_DEF_STMT (t0)
2748 	      && gimple_bb (SSA_NAME_DEF_STMT (t0)))
2749 	    {
2750 	      incr_vec[incr_vec_len].initializer = t0;
2751 	      incr_vec[incr_vec_len++].init_bb
2752 		= gimple_bb (SSA_NAME_DEF_STMT (t0));
2753 	    }
2754 	  else
2755 	    {
2756 	      incr_vec[incr_vec_len].initializer = NULL_TREE;
2757 	      incr_vec[incr_vec_len++].init_bb = NULL;
2758 	    }
2759 	}
2760       else
2761 	{
2762 	  incr_vec[incr_vec_len].initializer = NULL_TREE;
2763 	  incr_vec[incr_vec_len++].init_bb = NULL;
2764 	}
2765     }
2766 }
2767 
2768 /* Recursive helper function for record_phi_increments.  */
2769 
2770 static void
record_phi_increments_1(slsr_cand_t basis,gimple * phi)2771 record_phi_increments_1 (slsr_cand_t basis, gimple *phi)
2772 {
2773   unsigned i;
2774   slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
2775 
2776   if (phi_cand->visited)
2777     return;
2778   phi_cand->visited = 1;
2779 
2780   for (i = 0; i < gimple_phi_num_args (phi); i++)
2781     {
2782       tree arg = gimple_phi_arg_def (phi, i);
2783       gimple *arg_def = SSA_NAME_DEF_STMT (arg);
2784 
2785       if (gimple_code (arg_def) == GIMPLE_PHI)
2786 	record_phi_increments_1 (basis, arg_def);
2787       else
2788 	{
2789 	  widest_int diff;
2790 
2791 	  if (operand_equal_p (arg, phi_cand->base_expr, 0))
2792 	    {
2793 	      diff = -basis->index;
2794 	      record_increment (phi_cand, diff, PHI_ADJUST);
2795 	    }
2796 	  else
2797 	    {
2798 	      slsr_cand_t arg_cand = base_cand_from_table (arg);
2799 	      diff = arg_cand->index - basis->index;
2800 	      record_increment (arg_cand, diff, PHI_ADJUST);
2801 	    }
2802 	}
2803     }
2804 }
2805 
2806 /* Given phi statement PHI that hides a candidate from its BASIS, find
2807    the increments along each incoming arc (recursively handling additional
2808    phis that may be present) and record them.  These increments are the
2809    difference in index between the index-adjusting statements and the
2810    index of the basis.  */
2811 
2812 static void
record_phi_increments(slsr_cand_t basis,gimple * phi)2813 record_phi_increments (slsr_cand_t basis, gimple *phi)
2814 {
2815   record_phi_increments_1 (basis, phi);
2816   clear_visited (as_a <gphi *> (phi));
2817 }
2818 
2819 /* Determine how many times each unique increment occurs in the set
2820    of candidates rooted at C's parent, recording the data in the
2821    increment vector.  For each unique increment I, if an initializer
2822    T_0 = stride * I is provided by a candidate that dominates all
2823    candidates with the same increment, also record T_0 for subsequent
2824    use.  */
2825 
2826 static void
record_increments(slsr_cand_t c)2827 record_increments (slsr_cand_t c)
2828 {
2829   if (!cand_already_replaced (c))
2830     {
2831       if (!phi_dependent_cand_p (c))
2832 	record_increment (c, cand_increment (c), NOT_PHI_ADJUST);
2833       else
2834 	{
2835 	  /* A candidate with a basis hidden by a phi will have one
2836 	     increment for its relationship to the index represented by
2837 	     the phi, and potentially additional increments along each
2838 	     incoming edge.  For the root of the dependency tree (which
2839 	     has no basis), process just the initial index in case it has
2840 	     an initializer that can be used by subsequent candidates.  */
2841 	  record_increment (c, c->index, NOT_PHI_ADJUST);
2842 
2843 	  if (c->basis)
2844 	    record_phi_increments (lookup_cand (c->basis),
2845 				   lookup_cand (c->def_phi)->cand_stmt);
2846 	}
2847     }
2848 
2849   if (c->sibling)
2850     record_increments (lookup_cand (c->sibling));
2851 
2852   if (c->dependent)
2853     record_increments (lookup_cand (c->dependent));
2854 }
2855 
2856 /* Recursive helper function for phi_incr_cost.  */
2857 
2858 static int
phi_incr_cost_1(slsr_cand_t c,const widest_int & incr,gimple * phi,int * savings)2859 phi_incr_cost_1 (slsr_cand_t c, const widest_int &incr, gimple *phi,
2860 		 int *savings)
2861 {
2862   unsigned i;
2863   int cost = 0;
2864   slsr_cand_t basis = lookup_cand (c->basis);
2865   slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
2866 
2867   if (phi_cand->visited)
2868     return 0;
2869   phi_cand->visited = 1;
2870 
2871   for (i = 0; i < gimple_phi_num_args (phi); i++)
2872     {
2873       tree arg = gimple_phi_arg_def (phi, i);
2874       gimple *arg_def = SSA_NAME_DEF_STMT (arg);
2875 
2876       if (gimple_code (arg_def) == GIMPLE_PHI)
2877 	{
2878 	  int feeding_savings = 0;
2879 	  tree feeding_var = gimple_phi_result (arg_def);
2880 	  cost += phi_incr_cost_1 (c, incr, arg_def, &feeding_savings);
2881 	  if (uses_consumed_by_stmt (feeding_var, phi))
2882 	    *savings += feeding_savings;
2883 	}
2884       else
2885 	{
2886 	  widest_int diff;
2887 	  slsr_cand_t arg_cand;
2888 
2889 	  /* When the PHI argument is just a pass-through to the base
2890 	     expression of the hidden basis, the difference is zero minus
2891 	     the index of the basis.  There is no potential savings by
2892 	     eliminating a statement in this case.  */
2893 	  if (operand_equal_p (arg, phi_cand->base_expr, 0))
2894 	    {
2895 	      arg_cand = (slsr_cand_t)NULL;
2896 	      diff = -basis->index;
2897 	    }
2898 	  else
2899 	    {
2900 	      arg_cand = base_cand_from_table (arg);
2901 	      diff = arg_cand->index - basis->index;
2902 	    }
2903 
2904 	  if (incr == diff)
2905 	    {
2906 	      tree basis_lhs = gimple_assign_lhs (basis->cand_stmt);
2907 	      cost += add_cost (true, TYPE_MODE (TREE_TYPE (basis_lhs)));
2908 	      if (arg_cand)
2909 		{
2910 		  tree lhs = gimple_assign_lhs (arg_cand->cand_stmt);
2911 		  if (uses_consumed_by_stmt (lhs, phi))
2912 		    *savings += stmt_cost (arg_cand->cand_stmt, true);
2913 		}
2914 	    }
2915 	}
2916     }
2917 
2918   return cost;
2919 }
2920 
2921 /* Add up and return the costs of introducing add statements that
2922    require the increment INCR on behalf of candidate C and phi
2923    statement PHI.  Accumulate into *SAVINGS the potential savings
2924    from removing existing statements that feed PHI and have no other
2925    uses.  */
2926 
2927 static int
phi_incr_cost(slsr_cand_t c,const widest_int & incr,gimple * phi,int * savings)2928 phi_incr_cost (slsr_cand_t c, const widest_int &incr, gimple *phi,
2929 	       int *savings)
2930 {
2931   int retval = phi_incr_cost_1 (c, incr, phi, savings);
2932   clear_visited (as_a <gphi *> (phi));
2933   return retval;
2934 }
2935 
2936 /* Return the first candidate in the tree rooted at C that has not
2937    already been replaced, favoring siblings over dependents.  */
2938 
2939 static slsr_cand_t
unreplaced_cand_in_tree(slsr_cand_t c)2940 unreplaced_cand_in_tree (slsr_cand_t c)
2941 {
2942   if (!cand_already_replaced (c))
2943     return c;
2944 
2945   if (c->sibling)
2946     {
2947       slsr_cand_t sib = unreplaced_cand_in_tree (lookup_cand (c->sibling));
2948       if (sib)
2949 	return sib;
2950     }
2951 
2952   if (c->dependent)
2953     {
2954       slsr_cand_t dep = unreplaced_cand_in_tree (lookup_cand (c->dependent));
2955       if (dep)
2956 	return dep;
2957     }
2958 
2959   return NULL;
2960 }
2961 
2962 /* Return TRUE if the candidates in the tree rooted at C should be
2963    optimized for speed, else FALSE.  We estimate this based on the block
2964    containing the most dominant candidate in the tree that has not yet
2965    been replaced.  */
2966 
2967 static bool
optimize_cands_for_speed_p(slsr_cand_t c)2968 optimize_cands_for_speed_p (slsr_cand_t c)
2969 {
2970   slsr_cand_t c2 = unreplaced_cand_in_tree (c);
2971   gcc_assert (c2);
2972   return optimize_bb_for_speed_p (gimple_bb (c2->cand_stmt));
2973 }
2974 
2975 /* Add COST_IN to the lowest cost of any dependent path starting at
2976    candidate C or any of its siblings, counting only candidates along
2977    such paths with increment INCR.  Assume that replacing a candidate
2978    reduces cost by REPL_SAVINGS.  Also account for savings from any
2979    statements that would go dead.  If COUNT_PHIS is true, include
2980    costs of introducing feeding statements for conditional candidates.  */
2981 
2982 static int
lowest_cost_path(int cost_in,int repl_savings,slsr_cand_t c,const widest_int & incr,bool count_phis)2983 lowest_cost_path (int cost_in, int repl_savings, slsr_cand_t c,
2984 		  const widest_int &incr, bool count_phis)
2985 {
2986   int local_cost, sib_cost, savings = 0;
2987   widest_int cand_incr = cand_abs_increment (c);
2988 
2989   if (cand_already_replaced (c))
2990     local_cost = cost_in;
2991   else if (incr == cand_incr)
2992     local_cost = cost_in - repl_savings - c->dead_savings;
2993   else
2994     local_cost = cost_in - c->dead_savings;
2995 
2996   if (count_phis
2997       && phi_dependent_cand_p (c)
2998       && !cand_already_replaced (c))
2999     {
3000       gimple *phi = lookup_cand (c->def_phi)->cand_stmt;
3001       local_cost += phi_incr_cost (c, incr, phi, &savings);
3002 
3003       if (uses_consumed_by_stmt (gimple_phi_result (phi), c->cand_stmt))
3004 	local_cost -= savings;
3005     }
3006 
3007   if (c->dependent)
3008     local_cost = lowest_cost_path (local_cost, repl_savings,
3009 				   lookup_cand (c->dependent), incr,
3010 				   count_phis);
3011 
3012   if (c->sibling)
3013     {
3014       sib_cost = lowest_cost_path (cost_in, repl_savings,
3015 				   lookup_cand (c->sibling), incr,
3016 				   count_phis);
3017       local_cost = MIN (local_cost, sib_cost);
3018     }
3019 
3020   return local_cost;
3021 }
3022 
3023 /* Compute the total savings that would accrue from all replacements
3024    in the candidate tree rooted at C, counting only candidates with
3025    increment INCR.  Assume that replacing a candidate reduces cost
3026    by REPL_SAVINGS.  Also account for savings from statements that
3027    would go dead.  */
3028 
3029 static int
total_savings(int repl_savings,slsr_cand_t c,const widest_int & incr,bool count_phis)3030 total_savings (int repl_savings, slsr_cand_t c, const widest_int &incr,
3031 	       bool count_phis)
3032 {
3033   int savings = 0;
3034   widest_int cand_incr = cand_abs_increment (c);
3035 
3036   if (incr == cand_incr && !cand_already_replaced (c))
3037     savings += repl_savings + c->dead_savings;
3038 
3039   if (count_phis
3040       && phi_dependent_cand_p (c)
3041       && !cand_already_replaced (c))
3042     {
3043       int phi_savings = 0;
3044       gimple *phi = lookup_cand (c->def_phi)->cand_stmt;
3045       savings -= phi_incr_cost (c, incr, phi, &phi_savings);
3046 
3047       if (uses_consumed_by_stmt (gimple_phi_result (phi), c->cand_stmt))
3048 	savings += phi_savings;
3049     }
3050 
3051   if (c->dependent)
3052     savings += total_savings (repl_savings, lookup_cand (c->dependent), incr,
3053 			      count_phis);
3054 
3055   if (c->sibling)
3056     savings += total_savings (repl_savings, lookup_cand (c->sibling), incr,
3057 			      count_phis);
3058 
3059   return savings;
3060 }
3061 
3062 /* Use target-specific costs to determine and record which increments
3063    in the current candidate tree are profitable to replace, assuming
3064    MODE and SPEED.  FIRST_DEP is the first dependent of the root of
3065    the candidate tree.
3066 
3067    One slight limitation here is that we don't account for the possible
3068    introduction of casts in some cases.  See replace_one_candidate for
3069    the cases where these are introduced.  This should probably be cleaned
3070    up sometime.  */
3071 
3072 static void
analyze_increments(slsr_cand_t first_dep,machine_mode mode,bool speed)3073 analyze_increments (slsr_cand_t first_dep, machine_mode mode, bool speed)
3074 {
3075   unsigned i;
3076 
3077   for (i = 0; i < incr_vec_len; i++)
3078     {
3079       HOST_WIDE_INT incr = incr_vec[i].incr.to_shwi ();
3080 
3081       /* If somehow this increment is bigger than a HWI, we won't
3082 	 be optimizing candidates that use it.  And if the increment
3083 	 has a count of zero, nothing will be done with it.  */
3084       if (!wi::fits_shwi_p (incr_vec[i].incr) || !incr_vec[i].count)
3085 	incr_vec[i].cost = COST_INFINITE;
3086 
3087       /* Increments of 0, 1, and -1 are always profitable to replace,
3088 	 because they always replace a multiply or add with an add or
3089 	 copy, and may cause one or more existing instructions to go
3090 	 dead.  Exception:  -1 can't be assumed to be profitable for
3091 	 pointer addition.  */
3092       else if (incr == 0
3093 	       || incr == 1
3094 	       || (incr == -1
3095 		   && !POINTER_TYPE_P (first_dep->cand_type)))
3096 	incr_vec[i].cost = COST_NEUTRAL;
3097 
3098       /* If we need to add an initializer, give up if a cast from the
3099 	 candidate's type to its stride's type can lose precision.
3100 	 Note that this already takes into account that the stride may
3101 	 have been cast to a wider type, in which case this test won't
3102 	 fire.  Example:
3103 
3104            short int _1;
3105 	   _2 = (int) _1;
3106 	   _3 = _2 * 10;
3107 	   _4 = x + _3;    ADD: x + (10 * (int)_1) : int
3108 	   _5 = _2 * 15;
3109 	   _6 = x + _5;    ADD: x + (15 * (int)_1) : int
3110 
3111 	 Although the stride was a short int initially, the stride
3112 	 used in the analysis has been widened to an int, and such
3113 	 widening will be done in the initializer as well.  */
3114       else if (!incr_vec[i].initializer
3115 	       && TREE_CODE (first_dep->stride) != INTEGER_CST
3116 	       && !legal_cast_p_1 (first_dep->stride_type,
3117 				   TREE_TYPE (gimple_assign_lhs
3118 					      (first_dep->cand_stmt))))
3119 	incr_vec[i].cost = COST_INFINITE;
3120 
3121       /* If we need to add an initializer, make sure we don't introduce
3122 	 a multiply by a pointer type, which can happen in certain cast
3123 	 scenarios.  */
3124       else if (!incr_vec[i].initializer
3125 	       && TREE_CODE (first_dep->stride) != INTEGER_CST
3126 	       && POINTER_TYPE_P (first_dep->stride_type))
3127 	incr_vec[i].cost = COST_INFINITE;
3128 
3129       /* For any other increment, if this is a multiply candidate, we
3130 	 must introduce a temporary T and initialize it with
3131 	 T_0 = stride * increment.  When optimizing for speed, walk the
3132 	 candidate tree to calculate the best cost reduction along any
3133 	 path; if it offsets the fixed cost of inserting the initializer,
3134 	 replacing the increment is profitable.  When optimizing for
3135          size, instead calculate the total cost reduction from replacing
3136 	 all candidates with this increment.  */
3137       else if (first_dep->kind == CAND_MULT)
3138 	{
3139 	  int cost = mult_by_coeff_cost (incr, mode, speed);
3140 	  int repl_savings;
3141 
3142 	  if (tree_fits_shwi_p (first_dep->stride))
3143 	    {
3144 	      HOST_WIDE_INT hwi_stride = tree_to_shwi (first_dep->stride);
3145 	      repl_savings = mult_by_coeff_cost (hwi_stride, mode, speed);
3146 	    }
3147 	  else
3148 	    repl_savings = mul_cost (speed, mode);
3149 	  repl_savings -= add_cost (speed, mode);
3150 
3151 	  if (speed)
3152 	    cost = lowest_cost_path (cost, repl_savings, first_dep,
3153 				     incr_vec[i].incr, COUNT_PHIS);
3154 	  else
3155 	    cost -= total_savings (repl_savings, first_dep, incr_vec[i].incr,
3156 				   COUNT_PHIS);
3157 
3158 	  incr_vec[i].cost = cost;
3159 	}
3160 
3161       /* If this is an add candidate, the initializer may already
3162 	 exist, so only calculate the cost of the initializer if it
3163 	 doesn't.  We are replacing one add with another here, so the
3164 	 known replacement savings is zero.  We will account for removal
3165 	 of dead instructions in lowest_cost_path or total_savings.  */
3166       else
3167 	{
3168 	  int cost = 0;
3169 	  if (!incr_vec[i].initializer)
3170 	    cost = mult_by_coeff_cost (incr, mode, speed);
3171 
3172 	  if (speed)
3173 	    cost = lowest_cost_path (cost, 0, first_dep, incr_vec[i].incr,
3174 				     DONT_COUNT_PHIS);
3175 	  else
3176 	    cost -= total_savings (0, first_dep, incr_vec[i].incr,
3177 				   DONT_COUNT_PHIS);
3178 
3179 	  incr_vec[i].cost = cost;
3180 	}
3181     }
3182 }
3183 
3184 /* Return the nearest common dominator of BB1 and BB2.  If the blocks
3185    are identical, return the earlier of C1 and C2 in *WHERE.  Otherwise,
3186    if the NCD matches BB1, return C1 in *WHERE; if the NCD matches BB2,
3187    return C2 in *WHERE; and if the NCD matches neither, return NULL in
3188    *WHERE.  Note: It is possible for one of C1 and C2 to be NULL.  */
3189 
3190 static basic_block
ncd_for_two_cands(basic_block bb1,basic_block bb2,slsr_cand_t c1,slsr_cand_t c2,slsr_cand_t * where)3191 ncd_for_two_cands (basic_block bb1, basic_block bb2,
3192 		   slsr_cand_t c1, slsr_cand_t c2, slsr_cand_t *where)
3193 {
3194   basic_block ncd;
3195 
3196   if (!bb1)
3197     {
3198       *where = c2;
3199       return bb2;
3200     }
3201 
3202   if (!bb2)
3203     {
3204       *where = c1;
3205       return bb1;
3206     }
3207 
3208   ncd = nearest_common_dominator (CDI_DOMINATORS, bb1, bb2);
3209 
3210   /* If both candidates are in the same block, the earlier
3211      candidate wins.  */
3212   if (bb1 == ncd && bb2 == ncd)
3213     {
3214       if (!c1 || (c2 && c2->cand_num < c1->cand_num))
3215 	*where = c2;
3216       else
3217 	*where = c1;
3218     }
3219 
3220   /* Otherwise, if one of them produced a candidate in the
3221      dominator, that one wins.  */
3222   else if (bb1 == ncd)
3223     *where = c1;
3224 
3225   else if (bb2 == ncd)
3226     *where = c2;
3227 
3228   /* If neither matches the dominator, neither wins.  */
3229   else
3230     *where = NULL;
3231 
3232   return ncd;
3233 }
3234 
3235 /* Consider all candidates that feed PHI.  Find the nearest common
3236    dominator of those candidates requiring the given increment INCR.
3237    Further find and return the nearest common dominator of this result
3238    with block NCD.  If the returned block contains one or more of the
3239    candidates, return the earliest candidate in the block in *WHERE.  */
3240 
3241 static basic_block
ncd_with_phi(slsr_cand_t c,const widest_int & incr,gphi * phi,basic_block ncd,slsr_cand_t * where)3242 ncd_with_phi (slsr_cand_t c, const widest_int &incr, gphi *phi,
3243 	      basic_block ncd, slsr_cand_t *where)
3244 {
3245   unsigned i;
3246   slsr_cand_t basis = lookup_cand (c->basis);
3247   slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
3248 
3249   for (i = 0; i < gimple_phi_num_args (phi); i++)
3250     {
3251       tree arg = gimple_phi_arg_def (phi, i);
3252       gimple *arg_def = SSA_NAME_DEF_STMT (arg);
3253 
3254       if (gimple_code (arg_def) == GIMPLE_PHI)
3255 	ncd = ncd_with_phi (c, incr, as_a <gphi *> (arg_def), ncd, where);
3256       else
3257 	{
3258 	  widest_int diff;
3259 
3260 	  if (operand_equal_p (arg, phi_cand->base_expr, 0))
3261 	    diff = -basis->index;
3262 	  else
3263 	    {
3264 	      slsr_cand_t arg_cand = base_cand_from_table (arg);
3265 	      diff = arg_cand->index - basis->index;
3266 	    }
3267 
3268 	  basic_block pred = gimple_phi_arg_edge (phi, i)->src;
3269 
3270 	  if ((incr == diff) || (!address_arithmetic_p && incr == -diff))
3271 	    ncd = ncd_for_two_cands (ncd, pred, *where, NULL, where);
3272 	}
3273     }
3274 
3275   return ncd;
3276 }
3277 
3278 /* Consider the candidate C together with any candidates that feed
3279    C's phi dependence (if any).  Find and return the nearest common
3280    dominator of those candidates requiring the given increment INCR.
3281    If the returned block contains one or more of the candidates,
3282    return the earliest candidate in the block in *WHERE.  */
3283 
3284 static basic_block
ncd_of_cand_and_phis(slsr_cand_t c,const widest_int & incr,slsr_cand_t * where)3285 ncd_of_cand_and_phis (slsr_cand_t c, const widest_int &incr, slsr_cand_t *where)
3286 {
3287   basic_block ncd = NULL;
3288 
3289   if (cand_abs_increment (c) == incr)
3290     {
3291       ncd = gimple_bb (c->cand_stmt);
3292       *where = c;
3293     }
3294 
3295   if (phi_dependent_cand_p (c))
3296     ncd = ncd_with_phi (c, incr,
3297 			as_a <gphi *> (lookup_cand (c->def_phi)->cand_stmt),
3298 			ncd, where);
3299 
3300   return ncd;
3301 }
3302 
3303 /* Consider all candidates in the tree rooted at C for which INCR
3304    represents the required increment of C relative to its basis.
3305    Find and return the basic block that most nearly dominates all
3306    such candidates.  If the returned block contains one or more of
3307    the candidates, return the earliest candidate in the block in
3308    *WHERE.  */
3309 
3310 static basic_block
nearest_common_dominator_for_cands(slsr_cand_t c,const widest_int & incr,slsr_cand_t * where)3311 nearest_common_dominator_for_cands (slsr_cand_t c, const widest_int &incr,
3312 				    slsr_cand_t *where)
3313 {
3314   basic_block sib_ncd = NULL, dep_ncd = NULL, this_ncd = NULL, ncd;
3315   slsr_cand_t sib_where = NULL, dep_where = NULL, this_where = NULL, new_where;
3316 
3317   /* First find the NCD of all siblings and dependents.  */
3318   if (c->sibling)
3319     sib_ncd = nearest_common_dominator_for_cands (lookup_cand (c->sibling),
3320 						  incr, &sib_where);
3321   if (c->dependent)
3322     dep_ncd = nearest_common_dominator_for_cands (lookup_cand (c->dependent),
3323 						  incr, &dep_where);
3324   if (!sib_ncd && !dep_ncd)
3325     {
3326       new_where = NULL;
3327       ncd = NULL;
3328     }
3329   else if (sib_ncd && !dep_ncd)
3330     {
3331       new_where = sib_where;
3332       ncd = sib_ncd;
3333     }
3334   else if (dep_ncd && !sib_ncd)
3335     {
3336       new_where = dep_where;
3337       ncd = dep_ncd;
3338     }
3339   else
3340     ncd = ncd_for_two_cands (sib_ncd, dep_ncd, sib_where,
3341 			     dep_where, &new_where);
3342 
3343   /* If the candidate's increment doesn't match the one we're interested
3344      in (and nor do any increments for feeding defs of a phi-dependence),
3345      then the result depends only on siblings and dependents.  */
3346   this_ncd = ncd_of_cand_and_phis (c, incr, &this_where);
3347 
3348   if (!this_ncd || cand_already_replaced (c))
3349     {
3350       *where = new_where;
3351       return ncd;
3352     }
3353 
3354   /* Otherwise, compare this candidate with the result from all siblings
3355      and dependents.  */
3356   ncd = ncd_for_two_cands (ncd, this_ncd, new_where, this_where, where);
3357 
3358   return ncd;
3359 }
3360 
3361 /* Return TRUE if the increment indexed by INDEX is profitable to replace.  */
3362 
3363 static inline bool
profitable_increment_p(unsigned index)3364 profitable_increment_p (unsigned index)
3365 {
3366   return (incr_vec[index].cost <= COST_NEUTRAL);
3367 }
3368 
3369 /* For each profitable increment in the increment vector not equal to
3370    0 or 1 (or -1, for non-pointer arithmetic), find the nearest common
3371    dominator of all statements in the candidate chain rooted at C
3372    that require that increment, and insert an initializer
3373    T_0 = stride * increment at that location.  Record T_0 with the
3374    increment record.  */
3375 
3376 static void
insert_initializers(slsr_cand_t c)3377 insert_initializers (slsr_cand_t c)
3378 {
3379   unsigned i;
3380 
3381   for (i = 0; i < incr_vec_len; i++)
3382     {
3383       basic_block bb;
3384       slsr_cand_t where = NULL;
3385       gassign *init_stmt;
3386       gassign *cast_stmt = NULL;
3387       tree new_name, incr_tree, init_stride;
3388       widest_int incr = incr_vec[i].incr;
3389 
3390       if (!profitable_increment_p (i)
3391 	  || incr == 1
3392 	  || (incr == -1
3393 	      && (!POINTER_TYPE_P (lookup_cand (c->basis)->cand_type)))
3394 	  || incr == 0)
3395 	continue;
3396 
3397       /* We may have already identified an existing initializer that
3398 	 will suffice.  */
3399       if (incr_vec[i].initializer)
3400 	{
3401 	  if (dump_file && (dump_flags & TDF_DETAILS))
3402 	    {
3403 	      fputs ("Using existing initializer: ", dump_file);
3404 	      print_gimple_stmt (dump_file,
3405 				 SSA_NAME_DEF_STMT (incr_vec[i].initializer),
3406 				 0, TDF_NONE);
3407 	    }
3408 	  continue;
3409 	}
3410 
3411       /* Find the block that most closely dominates all candidates
3412 	 with this increment.  If there is at least one candidate in
3413 	 that block, the earliest one will be returned in WHERE.  */
3414       bb = nearest_common_dominator_for_cands (c, incr, &where);
3415 
3416       /* If the NCD is not dominated by the block containing the
3417 	 definition of the stride, we can't legally insert a
3418 	 single initializer.  Mark the increment as unprofitable
3419 	 so we don't make any replacements.  FIXME: Multiple
3420 	 initializers could be placed with more analysis.  */
3421       gimple *stride_def = SSA_NAME_DEF_STMT (c->stride);
3422       basic_block stride_bb = gimple_bb (stride_def);
3423 
3424       if (stride_bb && !dominated_by_p (CDI_DOMINATORS, bb, stride_bb))
3425 	{
3426 	  if (dump_file && (dump_flags & TDF_DETAILS))
3427 	    fprintf (dump_file,
3428 		     "Initializer #%d cannot be legally placed\n", i);
3429 	  incr_vec[i].cost = COST_INFINITE;
3430 	  continue;
3431 	}
3432 
3433       /* If the nominal stride has a different type than the recorded
3434 	 stride type, build a cast from the nominal stride to that type.  */
3435       if (!types_compatible_p (TREE_TYPE (c->stride), c->stride_type))
3436 	{
3437 	  init_stride = make_temp_ssa_name (c->stride_type, NULL, "slsr");
3438 	  cast_stmt = gimple_build_assign (init_stride, NOP_EXPR, c->stride);
3439 	}
3440       else
3441 	init_stride = c->stride;
3442 
3443       /* Create a new SSA name to hold the initializer's value.  */
3444       new_name = make_temp_ssa_name (c->stride_type, NULL, "slsr");
3445       incr_vec[i].initializer = new_name;
3446 
3447       /* Create the initializer and insert it in the latest possible
3448 	 dominating position.  */
3449       incr_tree = wide_int_to_tree (c->stride_type, incr);
3450       init_stmt = gimple_build_assign (new_name, MULT_EXPR,
3451 				       init_stride, incr_tree);
3452       if (where)
3453 	{
3454 	  gimple_stmt_iterator gsi = gsi_for_stmt (where->cand_stmt);
3455 	  location_t loc = gimple_location (where->cand_stmt);
3456 
3457 	  if (cast_stmt)
3458 	    {
3459 	      gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
3460 	      gimple_set_location (cast_stmt, loc);
3461 	    }
3462 
3463 	  gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
3464 	  gimple_set_location (init_stmt, loc);
3465 	}
3466       else
3467 	{
3468 	  gimple_stmt_iterator gsi = gsi_last_bb (bb);
3469 	  gimple *basis_stmt = lookup_cand (c->basis)->cand_stmt;
3470 	  location_t loc = gimple_location (basis_stmt);
3471 
3472 	  if (!gsi_end_p (gsi) && stmt_ends_bb_p (gsi_stmt (gsi)))
3473 	    {
3474 	      if (cast_stmt)
3475 		{
3476 		  gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
3477 		  gimple_set_location (cast_stmt, loc);
3478 		}
3479 	      gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
3480 	    }
3481 	  else
3482 	    {
3483 	      if (cast_stmt)
3484 		{
3485 		  gsi_insert_after (&gsi, cast_stmt, GSI_NEW_STMT);
3486 		  gimple_set_location (cast_stmt, loc);
3487 		}
3488 	      gsi_insert_after (&gsi, init_stmt, GSI_NEW_STMT);
3489 	    }
3490 
3491 	  gimple_set_location (init_stmt, gimple_location (basis_stmt));
3492 	}
3493 
3494       if (dump_file && (dump_flags & TDF_DETAILS))
3495 	{
3496 	  if (cast_stmt)
3497 	    {
3498 	      fputs ("Inserting stride cast: ", dump_file);
3499 	      print_gimple_stmt (dump_file, cast_stmt, 0);
3500 	    }
3501 	  fputs ("Inserting initializer: ", dump_file);
3502 	  print_gimple_stmt (dump_file, init_stmt, 0);
3503 	}
3504     }
3505 }
3506 
3507 /* Recursive helper function for all_phi_incrs_profitable.  */
3508 
3509 static bool
all_phi_incrs_profitable_1(slsr_cand_t c,gphi * phi,int * spread)3510 all_phi_incrs_profitable_1 (slsr_cand_t c, gphi *phi, int *spread)
3511 {
3512   unsigned i;
3513   slsr_cand_t basis = lookup_cand (c->basis);
3514   slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
3515 
3516   if (phi_cand->visited)
3517     return true;
3518 
3519   phi_cand->visited = 1;
3520   (*spread)++;
3521 
3522   /* If the basis doesn't dominate the PHI (including when the PHI is
3523      in the same block as the basis), we won't be able to create a PHI
3524      using the basis here.  */
3525   basic_block basis_bb = gimple_bb (basis->cand_stmt);
3526   basic_block phi_bb = gimple_bb (phi);
3527 
3528   if (phi_bb == basis_bb
3529       || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
3530     return false;
3531 
3532   for (i = 0; i < gimple_phi_num_args (phi); i++)
3533     {
3534       /* If the PHI arg resides in a block not dominated by the basis,
3535 	 we won't be able to create a PHI using the basis here.  */
3536       basic_block pred_bb = gimple_phi_arg_edge (phi, i)->src;
3537 
3538       if (!dominated_by_p (CDI_DOMINATORS, pred_bb, basis_bb))
3539 	return false;
3540 
3541       tree arg = gimple_phi_arg_def (phi, i);
3542       gimple *arg_def = SSA_NAME_DEF_STMT (arg);
3543 
3544       if (gimple_code (arg_def) == GIMPLE_PHI)
3545 	{
3546 	  if (!all_phi_incrs_profitable_1 (c, as_a <gphi *> (arg_def), spread)
3547 	      || *spread > MAX_SPREAD)
3548 	    return false;
3549 	}
3550       else
3551 	{
3552 	  int j;
3553 	  widest_int increment;
3554 
3555 	  if (operand_equal_p (arg, phi_cand->base_expr, 0))
3556 	    increment = -basis->index;
3557 	  else
3558 	    {
3559 	      slsr_cand_t arg_cand = base_cand_from_table (arg);
3560 	      increment = arg_cand->index - basis->index;
3561 	    }
3562 
3563 	  if (!address_arithmetic_p && wi::neg_p (increment))
3564 	    increment = -increment;
3565 
3566 	  j = incr_vec_index (increment);
3567 
3568 	  if (dump_file && (dump_flags & TDF_DETAILS))
3569 	    {
3570 	      fprintf (dump_file, "  Conditional candidate %d, phi: ",
3571 		       c->cand_num);
3572 	      print_gimple_stmt (dump_file, phi, 0);
3573 	      fputs ("    increment: ", dump_file);
3574 	      print_decs (increment, dump_file);
3575 	      if (j < 0)
3576 		fprintf (dump_file,
3577 			 "\n  Not replaced; incr_vec overflow.\n");
3578 	      else {
3579 		fprintf (dump_file, "\n    cost: %d\n", incr_vec[j].cost);
3580 		if (profitable_increment_p (j))
3581 		  fputs ("  Replacing...\n", dump_file);
3582 		else
3583 		  fputs ("  Not replaced.\n", dump_file);
3584 	      }
3585 	    }
3586 
3587 	  if (j < 0 || !profitable_increment_p (j))
3588 	    return false;
3589 	}
3590     }
3591 
3592   return true;
3593 }
3594 
3595 /* Return TRUE iff all required increments for candidates feeding PHI
3596    are profitable (and legal!) to replace on behalf of candidate C.  */
3597 
3598 static bool
all_phi_incrs_profitable(slsr_cand_t c,gphi * phi)3599 all_phi_incrs_profitable (slsr_cand_t c, gphi *phi)
3600 {
3601   int spread = 0;
3602   bool retval = all_phi_incrs_profitable_1 (c, phi, &spread);
3603   clear_visited (phi);
3604   return retval;
3605 }
3606 
3607 /* Create a NOP_EXPR that copies FROM_EXPR into a new SSA name of
3608    type TO_TYPE, and insert it in front of the statement represented
3609    by candidate C.  Use *NEW_VAR to create the new SSA name.  Return
3610    the new SSA name.  */
3611 
3612 static tree
introduce_cast_before_cand(slsr_cand_t c,tree to_type,tree from_expr)3613 introduce_cast_before_cand (slsr_cand_t c, tree to_type, tree from_expr)
3614 {
3615   tree cast_lhs;
3616   gassign *cast_stmt;
3617   gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3618 
3619   cast_lhs = make_temp_ssa_name (to_type, NULL, "slsr");
3620   cast_stmt = gimple_build_assign (cast_lhs, NOP_EXPR, from_expr);
3621   gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
3622   gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
3623 
3624   if (dump_file && (dump_flags & TDF_DETAILS))
3625     {
3626       fputs ("  Inserting: ", dump_file);
3627       print_gimple_stmt (dump_file, cast_stmt, 0);
3628     }
3629 
3630   return cast_lhs;
3631 }
3632 
3633 /* Replace the RHS of the statement represented by candidate C with
3634    NEW_CODE, NEW_RHS1, and NEW_RHS2, provided that to do so doesn't
3635    leave C unchanged or just interchange its operands.  The original
3636    operation and operands are in OLD_CODE, OLD_RHS1, and OLD_RHS2.
3637    If the replacement was made and we are doing a details dump,
3638    return the revised statement, else NULL.  */
3639 
3640 static gimple *
replace_rhs_if_not_dup(enum tree_code new_code,tree new_rhs1,tree new_rhs2,enum tree_code old_code,tree old_rhs1,tree old_rhs2,slsr_cand_t c)3641 replace_rhs_if_not_dup (enum tree_code new_code, tree new_rhs1, tree new_rhs2,
3642 			enum tree_code old_code, tree old_rhs1, tree old_rhs2,
3643 			slsr_cand_t c)
3644 {
3645   if (new_code != old_code
3646       || ((!operand_equal_p (new_rhs1, old_rhs1, 0)
3647 	   || !operand_equal_p (new_rhs2, old_rhs2, 0))
3648 	  && (!operand_equal_p (new_rhs1, old_rhs2, 0)
3649 	      || !operand_equal_p (new_rhs2, old_rhs1, 0))))
3650     {
3651       gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3652       slsr_cand_t cc = lookup_cand (c->first_interp);
3653       gimple_assign_set_rhs_with_ops (&gsi, new_code, new_rhs1, new_rhs2);
3654       update_stmt (gsi_stmt (gsi));
3655       while (cc)
3656 	{
3657 	  cc->cand_stmt = gsi_stmt (gsi);
3658 	  cc = lookup_cand (cc->next_interp);
3659 	}
3660 
3661       if (dump_file && (dump_flags & TDF_DETAILS))
3662 	return gsi_stmt (gsi);
3663     }
3664 
3665   else if (dump_file && (dump_flags & TDF_DETAILS))
3666     fputs ("  (duplicate, not actually replacing)\n", dump_file);
3667 
3668   return NULL;
3669 }
3670 
3671 /* Strength-reduce the statement represented by candidate C by replacing
3672    it with an equivalent addition or subtraction.  I is the index into
3673    the increment vector identifying C's increment.  NEW_VAR is used to
3674    create a new SSA name if a cast needs to be introduced.  BASIS_NAME
3675    is the rhs1 to use in creating the add/subtract.  */
3676 
3677 static void
replace_one_candidate(slsr_cand_t c,unsigned i,tree basis_name)3678 replace_one_candidate (slsr_cand_t c, unsigned i, tree basis_name)
3679 {
3680   gimple *stmt_to_print = NULL;
3681   tree orig_rhs1, orig_rhs2;
3682   tree rhs2;
3683   enum tree_code orig_code, repl_code;
3684   widest_int cand_incr;
3685 
3686   orig_code = gimple_assign_rhs_code (c->cand_stmt);
3687   orig_rhs1 = gimple_assign_rhs1 (c->cand_stmt);
3688   orig_rhs2 = gimple_assign_rhs2 (c->cand_stmt);
3689   cand_incr = cand_increment (c);
3690 
3691   /* If orig_rhs2 is NULL, we have already replaced this in situ with
3692      a copy statement under another interpretation.  */
3693   if (!orig_rhs2)
3694     return;
3695 
3696   if (dump_file && (dump_flags & TDF_DETAILS))
3697     {
3698       fputs ("Replacing: ", dump_file);
3699       print_gimple_stmt (dump_file, c->cand_stmt, 0);
3700       stmt_to_print = c->cand_stmt;
3701     }
3702 
3703   if (address_arithmetic_p)
3704     repl_code = POINTER_PLUS_EXPR;
3705   else
3706     repl_code = PLUS_EXPR;
3707 
3708   /* If the increment has an initializer T_0, replace the candidate
3709      statement with an add of the basis name and the initializer.  */
3710   if (incr_vec[i].initializer)
3711     {
3712       tree init_type = TREE_TYPE (incr_vec[i].initializer);
3713       tree orig_type = TREE_TYPE (orig_rhs2);
3714 
3715       if (types_compatible_p (orig_type, init_type))
3716 	rhs2 = incr_vec[i].initializer;
3717       else
3718 	rhs2 = introduce_cast_before_cand (c, orig_type,
3719 					   incr_vec[i].initializer);
3720 
3721       if (incr_vec[i].incr != cand_incr)
3722 	{
3723 	  gcc_assert (repl_code == PLUS_EXPR);
3724 	  repl_code = MINUS_EXPR;
3725 	}
3726 
3727       stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
3728 					      orig_code, orig_rhs1, orig_rhs2,
3729 					      c);
3730     }
3731 
3732   /* Otherwise, the increment is one of -1, 0, and 1.  Replace
3733      with a subtract of the stride from the basis name, a copy
3734      from the basis name, or an add of the stride to the basis
3735      name, respectively.  It may be necessary to introduce a
3736      cast (or reuse an existing cast).  */
3737   else if (cand_incr == 1)
3738     {
3739       tree stride_type = TREE_TYPE (c->stride);
3740       tree orig_type = TREE_TYPE (orig_rhs2);
3741 
3742       if (types_compatible_p (orig_type, stride_type))
3743 	rhs2 = c->stride;
3744       else
3745 	rhs2 = introduce_cast_before_cand (c, orig_type, c->stride);
3746 
3747       stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
3748 					      orig_code, orig_rhs1, orig_rhs2,
3749 					      c);
3750     }
3751 
3752   else if (cand_incr == -1)
3753     {
3754       tree stride_type = TREE_TYPE (c->stride);
3755       tree orig_type = TREE_TYPE (orig_rhs2);
3756       gcc_assert (repl_code != POINTER_PLUS_EXPR);
3757 
3758       if (types_compatible_p (orig_type, stride_type))
3759 	rhs2 = c->stride;
3760       else
3761 	rhs2 = introduce_cast_before_cand (c, orig_type, c->stride);
3762 
3763       if (orig_code != MINUS_EXPR
3764 	  || !operand_equal_p (basis_name, orig_rhs1, 0)
3765 	  || !operand_equal_p (rhs2, orig_rhs2, 0))
3766 	{
3767 	  gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3768 	  slsr_cand_t cc = lookup_cand (c->first_interp);
3769 	  gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, basis_name, rhs2);
3770 	  update_stmt (gsi_stmt (gsi));
3771 	  while (cc)
3772 	    {
3773 	      cc->cand_stmt = gsi_stmt (gsi);
3774 	      cc = lookup_cand (cc->next_interp);
3775 	    }
3776 
3777 	  if (dump_file && (dump_flags & TDF_DETAILS))
3778 	    stmt_to_print = gsi_stmt (gsi);
3779 	}
3780       else if (dump_file && (dump_flags & TDF_DETAILS))
3781 	fputs ("  (duplicate, not actually replacing)\n", dump_file);
3782     }
3783 
3784   else if (cand_incr == 0)
3785     {
3786       tree lhs = gimple_assign_lhs (c->cand_stmt);
3787       tree lhs_type = TREE_TYPE (lhs);
3788       tree basis_type = TREE_TYPE (basis_name);
3789 
3790       if (types_compatible_p (lhs_type, basis_type))
3791 	{
3792 	  gassign *copy_stmt = gimple_build_assign (lhs, basis_name);
3793 	  gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3794 	  slsr_cand_t cc = lookup_cand (c->first_interp);
3795 	  gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
3796 	  gsi_replace (&gsi, copy_stmt, false);
3797 	  while (cc)
3798 	    {
3799 	      cc->cand_stmt = copy_stmt;
3800 	      cc = lookup_cand (cc->next_interp);
3801 	    }
3802 
3803 	  if (dump_file && (dump_flags & TDF_DETAILS))
3804 	    stmt_to_print = copy_stmt;
3805 	}
3806       else
3807 	{
3808 	  gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3809 	  gassign *cast_stmt = gimple_build_assign (lhs, NOP_EXPR, basis_name);
3810 	  slsr_cand_t cc = lookup_cand (c->first_interp);
3811 	  gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
3812 	  gsi_replace (&gsi, cast_stmt, false);
3813 	  while (cc)
3814 	    {
3815 	      cc->cand_stmt = cast_stmt;
3816 	      cc = lookup_cand (cc->next_interp);
3817 	    }
3818 
3819 	  if (dump_file && (dump_flags & TDF_DETAILS))
3820 	    stmt_to_print = cast_stmt;
3821 	}
3822     }
3823   else
3824     gcc_unreachable ();
3825 
3826   if (dump_file && (dump_flags & TDF_DETAILS) && stmt_to_print)
3827     {
3828       fputs ("With: ", dump_file);
3829       print_gimple_stmt (dump_file, stmt_to_print, 0);
3830       fputs ("\n", dump_file);
3831     }
3832 }
3833 
3834 /* For each candidate in the tree rooted at C, replace it with
3835    an increment if such has been shown to be profitable.  */
3836 
3837 static void
replace_profitable_candidates(slsr_cand_t c)3838 replace_profitable_candidates (slsr_cand_t c)
3839 {
3840   if (!cand_already_replaced (c))
3841     {
3842       widest_int increment = cand_abs_increment (c);
3843       enum tree_code orig_code = gimple_assign_rhs_code (c->cand_stmt);
3844       int i;
3845 
3846       i = incr_vec_index (increment);
3847 
3848       /* Only process profitable increments.  Nothing useful can be done
3849 	 to a cast or copy.  */
3850       if (i >= 0
3851 	  && profitable_increment_p (i)
3852 	  && orig_code != SSA_NAME
3853 	  && !CONVERT_EXPR_CODE_P (orig_code))
3854 	{
3855 	  if (phi_dependent_cand_p (c))
3856 	    {
3857 	      gphi *phi = as_a <gphi *> (lookup_cand (c->def_phi)->cand_stmt);
3858 
3859 	      if (all_phi_incrs_profitable (c, phi))
3860 		{
3861 		  /* Look up the LHS SSA name from C's basis.  This will be
3862 		     the RHS1 of the adds we will introduce to create new
3863 		     phi arguments.  */
3864 		  slsr_cand_t basis = lookup_cand (c->basis);
3865 		  tree basis_name = gimple_assign_lhs (basis->cand_stmt);
3866 
3867 		  /* Create a new phi statement that will represent C's true
3868 		     basis after the transformation is complete.  */
3869 		  location_t loc = gimple_location (c->cand_stmt);
3870 		  tree name = create_phi_basis (c, phi, basis_name,
3871 						loc, UNKNOWN_STRIDE);
3872 
3873 		  /* Replace C with an add of the new basis phi and the
3874 		     increment.  */
3875 		  replace_one_candidate (c, i, name);
3876 		}
3877 	    }
3878 	  else
3879 	    {
3880 	      slsr_cand_t basis = lookup_cand (c->basis);
3881 	      tree basis_name = gimple_assign_lhs (basis->cand_stmt);
3882 	      replace_one_candidate (c, i, basis_name);
3883 	    }
3884 	}
3885     }
3886 
3887   if (c->sibling)
3888     replace_profitable_candidates (lookup_cand (c->sibling));
3889 
3890   if (c->dependent)
3891     replace_profitable_candidates (lookup_cand (c->dependent));
3892 }
3893 
3894 /* Analyze costs of related candidates in the candidate vector,
3895    and make beneficial replacements.  */
3896 
3897 static void
analyze_candidates_and_replace(void)3898 analyze_candidates_and_replace (void)
3899 {
3900   unsigned i;
3901   slsr_cand_t c;
3902 
3903   /* Each candidate that has a null basis and a non-null
3904      dependent is the root of a tree of related statements.
3905      Analyze each tree to determine a subset of those
3906      statements that can be replaced with maximum benefit.
3907 
3908      Note the first NULL element is skipped.  */
3909   FOR_EACH_VEC_ELT_FROM (cand_vec, i, c, 1)
3910     {
3911       slsr_cand_t first_dep;
3912 
3913       if (c->basis != 0 || c->dependent == 0)
3914 	continue;
3915 
3916       if (dump_file && (dump_flags & TDF_DETAILS))
3917 	fprintf (dump_file, "\nProcessing dependency tree rooted at %d.\n",
3918 		 c->cand_num);
3919 
3920       first_dep = lookup_cand (c->dependent);
3921 
3922       /* If this is a chain of CAND_REFs, unconditionally replace
3923 	 each of them with a strength-reduced data reference.  */
3924       if (c->kind == CAND_REF)
3925 	replace_refs (c);
3926 
3927       /* If the common stride of all related candidates is a known
3928 	 constant, each candidate without a phi-dependence can be
3929 	 profitably replaced.  Each replaces a multiply by a single
3930 	 add, with the possibility that a feeding add also goes dead.
3931 	 A candidate with a phi-dependence is replaced only if the
3932 	 compensation code it requires is offset by the strength
3933 	 reduction savings.  */
3934       else if (TREE_CODE (c->stride) == INTEGER_CST)
3935 	replace_uncond_cands_and_profitable_phis (first_dep);
3936 
3937       /* When the stride is an SSA name, it may still be profitable
3938 	 to replace some or all of the dependent candidates, depending
3939 	 on whether the introduced increments can be reused, or are
3940 	 less expensive to calculate than the replaced statements.  */
3941       else
3942 	{
3943 	  machine_mode mode;
3944 	  bool speed;
3945 
3946 	  /* Determine whether we'll be generating pointer arithmetic
3947 	     when replacing candidates.  */
3948 	  address_arithmetic_p = (c->kind == CAND_ADD
3949 				  && POINTER_TYPE_P (c->cand_type));
3950 
3951 	  /* If all candidates have already been replaced under other
3952 	     interpretations, nothing remains to be done.  */
3953 	  if (!count_candidates (c))
3954 	    continue;
3955 
3956 	  /* Construct an array of increments for this candidate chain.  */
3957 	  incr_vec = XNEWVEC (incr_info, MAX_INCR_VEC_LEN);
3958 	  incr_vec_len = 0;
3959 	  record_increments (c);
3960 
3961 	  /* Determine which increments are profitable to replace.  */
3962 	  mode = TYPE_MODE (TREE_TYPE (gimple_assign_lhs (c->cand_stmt)));
3963 	  speed = optimize_cands_for_speed_p (c);
3964 	  analyze_increments (first_dep, mode, speed);
3965 
3966 	  /* Insert initializers of the form T_0 = stride * increment
3967 	     for use in profitable replacements.  */
3968 	  insert_initializers (first_dep);
3969 	  dump_incr_vec ();
3970 
3971 	  /* Perform the replacements.  */
3972 	  replace_profitable_candidates (first_dep);
3973 	  free (incr_vec);
3974 	}
3975     }
3976 
3977   /* For conditional candidates, we may have uncommitted insertions
3978      on edges to clean up.  */
3979   gsi_commit_edge_inserts ();
3980 }
3981 
3982 namespace {
3983 
3984 const pass_data pass_data_strength_reduction =
3985 {
3986   GIMPLE_PASS, /* type */
3987   "slsr", /* name */
3988   OPTGROUP_NONE, /* optinfo_flags */
3989   TV_GIMPLE_SLSR, /* tv_id */
3990   ( PROP_cfg | PROP_ssa ), /* properties_required */
3991   0, /* properties_provided */
3992   0, /* properties_destroyed */
3993   0, /* todo_flags_start */
3994   0, /* todo_flags_finish */
3995 };
3996 
3997 class pass_strength_reduction : public gimple_opt_pass
3998 {
3999 public:
pass_strength_reduction(gcc::context * ctxt)4000   pass_strength_reduction (gcc::context *ctxt)
4001     : gimple_opt_pass (pass_data_strength_reduction, ctxt)
4002   {}
4003 
4004   /* opt_pass methods: */
gate(function *)4005   virtual bool gate (function *) { return flag_tree_slsr; }
4006   virtual unsigned int execute (function *);
4007 
4008 }; // class pass_strength_reduction
4009 
4010 unsigned
execute(function * fun)4011 pass_strength_reduction::execute (function *fun)
4012 {
4013   /* Create the obstack where candidates will reside.  */
4014   gcc_obstack_init (&cand_obstack);
4015 
4016   /* Allocate the candidate vector and initialize the first NULL element.  */
4017   cand_vec.create (128);
4018   cand_vec.safe_push (NULL);
4019 
4020   /* Allocate the mapping from statements to candidate indices.  */
4021   stmt_cand_map = new hash_map<gimple *, slsr_cand_t>;
4022 
4023   /* Create the obstack where candidate chains will reside.  */
4024   gcc_obstack_init (&chain_obstack);
4025 
4026   /* Allocate the mapping from base expressions to candidate chains.  */
4027   base_cand_map = new hash_table<cand_chain_hasher> (500);
4028 
4029   /* Allocate the mapping from bases to alternative bases.  */
4030   alt_base_map = new hash_map<tree, tree>;
4031 
4032   /* Initialize the loop optimizer.  We need to detect flow across
4033      back edges, and this gives us dominator information as well.  */
4034   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4035 
4036   /* Walk the CFG in predominator order looking for strength reduction
4037      candidates.  */
4038   find_candidates_dom_walker (CDI_DOMINATORS)
4039     .walk (fun->cfg->x_entry_block_ptr);
4040 
4041   if (dump_file && (dump_flags & TDF_DETAILS))
4042     {
4043       dump_cand_vec ();
4044       dump_cand_chains ();
4045     }
4046 
4047   delete alt_base_map;
4048   free_affine_expand_cache (&name_expansions);
4049 
4050   /* Analyze costs and make appropriate replacements.  */
4051   analyze_candidates_and_replace ();
4052 
4053   loop_optimizer_finalize ();
4054   delete base_cand_map;
4055   base_cand_map = NULL;
4056   obstack_free (&chain_obstack, NULL);
4057   delete stmt_cand_map;
4058   cand_vec.release ();
4059   obstack_free (&cand_obstack, NULL);
4060 
4061   return 0;
4062 }
4063 
4064 } // anon namespace
4065 
4066 gimple_opt_pass *
make_pass_strength_reduction(gcc::context * ctxt)4067 make_pass_strength_reduction (gcc::context *ctxt)
4068 {
4069   return new pass_strength_reduction (ctxt);
4070 }
4071