1 /* Straight-line strength reduction.
2    Copyright (C) 2012-2021 Free Software Foundation, Inc.
3    Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* There are many algorithms for performing strength reduction on
22    loops.  This is not one of them.  IVOPTS handles strength reduction
23    of induction variables just fine.  This pass is intended to pick
24    up the crumbs it leaves behind, by considering opportunities for
25    strength reduction along dominator paths.
26 
27    Strength reduction addresses explicit multiplies, and certain
28    multiplies implicit in addressing expressions.  It would also be
29    possible to apply strength reduction to divisions and modulos,
30    but such opportunities are relatively uncommon.
31 
32    Strength reduction is also currently restricted to integer operations.
33    If desired, it could be extended to floating-point operations under
34    control of something like -funsafe-math-optimizations.  */
35 
36 #include "config.h"
37 #include "system.h"
38 #include "coretypes.h"
39 #include "backend.h"
40 #include "rtl.h"
41 #include "tree.h"
42 #include "gimple.h"
43 #include "cfghooks.h"
44 #include "tree-pass.h"
45 #include "ssa.h"
46 #include "expmed.h"
47 #include "gimple-pretty-print.h"
48 #include "fold-const.h"
49 #include "gimple-iterator.h"
50 #include "gimplify-me.h"
51 #include "stor-layout.h"
52 #include "cfgloop.h"
53 #include "tree-cfg.h"
54 #include "domwalk.h"
55 #include "tree-ssa-address.h"
56 #include "tree-affine.h"
57 #include "tree-eh.h"
58 #include "builtins.h"
59 
60 /* Information about a strength reduction candidate.  Each statement
61    in the candidate table represents an expression of one of the
62    following forms (the special case of CAND_REF will be described
63    later):
64 
65    (CAND_MULT)  S1:  X = (B + i) * S
66    (CAND_ADD)   S1:  X = B + (i * S)
67 
68    Here X and B are SSA names, i is an integer constant, and S is
69    either an SSA name or a constant.  We call B the "base," i the
70    "index", and S the "stride."
71 
72    Any statement S0 that dominates S1 and is of the form:
73 
74    (CAND_MULT)  S0:  Y = (B + i') * S
75    (CAND_ADD)   S0:  Y = B + (i' * S)
76 
77    is called a "basis" for S1.  In both cases, S1 may be replaced by
78 
79                 S1':  X = Y + (i - i') * S,
80 
81    where (i - i') * S is folded to the extent possible.
82 
83    All gimple statements are visited in dominator order, and each
84    statement that may contribute to one of the forms of S1 above is
85    given at least one entry in the candidate table.  Such statements
86    include addition, pointer addition, subtraction, multiplication,
87    negation, copies, and nontrivial type casts.  If a statement may
88    represent more than one expression of the forms of S1 above,
89    multiple "interpretations" are stored in the table and chained
90    together.  Examples:
91 
92    * An add of two SSA names may treat either operand as the base.
93    * A multiply of two SSA names, likewise.
94    * A copy or cast may be thought of as either a CAND_MULT with
95      i = 0 and S = 1, or as a CAND_ADD with i = 0 or S = 0.
96 
97    Candidate records are allocated from an obstack.  They are addressed
98    both from a hash table keyed on S1, and from a vector of candidate
99    pointers arranged in predominator order.
100 
101    Opportunity note
102    ----------------
103    Currently we don't recognize:
104 
105      S0: Y = (S * i') - B
106      S1: X = (S * i) - B
107 
108    as a strength reduction opportunity, even though this S1 would
109    also be replaceable by the S1' above.  This can be added if it
110    comes up in practice.
111 
112    Strength reduction in addressing
113    --------------------------------
114    There is another kind of candidate known as CAND_REF.  A CAND_REF
115    describes a statement containing a memory reference having
116    complex addressing that might benefit from strength reduction.
117    Specifically, we are interested in references for which
118    get_inner_reference returns a base address, offset, and bitpos as
119    follows:
120 
121      base:    MEM_REF (T1, C1)
122      offset:  MULT_EXPR (PLUS_EXPR (T2, C2), C3)
123      bitpos:  C4 * BITS_PER_UNIT
124 
125    Here T1 and T2 are arbitrary trees, and C1, C2, C3, C4 are
126    arbitrary integer constants.  Note that C2 may be zero, in which
127    case the offset will be MULT_EXPR (T2, C3).
128 
129    When this pattern is recognized, the original memory reference
130    can be replaced with:
131 
132      MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)),
133               C1 + (C2 * C3) + C4)
134 
135    which distributes the multiply to allow constant folding.  When
136    two or more addressing expressions can be represented by MEM_REFs
137    of this form, differing only in the constants C1, C2, and C4,
138    making this substitution produces more efficient addressing during
139    the RTL phases.  When there are not at least two expressions with
140    the same values of T1, T2, and C3, there is nothing to be gained
141    by the replacement.
142 
143    Strength reduction of CAND_REFs uses the same infrastructure as
144    that used by CAND_MULTs and CAND_ADDs.  We record T1 in the base (B)
145    field, MULT_EXPR (T2, C3) in the stride (S) field, and
146    C1 + (C2 * C3) + C4 in the index (i) field.  A basis for a CAND_REF
147    is thus another CAND_REF with the same B and S values.  When at
148    least two CAND_REFs are chained together using the basis relation,
149    each of them is replaced as above, resulting in improved code
150    generation for addressing.
151 
152    Conditional candidates
153    ======================
154 
155    Conditional candidates are best illustrated with an example.
156    Consider the code sequence:
157 
158    (1)  x_0 = ...;
159    (2)  a_0 = x_0 * 5;          MULT (B: x_0; i: 0; S: 5)
160         if (...)
161    (3)    x_1 = x_0 + 1;        ADD  (B: x_0, i: 1; S: 1)
162    (4)  x_2 = PHI <x_0, x_1>;   PHI  (B: x_0, i: 0, S: 1)
163    (5)  x_3 = x_2 + 1;          ADD  (B: x_2, i: 1, S: 1)
164    (6)  a_1 = x_3 * 5;          MULT (B: x_2, i: 1; S: 5)
165 
166    Here strength reduction is complicated by the uncertain value of x_2.
167    A legitimate transformation is:
168 
169    (1)  x_0 = ...;
170    (2)  a_0 = x_0 * 5;
171         if (...)
172 	  {
173    (3)      [x_1 = x_0 + 1;]
174    (3a)     t_1 = a_0 + 5;
175           }
176    (4)  [x_2 = PHI <x_0, x_1>;]
177    (4a) t_2 = PHI <a_0, t_1>;
178    (5)  [x_3 = x_2 + 1;]
179    (6r) a_1 = t_2 + 5;
180 
181    where the bracketed instructions may go dead.
182 
183    To recognize this opportunity, we have to observe that statement (6)
184    has a "hidden basis" (2).  The hidden basis is unlike a normal basis
185    in that the statement and the hidden basis have different base SSA
186    names (x_2 and x_0, respectively).  The relationship is established
187    when a statement's base name (x_2) is defined by a phi statement (4),
188    each argument of which (x_0, x_1) has an identical "derived base name."
189    If the argument is defined by a candidate (as x_1 is by (3)) that is a
190    CAND_ADD having a stride of 1, the derived base name of the argument is
191    the base name of the candidate (x_0).  Otherwise, the argument itself
192    is its derived base name (as is the case with argument x_0).
193 
194    The hidden basis for statement (6) is the nearest dominating candidate
195    whose base name is the derived base name (x_0) of the feeding phi (4),
196    and whose stride is identical to that of the statement.  We can then
197    create the new "phi basis" (4a) and feeding adds along incoming arcs (3a),
198    allowing the final replacement of (6) by the strength-reduced (6r).
199 
200    To facilitate this, a new kind of candidate (CAND_PHI) is introduced.
201    A CAND_PHI is not a candidate for replacement, but is maintained in the
202    candidate table to ease discovery of hidden bases.  Any phi statement
203    whose arguments share a common derived base name is entered into the
204    table with the derived base name, an (arbitrary) index of zero, and a
205    stride of 1.  A statement with a hidden basis can then be detected by
206    simply looking up its feeding phi definition in the candidate table,
207    extracting the derived base name, and searching for a basis in the
208    usual manner after substituting the derived base name.
209 
210    Note that the transformation is only valid when the original phi and
211    the statements that define the phi's arguments are all at the same
212    position in the loop hierarchy.  */
213 
214 
215 /* Index into the candidate vector, offset by 1.  VECs are zero-based,
216    while cand_idx's are one-based, with zero indicating null.  */
217 typedef unsigned cand_idx;
218 
219 /* The kind of candidate.  */
220 enum cand_kind
221 {
222   CAND_MULT,
223   CAND_ADD,
224   CAND_REF,
225   CAND_PHI
226 };
227 
228 class slsr_cand_d
229 {
230 public:
231   /* The candidate statement S1.  */
232   gimple *cand_stmt;
233 
234   /* The base expression B:  often an SSA name, but not always.  */
235   tree base_expr;
236 
237   /* The stride S.  */
238   tree stride;
239 
240   /* The index constant i.  */
241   widest_int index;
242 
243   /* The type of the candidate.  This is normally the type of base_expr,
244      but casts may have occurred when combining feeding instructions.
245      A candidate can only be a basis for candidates of the same final type.
246      (For CAND_REFs, this is the type to be used for operand 1 of the
247      replacement MEM_REF.)  */
248   tree cand_type;
249 
250   /* The type to be used to interpret the stride field when the stride
251      is not a constant.  Normally the same as the type of the recorded
252      stride, but when the stride has been cast we need to maintain that
253      knowledge in order to make legal substitutions without losing
254      precision.  When the stride is a constant, this will be sizetype.  */
255   tree stride_type;
256 
257   /* The kind of candidate (CAND_MULT, etc.).  */
258   enum cand_kind kind;
259 
260   /* Index of this candidate in the candidate vector.  */
261   cand_idx cand_num;
262 
263   /* Index of the next candidate record for the same statement.
264      A statement may be useful in more than one way (e.g., due to
265      commutativity).  So we can have multiple "interpretations"
266      of a statement.  */
267   cand_idx next_interp;
268 
269   /* Index of the first candidate record in a chain for the same
270      statement.  */
271   cand_idx first_interp;
272 
273   /* Index of the basis statement S0, if any, in the candidate vector.  */
274   cand_idx basis;
275 
276   /* First candidate for which this candidate is a basis, if one exists.  */
277   cand_idx dependent;
278 
279   /* Next candidate having the same basis as this one.  */
280   cand_idx sibling;
281 
282   /* If this is a conditional candidate, the CAND_PHI candidate
283      that defines the base SSA name B.  */
284   cand_idx def_phi;
285 
286   /* Savings that can be expected from eliminating dead code if this
287      candidate is replaced.  */
288   int dead_savings;
289 
290   /* For PHI candidates, use a visited flag to keep from processing the
291      same PHI twice from multiple paths.  */
292   int visited;
293 
294   /* We sometimes have to cache a phi basis with a phi candidate to
295      avoid processing it twice.  Valid only if visited==1.  */
296   tree cached_basis;
297 };
298 
299 typedef class slsr_cand_d slsr_cand, *slsr_cand_t;
300 typedef const class slsr_cand_d *const_slsr_cand_t;
301 
302 /* Pointers to candidates are chained together as part of a mapping
303    from base expressions to the candidates that use them.  */
304 
305 struct cand_chain_d
306 {
307   /* Base expression for the chain of candidates:  often, but not
308      always, an SSA name.  */
309   tree base_expr;
310 
311   /* Pointer to a candidate.  */
312   slsr_cand_t cand;
313 
314   /* Chain pointer.  */
315   struct cand_chain_d *next;
316 
317 };
318 
319 typedef struct cand_chain_d cand_chain, *cand_chain_t;
320 typedef const struct cand_chain_d *const_cand_chain_t;
321 
322 /* Information about a unique "increment" associated with candidates
323    having an SSA name for a stride.  An increment is the difference
324    between the index of the candidate and the index of its basis,
325    i.e., (i - i') as discussed in the module commentary.
326 
327    When we are not going to generate address arithmetic we treat
328    increments that differ only in sign as the same, allowing sharing
329    of the cost of initializers.  The absolute value of the increment
330    is stored in the incr_info.  */
331 
332 class incr_info_d
333 {
334 public:
335   /* The increment that relates a candidate to its basis.  */
336   widest_int incr;
337 
338   /* How many times the increment occurs in the candidate tree.  */
339   unsigned count;
340 
341   /* Cost of replacing candidates using this increment.  Negative and
342      zero costs indicate replacement should be performed.  */
343   int cost;
344 
345   /* If this increment is profitable but is not -1, 0, or 1, it requires
346      an initializer T_0 = stride * incr to be found or introduced in the
347      nearest common dominator of all candidates.  This field holds T_0
348      for subsequent use.  */
349   tree initializer;
350 
351   /* If the initializer was found to already exist, this is the block
352      where it was found.  */
353   basic_block init_bb;
354 };
355 
356 typedef class incr_info_d incr_info, *incr_info_t;
357 
358 /* Candidates are maintained in a vector.  If candidate X dominates
359    candidate Y, then X appears before Y in the vector; but the
360    converse does not necessarily hold.  */
361 static vec<slsr_cand_t> cand_vec;
362 
363 enum cost_consts
364 {
365   COST_NEUTRAL = 0,
366   COST_INFINITE = 1000
367 };
368 
369 enum stride_status
370 {
371   UNKNOWN_STRIDE = 0,
372   KNOWN_STRIDE = 1
373 };
374 
375 enum phi_adjust_status
376 {
377   NOT_PHI_ADJUST = 0,
378   PHI_ADJUST = 1
379 };
380 
381 enum count_phis_status
382 {
383   DONT_COUNT_PHIS = 0,
384   COUNT_PHIS = 1
385 };
386 
387 /* Constrain how many PHI nodes we will visit for a conditional
388    candidate (depth and breadth).  */
389 const int MAX_SPREAD = 16;
390 
391 /* Pointer map embodying a mapping from statements to candidates.  */
392 static hash_map<gimple *, slsr_cand_t> *stmt_cand_map;
393 
394 /* Obstack for candidates.  */
395 static struct obstack cand_obstack;
396 
397 /* Obstack for candidate chains.  */
398 static struct obstack chain_obstack;
399 
400 /* An array INCR_VEC of incr_infos is used during analysis of related
401    candidates having an SSA name for a stride.  INCR_VEC_LEN describes
402    its current length.  MAX_INCR_VEC_LEN is used to avoid costly
403    pathological cases. */
404 static incr_info_t incr_vec;
405 static unsigned incr_vec_len;
406 const int MAX_INCR_VEC_LEN = 16;
407 
408 /* For a chain of candidates with unknown stride, indicates whether or not
409    we must generate pointer arithmetic when replacing statements.  */
410 static bool address_arithmetic_p;
411 
412 /* Forward function declarations.  */
413 static slsr_cand_t base_cand_from_table (tree);
414 static tree introduce_cast_before_cand (slsr_cand_t, tree, tree);
415 static bool legal_cast_p_1 (tree, tree);
416 
417 /* Produce a pointer to the IDX'th candidate in the candidate vector.  */
418 
419 static slsr_cand_t
lookup_cand(cand_idx idx)420 lookup_cand (cand_idx idx)
421 {
422   return cand_vec[idx];
423 }
424 
425 /* Helper for hashing a candidate chain header.  */
426 
427 struct cand_chain_hasher : nofree_ptr_hash <cand_chain>
428 {
429   static inline hashval_t hash (const cand_chain *);
430   static inline bool equal (const cand_chain *, const cand_chain *);
431 };
432 
433 inline hashval_t
hash(const cand_chain * p)434 cand_chain_hasher::hash (const cand_chain *p)
435 {
436   tree base_expr = p->base_expr;
437   return iterative_hash_expr (base_expr, 0);
438 }
439 
440 inline bool
equal(const cand_chain * chain1,const cand_chain * chain2)441 cand_chain_hasher::equal (const cand_chain *chain1, const cand_chain *chain2)
442 {
443   return operand_equal_p (chain1->base_expr, chain2->base_expr, 0);
444 }
445 
446 /* Hash table embodying a mapping from base exprs to chains of candidates.  */
447 static hash_table<cand_chain_hasher> *base_cand_map;
448 
449 /* Pointer map used by tree_to_aff_combination_expand.  */
450 static hash_map<tree, name_expansion *> *name_expansions;
451 /* Pointer map embodying a mapping from bases to alternative bases.  */
452 static hash_map<tree, tree> *alt_base_map;
453 
454 /* Given BASE, use the tree affine combiniation facilities to
455    find the underlying tree expression for BASE, with any
456    immediate offset excluded.
457 
458    N.B. we should eliminate this backtracking with better forward
459    analysis in a future release.  */
460 
461 static tree
get_alternative_base(tree base)462 get_alternative_base (tree base)
463 {
464   tree *result = alt_base_map->get (base);
465 
466   if (result == NULL)
467     {
468       tree expr;
469       aff_tree aff;
470 
471       tree_to_aff_combination_expand (base, TREE_TYPE (base),
472 				      &aff, &name_expansions);
473       aff.offset = 0;
474       expr = aff_combination_to_tree (&aff);
475 
476       gcc_assert (!alt_base_map->put (base, base == expr ? NULL : expr));
477 
478       return expr == base ? NULL : expr;
479     }
480 
481   return *result;
482 }
483 
484 /* Look in the candidate table for a CAND_PHI that defines BASE and
485    return it if found; otherwise return NULL.  */
486 
487 static cand_idx
find_phi_def(tree base)488 find_phi_def (tree base)
489 {
490   slsr_cand_t c;
491 
492   if (TREE_CODE (base) != SSA_NAME)
493     return 0;
494 
495   c = base_cand_from_table (base);
496 
497   if (!c || c->kind != CAND_PHI
498       || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (c->cand_stmt)))
499     return 0;
500 
501   return c->cand_num;
502 }
503 
504 /* Determine whether all uses of NAME are directly or indirectly
505    used by STMT.  That is, we want to know whether if STMT goes
506    dead, the definition of NAME also goes dead.  */
507 static bool
508 uses_consumed_by_stmt (tree name, gimple *stmt, unsigned recurse = 0)
509 {
510   gimple *use_stmt;
511   imm_use_iterator iter;
512   bool retval = true;
513 
FOR_EACH_IMM_USE_STMT(use_stmt,iter,name)514   FOR_EACH_IMM_USE_STMT (use_stmt, iter, name)
515     {
516       if (use_stmt == stmt || is_gimple_debug (use_stmt))
517 	continue;
518 
519       if (!is_gimple_assign (use_stmt)
520 	  || !gimple_get_lhs (use_stmt)
521 	  || !is_gimple_reg (gimple_get_lhs (use_stmt))
522 	  || recurse >= 10
523 	  || !uses_consumed_by_stmt (gimple_get_lhs (use_stmt), stmt,
524 				     recurse + 1))
525 	{
526 	  retval = false;
527 	  break;
528 	}
529     }
530 
531   return retval;
532 }
533 
534 /* Helper routine for find_basis_for_candidate.  May be called twice:
535    once for the candidate's base expr, and optionally again either for
536    the candidate's phi definition or for a CAND_REF's alternative base
537    expression.  */
538 
539 static slsr_cand_t
find_basis_for_base_expr(slsr_cand_t c,tree base_expr)540 find_basis_for_base_expr (slsr_cand_t c, tree base_expr)
541 {
542   cand_chain mapping_key;
543   cand_chain_t chain;
544   slsr_cand_t basis = NULL;
545 
546   // Limit potential of N^2 behavior for long candidate chains.
547   int iters = 0;
548   int max_iters = param_max_slsr_candidate_scan;
549 
550   mapping_key.base_expr = base_expr;
551   chain = base_cand_map->find (&mapping_key);
552 
553   for (; chain && iters < max_iters; chain = chain->next, ++iters)
554     {
555       slsr_cand_t one_basis = chain->cand;
556 
557       if (one_basis->kind != c->kind
558 	  || one_basis->cand_stmt == c->cand_stmt
559 	  || !operand_equal_p (one_basis->stride, c->stride, 0)
560 	  || !types_compatible_p (one_basis->cand_type, c->cand_type)
561 	  || !types_compatible_p (one_basis->stride_type, c->stride_type)
562 	  || !dominated_by_p (CDI_DOMINATORS,
563 			      gimple_bb (c->cand_stmt),
564 			      gimple_bb (one_basis->cand_stmt)))
565 	continue;
566 
567       tree lhs = gimple_assign_lhs (one_basis->cand_stmt);
568       if (lhs && TREE_CODE (lhs) == SSA_NAME
569 	  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
570 	continue;
571 
572       if (!basis || basis->cand_num < one_basis->cand_num)
573 	basis = one_basis;
574     }
575 
576   return basis;
577 }
578 
579 /* Use the base expr from candidate C to look for possible candidates
580    that can serve as a basis for C.  Each potential basis must also
581    appear in a block that dominates the candidate statement and have
582    the same stride and type.  If more than one possible basis exists,
583    the one with highest index in the vector is chosen; this will be
584    the most immediately dominating basis.  */
585 
586 static int
find_basis_for_candidate(slsr_cand_t c)587 find_basis_for_candidate (slsr_cand_t c)
588 {
589   slsr_cand_t basis = find_basis_for_base_expr (c, c->base_expr);
590 
591   /* If a candidate doesn't have a basis using its base expression,
592      it may have a basis hidden by one or more intervening phis.  */
593   if (!basis && c->def_phi)
594     {
595       basic_block basis_bb, phi_bb;
596       slsr_cand_t phi_cand = lookup_cand (c->def_phi);
597       basis = find_basis_for_base_expr (c, phi_cand->base_expr);
598 
599       if (basis)
600 	{
601 	  /* A hidden basis must dominate the phi-definition of the
602 	     candidate's base name.  */
603 	  phi_bb = gimple_bb (phi_cand->cand_stmt);
604 	  basis_bb = gimple_bb (basis->cand_stmt);
605 
606 	  if (phi_bb == basis_bb
607 	      || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
608 	    {
609 	      basis = NULL;
610 	      c->basis = 0;
611 	    }
612 
613 	  /* If we found a hidden basis, estimate additional dead-code
614 	     savings if the phi and its feeding statements can be removed.  */
615 	  tree feeding_var = gimple_phi_result (phi_cand->cand_stmt);
616 	  if (basis && uses_consumed_by_stmt (feeding_var, c->cand_stmt))
617 	    c->dead_savings += phi_cand->dead_savings;
618 	}
619     }
620 
621   if (flag_expensive_optimizations && !basis && c->kind == CAND_REF)
622     {
623       tree alt_base_expr = get_alternative_base (c->base_expr);
624       if (alt_base_expr)
625 	basis = find_basis_for_base_expr (c, alt_base_expr);
626     }
627 
628   if (basis)
629     {
630       c->sibling = basis->dependent;
631       basis->dependent = c->cand_num;
632       return basis->cand_num;
633     }
634 
635   return 0;
636 }
637 
638 /* Record a mapping from BASE to C, indicating that C may potentially serve
639    as a basis using that base expression.  BASE may be the same as
640    C->BASE_EXPR; alternatively BASE can be a different tree that share the
641    underlining expression of C->BASE_EXPR.  */
642 
643 static void
record_potential_basis(slsr_cand_t c,tree base)644 record_potential_basis (slsr_cand_t c, tree base)
645 {
646   cand_chain_t node;
647   cand_chain **slot;
648 
649   gcc_assert (base);
650 
651   node = (cand_chain_t) obstack_alloc (&chain_obstack, sizeof (cand_chain));
652   node->base_expr = base;
653   node->cand = c;
654   node->next = NULL;
655   slot = base_cand_map->find_slot (node, INSERT);
656 
657   if (*slot)
658     {
659       cand_chain_t head = (cand_chain_t) (*slot);
660       node->next = head->next;
661       head->next = node;
662     }
663   else
664     *slot = node;
665 }
666 
667 /* Allocate storage for a new candidate and initialize its fields.
668    Attempt to find a basis for the candidate.
669 
670    For CAND_REF, an alternative base may also be recorded and used
671    to find a basis.  This helps cases where the expression hidden
672    behind BASE (which is usually an SSA_NAME) has immediate offset,
673    e.g.
674 
675      a2[i][j] = 1;
676      a2[i + 20][j] = 2;  */
677 
678 static slsr_cand_t
alloc_cand_and_find_basis(enum cand_kind kind,gimple * gs,tree base,const widest_int & index,tree stride,tree ctype,tree stype,unsigned savings)679 alloc_cand_and_find_basis (enum cand_kind kind, gimple *gs, tree base,
680 			   const widest_int &index, tree stride, tree ctype,
681 			   tree stype, unsigned savings)
682 {
683   slsr_cand_t c = (slsr_cand_t) obstack_alloc (&cand_obstack,
684 					       sizeof (slsr_cand));
685   c->cand_stmt = gs;
686   c->base_expr = base;
687   c->stride = stride;
688   c->index = index;
689   c->cand_type = ctype;
690   c->stride_type = stype;
691   c->kind = kind;
692   c->cand_num = cand_vec.length ();
693   c->next_interp = 0;
694   c->first_interp = c->cand_num;
695   c->dependent = 0;
696   c->sibling = 0;
697   c->def_phi = kind == CAND_MULT ? find_phi_def (base) : 0;
698   c->dead_savings = savings;
699   c->visited = 0;
700   c->cached_basis = NULL_TREE;
701 
702   cand_vec.safe_push (c);
703 
704   if (kind == CAND_PHI)
705     c->basis = 0;
706   else
707     c->basis = find_basis_for_candidate (c);
708 
709   record_potential_basis (c, base);
710   if (flag_expensive_optimizations && kind == CAND_REF)
711     {
712       tree alt_base = get_alternative_base (base);
713       if (alt_base)
714 	record_potential_basis (c, alt_base);
715     }
716 
717   return c;
718 }
719 
720 /* Determine the target cost of statement GS when compiling according
721    to SPEED.  */
722 
723 static int
stmt_cost(gimple * gs,bool speed)724 stmt_cost (gimple *gs, bool speed)
725 {
726   tree lhs, rhs1, rhs2;
727   machine_mode lhs_mode;
728 
729   gcc_assert (is_gimple_assign (gs));
730   lhs = gimple_assign_lhs (gs);
731   rhs1 = gimple_assign_rhs1 (gs);
732   lhs_mode = TYPE_MODE (TREE_TYPE (lhs));
733 
734   switch (gimple_assign_rhs_code (gs))
735     {
736     case MULT_EXPR:
737       rhs2 = gimple_assign_rhs2 (gs);
738 
739       if (tree_fits_shwi_p (rhs2))
740 	return mult_by_coeff_cost (tree_to_shwi (rhs2), lhs_mode, speed);
741 
742       gcc_assert (TREE_CODE (rhs1) != INTEGER_CST);
743       return mul_cost (speed, lhs_mode);
744 
745     case PLUS_EXPR:
746     case POINTER_PLUS_EXPR:
747     case MINUS_EXPR:
748       return add_cost (speed, lhs_mode);
749 
750     case NEGATE_EXPR:
751       return neg_cost (speed, lhs_mode);
752 
753     CASE_CONVERT:
754       return convert_cost (lhs_mode, TYPE_MODE (TREE_TYPE (rhs1)), speed);
755 
756     /* Note that we don't assign costs to copies that in most cases
757        will go away.  */
758     case SSA_NAME:
759       return 0;
760 
761     default:
762       ;
763     }
764 
765   gcc_unreachable ();
766   return 0;
767 }
768 
769 /* Look up the defining statement for BASE_IN and return a pointer
770    to its candidate in the candidate table, if any; otherwise NULL.
771    Only CAND_ADD and CAND_MULT candidates are returned.  */
772 
773 static slsr_cand_t
base_cand_from_table(tree base_in)774 base_cand_from_table (tree base_in)
775 {
776   slsr_cand_t *result;
777 
778   gimple *def = SSA_NAME_DEF_STMT (base_in);
779   if (!def)
780     return (slsr_cand_t) NULL;
781 
782   result = stmt_cand_map->get (def);
783 
784   if (result && (*result)->kind != CAND_REF)
785     return *result;
786 
787   return (slsr_cand_t) NULL;
788 }
789 
790 /* Add an entry to the statement-to-candidate mapping.  */
791 
792 static void
add_cand_for_stmt(gimple * gs,slsr_cand_t c)793 add_cand_for_stmt (gimple *gs, slsr_cand_t c)
794 {
795   gcc_assert (!stmt_cand_map->put (gs, c));
796 }
797 
798 /* Given PHI which contains a phi statement, determine whether it
799    satisfies all the requirements of a phi candidate.  If so, create
800    a candidate.  Note that a CAND_PHI never has a basis itself, but
801    is used to help find a basis for subsequent candidates.  */
802 
803 static void
slsr_process_phi(gphi * phi,bool speed)804 slsr_process_phi (gphi *phi, bool speed)
805 {
806   unsigned i;
807   tree arg0_base = NULL_TREE, base_type;
808   slsr_cand_t c;
809   class loop *cand_loop = gimple_bb (phi)->loop_father;
810   unsigned savings = 0;
811 
812   /* A CAND_PHI requires each of its arguments to have the same
813      derived base name.  (See the module header commentary for a
814      definition of derived base names.)  Furthermore, all feeding
815      definitions must be in the same position in the loop hierarchy
816      as PHI.  */
817 
818   for (i = 0; i < gimple_phi_num_args (phi); i++)
819     {
820       slsr_cand_t arg_cand;
821       tree arg = gimple_phi_arg_def (phi, i);
822       tree derived_base_name = NULL_TREE;
823       gimple *arg_stmt = NULL;
824       basic_block arg_bb = NULL;
825 
826       if (TREE_CODE (arg) != SSA_NAME)
827 	return;
828 
829       arg_cand = base_cand_from_table (arg);
830 
831       if (arg_cand)
832 	{
833 	  while (arg_cand->kind != CAND_ADD && arg_cand->kind != CAND_PHI)
834 	    {
835 	      if (!arg_cand->next_interp)
836 		return;
837 
838 	      arg_cand = lookup_cand (arg_cand->next_interp);
839 	    }
840 
841 	  if (!integer_onep (arg_cand->stride))
842 	    return;
843 
844 	  derived_base_name = arg_cand->base_expr;
845 	  arg_stmt = arg_cand->cand_stmt;
846 	  arg_bb = gimple_bb (arg_stmt);
847 
848 	  /* Gather potential dead code savings if the phi statement
849 	     can be removed later on.  */
850 	  if (uses_consumed_by_stmt (arg, phi))
851 	    {
852 	      if (gimple_code (arg_stmt) == GIMPLE_PHI)
853 		savings += arg_cand->dead_savings;
854 	      else
855 		savings += stmt_cost (arg_stmt, speed);
856 	    }
857 	}
858       else if (SSA_NAME_IS_DEFAULT_DEF (arg))
859 	{
860 	  derived_base_name = arg;
861 	  arg_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
862 	}
863 
864       if (!arg_bb || arg_bb->loop_father != cand_loop)
865 	return;
866 
867       if (i == 0)
868 	arg0_base = derived_base_name;
869       else if (!operand_equal_p (derived_base_name, arg0_base, 0))
870 	return;
871     }
872 
873   /* Create the candidate.  "alloc_cand_and_find_basis" is named
874      misleadingly for this case, as no basis will be sought for a
875      CAND_PHI.  */
876   base_type = TREE_TYPE (arg0_base);
877 
878   c = alloc_cand_and_find_basis (CAND_PHI, phi, arg0_base,
879 				 0, integer_one_node, base_type,
880 				 sizetype, savings);
881 
882   /* Add the candidate to the statement-candidate mapping.  */
883   add_cand_for_stmt (phi, c);
884 }
885 
886 /* Given PBASE which is a pointer to tree, look up the defining
887    statement for it and check whether the candidate is in the
888    form of:
889 
890      X = B + (1 * S), S is integer constant
891      X = B + (i * S), S is integer one
892 
893    If so, set PBASE to the candidate's base_expr and return double
894    int (i * S).
895    Otherwise, just return double int zero.  */
896 
897 static widest_int
backtrace_base_for_ref(tree * pbase)898 backtrace_base_for_ref (tree *pbase)
899 {
900   tree base_in = *pbase;
901   slsr_cand_t base_cand;
902 
903   STRIP_NOPS (base_in);
904 
905   /* Strip off widening conversion(s) to handle cases where
906      e.g. 'B' is widened from an 'int' in order to calculate
907      a 64-bit address.  */
908   if (CONVERT_EXPR_P (base_in)
909       && legal_cast_p_1 (TREE_TYPE (base_in),
910 			 TREE_TYPE (TREE_OPERAND (base_in, 0))))
911     base_in = get_unwidened (base_in, NULL_TREE);
912 
913   if (TREE_CODE (base_in) != SSA_NAME)
914     return 0;
915 
916   base_cand = base_cand_from_table (base_in);
917 
918   while (base_cand && base_cand->kind != CAND_PHI)
919     {
920       if (base_cand->kind == CAND_ADD
921 	  && base_cand->index == 1
922 	  && TREE_CODE (base_cand->stride) == INTEGER_CST)
923 	{
924 	  /* X = B + (1 * S), S is integer constant.  */
925 	  *pbase = base_cand->base_expr;
926 	  return wi::to_widest (base_cand->stride);
927 	}
928       else if (base_cand->kind == CAND_ADD
929 	       && TREE_CODE (base_cand->stride) == INTEGER_CST
930 	       && integer_onep (base_cand->stride))
931 	{
932 	  /* X = B + (i * S), S is integer one.  */
933 	  *pbase = base_cand->base_expr;
934 	  return base_cand->index;
935 	}
936 
937       base_cand = lookup_cand (base_cand->next_interp);
938     }
939 
940   return 0;
941 }
942 
943 /* Look for the following pattern:
944 
945     *PBASE:    MEM_REF (T1, C1)
946 
947     *POFFSET:  MULT_EXPR (T2, C3)        [C2 is zero]
948                      or
949                MULT_EXPR (PLUS_EXPR (T2, C2), C3)
950                      or
951                MULT_EXPR (MINUS_EXPR (T2, -C2), C3)
952 
953     *PINDEX:   C4 * BITS_PER_UNIT
954 
955    If not present, leave the input values unchanged and return FALSE.
956    Otherwise, modify the input values as follows and return TRUE:
957 
958     *PBASE:    T1
959     *POFFSET:  MULT_EXPR (T2, C3)
960     *PINDEX:   C1 + (C2 * C3) + C4
961 
962    When T2 is recorded by a CAND_ADD in the form of (T2' + C5), it
963    will be further restructured to:
964 
965     *PBASE:    T1
966     *POFFSET:  MULT_EXPR (T2', C3)
967     *PINDEX:   C1 + (C2 * C3) + C4 + (C5 * C3)  */
968 
969 static bool
restructure_reference(tree * pbase,tree * poffset,widest_int * pindex,tree * ptype)970 restructure_reference (tree *pbase, tree *poffset, widest_int *pindex,
971 		       tree *ptype)
972 {
973   tree base = *pbase, offset = *poffset;
974   widest_int index = *pindex;
975   tree mult_op0, t1, t2, type;
976   widest_int c1, c2, c3, c4, c5;
977   offset_int mem_offset;
978 
979   if (!base
980       || !offset
981       || TREE_CODE (base) != MEM_REF
982       || !mem_ref_offset (base).is_constant (&mem_offset)
983       || TREE_CODE (offset) != MULT_EXPR
984       || TREE_CODE (TREE_OPERAND (offset, 1)) != INTEGER_CST
985       || wi::umod_floor (index, BITS_PER_UNIT) != 0)
986     return false;
987 
988   t1 = TREE_OPERAND (base, 0);
989   c1 = widest_int::from (mem_offset, SIGNED);
990   type = TREE_TYPE (TREE_OPERAND (base, 1));
991 
992   mult_op0 = TREE_OPERAND (offset, 0);
993   c3 = wi::to_widest (TREE_OPERAND (offset, 1));
994 
995   if (TREE_CODE (mult_op0) == PLUS_EXPR)
996 
997     if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
998       {
999 	t2 = TREE_OPERAND (mult_op0, 0);
1000 	c2 = wi::to_widest (TREE_OPERAND (mult_op0, 1));
1001       }
1002     else
1003       return false;
1004 
1005   else if (TREE_CODE (mult_op0) == MINUS_EXPR)
1006 
1007     if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
1008       {
1009 	t2 = TREE_OPERAND (mult_op0, 0);
1010 	c2 = -wi::to_widest (TREE_OPERAND (mult_op0, 1));
1011       }
1012     else
1013       return false;
1014 
1015   else
1016     {
1017       t2 = mult_op0;
1018       c2 = 0;
1019     }
1020 
1021   c4 = index >> LOG2_BITS_PER_UNIT;
1022   c5 = backtrace_base_for_ref (&t2);
1023 
1024   *pbase = t1;
1025   *poffset = fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, t2),
1026 			  wide_int_to_tree (sizetype, c3));
1027   *pindex = c1 + c2 * c3 + c4 + c5 * c3;
1028   *ptype = type;
1029 
1030   return true;
1031 }
1032 
1033 /* Given GS which contains a data reference, create a CAND_REF entry in
1034    the candidate table and attempt to find a basis.  */
1035 
1036 static void
slsr_process_ref(gimple * gs)1037 slsr_process_ref (gimple *gs)
1038 {
1039   tree ref_expr, base, offset, type;
1040   poly_int64 bitsize, bitpos;
1041   machine_mode mode;
1042   int unsignedp, reversep, volatilep;
1043   slsr_cand_t c;
1044 
1045   if (gimple_vdef (gs))
1046     ref_expr = gimple_assign_lhs (gs);
1047   else
1048     ref_expr = gimple_assign_rhs1 (gs);
1049 
1050   if (!handled_component_p (ref_expr)
1051       || TREE_CODE (ref_expr) == BIT_FIELD_REF
1052       || (TREE_CODE (ref_expr) == COMPONENT_REF
1053 	  && DECL_BIT_FIELD (TREE_OPERAND (ref_expr, 1))))
1054     return;
1055 
1056   base = get_inner_reference (ref_expr, &bitsize, &bitpos, &offset, &mode,
1057 			      &unsignedp, &reversep, &volatilep);
1058   HOST_WIDE_INT cbitpos;
1059   if (reversep || !bitpos.is_constant (&cbitpos))
1060     return;
1061   widest_int index = cbitpos;
1062 
1063   if (!restructure_reference (&base, &offset, &index, &type))
1064     return;
1065 
1066   c = alloc_cand_and_find_basis (CAND_REF, gs, base, index, offset,
1067 				 type, sizetype, 0);
1068 
1069   /* Add the candidate to the statement-candidate mapping.  */
1070   add_cand_for_stmt (gs, c);
1071 }
1072 
1073 /* Create a candidate entry for a statement GS, where GS multiplies
1074    two SSA names BASE_IN and STRIDE_IN.  Propagate any known information
1075    about the two SSA names into the new candidate.  Return the new
1076    candidate.  */
1077 
1078 static slsr_cand_t
create_mul_ssa_cand(gimple * gs,tree base_in,tree stride_in,bool speed)1079 create_mul_ssa_cand (gimple *gs, tree base_in, tree stride_in, bool speed)
1080 {
1081   tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1082   tree stype = NULL_TREE;
1083   widest_int index;
1084   unsigned savings = 0;
1085   slsr_cand_t c;
1086   slsr_cand_t base_cand = base_cand_from_table (base_in);
1087 
1088   /* Look at all interpretations of the base candidate, if necessary,
1089      to find information to propagate into this candidate.  */
1090   while (base_cand && !base && base_cand->kind != CAND_PHI)
1091     {
1092 
1093       if (base_cand->kind == CAND_MULT && integer_onep (base_cand->stride))
1094 	{
1095 	  /* Y = (B + i') * 1
1096 	     X = Y * Z
1097 	     ================
1098 	     X = (B + i') * Z  */
1099 	  base = base_cand->base_expr;
1100 	  index = base_cand->index;
1101 	  stride = stride_in;
1102 	  ctype = base_cand->cand_type;
1103 	  stype = TREE_TYPE (stride_in);
1104 	  if (has_single_use (base_in))
1105 	    savings = (base_cand->dead_savings
1106 		       + stmt_cost (base_cand->cand_stmt, speed));
1107 	}
1108       else if (base_cand->kind == CAND_ADD
1109 	       && TREE_CODE (base_cand->stride) == INTEGER_CST)
1110 	{
1111 	  /* Y = B + (i' * S), S constant
1112 	     X = Y * Z
1113 	     ============================
1114 	     X = B + ((i' * S) * Z)  */
1115 	  base = base_cand->base_expr;
1116 	  index = base_cand->index * wi::to_widest (base_cand->stride);
1117 	  stride = stride_in;
1118 	  ctype = base_cand->cand_type;
1119 	  stype = TREE_TYPE (stride_in);
1120 	  if (has_single_use (base_in))
1121 	    savings = (base_cand->dead_savings
1122 		       + stmt_cost (base_cand->cand_stmt, speed));
1123 	}
1124 
1125       base_cand = lookup_cand (base_cand->next_interp);
1126     }
1127 
1128   if (!base)
1129     {
1130       /* No interpretations had anything useful to propagate, so
1131 	 produce X = (Y + 0) * Z.  */
1132       base = base_in;
1133       index = 0;
1134       stride = stride_in;
1135       ctype = TREE_TYPE (base_in);
1136       stype = TREE_TYPE (stride_in);
1137     }
1138 
1139   c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
1140 				 ctype, stype, savings);
1141   return c;
1142 }
1143 
1144 /* Create a candidate entry for a statement GS, where GS multiplies
1145    SSA name BASE_IN by constant STRIDE_IN.  Propagate any known
1146    information about BASE_IN into the new candidate.  Return the new
1147    candidate.  */
1148 
1149 static slsr_cand_t
create_mul_imm_cand(gimple * gs,tree base_in,tree stride_in,bool speed)1150 create_mul_imm_cand (gimple *gs, tree base_in, tree stride_in, bool speed)
1151 {
1152   tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1153   widest_int index, temp;
1154   unsigned savings = 0;
1155   slsr_cand_t c;
1156   slsr_cand_t base_cand = base_cand_from_table (base_in);
1157 
1158   /* Look at all interpretations of the base candidate, if necessary,
1159      to find information to propagate into this candidate.  */
1160   while (base_cand && !base && base_cand->kind != CAND_PHI)
1161     {
1162       if (base_cand->kind == CAND_MULT
1163 	  && TREE_CODE (base_cand->stride) == INTEGER_CST)
1164 	{
1165 	  /* Y = (B + i') * S, S constant
1166 	     X = Y * c
1167 	     ============================
1168 	     X = (B + i') * (S * c)  */
1169 	  temp = wi::to_widest (base_cand->stride) * wi::to_widest (stride_in);
1170 	  if (wi::fits_to_tree_p (temp, TREE_TYPE (stride_in)))
1171 	    {
1172 	      base = base_cand->base_expr;
1173 	      index = base_cand->index;
1174 	      stride = wide_int_to_tree (TREE_TYPE (stride_in), temp);
1175 	      ctype = base_cand->cand_type;
1176 	      if (has_single_use (base_in))
1177 		savings = (base_cand->dead_savings
1178 			   + stmt_cost (base_cand->cand_stmt, speed));
1179 	    }
1180 	}
1181       else if (base_cand->kind == CAND_ADD && integer_onep (base_cand->stride))
1182 	{
1183 	  /* Y = B + (i' * 1)
1184 	     X = Y * c
1185 	     ===========================
1186 	     X = (B + i') * c  */
1187 	  base = base_cand->base_expr;
1188 	  index = base_cand->index;
1189 	  stride = stride_in;
1190 	  ctype = base_cand->cand_type;
1191 	  if (has_single_use (base_in))
1192 	    savings = (base_cand->dead_savings
1193 		       + stmt_cost (base_cand->cand_stmt, speed));
1194 	}
1195       else if (base_cand->kind == CAND_ADD
1196 	       && base_cand->index == 1
1197 	       && TREE_CODE (base_cand->stride) == INTEGER_CST)
1198 	{
1199 	  /* Y = B + (1 * S), S constant
1200 	     X = Y * c
1201 	     ===========================
1202 	     X = (B + S) * c  */
1203 	  base = base_cand->base_expr;
1204 	  index = wi::to_widest (base_cand->stride);
1205 	  stride = stride_in;
1206 	  ctype = base_cand->cand_type;
1207 	  if (has_single_use (base_in))
1208 	    savings = (base_cand->dead_savings
1209 		       + stmt_cost (base_cand->cand_stmt, speed));
1210 	}
1211 
1212       base_cand = lookup_cand (base_cand->next_interp);
1213     }
1214 
1215   if (!base)
1216     {
1217       /* No interpretations had anything useful to propagate, so
1218 	 produce X = (Y + 0) * c.  */
1219       base = base_in;
1220       index = 0;
1221       stride = stride_in;
1222       ctype = TREE_TYPE (base_in);
1223     }
1224 
1225   c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
1226 				 ctype, sizetype, savings);
1227   return c;
1228 }
1229 
1230 /* Given GS which is a multiply of scalar integers, make an appropriate
1231    entry in the candidate table.  If this is a multiply of two SSA names,
1232    create two CAND_MULT interpretations and attempt to find a basis for
1233    each of them.  Otherwise, create a single CAND_MULT and attempt to
1234    find a basis.  */
1235 
1236 static void
slsr_process_mul(gimple * gs,tree rhs1,tree rhs2,bool speed)1237 slsr_process_mul (gimple *gs, tree rhs1, tree rhs2, bool speed)
1238 {
1239   slsr_cand_t c, c2;
1240 
1241   /* If this is a multiply of an SSA name with itself, it is highly
1242      unlikely that we will get a strength reduction opportunity, so
1243      don't record it as a candidate.  This simplifies the logic for
1244      finding a basis, so if this is removed that must be considered.  */
1245   if (rhs1 == rhs2)
1246     return;
1247 
1248   if (TREE_CODE (rhs2) == SSA_NAME)
1249     {
1250       /* Record an interpretation of this statement in the candidate table
1251 	 assuming RHS1 is the base expression and RHS2 is the stride.  */
1252       c = create_mul_ssa_cand (gs, rhs1, rhs2, speed);
1253 
1254       /* Add the first interpretation to the statement-candidate mapping.  */
1255       add_cand_for_stmt (gs, c);
1256 
1257       /* Record another interpretation of this statement assuming RHS1
1258 	 is the stride and RHS2 is the base expression.  */
1259       c2 = create_mul_ssa_cand (gs, rhs2, rhs1, speed);
1260       c->next_interp = c2->cand_num;
1261       c2->first_interp = c->cand_num;
1262     }
1263   else if (TREE_CODE (rhs2) == INTEGER_CST && !integer_zerop (rhs2))
1264     {
1265       /* Record an interpretation for the multiply-immediate.  */
1266       c = create_mul_imm_cand (gs, rhs1, rhs2, speed);
1267 
1268       /* Add the interpretation to the statement-candidate mapping.  */
1269       add_cand_for_stmt (gs, c);
1270     }
1271 }
1272 
1273 /* Create a candidate entry for a statement GS, where GS adds two
1274    SSA names BASE_IN and ADDEND_IN if SUBTRACT_P is false, and
1275    subtracts ADDEND_IN from BASE_IN otherwise.  Propagate any known
1276    information about the two SSA names into the new candidate.
1277    Return the new candidate.  */
1278 
1279 static slsr_cand_t
create_add_ssa_cand(gimple * gs,tree base_in,tree addend_in,bool subtract_p,bool speed)1280 create_add_ssa_cand (gimple *gs, tree base_in, tree addend_in,
1281 		     bool subtract_p, bool speed)
1282 {
1283   tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1284   tree stype = NULL_TREE;
1285   widest_int index;
1286   unsigned savings = 0;
1287   slsr_cand_t c;
1288   slsr_cand_t base_cand = base_cand_from_table (base_in);
1289   slsr_cand_t addend_cand = base_cand_from_table (addend_in);
1290 
1291   /* The most useful transformation is a multiply-immediate feeding
1292      an add or subtract.  Look for that first.  */
1293   while (addend_cand && !base && addend_cand->kind != CAND_PHI)
1294     {
1295       if (addend_cand->kind == CAND_MULT
1296 	  && addend_cand->index == 0
1297 	  && TREE_CODE (addend_cand->stride) == INTEGER_CST)
1298 	{
1299 	  /* Z = (B + 0) * S, S constant
1300 	     X = Y +/- Z
1301 	     ===========================
1302 	     X = Y + ((+/-1 * S) * B)  */
1303 	  base = base_in;
1304 	  index = wi::to_widest (addend_cand->stride);
1305 	  if (subtract_p)
1306 	    index = -index;
1307 	  stride = addend_cand->base_expr;
1308 	  ctype = TREE_TYPE (base_in);
1309 	  stype = addend_cand->cand_type;
1310 	  if (has_single_use (addend_in))
1311 	    savings = (addend_cand->dead_savings
1312 		       + stmt_cost (addend_cand->cand_stmt, speed));
1313 	}
1314 
1315       addend_cand = lookup_cand (addend_cand->next_interp);
1316     }
1317 
1318   while (base_cand && !base && base_cand->kind != CAND_PHI)
1319     {
1320       if (base_cand->kind == CAND_ADD
1321 	  && (base_cand->index == 0
1322 	      || operand_equal_p (base_cand->stride,
1323 				  integer_zero_node, 0)))
1324 	{
1325 	  /* Y = B + (i' * S), i' * S = 0
1326 	     X = Y +/- Z
1327 	     ============================
1328 	     X = B + (+/-1 * Z)  */
1329 	  base = base_cand->base_expr;
1330 	  index = subtract_p ? -1 : 1;
1331 	  stride = addend_in;
1332 	  ctype = base_cand->cand_type;
1333 	  stype = (TREE_CODE (addend_in) == INTEGER_CST ? sizetype
1334 		   : TREE_TYPE (addend_in));
1335 	  if (has_single_use (base_in))
1336 	    savings = (base_cand->dead_savings
1337 		       + stmt_cost (base_cand->cand_stmt, speed));
1338 	}
1339       else if (subtract_p)
1340 	{
1341 	  slsr_cand_t subtrahend_cand = base_cand_from_table (addend_in);
1342 
1343 	  while (subtrahend_cand && !base && subtrahend_cand->kind != CAND_PHI)
1344 	    {
1345 	      if (subtrahend_cand->kind == CAND_MULT
1346 		  && subtrahend_cand->index == 0
1347 		  && TREE_CODE (subtrahend_cand->stride) == INTEGER_CST)
1348 		{
1349 		  /* Z = (B + 0) * S, S constant
1350 		     X = Y - Z
1351 		     ===========================
1352 		     Value:  X = Y + ((-1 * S) * B)  */
1353 		  base = base_in;
1354 		  index = wi::to_widest (subtrahend_cand->stride);
1355 		  index = -index;
1356 		  stride = subtrahend_cand->base_expr;
1357 		  ctype = TREE_TYPE (base_in);
1358 		  stype = subtrahend_cand->cand_type;
1359 		  if (has_single_use (addend_in))
1360 		    savings = (subtrahend_cand->dead_savings
1361 			       + stmt_cost (subtrahend_cand->cand_stmt, speed));
1362 		}
1363 
1364 	      subtrahend_cand = lookup_cand (subtrahend_cand->next_interp);
1365 	    }
1366 	}
1367 
1368       base_cand = lookup_cand (base_cand->next_interp);
1369     }
1370 
1371   if (!base)
1372     {
1373       /* No interpretations had anything useful to propagate, so
1374 	 produce X = Y + (1 * Z).  */
1375       base = base_in;
1376       index = subtract_p ? -1 : 1;
1377       stride = addend_in;
1378       ctype = TREE_TYPE (base_in);
1379       stype = (TREE_CODE (addend_in) == INTEGER_CST ? sizetype
1380 	       : TREE_TYPE (addend_in));
1381     }
1382 
1383   c = alloc_cand_and_find_basis (CAND_ADD, gs, base, index, stride,
1384 				 ctype, stype, savings);
1385   return c;
1386 }
1387 
1388 /* Create a candidate entry for a statement GS, where GS adds SSA
1389    name BASE_IN to constant INDEX_IN.  Propagate any known information
1390    about BASE_IN into the new candidate.  Return the new candidate.  */
1391 
1392 static slsr_cand_t
create_add_imm_cand(gimple * gs,tree base_in,const widest_int & index_in,bool speed)1393 create_add_imm_cand (gimple *gs, tree base_in, const widest_int &index_in,
1394 		     bool speed)
1395 {
1396   enum cand_kind kind = CAND_ADD;
1397   tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1398   tree stype = NULL_TREE;
1399   widest_int index, multiple;
1400   unsigned savings = 0;
1401   slsr_cand_t c;
1402   slsr_cand_t base_cand = base_cand_from_table (base_in);
1403 
1404   while (base_cand && !base && base_cand->kind != CAND_PHI)
1405     {
1406       signop sign = TYPE_SIGN (TREE_TYPE (base_cand->stride));
1407 
1408       if (TREE_CODE (base_cand->stride) == INTEGER_CST
1409 	  && wi::multiple_of_p (index_in, wi::to_widest (base_cand->stride),
1410 				sign, &multiple))
1411 	{
1412 	  /* Y = (B + i') * S, S constant, c = kS for some integer k
1413 	     X = Y + c
1414 	     ============================
1415 	     X = (B + (i'+ k)) * S
1416 	  OR
1417 	     Y = B + (i' * S), S constant, c = kS for some integer k
1418 	     X = Y + c
1419 	     ============================
1420 	     X = (B + (i'+ k)) * S  */
1421 	  kind = base_cand->kind;
1422 	  base = base_cand->base_expr;
1423 	  index = base_cand->index + multiple;
1424 	  stride = base_cand->stride;
1425 	  ctype = base_cand->cand_type;
1426 	  stype = base_cand->stride_type;
1427 	  if (has_single_use (base_in))
1428 	    savings = (base_cand->dead_savings
1429 		       + stmt_cost (base_cand->cand_stmt, speed));
1430 	}
1431 
1432       base_cand = lookup_cand (base_cand->next_interp);
1433     }
1434 
1435   if (!base)
1436     {
1437       /* No interpretations had anything useful to propagate, so
1438 	 produce X = Y + (c * 1).  */
1439       kind = CAND_ADD;
1440       base = base_in;
1441       index = index_in;
1442       stride = integer_one_node;
1443       ctype = TREE_TYPE (base_in);
1444       stype = sizetype;
1445     }
1446 
1447   c = alloc_cand_and_find_basis (kind, gs, base, index, stride,
1448 				 ctype, stype, savings);
1449   return c;
1450 }
1451 
1452 /* Given GS which is an add or subtract of scalar integers or pointers,
1453    make at least one appropriate entry in the candidate table.  */
1454 
1455 static void
slsr_process_add(gimple * gs,tree rhs1,tree rhs2,bool speed)1456 slsr_process_add (gimple *gs, tree rhs1, tree rhs2, bool speed)
1457 {
1458   bool subtract_p = gimple_assign_rhs_code (gs) == MINUS_EXPR;
1459   slsr_cand_t c = NULL, c2;
1460 
1461   if (TREE_CODE (rhs2) == SSA_NAME)
1462     {
1463       /* First record an interpretation assuming RHS1 is the base expression
1464 	 and RHS2 is the stride.  But it doesn't make sense for the
1465 	 stride to be a pointer, so don't record a candidate in that case.  */
1466       if (!POINTER_TYPE_P (TREE_TYPE (rhs2)))
1467 	{
1468 	  c = create_add_ssa_cand (gs, rhs1, rhs2, subtract_p, speed);
1469 
1470 	  /* Add the first interpretation to the statement-candidate
1471 	     mapping.  */
1472 	  add_cand_for_stmt (gs, c);
1473 	}
1474 
1475       /* If the two RHS operands are identical, or this is a subtract,
1476 	 we're done.  */
1477       if (operand_equal_p (rhs1, rhs2, 0) || subtract_p)
1478 	return;
1479 
1480       /* Otherwise, record another interpretation assuming RHS2 is the
1481 	 base expression and RHS1 is the stride, again provided that the
1482 	 stride is not a pointer.  */
1483       if (!POINTER_TYPE_P (TREE_TYPE (rhs1)))
1484 	{
1485 	  c2 = create_add_ssa_cand (gs, rhs2, rhs1, false, speed);
1486 	  if (c)
1487 	    {
1488 	      c->next_interp = c2->cand_num;
1489 	      c2->first_interp = c->cand_num;
1490 	    }
1491 	  else
1492 	    add_cand_for_stmt (gs, c2);
1493 	}
1494     }
1495   else if (TREE_CODE (rhs2) == INTEGER_CST)
1496     {
1497       /* Record an interpretation for the add-immediate.  */
1498       widest_int index = wi::to_widest (rhs2);
1499       if (subtract_p)
1500 	index = -index;
1501 
1502       c = create_add_imm_cand (gs, rhs1, index, speed);
1503 
1504       /* Add the interpretation to the statement-candidate mapping.  */
1505       add_cand_for_stmt (gs, c);
1506     }
1507 }
1508 
1509 /* Given GS which is a negate of a scalar integer, make an appropriate
1510    entry in the candidate table.  A negate is equivalent to a multiply
1511    by -1.  */
1512 
1513 static void
slsr_process_neg(gimple * gs,tree rhs1,bool speed)1514 slsr_process_neg (gimple *gs, tree rhs1, bool speed)
1515 {
1516   /* Record a CAND_MULT interpretation for the multiply by -1.  */
1517   slsr_cand_t c = create_mul_imm_cand (gs, rhs1, integer_minus_one_node, speed);
1518 
1519   /* Add the interpretation to the statement-candidate mapping.  */
1520   add_cand_for_stmt (gs, c);
1521 }
1522 
1523 /* Help function for legal_cast_p, operating on two trees.  Checks
1524    whether it's allowable to cast from RHS to LHS.  See legal_cast_p
1525    for more details.  */
1526 
1527 static bool
legal_cast_p_1(tree lhs_type,tree rhs_type)1528 legal_cast_p_1 (tree lhs_type, tree rhs_type)
1529 {
1530   unsigned lhs_size, rhs_size;
1531   bool lhs_wraps, rhs_wraps;
1532 
1533   lhs_size = TYPE_PRECISION (lhs_type);
1534   rhs_size = TYPE_PRECISION (rhs_type);
1535   lhs_wraps = ANY_INTEGRAL_TYPE_P (lhs_type) && TYPE_OVERFLOW_WRAPS (lhs_type);
1536   rhs_wraps = ANY_INTEGRAL_TYPE_P (rhs_type) && TYPE_OVERFLOW_WRAPS (rhs_type);
1537 
1538   if (lhs_size < rhs_size
1539       || (rhs_wraps && !lhs_wraps)
1540       || (rhs_wraps && lhs_wraps && rhs_size != lhs_size))
1541     return false;
1542 
1543   return true;
1544 }
1545 
1546 /* Return TRUE if GS is a statement that defines an SSA name from
1547    a conversion and is legal for us to combine with an add and multiply
1548    in the candidate table.  For example, suppose we have:
1549 
1550      A = B + i;
1551      C = (type) A;
1552      D = C * S;
1553 
1554    Without the type-cast, we would create a CAND_MULT for D with base B,
1555    index i, and stride S.  We want to record this candidate only if it
1556    is equivalent to apply the type cast following the multiply:
1557 
1558      A = B + i;
1559      E = A * S;
1560      D = (type) E;
1561 
1562    We will record the type with the candidate for D.  This allows us
1563    to use a similar previous candidate as a basis.  If we have earlier seen
1564 
1565      A' = B + i';
1566      C' = (type) A';
1567      D' = C' * S;
1568 
1569    we can replace D with
1570 
1571      D = D' + (i - i') * S;
1572 
1573    But if moving the type-cast would change semantics, we mustn't do this.
1574 
1575    This is legitimate for casts from a non-wrapping integral type to
1576    any integral type of the same or larger size.  It is not legitimate
1577    to convert a wrapping type to a non-wrapping type, or to a wrapping
1578    type of a different size.  I.e., with a wrapping type, we must
1579    assume that the addition B + i could wrap, in which case performing
1580    the multiply before or after one of the "illegal" type casts will
1581    have different semantics.  */
1582 
1583 static bool
legal_cast_p(gimple * gs,tree rhs)1584 legal_cast_p (gimple *gs, tree rhs)
1585 {
1586   if (!is_gimple_assign (gs)
1587       || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (gs)))
1588     return false;
1589 
1590   return legal_cast_p_1 (TREE_TYPE (gimple_assign_lhs (gs)), TREE_TYPE (rhs));
1591 }
1592 
1593 /* Given GS which is a cast to a scalar integer type, determine whether
1594    the cast is legal for strength reduction.  If so, make at least one
1595    appropriate entry in the candidate table.  */
1596 
1597 static void
slsr_process_cast(gimple * gs,tree rhs1,bool speed)1598 slsr_process_cast (gimple *gs, tree rhs1, bool speed)
1599 {
1600   tree lhs, ctype;
1601   slsr_cand_t base_cand, c = NULL, c2;
1602   unsigned savings = 0;
1603 
1604   if (!legal_cast_p (gs, rhs1))
1605     return;
1606 
1607   lhs = gimple_assign_lhs (gs);
1608   base_cand = base_cand_from_table (rhs1);
1609   ctype = TREE_TYPE (lhs);
1610 
1611   if (base_cand && base_cand->kind != CAND_PHI)
1612     {
1613       slsr_cand_t first_cand = NULL;
1614 
1615       while (base_cand)
1616 	{
1617 	  /* Propagate all data from the base candidate except the type,
1618 	     which comes from the cast, and the base candidate's cast,
1619 	     which is no longer applicable.  */
1620 	  if (has_single_use (rhs1))
1621 	    savings = (base_cand->dead_savings
1622 		       + stmt_cost (base_cand->cand_stmt, speed));
1623 
1624 	  c = alloc_cand_and_find_basis (base_cand->kind, gs,
1625 					 base_cand->base_expr,
1626 					 base_cand->index, base_cand->stride,
1627 					 ctype, base_cand->stride_type,
1628 					 savings);
1629 	  if (!first_cand)
1630 	    first_cand = c;
1631 
1632 	  if (first_cand != c)
1633 	    c->first_interp = first_cand->cand_num;
1634 
1635 	  base_cand = lookup_cand (base_cand->next_interp);
1636 	}
1637     }
1638   else
1639     {
1640       /* If nothing is known about the RHS, create fresh CAND_ADD and
1641 	 CAND_MULT interpretations:
1642 
1643 	 X = Y + (0 * 1)
1644 	 X = (Y + 0) * 1
1645 
1646 	 The first of these is somewhat arbitrary, but the choice of
1647 	 1 for the stride simplifies the logic for propagating casts
1648 	 into their uses.  */
1649       c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, 0,
1650 				     integer_one_node, ctype, sizetype, 0);
1651       c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, 0,
1652 				      integer_one_node, ctype, sizetype, 0);
1653       c->next_interp = c2->cand_num;
1654       c2->first_interp = c->cand_num;
1655     }
1656 
1657   /* Add the first (or only) interpretation to the statement-candidate
1658      mapping.  */
1659   add_cand_for_stmt (gs, c);
1660 }
1661 
1662 /* Given GS which is a copy of a scalar integer type, make at least one
1663    appropriate entry in the candidate table.
1664 
1665    This interface is included for completeness, but is unnecessary
1666    if this pass immediately follows a pass that performs copy
1667    propagation, such as DOM.  */
1668 
1669 static void
slsr_process_copy(gimple * gs,tree rhs1,bool speed)1670 slsr_process_copy (gimple *gs, tree rhs1, bool speed)
1671 {
1672   slsr_cand_t base_cand, c = NULL, c2;
1673   unsigned savings = 0;
1674 
1675   base_cand = base_cand_from_table (rhs1);
1676 
1677   if (base_cand && base_cand->kind != CAND_PHI)
1678     {
1679       slsr_cand_t first_cand = NULL;
1680 
1681       while (base_cand)
1682 	{
1683 	  /* Propagate all data from the base candidate.  */
1684 	  if (has_single_use (rhs1))
1685 	    savings = (base_cand->dead_savings
1686 		       + stmt_cost (base_cand->cand_stmt, speed));
1687 
1688 	  c = alloc_cand_and_find_basis (base_cand->kind, gs,
1689 					 base_cand->base_expr,
1690 					 base_cand->index, base_cand->stride,
1691 					 base_cand->cand_type,
1692 					 base_cand->stride_type, savings);
1693 	  if (!first_cand)
1694 	    first_cand = c;
1695 
1696 	  if (first_cand != c)
1697 	    c->first_interp = first_cand->cand_num;
1698 
1699 	  base_cand = lookup_cand (base_cand->next_interp);
1700 	}
1701     }
1702   else
1703     {
1704       /* If nothing is known about the RHS, create fresh CAND_ADD and
1705 	 CAND_MULT interpretations:
1706 
1707 	 X = Y + (0 * 1)
1708 	 X = (Y + 0) * 1
1709 
1710 	 The first of these is somewhat arbitrary, but the choice of
1711 	 1 for the stride simplifies the logic for propagating casts
1712 	 into their uses.  */
1713       c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, 0,
1714 				     integer_one_node, TREE_TYPE (rhs1),
1715 				     sizetype, 0);
1716       c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, 0,
1717 				      integer_one_node, TREE_TYPE (rhs1),
1718 				      sizetype, 0);
1719       c->next_interp = c2->cand_num;
1720       c2->first_interp = c->cand_num;
1721     }
1722 
1723   /* Add the first (or only) interpretation to the statement-candidate
1724      mapping.  */
1725   add_cand_for_stmt (gs, c);
1726 }
1727 
1728 class find_candidates_dom_walker : public dom_walker
1729 {
1730 public:
find_candidates_dom_walker(cdi_direction direction)1731   find_candidates_dom_walker (cdi_direction direction)
1732     : dom_walker (direction) {}
1733   virtual edge before_dom_children (basic_block);
1734 };
1735 
1736 /* Find strength-reduction candidates in block BB.  */
1737 
1738 edge
before_dom_children(basic_block bb)1739 find_candidates_dom_walker::before_dom_children (basic_block bb)
1740 {
1741   bool speed = optimize_bb_for_speed_p (bb);
1742 
1743   for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
1744        gsi_next (&gsi))
1745     slsr_process_phi (gsi.phi (), speed);
1746 
1747   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1748        gsi_next (&gsi))
1749     {
1750       gimple *gs = gsi_stmt (gsi);
1751 
1752       if (stmt_could_throw_p (cfun, gs))
1753 	continue;
1754 
1755       if (gimple_vuse (gs) && gimple_assign_single_p (gs))
1756 	slsr_process_ref (gs);
1757 
1758       else if (is_gimple_assign (gs)
1759 	       && (INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (gs)))
1760 		   || POINTER_TYPE_P (TREE_TYPE (gimple_assign_lhs (gs)))))
1761 	{
1762 	  tree rhs1 = NULL_TREE, rhs2 = NULL_TREE;
1763 
1764 	  switch (gimple_assign_rhs_code (gs))
1765 	    {
1766 	    case MULT_EXPR:
1767 	    case PLUS_EXPR:
1768 	      rhs1 = gimple_assign_rhs1 (gs);
1769 	      rhs2 = gimple_assign_rhs2 (gs);
1770 	      /* Should never happen, but currently some buggy situations
1771 		 in earlier phases put constants in rhs1.  */
1772 	      if (TREE_CODE (rhs1) != SSA_NAME)
1773 		continue;
1774 	      break;
1775 
1776 	    /* Possible future opportunity: rhs1 of a ptr+ can be
1777 	       an ADDR_EXPR.  */
1778 	    case POINTER_PLUS_EXPR:
1779 	    case MINUS_EXPR:
1780 	      rhs2 = gimple_assign_rhs2 (gs);
1781 	      gcc_fallthrough ();
1782 
1783 	    CASE_CONVERT:
1784 	    case SSA_NAME:
1785 	    case NEGATE_EXPR:
1786 	      rhs1 = gimple_assign_rhs1 (gs);
1787 	      if (TREE_CODE (rhs1) != SSA_NAME)
1788 		continue;
1789 	      break;
1790 
1791 	    default:
1792 	      ;
1793 	    }
1794 
1795 	  switch (gimple_assign_rhs_code (gs))
1796 	    {
1797 	    case MULT_EXPR:
1798 	      slsr_process_mul (gs, rhs1, rhs2, speed);
1799 	      break;
1800 
1801 	    case PLUS_EXPR:
1802 	    case POINTER_PLUS_EXPR:
1803 	    case MINUS_EXPR:
1804 	      slsr_process_add (gs, rhs1, rhs2, speed);
1805 	      break;
1806 
1807 	    case NEGATE_EXPR:
1808 	      slsr_process_neg (gs, rhs1, speed);
1809 	      break;
1810 
1811 	    CASE_CONVERT:
1812 	      slsr_process_cast (gs, rhs1, speed);
1813 	      break;
1814 
1815 	    case SSA_NAME:
1816 	      slsr_process_copy (gs, rhs1, speed);
1817 	      break;
1818 
1819 	    default:
1820 	      ;
1821 	    }
1822 	}
1823     }
1824   return NULL;
1825 }
1826 
1827 /* Dump a candidate for debug.  */
1828 
1829 static void
dump_candidate(slsr_cand_t c)1830 dump_candidate (slsr_cand_t c)
1831 {
1832   fprintf (dump_file, "%3d  [%d] ", c->cand_num,
1833 	   gimple_bb (c->cand_stmt)->index);
1834   print_gimple_stmt (dump_file, c->cand_stmt, 0);
1835   switch (c->kind)
1836     {
1837     case CAND_MULT:
1838       fputs ("     MULT : (", dump_file);
1839       print_generic_expr (dump_file, c->base_expr);
1840       fputs (" + ", dump_file);
1841       print_decs (c->index, dump_file);
1842       fputs (") * ", dump_file);
1843       if (TREE_CODE (c->stride) != INTEGER_CST
1844 	  && c->stride_type != TREE_TYPE (c->stride))
1845 	{
1846 	  fputs ("(", dump_file);
1847 	  print_generic_expr (dump_file, c->stride_type);
1848 	  fputs (")", dump_file);
1849 	}
1850       print_generic_expr (dump_file, c->stride);
1851       fputs (" : ", dump_file);
1852       break;
1853     case CAND_ADD:
1854       fputs ("     ADD  : ", dump_file);
1855       print_generic_expr (dump_file, c->base_expr);
1856       fputs (" + (", dump_file);
1857       print_decs (c->index, dump_file);
1858       fputs (" * ", dump_file);
1859       if (TREE_CODE (c->stride) != INTEGER_CST
1860 	  && c->stride_type != TREE_TYPE (c->stride))
1861 	{
1862 	  fputs ("(", dump_file);
1863 	  print_generic_expr (dump_file, c->stride_type);
1864 	  fputs (")", dump_file);
1865 	}
1866       print_generic_expr (dump_file, c->stride);
1867       fputs (") : ", dump_file);
1868       break;
1869     case CAND_REF:
1870       fputs ("     REF  : ", dump_file);
1871       print_generic_expr (dump_file, c->base_expr);
1872       fputs (" + (", dump_file);
1873       print_generic_expr (dump_file, c->stride);
1874       fputs (") + ", dump_file);
1875       print_decs (c->index, dump_file);
1876       fputs (" : ", dump_file);
1877       break;
1878     case CAND_PHI:
1879       fputs ("     PHI  : ", dump_file);
1880       print_generic_expr (dump_file, c->base_expr);
1881       fputs (" + (unknown * ", dump_file);
1882       print_generic_expr (dump_file, c->stride);
1883       fputs (") : ", dump_file);
1884       break;
1885     default:
1886       gcc_unreachable ();
1887     }
1888   print_generic_expr (dump_file, c->cand_type);
1889   fprintf (dump_file, "\n     basis: %d  dependent: %d  sibling: %d\n",
1890 	   c->basis, c->dependent, c->sibling);
1891   fprintf (dump_file,
1892 	   "     next-interp: %d  first-interp: %d  dead-savings: %d\n",
1893 	   c->next_interp, c->first_interp, c->dead_savings);
1894   if (c->def_phi)
1895     fprintf (dump_file, "     phi:  %d\n", c->def_phi);
1896   fputs ("\n", dump_file);
1897 }
1898 
1899 /* Dump the candidate vector for debug.  */
1900 
1901 static void
dump_cand_vec(void)1902 dump_cand_vec (void)
1903 {
1904   unsigned i;
1905   slsr_cand_t c;
1906 
1907   fprintf (dump_file, "\nStrength reduction candidate vector:\n\n");
1908 
1909   FOR_EACH_VEC_ELT (cand_vec, i, c)
1910     if (c != NULL)
1911       dump_candidate (c);
1912 }
1913 
1914 /* Callback used to dump the candidate chains hash table.  */
1915 
1916 int
ssa_base_cand_dump_callback(cand_chain ** slot,void * ignored ATTRIBUTE_UNUSED)1917 ssa_base_cand_dump_callback (cand_chain **slot, void *ignored ATTRIBUTE_UNUSED)
1918 {
1919   const_cand_chain_t chain = *slot;
1920   cand_chain_t p;
1921 
1922   print_generic_expr (dump_file, chain->base_expr);
1923   fprintf (dump_file, " -> %d", chain->cand->cand_num);
1924 
1925   for (p = chain->next; p; p = p->next)
1926     fprintf (dump_file, " -> %d", p->cand->cand_num);
1927 
1928   fputs ("\n", dump_file);
1929   return 1;
1930 }
1931 
1932 /* Dump the candidate chains.  */
1933 
1934 static void
dump_cand_chains(void)1935 dump_cand_chains (void)
1936 {
1937   fprintf (dump_file, "\nStrength reduction candidate chains:\n\n");
1938   base_cand_map->traverse_noresize <void *, ssa_base_cand_dump_callback>
1939     (NULL);
1940   fputs ("\n", dump_file);
1941 }
1942 
1943 /* Dump the increment vector for debug.  */
1944 
1945 static void
dump_incr_vec(void)1946 dump_incr_vec (void)
1947 {
1948   if (dump_file && (dump_flags & TDF_DETAILS))
1949     {
1950       unsigned i;
1951 
1952       fprintf (dump_file, "\nIncrement vector:\n\n");
1953 
1954       for (i = 0; i < incr_vec_len; i++)
1955 	{
1956 	  fprintf (dump_file, "%3d  increment:   ", i);
1957 	  print_decs (incr_vec[i].incr, dump_file);
1958 	  fprintf (dump_file, "\n     count:       %d", incr_vec[i].count);
1959 	  fprintf (dump_file, "\n     cost:        %d", incr_vec[i].cost);
1960 	  fputs ("\n     initializer: ", dump_file);
1961 	  print_generic_expr (dump_file, incr_vec[i].initializer);
1962 	  fputs ("\n\n", dump_file);
1963 	}
1964     }
1965 }
1966 
1967 /* Replace *EXPR in candidate C with an equivalent strength-reduced
1968    data reference.  */
1969 
1970 static void
replace_ref(tree * expr,slsr_cand_t c)1971 replace_ref (tree *expr, slsr_cand_t c)
1972 {
1973   tree add_expr, mem_ref, acc_type = TREE_TYPE (*expr);
1974   unsigned HOST_WIDE_INT misalign;
1975   unsigned align;
1976 
1977   /* Ensure the memory reference carries the minimum alignment
1978      requirement for the data type.  See PR58041.  */
1979   get_object_alignment_1 (*expr, &align, &misalign);
1980   if (misalign != 0)
1981     align = least_bit_hwi (misalign);
1982   if (align < TYPE_ALIGN (acc_type))
1983     acc_type = build_aligned_type (acc_type, align);
1984 
1985   add_expr = fold_build2 (POINTER_PLUS_EXPR, c->cand_type,
1986 			  c->base_expr, c->stride);
1987   mem_ref = fold_build2 (MEM_REF, acc_type, add_expr,
1988 			 wide_int_to_tree (c->cand_type, c->index));
1989 
1990   /* Gimplify the base addressing expression for the new MEM_REF tree.  */
1991   gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1992   TREE_OPERAND (mem_ref, 0)
1993     = force_gimple_operand_gsi (&gsi, TREE_OPERAND (mem_ref, 0),
1994 				/*simple_p=*/true, NULL,
1995 				/*before=*/true, GSI_SAME_STMT);
1996   copy_ref_info (mem_ref, *expr);
1997   *expr = mem_ref;
1998   update_stmt (c->cand_stmt);
1999 }
2000 
2001 /* Return true if CAND_REF candidate C is a valid memory reference.  */
2002 
2003 static bool
valid_mem_ref_cand_p(slsr_cand_t c)2004 valid_mem_ref_cand_p (slsr_cand_t c)
2005 {
2006   if (TREE_CODE (TREE_OPERAND (c->stride, 1)) != INTEGER_CST)
2007     return false;
2008 
2009   struct mem_address addr
2010     = { NULL_TREE, c->base_expr, TREE_OPERAND (c->stride, 0),
2011 	TREE_OPERAND (c->stride, 1), wide_int_to_tree (sizetype, c->index) };
2012 
2013   return
2014     valid_mem_ref_p (TYPE_MODE (c->cand_type), TYPE_ADDR_SPACE (c->cand_type),
2015 		     &addr);
2016 }
2017 
2018 /* Replace CAND_REF candidate C, each sibling of candidate C, and each
2019    dependent of candidate C with an equivalent strength-reduced data
2020    reference.  */
2021 
2022 static void
replace_refs(slsr_cand_t c)2023 replace_refs (slsr_cand_t c)
2024 {
2025   /* Replacing a chain of only 2 candidates which are valid memory references
2026      is generally counter-productive because you cannot recoup the additional
2027      calculation added in front of them.  */
2028   if (c->basis == 0
2029       && c->dependent
2030       && !lookup_cand (c->dependent)->dependent
2031       && valid_mem_ref_cand_p (c)
2032       && valid_mem_ref_cand_p (lookup_cand (c->dependent)))
2033     return;
2034 
2035   if (dump_file && (dump_flags & TDF_DETAILS))
2036     {
2037       fputs ("Replacing reference: ", dump_file);
2038       print_gimple_stmt (dump_file, c->cand_stmt, 0);
2039     }
2040 
2041   if (gimple_vdef (c->cand_stmt))
2042     {
2043       tree *lhs = gimple_assign_lhs_ptr (c->cand_stmt);
2044       replace_ref (lhs, c);
2045     }
2046   else
2047     {
2048       tree *rhs = gimple_assign_rhs1_ptr (c->cand_stmt);
2049       replace_ref (rhs, c);
2050     }
2051 
2052   if (dump_file && (dump_flags & TDF_DETAILS))
2053     {
2054       fputs ("With: ", dump_file);
2055       print_gimple_stmt (dump_file, c->cand_stmt, 0);
2056       fputs ("\n", dump_file);
2057     }
2058 
2059   if (c->sibling)
2060     replace_refs (lookup_cand (c->sibling));
2061 
2062   if (c->dependent)
2063     replace_refs (lookup_cand (c->dependent));
2064 }
2065 
2066 /* Return TRUE if candidate C is dependent upon a PHI.  */
2067 
2068 static bool
phi_dependent_cand_p(slsr_cand_t c)2069 phi_dependent_cand_p (slsr_cand_t c)
2070 {
2071   /* A candidate is not necessarily dependent upon a PHI just because
2072      it has a phi definition for its base name.  It may have a basis
2073      that relies upon the same phi definition, in which case the PHI
2074      is irrelevant to this candidate.  */
2075   return (c->def_phi
2076 	  && c->basis
2077 	  && lookup_cand (c->basis)->def_phi != c->def_phi);
2078 }
2079 
2080 /* Calculate the increment required for candidate C relative to
2081    its basis.  */
2082 
2083 static widest_int
cand_increment(slsr_cand_t c)2084 cand_increment (slsr_cand_t c)
2085 {
2086   slsr_cand_t basis;
2087 
2088   /* If the candidate doesn't have a basis, just return its own
2089      index.  This is useful in record_increments to help us find
2090      an existing initializer.  Also, if the candidate's basis is
2091      hidden by a phi, then its own index will be the increment
2092      from the newly introduced phi basis.  */
2093   if (!c->basis || phi_dependent_cand_p (c))
2094     return c->index;
2095 
2096   basis = lookup_cand (c->basis);
2097   gcc_assert (operand_equal_p (c->base_expr, basis->base_expr, 0));
2098   return c->index - basis->index;
2099 }
2100 
2101 /* Calculate the increment required for candidate C relative to
2102    its basis.  If we aren't going to generate pointer arithmetic
2103    for this candidate, return the absolute value of that increment
2104    instead.  */
2105 
2106 static inline widest_int
cand_abs_increment(slsr_cand_t c)2107 cand_abs_increment (slsr_cand_t c)
2108 {
2109   widest_int increment = cand_increment (c);
2110 
2111   if (!address_arithmetic_p && wi::neg_p (increment))
2112     increment = -increment;
2113 
2114   return increment;
2115 }
2116 
2117 /* Return TRUE iff candidate C has already been replaced under
2118    another interpretation.  */
2119 
2120 static inline bool
cand_already_replaced(slsr_cand_t c)2121 cand_already_replaced (slsr_cand_t c)
2122 {
2123   return (gimple_bb (c->cand_stmt) == 0);
2124 }
2125 
2126 /* Common logic used by replace_unconditional_candidate and
2127    replace_conditional_candidate.  */
2128 
2129 static void
replace_mult_candidate(slsr_cand_t c,tree basis_name,widest_int bump)2130 replace_mult_candidate (slsr_cand_t c, tree basis_name, widest_int bump)
2131 {
2132   tree target_type = TREE_TYPE (gimple_assign_lhs (c->cand_stmt));
2133   enum tree_code cand_code = gimple_assign_rhs_code (c->cand_stmt);
2134 
2135   /* It is not useful to replace casts, copies, negates, or adds of
2136      an SSA name and a constant.  */
2137   if (cand_code == SSA_NAME
2138       || CONVERT_EXPR_CODE_P (cand_code)
2139       || cand_code == PLUS_EXPR
2140       || cand_code == POINTER_PLUS_EXPR
2141       || cand_code == MINUS_EXPR
2142       || cand_code == NEGATE_EXPR)
2143     return;
2144 
2145   enum tree_code code = PLUS_EXPR;
2146   tree bump_tree;
2147   gimple *stmt_to_print = NULL;
2148 
2149   if (wi::neg_p (bump))
2150     {
2151       code = MINUS_EXPR;
2152       bump = -bump;
2153     }
2154 
2155   /* It is possible that the resulting bump doesn't fit in target_type.
2156      Abandon the replacement in this case.  This does not affect
2157      siblings or dependents of C.  */
2158   if (bump != wi::ext (bump, TYPE_PRECISION (target_type),
2159 		       TYPE_SIGN (target_type)))
2160     return;
2161 
2162   bump_tree = wide_int_to_tree (target_type, bump);
2163 
2164   /* If the basis name and the candidate's LHS have incompatible types,
2165      introduce a cast.  */
2166   if (!useless_type_conversion_p (target_type, TREE_TYPE (basis_name)))
2167     basis_name = introduce_cast_before_cand (c, target_type, basis_name);
2168 
2169   if (dump_file && (dump_flags & TDF_DETAILS))
2170     {
2171       fputs ("Replacing: ", dump_file);
2172       print_gimple_stmt (dump_file, c->cand_stmt, 0);
2173     }
2174 
2175   if (bump == 0)
2176     {
2177       tree lhs = gimple_assign_lhs (c->cand_stmt);
2178       gassign *copy_stmt = gimple_build_assign (lhs, basis_name);
2179       gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2180       slsr_cand_t cc = lookup_cand (c->first_interp);
2181       gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
2182       gsi_replace (&gsi, copy_stmt, false);
2183       while (cc)
2184 	{
2185 	  cc->cand_stmt = copy_stmt;
2186 	  cc = lookup_cand (cc->next_interp);
2187 	}
2188       if (dump_file && (dump_flags & TDF_DETAILS))
2189 	stmt_to_print = copy_stmt;
2190     }
2191   else
2192     {
2193       tree rhs1, rhs2;
2194       if (cand_code != NEGATE_EXPR) {
2195 	rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2196 	rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2197       }
2198       if (cand_code != NEGATE_EXPR
2199 	  && ((operand_equal_p (rhs1, basis_name, 0)
2200 	       && operand_equal_p (rhs2, bump_tree, 0))
2201 	      || (operand_equal_p (rhs1, bump_tree, 0)
2202 		  && operand_equal_p (rhs2, basis_name, 0))))
2203 	{
2204 	  if (dump_file && (dump_flags & TDF_DETAILS))
2205 	    {
2206 	      fputs ("(duplicate, not actually replacing)", dump_file);
2207 	      stmt_to_print = c->cand_stmt;
2208 	    }
2209 	}
2210       else
2211 	{
2212 	  gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2213 	  slsr_cand_t cc = lookup_cand (c->first_interp);
2214 	  gimple_assign_set_rhs_with_ops (&gsi, code, basis_name, bump_tree);
2215 	  update_stmt (gsi_stmt (gsi));
2216 	  while (cc)
2217 	    {
2218 	      cc->cand_stmt = gsi_stmt (gsi);
2219 	      cc = lookup_cand (cc->next_interp);
2220 	    }
2221 	  if (dump_file && (dump_flags & TDF_DETAILS))
2222 	    stmt_to_print = gsi_stmt (gsi);
2223 	}
2224     }
2225 
2226   if (dump_file && (dump_flags & TDF_DETAILS))
2227     {
2228       fputs ("With: ", dump_file);
2229       print_gimple_stmt (dump_file, stmt_to_print, 0);
2230       fputs ("\n", dump_file);
2231     }
2232 }
2233 
2234 /* Replace candidate C with an add or subtract.   Note that we only
2235    operate on CAND_MULTs with known strides, so we will never generate
2236    a POINTER_PLUS_EXPR.  Each candidate X = (B + i) * S is replaced by
2237    X = Y + ((i - i') * S), as described in the module commentary.  The
2238    folded value ((i - i') * S) is referred to here as the "bump."  */
2239 
2240 static void
replace_unconditional_candidate(slsr_cand_t c)2241 replace_unconditional_candidate (slsr_cand_t c)
2242 {
2243   slsr_cand_t basis;
2244 
2245   if (cand_already_replaced (c))
2246     return;
2247 
2248   basis = lookup_cand (c->basis);
2249   widest_int bump = cand_increment (c) * wi::to_widest (c->stride);
2250 
2251   replace_mult_candidate (c, gimple_assign_lhs (basis->cand_stmt), bump);
2252 }
2253 
2254 /* Return the index in the increment vector of the given INCREMENT,
2255    or -1 if not found.  The latter can occur if more than
2256    MAX_INCR_VEC_LEN increments have been found.  */
2257 
2258 static inline int
incr_vec_index(const widest_int & increment)2259 incr_vec_index (const widest_int &increment)
2260 {
2261   unsigned i;
2262 
2263   for (i = 0; i < incr_vec_len && increment != incr_vec[i].incr; i++)
2264     ;
2265 
2266   if (i < incr_vec_len)
2267     return i;
2268   else
2269     return -1;
2270 }
2271 
2272 /* Create a new statement along edge E to add BASIS_NAME to the product
2273    of INCREMENT and the stride of candidate C.  Create and return a new
2274    SSA name from *VAR to be used as the LHS of the new statement.
2275    KNOWN_STRIDE is true iff C's stride is a constant.  */
2276 
2277 static tree
create_add_on_incoming_edge(slsr_cand_t c,tree basis_name,widest_int increment,edge e,location_t loc,bool known_stride)2278 create_add_on_incoming_edge (slsr_cand_t c, tree basis_name,
2279 			     widest_int increment, edge e, location_t loc,
2280 			     bool known_stride)
2281 {
2282   tree lhs, basis_type;
2283   gassign *new_stmt, *cast_stmt = NULL;
2284 
2285   /* If the add candidate along this incoming edge has the same
2286      index as C's hidden basis, the hidden basis represents this
2287      edge correctly.  */
2288   if (increment == 0)
2289     return basis_name;
2290 
2291   basis_type = TREE_TYPE (basis_name);
2292   lhs = make_temp_ssa_name (basis_type, NULL, "slsr");
2293 
2294   /* Occasionally people convert integers to pointers without a
2295      cast, leading us into trouble if we aren't careful.  */
2296   enum tree_code plus_code
2297     = POINTER_TYPE_P (basis_type) ? POINTER_PLUS_EXPR : PLUS_EXPR;
2298 
2299   if (known_stride)
2300     {
2301       tree bump_tree;
2302       enum tree_code code = plus_code;
2303       widest_int bump = increment * wi::to_widest (c->stride);
2304       if (wi::neg_p (bump) && !POINTER_TYPE_P (basis_type))
2305 	{
2306 	  code = MINUS_EXPR;
2307 	  bump = -bump;
2308 	}
2309 
2310       tree stride_type = POINTER_TYPE_P (basis_type) ? sizetype : basis_type;
2311       bump_tree = wide_int_to_tree (stride_type, bump);
2312       new_stmt = gimple_build_assign (lhs, code, basis_name, bump_tree);
2313     }
2314   else
2315     {
2316       int i;
2317       bool negate_incr = !POINTER_TYPE_P (basis_type) && wi::neg_p (increment);
2318       i = incr_vec_index (negate_incr ? -increment : increment);
2319       gcc_assert (i >= 0);
2320 
2321       if (incr_vec[i].initializer)
2322 	{
2323 	  enum tree_code code = negate_incr ? MINUS_EXPR : plus_code;
2324 	  new_stmt = gimple_build_assign (lhs, code, basis_name,
2325 					  incr_vec[i].initializer);
2326 	}
2327       else {
2328 	tree stride;
2329 
2330 	if (!types_compatible_p (TREE_TYPE (c->stride), c->stride_type))
2331 	  {
2332 	    tree cast_stride = make_temp_ssa_name (c->stride_type, NULL,
2333 						   "slsr");
2334 	    cast_stmt = gimple_build_assign (cast_stride, NOP_EXPR,
2335 					     c->stride);
2336 	    stride = cast_stride;
2337 	  }
2338 	else
2339 	  stride = c->stride;
2340 
2341 	if (increment == 1)
2342 	  new_stmt = gimple_build_assign (lhs, plus_code, basis_name, stride);
2343 	else if (increment == -1)
2344 	  new_stmt = gimple_build_assign (lhs, MINUS_EXPR, basis_name, stride);
2345 	else
2346 	  gcc_unreachable ();
2347       }
2348     }
2349 
2350   if (cast_stmt)
2351     {
2352       gimple_set_location (cast_stmt, loc);
2353       gsi_insert_on_edge (e, cast_stmt);
2354     }
2355 
2356   gimple_set_location (new_stmt, loc);
2357   gsi_insert_on_edge (e, new_stmt);
2358 
2359   if (dump_file && (dump_flags & TDF_DETAILS))
2360     {
2361       if (cast_stmt)
2362 	{
2363 	  fprintf (dump_file, "Inserting cast on edge %d->%d: ",
2364 		   e->src->index, e->dest->index);
2365 	  print_gimple_stmt (dump_file, cast_stmt, 0);
2366 	}
2367       fprintf (dump_file, "Inserting on edge %d->%d: ", e->src->index,
2368 	       e->dest->index);
2369       print_gimple_stmt (dump_file, new_stmt, 0);
2370     }
2371 
2372   return lhs;
2373 }
2374 
2375 /* Clear the visited field for a tree of PHI candidates.  */
2376 
2377 static void
clear_visited(gphi * phi)2378 clear_visited (gphi *phi)
2379 {
2380   unsigned i;
2381   slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
2382 
2383   if (phi_cand->visited)
2384     {
2385       phi_cand->visited = 0;
2386 
2387       for (i = 0; i < gimple_phi_num_args (phi); i++)
2388 	{
2389 	  tree arg = gimple_phi_arg_def (phi, i);
2390 	  gimple *arg_def = SSA_NAME_DEF_STMT (arg);
2391 	  if (gimple_code (arg_def) == GIMPLE_PHI)
2392 	    clear_visited (as_a <gphi *> (arg_def));
2393 	}
2394     }
2395 }
2396 
2397 /* Recursive helper function for create_phi_basis.  */
2398 
2399 static tree
create_phi_basis_1(slsr_cand_t c,gimple * from_phi,tree basis_name,location_t loc,bool known_stride)2400 create_phi_basis_1 (slsr_cand_t c, gimple *from_phi, tree basis_name,
2401 		    location_t loc, bool known_stride)
2402 {
2403   int i;
2404   tree name, phi_arg;
2405   gphi *phi;
2406   slsr_cand_t basis = lookup_cand (c->basis);
2407   int nargs = gimple_phi_num_args (from_phi);
2408   basic_block phi_bb = gimple_bb (from_phi);
2409   slsr_cand_t phi_cand = *stmt_cand_map->get (from_phi);
2410   auto_vec<tree> phi_args (nargs);
2411 
2412   if (phi_cand->visited)
2413     return phi_cand->cached_basis;
2414   phi_cand->visited = 1;
2415 
2416   /* Process each argument of the existing phi that represents
2417      conditionally-executed add candidates.  */
2418   for (i = 0; i < nargs; i++)
2419     {
2420       edge e = (*phi_bb->preds)[i];
2421       tree arg = gimple_phi_arg_def (from_phi, i);
2422       tree feeding_def;
2423 
2424       /* If the phi argument is the base name of the CAND_PHI, then
2425 	 this incoming arc should use the hidden basis.  */
2426       if (operand_equal_p (arg, phi_cand->base_expr, 0))
2427 	if (basis->index == 0)
2428 	  feeding_def = gimple_assign_lhs (basis->cand_stmt);
2429 	else
2430 	  {
2431 	    widest_int incr = -basis->index;
2432 	    feeding_def = create_add_on_incoming_edge (c, basis_name, incr,
2433 						       e, loc, known_stride);
2434 	  }
2435       else
2436 	{
2437 	  gimple *arg_def = SSA_NAME_DEF_STMT (arg);
2438 
2439 	  /* If there is another phi along this incoming edge, we must
2440 	     process it in the same fashion to ensure that all basis
2441 	     adjustments are made along its incoming edges.  */
2442 	  if (gimple_code (arg_def) == GIMPLE_PHI)
2443 	    feeding_def = create_phi_basis_1 (c, arg_def, basis_name,
2444 					      loc, known_stride);
2445 	  else
2446 	    {
2447 	      slsr_cand_t arg_cand = base_cand_from_table (arg);
2448 	      widest_int diff = arg_cand->index - basis->index;
2449 	      feeding_def = create_add_on_incoming_edge (c, basis_name, diff,
2450 							 e, loc, known_stride);
2451 	    }
2452 	}
2453 
2454       /* Because of recursion, we need to save the arguments in a vector
2455 	 so we can create the PHI statement all at once.  Otherwise the
2456 	 storage for the half-created PHI can be reclaimed.  */
2457       phi_args.safe_push (feeding_def);
2458     }
2459 
2460   /* Create the new phi basis.  */
2461   name = make_temp_ssa_name (TREE_TYPE (basis_name), NULL, "slsr");
2462   phi = create_phi_node (name, phi_bb);
2463   SSA_NAME_DEF_STMT (name) = phi;
2464 
2465   FOR_EACH_VEC_ELT (phi_args, i, phi_arg)
2466     {
2467       edge e = (*phi_bb->preds)[i];
2468       add_phi_arg (phi, phi_arg, e, loc);
2469     }
2470 
2471   update_stmt (phi);
2472 
2473   if (dump_file && (dump_flags & TDF_DETAILS))
2474     {
2475       fputs ("Introducing new phi basis: ", dump_file);
2476       print_gimple_stmt (dump_file, phi, 0);
2477     }
2478 
2479   phi_cand->cached_basis = name;
2480   return name;
2481 }
2482 
2483 /* Given a candidate C with BASIS_NAME being the LHS of C's basis which
2484    is hidden by the phi node FROM_PHI, create a new phi node in the same
2485    block as FROM_PHI.  The new phi is suitable for use as a basis by C,
2486    with its phi arguments representing conditional adjustments to the
2487    hidden basis along conditional incoming paths.  Those adjustments are
2488    made by creating add statements (and sometimes recursively creating
2489    phis) along those incoming paths.  LOC is the location to attach to
2490    the introduced statements.  KNOWN_STRIDE is true iff C's stride is a
2491    constant.  */
2492 
2493 static tree
create_phi_basis(slsr_cand_t c,gimple * from_phi,tree basis_name,location_t loc,bool known_stride)2494 create_phi_basis (slsr_cand_t c, gimple *from_phi, tree basis_name,
2495 		  location_t loc, bool known_stride)
2496 {
2497   tree retval = create_phi_basis_1 (c, from_phi, basis_name, loc,
2498 				    known_stride);
2499   gcc_assert (retval);
2500   clear_visited (as_a <gphi *> (from_phi));
2501   return retval;
2502 }
2503 
2504 /* Given a candidate C whose basis is hidden by at least one intervening
2505    phi, introduce a matching number of new phis to represent its basis
2506    adjusted by conditional increments along possible incoming paths.  Then
2507    replace C as though it were an unconditional candidate, using the new
2508    basis.  */
2509 
2510 static void
replace_conditional_candidate(slsr_cand_t c)2511 replace_conditional_candidate (slsr_cand_t c)
2512 {
2513   tree basis_name, name;
2514   slsr_cand_t basis;
2515   location_t loc;
2516 
2517   /* Look up the LHS SSA name from C's basis.  This will be the
2518      RHS1 of the adds we will introduce to create new phi arguments.  */
2519   basis = lookup_cand (c->basis);
2520   basis_name = gimple_assign_lhs (basis->cand_stmt);
2521 
2522   /* Create a new phi statement which will represent C's true basis
2523      after the transformation is complete.  */
2524   loc = gimple_location (c->cand_stmt);
2525   name = create_phi_basis (c, lookup_cand (c->def_phi)->cand_stmt,
2526 			   basis_name, loc, KNOWN_STRIDE);
2527 
2528   /* Replace C with an add of the new basis phi and a constant.  */
2529   widest_int bump = c->index * wi::to_widest (c->stride);
2530 
2531   replace_mult_candidate (c, name, bump);
2532 }
2533 
2534 /* Recursive helper function for phi_add_costs.  SPREAD is a measure of
2535    how many PHI nodes we have visited at this point in the tree walk.  */
2536 
2537 static int
phi_add_costs_1(gimple * phi,slsr_cand_t c,int one_add_cost,int * spread)2538 phi_add_costs_1 (gimple *phi, slsr_cand_t c, int one_add_cost, int *spread)
2539 {
2540   unsigned i;
2541   int cost = 0;
2542   slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
2543 
2544   if (phi_cand->visited)
2545     return 0;
2546 
2547   phi_cand->visited = 1;
2548   (*spread)++;
2549 
2550   /* If we work our way back to a phi that isn't dominated by the hidden
2551      basis, this isn't a candidate for replacement.  Indicate this by
2552      returning an unreasonably high cost.  It's not easy to detect
2553      these situations when determining the basis, so we defer the
2554      decision until now.  */
2555   basic_block phi_bb = gimple_bb (phi);
2556   slsr_cand_t basis = lookup_cand (c->basis);
2557   basic_block basis_bb = gimple_bb (basis->cand_stmt);
2558 
2559   if (phi_bb == basis_bb || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
2560     return COST_INFINITE;
2561 
2562   for (i = 0; i < gimple_phi_num_args (phi); i++)
2563     {
2564       tree arg = gimple_phi_arg_def (phi, i);
2565 
2566       if (arg != phi_cand->base_expr)
2567 	{
2568 	  gimple *arg_def = SSA_NAME_DEF_STMT (arg);
2569 
2570 	  if (gimple_code (arg_def) == GIMPLE_PHI)
2571 	    {
2572 	      cost += phi_add_costs_1 (arg_def, c, one_add_cost, spread);
2573 
2574 	      if (cost >= COST_INFINITE || *spread > MAX_SPREAD)
2575 		return COST_INFINITE;
2576 	    }
2577 	  else
2578 	    {
2579 	      slsr_cand_t arg_cand = base_cand_from_table (arg);
2580 
2581 	      if (arg_cand->index != c->index)
2582 		cost += one_add_cost;
2583 	    }
2584 	}
2585     }
2586 
2587   return cost;
2588 }
2589 
2590 /* Compute the expected costs of inserting basis adjustments for
2591    candidate C with phi-definition PHI.  The cost of inserting
2592    one adjustment is given by ONE_ADD_COST.  If PHI has arguments
2593    which are themselves phi results, recursively calculate costs
2594    for those phis as well.  */
2595 
2596 static int
phi_add_costs(gimple * phi,slsr_cand_t c,int one_add_cost)2597 phi_add_costs (gimple *phi, slsr_cand_t c, int one_add_cost)
2598 {
2599   int spread = 0;
2600   int retval = phi_add_costs_1 (phi, c, one_add_cost, &spread);
2601   clear_visited (as_a <gphi *> (phi));
2602   return retval;
2603 }
2604 /* For candidate C, each sibling of candidate C, and each dependent of
2605    candidate C, determine whether the candidate is dependent upon a
2606    phi that hides its basis.  If not, replace the candidate unconditionally.
2607    Otherwise, determine whether the cost of introducing compensation code
2608    for the candidate is offset by the gains from strength reduction.  If
2609    so, replace the candidate and introduce the compensation code.  */
2610 
2611 static void
replace_uncond_cands_and_profitable_phis(slsr_cand_t c)2612 replace_uncond_cands_and_profitable_phis (slsr_cand_t c)
2613 {
2614   if (phi_dependent_cand_p (c))
2615     {
2616       /* A multiply candidate with a stride of 1 is just an artifice
2617 	 of a copy or cast; there is no value in replacing it.  */
2618       if (c->kind == CAND_MULT && wi::to_widest (c->stride) != 1)
2619 	{
2620 	  /* A candidate dependent upon a phi will replace a multiply by
2621 	     a constant with an add, and will insert at most one add for
2622 	     each phi argument.  Add these costs with the potential dead-code
2623 	     savings to determine profitability.  */
2624 	  bool speed = optimize_bb_for_speed_p (gimple_bb (c->cand_stmt));
2625 	  int mult_savings = stmt_cost (c->cand_stmt, speed);
2626 	  gimple *phi = lookup_cand (c->def_phi)->cand_stmt;
2627 	  tree phi_result = gimple_phi_result (phi);
2628 	  int one_add_cost = add_cost (speed,
2629 				       TYPE_MODE (TREE_TYPE (phi_result)));
2630 	  int add_costs = one_add_cost + phi_add_costs (phi, c, one_add_cost);
2631 	  int cost = add_costs - mult_savings - c->dead_savings;
2632 
2633 	  if (dump_file && (dump_flags & TDF_DETAILS))
2634 	    {
2635 	      fprintf (dump_file, "  Conditional candidate %d:\n", c->cand_num);
2636 	      fprintf (dump_file, "    add_costs = %d\n", add_costs);
2637 	      fprintf (dump_file, "    mult_savings = %d\n", mult_savings);
2638 	      fprintf (dump_file, "    dead_savings = %d\n", c->dead_savings);
2639 	      fprintf (dump_file, "    cost = %d\n", cost);
2640 	      if (cost <= COST_NEUTRAL)
2641 		fputs ("  Replacing...\n", dump_file);
2642 	      else
2643 		fputs ("  Not replaced.\n", dump_file);
2644 	    }
2645 
2646 	  if (cost <= COST_NEUTRAL)
2647 	    replace_conditional_candidate (c);
2648 	}
2649     }
2650   else
2651     replace_unconditional_candidate (c);
2652 
2653   if (c->sibling)
2654     replace_uncond_cands_and_profitable_phis (lookup_cand (c->sibling));
2655 
2656   if (c->dependent)
2657     replace_uncond_cands_and_profitable_phis (lookup_cand (c->dependent));
2658 }
2659 
2660 /* Count the number of candidates in the tree rooted at C that have
2661    not already been replaced under other interpretations.  */
2662 
2663 static int
count_candidates(slsr_cand_t c)2664 count_candidates (slsr_cand_t c)
2665 {
2666   unsigned count = cand_already_replaced (c) ? 0 : 1;
2667 
2668   if (c->sibling)
2669     count += count_candidates (lookup_cand (c->sibling));
2670 
2671   if (c->dependent)
2672     count += count_candidates (lookup_cand (c->dependent));
2673 
2674   return count;
2675 }
2676 
2677 /* Increase the count of INCREMENT by one in the increment vector.
2678    INCREMENT is associated with candidate C.  If INCREMENT is to be
2679    conditionally executed as part of a conditional candidate replacement,
2680    IS_PHI_ADJUST is true, otherwise false.  If an initializer
2681    T_0 = stride * I is provided by a candidate that dominates all
2682    candidates with the same increment, also record T_0 for subsequent use.  */
2683 
2684 static void
record_increment(slsr_cand_t c,widest_int increment,bool is_phi_adjust)2685 record_increment (slsr_cand_t c, widest_int increment, bool is_phi_adjust)
2686 {
2687   bool found = false;
2688   unsigned i;
2689 
2690   /* Treat increments that differ only in sign as identical so as to
2691      share initializers, unless we are generating pointer arithmetic.  */
2692   if (!address_arithmetic_p && wi::neg_p (increment))
2693     increment = -increment;
2694 
2695   for (i = 0; i < incr_vec_len; i++)
2696     {
2697       if (incr_vec[i].incr == increment)
2698 	{
2699 	  incr_vec[i].count++;
2700 	  found = true;
2701 
2702 	  /* If we previously recorded an initializer that doesn't
2703 	     dominate this candidate, it's not going to be useful to
2704 	     us after all.  */
2705 	  if (incr_vec[i].initializer
2706 	      && !dominated_by_p (CDI_DOMINATORS,
2707 				  gimple_bb (c->cand_stmt),
2708 				  incr_vec[i].init_bb))
2709 	    {
2710 	      incr_vec[i].initializer = NULL_TREE;
2711 	      incr_vec[i].init_bb = NULL;
2712 	    }
2713 
2714 	  break;
2715 	}
2716     }
2717 
2718   if (!found && incr_vec_len < MAX_INCR_VEC_LEN - 1)
2719     {
2720       /* The first time we see an increment, create the entry for it.
2721 	 If this is the root candidate which doesn't have a basis, set
2722 	 the count to zero.  We're only processing it so it can possibly
2723 	 provide an initializer for other candidates.  */
2724       incr_vec[incr_vec_len].incr = increment;
2725       incr_vec[incr_vec_len].count = c->basis || is_phi_adjust ? 1 : 0;
2726       incr_vec[incr_vec_len].cost = COST_INFINITE;
2727 
2728       /* Optimistically record the first occurrence of this increment
2729 	 as providing an initializer (if it does); we will revise this
2730 	 opinion later if it doesn't dominate all other occurrences.
2731          Exception:  increments of 0, 1 never need initializers;
2732 	 and phi adjustments don't ever provide initializers.  */
2733       if (c->kind == CAND_ADD
2734 	  && !is_phi_adjust
2735 	  && c->index == increment
2736 	  && (increment > 1 || increment < 0)
2737 	  && (gimple_assign_rhs_code (c->cand_stmt) == PLUS_EXPR
2738 	      || gimple_assign_rhs_code (c->cand_stmt) == POINTER_PLUS_EXPR))
2739 	{
2740 	  tree t0 = NULL_TREE;
2741 	  tree rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2742 	  tree rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2743 	  if (operand_equal_p (rhs1, c->base_expr, 0))
2744 	    t0 = rhs2;
2745 	  else if (operand_equal_p (rhs2, c->base_expr, 0))
2746 	    t0 = rhs1;
2747 	  if (t0
2748 	      && SSA_NAME_DEF_STMT (t0)
2749 	      && gimple_bb (SSA_NAME_DEF_STMT (t0)))
2750 	    {
2751 	      incr_vec[incr_vec_len].initializer = t0;
2752 	      incr_vec[incr_vec_len++].init_bb
2753 		= gimple_bb (SSA_NAME_DEF_STMT (t0));
2754 	    }
2755 	  else
2756 	    {
2757 	      incr_vec[incr_vec_len].initializer = NULL_TREE;
2758 	      incr_vec[incr_vec_len++].init_bb = NULL;
2759 	    }
2760 	}
2761       else
2762 	{
2763 	  incr_vec[incr_vec_len].initializer = NULL_TREE;
2764 	  incr_vec[incr_vec_len++].init_bb = NULL;
2765 	}
2766     }
2767 }
2768 
2769 /* Recursive helper function for record_phi_increments.  */
2770 
2771 static void
record_phi_increments_1(slsr_cand_t basis,gimple * phi)2772 record_phi_increments_1 (slsr_cand_t basis, gimple *phi)
2773 {
2774   unsigned i;
2775   slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
2776 
2777   if (phi_cand->visited)
2778     return;
2779   phi_cand->visited = 1;
2780 
2781   for (i = 0; i < gimple_phi_num_args (phi); i++)
2782     {
2783       tree arg = gimple_phi_arg_def (phi, i);
2784       gimple *arg_def = SSA_NAME_DEF_STMT (arg);
2785 
2786       if (gimple_code (arg_def) == GIMPLE_PHI)
2787 	record_phi_increments_1 (basis, arg_def);
2788       else
2789 	{
2790 	  widest_int diff;
2791 
2792 	  if (operand_equal_p (arg, phi_cand->base_expr, 0))
2793 	    {
2794 	      diff = -basis->index;
2795 	      record_increment (phi_cand, diff, PHI_ADJUST);
2796 	    }
2797 	  else
2798 	    {
2799 	      slsr_cand_t arg_cand = base_cand_from_table (arg);
2800 	      diff = arg_cand->index - basis->index;
2801 	      record_increment (arg_cand, diff, PHI_ADJUST);
2802 	    }
2803 	}
2804     }
2805 }
2806 
2807 /* Given phi statement PHI that hides a candidate from its BASIS, find
2808    the increments along each incoming arc (recursively handling additional
2809    phis that may be present) and record them.  These increments are the
2810    difference in index between the index-adjusting statements and the
2811    index of the basis.  */
2812 
2813 static void
record_phi_increments(slsr_cand_t basis,gimple * phi)2814 record_phi_increments (slsr_cand_t basis, gimple *phi)
2815 {
2816   record_phi_increments_1 (basis, phi);
2817   clear_visited (as_a <gphi *> (phi));
2818 }
2819 
2820 /* Determine how many times each unique increment occurs in the set
2821    of candidates rooted at C's parent, recording the data in the
2822    increment vector.  For each unique increment I, if an initializer
2823    T_0 = stride * I is provided by a candidate that dominates all
2824    candidates with the same increment, also record T_0 for subsequent
2825    use.  */
2826 
2827 static void
record_increments(slsr_cand_t c)2828 record_increments (slsr_cand_t c)
2829 {
2830   if (!cand_already_replaced (c))
2831     {
2832       if (!phi_dependent_cand_p (c))
2833 	record_increment (c, cand_increment (c), NOT_PHI_ADJUST);
2834       else
2835 	{
2836 	  /* A candidate with a basis hidden by a phi will have one
2837 	     increment for its relationship to the index represented by
2838 	     the phi, and potentially additional increments along each
2839 	     incoming edge.  For the root of the dependency tree (which
2840 	     has no basis), process just the initial index in case it has
2841 	     an initializer that can be used by subsequent candidates.  */
2842 	  record_increment (c, c->index, NOT_PHI_ADJUST);
2843 
2844 	  if (c->basis)
2845 	    record_phi_increments (lookup_cand (c->basis),
2846 				   lookup_cand (c->def_phi)->cand_stmt);
2847 	}
2848     }
2849 
2850   if (c->sibling)
2851     record_increments (lookup_cand (c->sibling));
2852 
2853   if (c->dependent)
2854     record_increments (lookup_cand (c->dependent));
2855 }
2856 
2857 /* Recursive helper function for phi_incr_cost.  */
2858 
2859 static int
phi_incr_cost_1(slsr_cand_t c,const widest_int & incr,gimple * phi,int * savings)2860 phi_incr_cost_1 (slsr_cand_t c, const widest_int &incr, gimple *phi,
2861 		 int *savings)
2862 {
2863   unsigned i;
2864   int cost = 0;
2865   slsr_cand_t basis = lookup_cand (c->basis);
2866   slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
2867 
2868   if (phi_cand->visited)
2869     return 0;
2870   phi_cand->visited = 1;
2871 
2872   for (i = 0; i < gimple_phi_num_args (phi); i++)
2873     {
2874       tree arg = gimple_phi_arg_def (phi, i);
2875       gimple *arg_def = SSA_NAME_DEF_STMT (arg);
2876 
2877       if (gimple_code (arg_def) == GIMPLE_PHI)
2878 	{
2879 	  int feeding_savings = 0;
2880 	  tree feeding_var = gimple_phi_result (arg_def);
2881 	  cost += phi_incr_cost_1 (c, incr, arg_def, &feeding_savings);
2882 	  if (uses_consumed_by_stmt (feeding_var, phi))
2883 	    *savings += feeding_savings;
2884 	}
2885       else
2886 	{
2887 	  widest_int diff;
2888 	  slsr_cand_t arg_cand;
2889 
2890 	  /* When the PHI argument is just a pass-through to the base
2891 	     expression of the hidden basis, the difference is zero minus
2892 	     the index of the basis.  There is no potential savings by
2893 	     eliminating a statement in this case.  */
2894 	  if (operand_equal_p (arg, phi_cand->base_expr, 0))
2895 	    {
2896 	      arg_cand = (slsr_cand_t)NULL;
2897 	      diff = -basis->index;
2898 	    }
2899 	  else
2900 	    {
2901 	      arg_cand = base_cand_from_table (arg);
2902 	      diff = arg_cand->index - basis->index;
2903 	    }
2904 
2905 	  if (incr == diff)
2906 	    {
2907 	      tree basis_lhs = gimple_assign_lhs (basis->cand_stmt);
2908 	      cost += add_cost (true, TYPE_MODE (TREE_TYPE (basis_lhs)));
2909 	      if (arg_cand)
2910 		{
2911 		  tree lhs = gimple_assign_lhs (arg_cand->cand_stmt);
2912 		  if (uses_consumed_by_stmt (lhs, phi))
2913 		    *savings += stmt_cost (arg_cand->cand_stmt, true);
2914 		}
2915 	    }
2916 	}
2917     }
2918 
2919   return cost;
2920 }
2921 
2922 /* Add up and return the costs of introducing add statements that
2923    require the increment INCR on behalf of candidate C and phi
2924    statement PHI.  Accumulate into *SAVINGS the potential savings
2925    from removing existing statements that feed PHI and have no other
2926    uses.  */
2927 
2928 static int
phi_incr_cost(slsr_cand_t c,const widest_int & incr,gimple * phi,int * savings)2929 phi_incr_cost (slsr_cand_t c, const widest_int &incr, gimple *phi,
2930 	       int *savings)
2931 {
2932   int retval = phi_incr_cost_1 (c, incr, phi, savings);
2933   clear_visited (as_a <gphi *> (phi));
2934   return retval;
2935 }
2936 
2937 /* Return the first candidate in the tree rooted at C that has not
2938    already been replaced, favoring siblings over dependents.  */
2939 
2940 static slsr_cand_t
unreplaced_cand_in_tree(slsr_cand_t c)2941 unreplaced_cand_in_tree (slsr_cand_t c)
2942 {
2943   if (!cand_already_replaced (c))
2944     return c;
2945 
2946   if (c->sibling)
2947     {
2948       slsr_cand_t sib = unreplaced_cand_in_tree (lookup_cand (c->sibling));
2949       if (sib)
2950 	return sib;
2951     }
2952 
2953   if (c->dependent)
2954     {
2955       slsr_cand_t dep = unreplaced_cand_in_tree (lookup_cand (c->dependent));
2956       if (dep)
2957 	return dep;
2958     }
2959 
2960   return NULL;
2961 }
2962 
2963 /* Return TRUE if the candidates in the tree rooted at C should be
2964    optimized for speed, else FALSE.  We estimate this based on the block
2965    containing the most dominant candidate in the tree that has not yet
2966    been replaced.  */
2967 
2968 static bool
optimize_cands_for_speed_p(slsr_cand_t c)2969 optimize_cands_for_speed_p (slsr_cand_t c)
2970 {
2971   slsr_cand_t c2 = unreplaced_cand_in_tree (c);
2972   gcc_assert (c2);
2973   return optimize_bb_for_speed_p (gimple_bb (c2->cand_stmt));
2974 }
2975 
2976 /* Add COST_IN to the lowest cost of any dependent path starting at
2977    candidate C or any of its siblings, counting only candidates along
2978    such paths with increment INCR.  Assume that replacing a candidate
2979    reduces cost by REPL_SAVINGS.  Also account for savings from any
2980    statements that would go dead.  If COUNT_PHIS is true, include
2981    costs of introducing feeding statements for conditional candidates.  */
2982 
2983 static int
lowest_cost_path(int cost_in,int repl_savings,slsr_cand_t c,const widest_int & incr,bool count_phis)2984 lowest_cost_path (int cost_in, int repl_savings, slsr_cand_t c,
2985 		  const widest_int &incr, bool count_phis)
2986 {
2987   int local_cost, sib_cost, savings = 0;
2988   widest_int cand_incr = cand_abs_increment (c);
2989 
2990   if (cand_already_replaced (c))
2991     local_cost = cost_in;
2992   else if (incr == cand_incr)
2993     local_cost = cost_in - repl_savings - c->dead_savings;
2994   else
2995     local_cost = cost_in - c->dead_savings;
2996 
2997   if (count_phis
2998       && phi_dependent_cand_p (c)
2999       && !cand_already_replaced (c))
3000     {
3001       gimple *phi = lookup_cand (c->def_phi)->cand_stmt;
3002       local_cost += phi_incr_cost (c, incr, phi, &savings);
3003 
3004       if (uses_consumed_by_stmt (gimple_phi_result (phi), c->cand_stmt))
3005 	local_cost -= savings;
3006     }
3007 
3008   if (c->dependent)
3009     local_cost = lowest_cost_path (local_cost, repl_savings,
3010 				   lookup_cand (c->dependent), incr,
3011 				   count_phis);
3012 
3013   if (c->sibling)
3014     {
3015       sib_cost = lowest_cost_path (cost_in, repl_savings,
3016 				   lookup_cand (c->sibling), incr,
3017 				   count_phis);
3018       local_cost = MIN (local_cost, sib_cost);
3019     }
3020 
3021   return local_cost;
3022 }
3023 
3024 /* Compute the total savings that would accrue from all replacements
3025    in the candidate tree rooted at C, counting only candidates with
3026    increment INCR.  Assume that replacing a candidate reduces cost
3027    by REPL_SAVINGS.  Also account for savings from statements that
3028    would go dead.  */
3029 
3030 static int
total_savings(int repl_savings,slsr_cand_t c,const widest_int & incr,bool count_phis)3031 total_savings (int repl_savings, slsr_cand_t c, const widest_int &incr,
3032 	       bool count_phis)
3033 {
3034   int savings = 0;
3035   widest_int cand_incr = cand_abs_increment (c);
3036 
3037   if (incr == cand_incr && !cand_already_replaced (c))
3038     savings += repl_savings + c->dead_savings;
3039 
3040   if (count_phis
3041       && phi_dependent_cand_p (c)
3042       && !cand_already_replaced (c))
3043     {
3044       int phi_savings = 0;
3045       gimple *phi = lookup_cand (c->def_phi)->cand_stmt;
3046       savings -= phi_incr_cost (c, incr, phi, &phi_savings);
3047 
3048       if (uses_consumed_by_stmt (gimple_phi_result (phi), c->cand_stmt))
3049 	savings += phi_savings;
3050     }
3051 
3052   if (c->dependent)
3053     savings += total_savings (repl_savings, lookup_cand (c->dependent), incr,
3054 			      count_phis);
3055 
3056   if (c->sibling)
3057     savings += total_savings (repl_savings, lookup_cand (c->sibling), incr,
3058 			      count_phis);
3059 
3060   return savings;
3061 }
3062 
3063 /* Use target-specific costs to determine and record which increments
3064    in the current candidate tree are profitable to replace, assuming
3065    MODE and SPEED.  FIRST_DEP is the first dependent of the root of
3066    the candidate tree.
3067 
3068    One slight limitation here is that we don't account for the possible
3069    introduction of casts in some cases.  See replace_one_candidate for
3070    the cases where these are introduced.  This should probably be cleaned
3071    up sometime.  */
3072 
3073 static void
analyze_increments(slsr_cand_t first_dep,machine_mode mode,bool speed)3074 analyze_increments (slsr_cand_t first_dep, machine_mode mode, bool speed)
3075 {
3076   unsigned i;
3077 
3078   for (i = 0; i < incr_vec_len; i++)
3079     {
3080       HOST_WIDE_INT incr = incr_vec[i].incr.to_shwi ();
3081 
3082       /* If somehow this increment is bigger than a HWI, we won't
3083 	 be optimizing candidates that use it.  And if the increment
3084 	 has a count of zero, nothing will be done with it.  */
3085       if (!wi::fits_shwi_p (incr_vec[i].incr) || !incr_vec[i].count)
3086 	incr_vec[i].cost = COST_INFINITE;
3087 
3088       /* Increments of 0, 1, and -1 are always profitable to replace,
3089 	 because they always replace a multiply or add with an add or
3090 	 copy, and may cause one or more existing instructions to go
3091 	 dead.  Exception:  -1 can't be assumed to be profitable for
3092 	 pointer addition.  */
3093       else if (incr == 0
3094 	       || incr == 1
3095 	       || (incr == -1
3096 		   && !POINTER_TYPE_P (first_dep->cand_type)))
3097 	incr_vec[i].cost = COST_NEUTRAL;
3098 
3099       /* If we need to add an initializer, give up if a cast from the
3100 	 candidate's type to its stride's type can lose precision.
3101 	 Note that this already takes into account that the stride may
3102 	 have been cast to a wider type, in which case this test won't
3103 	 fire.  Example:
3104 
3105            short int _1;
3106 	   _2 = (int) _1;
3107 	   _3 = _2 * 10;
3108 	   _4 = x + _3;    ADD: x + (10 * (int)_1) : int
3109 	   _5 = _2 * 15;
3110 	   _6 = x + _5;    ADD: x + (15 * (int)_1) : int
3111 
3112 	 Although the stride was a short int initially, the stride
3113 	 used in the analysis has been widened to an int, and such
3114 	 widening will be done in the initializer as well.  */
3115       else if (!incr_vec[i].initializer
3116 	       && TREE_CODE (first_dep->stride) != INTEGER_CST
3117 	       && !legal_cast_p_1 (first_dep->stride_type,
3118 				   TREE_TYPE (gimple_assign_lhs
3119 					      (first_dep->cand_stmt))))
3120 	incr_vec[i].cost = COST_INFINITE;
3121 
3122       /* If we need to add an initializer, make sure we don't introduce
3123 	 a multiply by a pointer type, which can happen in certain cast
3124 	 scenarios.  */
3125       else if (!incr_vec[i].initializer
3126 	       && TREE_CODE (first_dep->stride) != INTEGER_CST
3127 	       && POINTER_TYPE_P (first_dep->stride_type))
3128 	incr_vec[i].cost = COST_INFINITE;
3129 
3130       /* For any other increment, if this is a multiply candidate, we
3131 	 must introduce a temporary T and initialize it with
3132 	 T_0 = stride * increment.  When optimizing for speed, walk the
3133 	 candidate tree to calculate the best cost reduction along any
3134 	 path; if it offsets the fixed cost of inserting the initializer,
3135 	 replacing the increment is profitable.  When optimizing for
3136          size, instead calculate the total cost reduction from replacing
3137 	 all candidates with this increment.  */
3138       else if (first_dep->kind == CAND_MULT)
3139 	{
3140 	  int cost = mult_by_coeff_cost (incr, mode, speed);
3141 	  int repl_savings;
3142 
3143 	  if (tree_fits_shwi_p (first_dep->stride))
3144 	    {
3145 	      HOST_WIDE_INT hwi_stride = tree_to_shwi (first_dep->stride);
3146 	      repl_savings = mult_by_coeff_cost (hwi_stride, mode, speed);
3147 	    }
3148 	  else
3149 	    repl_savings = mul_cost (speed, mode);
3150 	  repl_savings -= add_cost (speed, mode);
3151 
3152 	  if (speed)
3153 	    cost = lowest_cost_path (cost, repl_savings, first_dep,
3154 				     incr_vec[i].incr, COUNT_PHIS);
3155 	  else
3156 	    cost -= total_savings (repl_savings, first_dep, incr_vec[i].incr,
3157 				   COUNT_PHIS);
3158 
3159 	  incr_vec[i].cost = cost;
3160 	}
3161 
3162       /* If this is an add candidate, the initializer may already
3163 	 exist, so only calculate the cost of the initializer if it
3164 	 doesn't.  We are replacing one add with another here, so the
3165 	 known replacement savings is zero.  We will account for removal
3166 	 of dead instructions in lowest_cost_path or total_savings.  */
3167       else
3168 	{
3169 	  int cost = 0;
3170 	  if (!incr_vec[i].initializer)
3171 	    cost = mult_by_coeff_cost (incr, mode, speed);
3172 
3173 	  if (speed)
3174 	    cost = lowest_cost_path (cost, 0, first_dep, incr_vec[i].incr,
3175 				     DONT_COUNT_PHIS);
3176 	  else
3177 	    cost -= total_savings (0, first_dep, incr_vec[i].incr,
3178 				   DONT_COUNT_PHIS);
3179 
3180 	  incr_vec[i].cost = cost;
3181 	}
3182     }
3183 }
3184 
3185 /* Return the nearest common dominator of BB1 and BB2.  If the blocks
3186    are identical, return the earlier of C1 and C2 in *WHERE.  Otherwise,
3187    if the NCD matches BB1, return C1 in *WHERE; if the NCD matches BB2,
3188    return C2 in *WHERE; and if the NCD matches neither, return NULL in
3189    *WHERE.  Note: It is possible for one of C1 and C2 to be NULL.  */
3190 
3191 static basic_block
ncd_for_two_cands(basic_block bb1,basic_block bb2,slsr_cand_t c1,slsr_cand_t c2,slsr_cand_t * where)3192 ncd_for_two_cands (basic_block bb1, basic_block bb2,
3193 		   slsr_cand_t c1, slsr_cand_t c2, slsr_cand_t *where)
3194 {
3195   basic_block ncd;
3196 
3197   if (!bb1)
3198     {
3199       *where = c2;
3200       return bb2;
3201     }
3202 
3203   if (!bb2)
3204     {
3205       *where = c1;
3206       return bb1;
3207     }
3208 
3209   ncd = nearest_common_dominator (CDI_DOMINATORS, bb1, bb2);
3210 
3211   /* If both candidates are in the same block, the earlier
3212      candidate wins.  */
3213   if (bb1 == ncd && bb2 == ncd)
3214     {
3215       if (!c1 || (c2 && c2->cand_num < c1->cand_num))
3216 	*where = c2;
3217       else
3218 	*where = c1;
3219     }
3220 
3221   /* Otherwise, if one of them produced a candidate in the
3222      dominator, that one wins.  */
3223   else if (bb1 == ncd)
3224     *where = c1;
3225 
3226   else if (bb2 == ncd)
3227     *where = c2;
3228 
3229   /* If neither matches the dominator, neither wins.  */
3230   else
3231     *where = NULL;
3232 
3233   return ncd;
3234 }
3235 
3236 /* Consider all candidates that feed PHI.  Find the nearest common
3237    dominator of those candidates requiring the given increment INCR.
3238    Further find and return the nearest common dominator of this result
3239    with block NCD.  If the returned block contains one or more of the
3240    candidates, return the earliest candidate in the block in *WHERE.  */
3241 
3242 static basic_block
ncd_with_phi(slsr_cand_t c,const widest_int & incr,gphi * phi,basic_block ncd,slsr_cand_t * where)3243 ncd_with_phi (slsr_cand_t c, const widest_int &incr, gphi *phi,
3244 	      basic_block ncd, slsr_cand_t *where)
3245 {
3246   unsigned i;
3247   slsr_cand_t basis = lookup_cand (c->basis);
3248   slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
3249 
3250   for (i = 0; i < gimple_phi_num_args (phi); i++)
3251     {
3252       tree arg = gimple_phi_arg_def (phi, i);
3253       gimple *arg_def = SSA_NAME_DEF_STMT (arg);
3254 
3255       if (gimple_code (arg_def) == GIMPLE_PHI)
3256 	ncd = ncd_with_phi (c, incr, as_a <gphi *> (arg_def), ncd, where);
3257       else
3258 	{
3259 	  widest_int diff;
3260 
3261 	  if (operand_equal_p (arg, phi_cand->base_expr, 0))
3262 	    diff = -basis->index;
3263 	  else
3264 	    {
3265 	      slsr_cand_t arg_cand = base_cand_from_table (arg);
3266 	      diff = arg_cand->index - basis->index;
3267 	    }
3268 
3269 	  basic_block pred = gimple_phi_arg_edge (phi, i)->src;
3270 
3271 	  if ((incr == diff) || (!address_arithmetic_p && incr == -diff))
3272 	    ncd = ncd_for_two_cands (ncd, pred, *where, NULL, where);
3273 	}
3274     }
3275 
3276   return ncd;
3277 }
3278 
3279 /* Consider the candidate C together with any candidates that feed
3280    C's phi dependence (if any).  Find and return the nearest common
3281    dominator of those candidates requiring the given increment INCR.
3282    If the returned block contains one or more of the candidates,
3283    return the earliest candidate in the block in *WHERE.  */
3284 
3285 static basic_block
ncd_of_cand_and_phis(slsr_cand_t c,const widest_int & incr,slsr_cand_t * where)3286 ncd_of_cand_and_phis (slsr_cand_t c, const widest_int &incr, slsr_cand_t *where)
3287 {
3288   basic_block ncd = NULL;
3289 
3290   if (cand_abs_increment (c) == incr)
3291     {
3292       ncd = gimple_bb (c->cand_stmt);
3293       *where = c;
3294     }
3295 
3296   if (phi_dependent_cand_p (c))
3297     ncd = ncd_with_phi (c, incr,
3298 			as_a <gphi *> (lookup_cand (c->def_phi)->cand_stmt),
3299 			ncd, where);
3300 
3301   return ncd;
3302 }
3303 
3304 /* Consider all candidates in the tree rooted at C for which INCR
3305    represents the required increment of C relative to its basis.
3306    Find and return the basic block that most nearly dominates all
3307    such candidates.  If the returned block contains one or more of
3308    the candidates, return the earliest candidate in the block in
3309    *WHERE.  */
3310 
3311 static basic_block
nearest_common_dominator_for_cands(slsr_cand_t c,const widest_int & incr,slsr_cand_t * where)3312 nearest_common_dominator_for_cands (slsr_cand_t c, const widest_int &incr,
3313 				    slsr_cand_t *where)
3314 {
3315   basic_block sib_ncd = NULL, dep_ncd = NULL, this_ncd = NULL, ncd;
3316   slsr_cand_t sib_where = NULL, dep_where = NULL, this_where = NULL, new_where;
3317 
3318   /* First find the NCD of all siblings and dependents.  */
3319   if (c->sibling)
3320     sib_ncd = nearest_common_dominator_for_cands (lookup_cand (c->sibling),
3321 						  incr, &sib_where);
3322   if (c->dependent)
3323     dep_ncd = nearest_common_dominator_for_cands (lookup_cand (c->dependent),
3324 						  incr, &dep_where);
3325   if (!sib_ncd && !dep_ncd)
3326     {
3327       new_where = NULL;
3328       ncd = NULL;
3329     }
3330   else if (sib_ncd && !dep_ncd)
3331     {
3332       new_where = sib_where;
3333       ncd = sib_ncd;
3334     }
3335   else if (dep_ncd && !sib_ncd)
3336     {
3337       new_where = dep_where;
3338       ncd = dep_ncd;
3339     }
3340   else
3341     ncd = ncd_for_two_cands (sib_ncd, dep_ncd, sib_where,
3342 			     dep_where, &new_where);
3343 
3344   /* If the candidate's increment doesn't match the one we're interested
3345      in (and nor do any increments for feeding defs of a phi-dependence),
3346      then the result depends only on siblings and dependents.  */
3347   this_ncd = ncd_of_cand_and_phis (c, incr, &this_where);
3348 
3349   if (!this_ncd || cand_already_replaced (c))
3350     {
3351       *where = new_where;
3352       return ncd;
3353     }
3354 
3355   /* Otherwise, compare this candidate with the result from all siblings
3356      and dependents.  */
3357   ncd = ncd_for_two_cands (ncd, this_ncd, new_where, this_where, where);
3358 
3359   return ncd;
3360 }
3361 
3362 /* Return TRUE if the increment indexed by INDEX is profitable to replace.  */
3363 
3364 static inline bool
profitable_increment_p(unsigned index)3365 profitable_increment_p (unsigned index)
3366 {
3367   return (incr_vec[index].cost <= COST_NEUTRAL);
3368 }
3369 
3370 /* For each profitable increment in the increment vector not equal to
3371    0 or 1 (or -1, for non-pointer arithmetic), find the nearest common
3372    dominator of all statements in the candidate chain rooted at C
3373    that require that increment, and insert an initializer
3374    T_0 = stride * increment at that location.  Record T_0 with the
3375    increment record.  */
3376 
3377 static void
insert_initializers(slsr_cand_t c)3378 insert_initializers (slsr_cand_t c)
3379 {
3380   unsigned i;
3381 
3382   for (i = 0; i < incr_vec_len; i++)
3383     {
3384       basic_block bb;
3385       slsr_cand_t where = NULL;
3386       gassign *init_stmt;
3387       gassign *cast_stmt = NULL;
3388       tree new_name, incr_tree, init_stride;
3389       widest_int incr = incr_vec[i].incr;
3390 
3391       if (!profitable_increment_p (i)
3392 	  || incr == 1
3393 	  || (incr == -1
3394 	      && (!POINTER_TYPE_P (lookup_cand (c->basis)->cand_type)))
3395 	  || incr == 0)
3396 	continue;
3397 
3398       /* We may have already identified an existing initializer that
3399 	 will suffice.  */
3400       if (incr_vec[i].initializer)
3401 	{
3402 	  if (dump_file && (dump_flags & TDF_DETAILS))
3403 	    {
3404 	      fputs ("Using existing initializer: ", dump_file);
3405 	      print_gimple_stmt (dump_file,
3406 				 SSA_NAME_DEF_STMT (incr_vec[i].initializer),
3407 				 0, TDF_NONE);
3408 	    }
3409 	  continue;
3410 	}
3411 
3412       /* Find the block that most closely dominates all candidates
3413 	 with this increment.  If there is at least one candidate in
3414 	 that block, the earliest one will be returned in WHERE.  */
3415       bb = nearest_common_dominator_for_cands (c, incr, &where);
3416 
3417       /* If the NCD is not dominated by the block containing the
3418 	 definition of the stride, we can't legally insert a
3419 	 single initializer.  Mark the increment as unprofitable
3420 	 so we don't make any replacements.  FIXME: Multiple
3421 	 initializers could be placed with more analysis.  */
3422       gimple *stride_def = SSA_NAME_DEF_STMT (c->stride);
3423       basic_block stride_bb = gimple_bb (stride_def);
3424 
3425       if (stride_bb && !dominated_by_p (CDI_DOMINATORS, bb, stride_bb))
3426 	{
3427 	  if (dump_file && (dump_flags & TDF_DETAILS))
3428 	    fprintf (dump_file,
3429 		     "Initializer #%d cannot be legally placed\n", i);
3430 	  incr_vec[i].cost = COST_INFINITE;
3431 	  continue;
3432 	}
3433 
3434       /* If the nominal stride has a different type than the recorded
3435 	 stride type, build a cast from the nominal stride to that type.  */
3436       if (!types_compatible_p (TREE_TYPE (c->stride), c->stride_type))
3437 	{
3438 	  init_stride = make_temp_ssa_name (c->stride_type, NULL, "slsr");
3439 	  cast_stmt = gimple_build_assign (init_stride, NOP_EXPR, c->stride);
3440 	}
3441       else
3442 	init_stride = c->stride;
3443 
3444       /* Create a new SSA name to hold the initializer's value.  */
3445       new_name = make_temp_ssa_name (c->stride_type, NULL, "slsr");
3446       incr_vec[i].initializer = new_name;
3447 
3448       /* Create the initializer and insert it in the latest possible
3449 	 dominating position.  */
3450       incr_tree = wide_int_to_tree (c->stride_type, incr);
3451       init_stmt = gimple_build_assign (new_name, MULT_EXPR,
3452 				       init_stride, incr_tree);
3453       if (where)
3454 	{
3455 	  gimple_stmt_iterator gsi = gsi_for_stmt (where->cand_stmt);
3456 	  location_t loc = gimple_location (where->cand_stmt);
3457 
3458 	  if (cast_stmt)
3459 	    {
3460 	      gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
3461 	      gimple_set_location (cast_stmt, loc);
3462 	    }
3463 
3464 	  gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
3465 	  gimple_set_location (init_stmt, loc);
3466 	}
3467       else
3468 	{
3469 	  gimple_stmt_iterator gsi = gsi_last_bb (bb);
3470 	  gimple *basis_stmt = lookup_cand (c->basis)->cand_stmt;
3471 	  location_t loc = gimple_location (basis_stmt);
3472 
3473 	  if (!gsi_end_p (gsi) && stmt_ends_bb_p (gsi_stmt (gsi)))
3474 	    {
3475 	      if (cast_stmt)
3476 		{
3477 		  gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
3478 		  gimple_set_location (cast_stmt, loc);
3479 		}
3480 	      gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
3481 	    }
3482 	  else
3483 	    {
3484 	      if (cast_stmt)
3485 		{
3486 		  gsi_insert_after (&gsi, cast_stmt, GSI_NEW_STMT);
3487 		  gimple_set_location (cast_stmt, loc);
3488 		}
3489 	      gsi_insert_after (&gsi, init_stmt, GSI_NEW_STMT);
3490 	    }
3491 
3492 	  gimple_set_location (init_stmt, gimple_location (basis_stmt));
3493 	}
3494 
3495       if (dump_file && (dump_flags & TDF_DETAILS))
3496 	{
3497 	  if (cast_stmt)
3498 	    {
3499 	      fputs ("Inserting stride cast: ", dump_file);
3500 	      print_gimple_stmt (dump_file, cast_stmt, 0);
3501 	    }
3502 	  fputs ("Inserting initializer: ", dump_file);
3503 	  print_gimple_stmt (dump_file, init_stmt, 0);
3504 	}
3505     }
3506 }
3507 
3508 /* Recursive helper function for all_phi_incrs_profitable.  */
3509 
3510 static bool
all_phi_incrs_profitable_1(slsr_cand_t c,gphi * phi,int * spread)3511 all_phi_incrs_profitable_1 (slsr_cand_t c, gphi *phi, int *spread)
3512 {
3513   unsigned i;
3514   slsr_cand_t basis = lookup_cand (c->basis);
3515   slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
3516 
3517   if (phi_cand->visited)
3518     return true;
3519 
3520   phi_cand->visited = 1;
3521   (*spread)++;
3522 
3523   /* If the basis doesn't dominate the PHI (including when the PHI is
3524      in the same block as the basis), we won't be able to create a PHI
3525      using the basis here.  */
3526   basic_block basis_bb = gimple_bb (basis->cand_stmt);
3527   basic_block phi_bb = gimple_bb (phi);
3528 
3529   if (phi_bb == basis_bb
3530       || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
3531     return false;
3532 
3533   for (i = 0; i < gimple_phi_num_args (phi); i++)
3534     {
3535       /* If the PHI arg resides in a block not dominated by the basis,
3536 	 we won't be able to create a PHI using the basis here.  */
3537       basic_block pred_bb = gimple_phi_arg_edge (phi, i)->src;
3538 
3539       if (!dominated_by_p (CDI_DOMINATORS, pred_bb, basis_bb))
3540 	return false;
3541 
3542       tree arg = gimple_phi_arg_def (phi, i);
3543       gimple *arg_def = SSA_NAME_DEF_STMT (arg);
3544 
3545       if (gimple_code (arg_def) == GIMPLE_PHI)
3546 	{
3547 	  if (!all_phi_incrs_profitable_1 (c, as_a <gphi *> (arg_def), spread)
3548 	      || *spread > MAX_SPREAD)
3549 	    return false;
3550 	}
3551       else
3552 	{
3553 	  int j;
3554 	  widest_int increment;
3555 
3556 	  if (operand_equal_p (arg, phi_cand->base_expr, 0))
3557 	    increment = -basis->index;
3558 	  else
3559 	    {
3560 	      slsr_cand_t arg_cand = base_cand_from_table (arg);
3561 	      increment = arg_cand->index - basis->index;
3562 	    }
3563 
3564 	  if (!address_arithmetic_p && wi::neg_p (increment))
3565 	    increment = -increment;
3566 
3567 	  j = incr_vec_index (increment);
3568 
3569 	  if (dump_file && (dump_flags & TDF_DETAILS))
3570 	    {
3571 	      fprintf (dump_file, "  Conditional candidate %d, phi: ",
3572 		       c->cand_num);
3573 	      print_gimple_stmt (dump_file, phi, 0);
3574 	      fputs ("    increment: ", dump_file);
3575 	      print_decs (increment, dump_file);
3576 	      if (j < 0)
3577 		fprintf (dump_file,
3578 			 "\n  Not replaced; incr_vec overflow.\n");
3579 	      else {
3580 		fprintf (dump_file, "\n    cost: %d\n", incr_vec[j].cost);
3581 		if (profitable_increment_p (j))
3582 		  fputs ("  Replacing...\n", dump_file);
3583 		else
3584 		  fputs ("  Not replaced.\n", dump_file);
3585 	      }
3586 	    }
3587 
3588 	  if (j < 0 || !profitable_increment_p (j))
3589 	    return false;
3590 	}
3591     }
3592 
3593   return true;
3594 }
3595 
3596 /* Return TRUE iff all required increments for candidates feeding PHI
3597    are profitable (and legal!) to replace on behalf of candidate C.  */
3598 
3599 static bool
all_phi_incrs_profitable(slsr_cand_t c,gphi * phi)3600 all_phi_incrs_profitable (slsr_cand_t c, gphi *phi)
3601 {
3602   int spread = 0;
3603   bool retval = all_phi_incrs_profitable_1 (c, phi, &spread);
3604   clear_visited (phi);
3605   return retval;
3606 }
3607 
3608 /* Create a NOP_EXPR that copies FROM_EXPR into a new SSA name of
3609    type TO_TYPE, and insert it in front of the statement represented
3610    by candidate C.  Use *NEW_VAR to create the new SSA name.  Return
3611    the new SSA name.  */
3612 
3613 static tree
introduce_cast_before_cand(slsr_cand_t c,tree to_type,tree from_expr)3614 introduce_cast_before_cand (slsr_cand_t c, tree to_type, tree from_expr)
3615 {
3616   tree cast_lhs;
3617   gassign *cast_stmt;
3618   gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3619 
3620   cast_lhs = make_temp_ssa_name (to_type, NULL, "slsr");
3621   cast_stmt = gimple_build_assign (cast_lhs, NOP_EXPR, from_expr);
3622   gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
3623   gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
3624 
3625   if (dump_file && (dump_flags & TDF_DETAILS))
3626     {
3627       fputs ("  Inserting: ", dump_file);
3628       print_gimple_stmt (dump_file, cast_stmt, 0);
3629     }
3630 
3631   return cast_lhs;
3632 }
3633 
3634 /* Replace the RHS of the statement represented by candidate C with
3635    NEW_CODE, NEW_RHS1, and NEW_RHS2, provided that to do so doesn't
3636    leave C unchanged or just interchange its operands.  The original
3637    operation and operands are in OLD_CODE, OLD_RHS1, and OLD_RHS2.
3638    If the replacement was made and we are doing a details dump,
3639    return the revised statement, else NULL.  */
3640 
3641 static gimple *
replace_rhs_if_not_dup(enum tree_code new_code,tree new_rhs1,tree new_rhs2,enum tree_code old_code,tree old_rhs1,tree old_rhs2,slsr_cand_t c)3642 replace_rhs_if_not_dup (enum tree_code new_code, tree new_rhs1, tree new_rhs2,
3643 			enum tree_code old_code, tree old_rhs1, tree old_rhs2,
3644 			slsr_cand_t c)
3645 {
3646   if (new_code != old_code
3647       || ((!operand_equal_p (new_rhs1, old_rhs1, 0)
3648 	   || !operand_equal_p (new_rhs2, old_rhs2, 0))
3649 	  && (!operand_equal_p (new_rhs1, old_rhs2, 0)
3650 	      || !operand_equal_p (new_rhs2, old_rhs1, 0))))
3651     {
3652       gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3653       slsr_cand_t cc = lookup_cand (c->first_interp);
3654       gimple_assign_set_rhs_with_ops (&gsi, new_code, new_rhs1, new_rhs2);
3655       update_stmt (gsi_stmt (gsi));
3656       while (cc)
3657 	{
3658 	  cc->cand_stmt = gsi_stmt (gsi);
3659 	  cc = lookup_cand (cc->next_interp);
3660 	}
3661 
3662       if (dump_file && (dump_flags & TDF_DETAILS))
3663 	return gsi_stmt (gsi);
3664     }
3665 
3666   else if (dump_file && (dump_flags & TDF_DETAILS))
3667     fputs ("  (duplicate, not actually replacing)\n", dump_file);
3668 
3669   return NULL;
3670 }
3671 
3672 /* Strength-reduce the statement represented by candidate C by replacing
3673    it with an equivalent addition or subtraction.  I is the index into
3674    the increment vector identifying C's increment.  NEW_VAR is used to
3675    create a new SSA name if a cast needs to be introduced.  BASIS_NAME
3676    is the rhs1 to use in creating the add/subtract.  */
3677 
3678 static void
replace_one_candidate(slsr_cand_t c,unsigned i,tree basis_name)3679 replace_one_candidate (slsr_cand_t c, unsigned i, tree basis_name)
3680 {
3681   gimple *stmt_to_print = NULL;
3682   tree orig_rhs1, orig_rhs2;
3683   tree rhs2;
3684   enum tree_code orig_code, repl_code;
3685   widest_int cand_incr;
3686 
3687   orig_code = gimple_assign_rhs_code (c->cand_stmt);
3688   orig_rhs1 = gimple_assign_rhs1 (c->cand_stmt);
3689   orig_rhs2 = gimple_assign_rhs2 (c->cand_stmt);
3690   cand_incr = cand_increment (c);
3691 
3692   /* If orig_rhs2 is NULL, we have already replaced this in situ with
3693      a copy statement under another interpretation.  */
3694   if (!orig_rhs2)
3695     return;
3696 
3697   if (dump_file && (dump_flags & TDF_DETAILS))
3698     {
3699       fputs ("Replacing: ", dump_file);
3700       print_gimple_stmt (dump_file, c->cand_stmt, 0);
3701       stmt_to_print = c->cand_stmt;
3702     }
3703 
3704   if (address_arithmetic_p)
3705     repl_code = POINTER_PLUS_EXPR;
3706   else
3707     repl_code = PLUS_EXPR;
3708 
3709   /* If the increment has an initializer T_0, replace the candidate
3710      statement with an add of the basis name and the initializer.  */
3711   if (incr_vec[i].initializer)
3712     {
3713       tree init_type = TREE_TYPE (incr_vec[i].initializer);
3714       tree orig_type = TREE_TYPE (orig_rhs2);
3715 
3716       if (types_compatible_p (orig_type, init_type))
3717 	rhs2 = incr_vec[i].initializer;
3718       else
3719 	rhs2 = introduce_cast_before_cand (c, orig_type,
3720 					   incr_vec[i].initializer);
3721 
3722       if (incr_vec[i].incr != cand_incr)
3723 	{
3724 	  gcc_assert (repl_code == PLUS_EXPR);
3725 	  repl_code = MINUS_EXPR;
3726 	}
3727 
3728       stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
3729 					      orig_code, orig_rhs1, orig_rhs2,
3730 					      c);
3731     }
3732 
3733   /* Otherwise, the increment is one of -1, 0, and 1.  Replace
3734      with a subtract of the stride from the basis name, a copy
3735      from the basis name, or an add of the stride to the basis
3736      name, respectively.  It may be necessary to introduce a
3737      cast (or reuse an existing cast).  */
3738   else if (cand_incr == 1)
3739     {
3740       tree stride_type = TREE_TYPE (c->stride);
3741       tree orig_type = TREE_TYPE (orig_rhs2);
3742 
3743       if (types_compatible_p (orig_type, stride_type))
3744 	rhs2 = c->stride;
3745       else
3746 	rhs2 = introduce_cast_before_cand (c, orig_type, c->stride);
3747 
3748       stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
3749 					      orig_code, orig_rhs1, orig_rhs2,
3750 					      c);
3751     }
3752 
3753   else if (cand_incr == -1)
3754     {
3755       tree stride_type = TREE_TYPE (c->stride);
3756       tree orig_type = TREE_TYPE (orig_rhs2);
3757       gcc_assert (repl_code != POINTER_PLUS_EXPR);
3758 
3759       if (types_compatible_p (orig_type, stride_type))
3760 	rhs2 = c->stride;
3761       else
3762 	rhs2 = introduce_cast_before_cand (c, orig_type, c->stride);
3763 
3764       if (orig_code != MINUS_EXPR
3765 	  || !operand_equal_p (basis_name, orig_rhs1, 0)
3766 	  || !operand_equal_p (rhs2, orig_rhs2, 0))
3767 	{
3768 	  gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3769 	  slsr_cand_t cc = lookup_cand (c->first_interp);
3770 	  gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, basis_name, rhs2);
3771 	  update_stmt (gsi_stmt (gsi));
3772 	  while (cc)
3773 	    {
3774 	      cc->cand_stmt = gsi_stmt (gsi);
3775 	      cc = lookup_cand (cc->next_interp);
3776 	    }
3777 
3778 	  if (dump_file && (dump_flags & TDF_DETAILS))
3779 	    stmt_to_print = gsi_stmt (gsi);
3780 	}
3781       else if (dump_file && (dump_flags & TDF_DETAILS))
3782 	fputs ("  (duplicate, not actually replacing)\n", dump_file);
3783     }
3784 
3785   else if (cand_incr == 0)
3786     {
3787       tree lhs = gimple_assign_lhs (c->cand_stmt);
3788       tree lhs_type = TREE_TYPE (lhs);
3789       tree basis_type = TREE_TYPE (basis_name);
3790 
3791       if (types_compatible_p (lhs_type, basis_type))
3792 	{
3793 	  gassign *copy_stmt = gimple_build_assign (lhs, basis_name);
3794 	  gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3795 	  slsr_cand_t cc = lookup_cand (c->first_interp);
3796 	  gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
3797 	  gsi_replace (&gsi, copy_stmt, false);
3798 	  while (cc)
3799 	    {
3800 	      cc->cand_stmt = copy_stmt;
3801 	      cc = lookup_cand (cc->next_interp);
3802 	    }
3803 
3804 	  if (dump_file && (dump_flags & TDF_DETAILS))
3805 	    stmt_to_print = copy_stmt;
3806 	}
3807       else
3808 	{
3809 	  gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3810 	  gassign *cast_stmt = gimple_build_assign (lhs, NOP_EXPR, basis_name);
3811 	  slsr_cand_t cc = lookup_cand (c->first_interp);
3812 	  gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
3813 	  gsi_replace (&gsi, cast_stmt, false);
3814 	  while (cc)
3815 	    {
3816 	      cc->cand_stmt = cast_stmt;
3817 	      cc = lookup_cand (cc->next_interp);
3818 	    }
3819 
3820 	  if (dump_file && (dump_flags & TDF_DETAILS))
3821 	    stmt_to_print = cast_stmt;
3822 	}
3823     }
3824   else
3825     gcc_unreachable ();
3826 
3827   if (dump_file && (dump_flags & TDF_DETAILS) && stmt_to_print)
3828     {
3829       fputs ("With: ", dump_file);
3830       print_gimple_stmt (dump_file, stmt_to_print, 0);
3831       fputs ("\n", dump_file);
3832     }
3833 }
3834 
3835 /* For each candidate in the tree rooted at C, replace it with
3836    an increment if such has been shown to be profitable.  */
3837 
3838 static void
replace_profitable_candidates(slsr_cand_t c)3839 replace_profitable_candidates (slsr_cand_t c)
3840 {
3841   if (!cand_already_replaced (c))
3842     {
3843       widest_int increment = cand_abs_increment (c);
3844       enum tree_code orig_code = gimple_assign_rhs_code (c->cand_stmt);
3845       int i;
3846 
3847       i = incr_vec_index (increment);
3848 
3849       /* Only process profitable increments.  Nothing useful can be done
3850 	 to a cast or copy.  */
3851       if (i >= 0
3852 	  && profitable_increment_p (i)
3853 	  && orig_code != SSA_NAME
3854 	  && !CONVERT_EXPR_CODE_P (orig_code))
3855 	{
3856 	  if (phi_dependent_cand_p (c))
3857 	    {
3858 	      gphi *phi = as_a <gphi *> (lookup_cand (c->def_phi)->cand_stmt);
3859 
3860 	      if (all_phi_incrs_profitable (c, phi))
3861 		{
3862 		  /* Look up the LHS SSA name from C's basis.  This will be
3863 		     the RHS1 of the adds we will introduce to create new
3864 		     phi arguments.  */
3865 		  slsr_cand_t basis = lookup_cand (c->basis);
3866 		  tree basis_name = gimple_assign_lhs (basis->cand_stmt);
3867 
3868 		  /* Create a new phi statement that will represent C's true
3869 		     basis after the transformation is complete.  */
3870 		  location_t loc = gimple_location (c->cand_stmt);
3871 		  tree name = create_phi_basis (c, phi, basis_name,
3872 						loc, UNKNOWN_STRIDE);
3873 
3874 		  /* Replace C with an add of the new basis phi and the
3875 		     increment.  */
3876 		  replace_one_candidate (c, i, name);
3877 		}
3878 	    }
3879 	  else
3880 	    {
3881 	      slsr_cand_t basis = lookup_cand (c->basis);
3882 	      tree basis_name = gimple_assign_lhs (basis->cand_stmt);
3883 	      replace_one_candidate (c, i, basis_name);
3884 	    }
3885 	}
3886     }
3887 
3888   if (c->sibling)
3889     replace_profitable_candidates (lookup_cand (c->sibling));
3890 
3891   if (c->dependent)
3892     replace_profitable_candidates (lookup_cand (c->dependent));
3893 }
3894 
3895 /* Analyze costs of related candidates in the candidate vector,
3896    and make beneficial replacements.  */
3897 
3898 static void
analyze_candidates_and_replace(void)3899 analyze_candidates_and_replace (void)
3900 {
3901   unsigned i;
3902   slsr_cand_t c;
3903 
3904   /* Each candidate that has a null basis and a non-null
3905      dependent is the root of a tree of related statements.
3906      Analyze each tree to determine a subset of those
3907      statements that can be replaced with maximum benefit.
3908 
3909      Note the first NULL element is skipped.  */
3910   FOR_EACH_VEC_ELT_FROM (cand_vec, i, c, 1)
3911     {
3912       slsr_cand_t first_dep;
3913 
3914       if (c->basis != 0 || c->dependent == 0)
3915 	continue;
3916 
3917       if (dump_file && (dump_flags & TDF_DETAILS))
3918 	fprintf (dump_file, "\nProcessing dependency tree rooted at %d.\n",
3919 		 c->cand_num);
3920 
3921       first_dep = lookup_cand (c->dependent);
3922 
3923       /* If this is a chain of CAND_REFs, unconditionally replace
3924 	 each of them with a strength-reduced data reference.  */
3925       if (c->kind == CAND_REF)
3926 	replace_refs (c);
3927 
3928       /* If the common stride of all related candidates is a known
3929 	 constant, each candidate without a phi-dependence can be
3930 	 profitably replaced.  Each replaces a multiply by a single
3931 	 add, with the possibility that a feeding add also goes dead.
3932 	 A candidate with a phi-dependence is replaced only if the
3933 	 compensation code it requires is offset by the strength
3934 	 reduction savings.  */
3935       else if (TREE_CODE (c->stride) == INTEGER_CST)
3936 	replace_uncond_cands_and_profitable_phis (first_dep);
3937 
3938       /* When the stride is an SSA name, it may still be profitable
3939 	 to replace some or all of the dependent candidates, depending
3940 	 on whether the introduced increments can be reused, or are
3941 	 less expensive to calculate than the replaced statements.  */
3942       else
3943 	{
3944 	  machine_mode mode;
3945 	  bool speed;
3946 
3947 	  /* Determine whether we'll be generating pointer arithmetic
3948 	     when replacing candidates.  */
3949 	  address_arithmetic_p = (c->kind == CAND_ADD
3950 				  && POINTER_TYPE_P (c->cand_type));
3951 
3952 	  /* If all candidates have already been replaced under other
3953 	     interpretations, nothing remains to be done.  */
3954 	  if (!count_candidates (c))
3955 	    continue;
3956 
3957 	  /* Construct an array of increments for this candidate chain.  */
3958 	  incr_vec = XNEWVEC (incr_info, MAX_INCR_VEC_LEN);
3959 	  incr_vec_len = 0;
3960 	  record_increments (c);
3961 
3962 	  /* Determine which increments are profitable to replace.  */
3963 	  mode = TYPE_MODE (TREE_TYPE (gimple_assign_lhs (c->cand_stmt)));
3964 	  speed = optimize_cands_for_speed_p (c);
3965 	  analyze_increments (first_dep, mode, speed);
3966 
3967 	  /* Insert initializers of the form T_0 = stride * increment
3968 	     for use in profitable replacements.  */
3969 	  insert_initializers (first_dep);
3970 	  dump_incr_vec ();
3971 
3972 	  /* Perform the replacements.  */
3973 	  replace_profitable_candidates (first_dep);
3974 	  free (incr_vec);
3975 	}
3976     }
3977 
3978   /* For conditional candidates, we may have uncommitted insertions
3979      on edges to clean up.  */
3980   gsi_commit_edge_inserts ();
3981 }
3982 
3983 namespace {
3984 
3985 const pass_data pass_data_strength_reduction =
3986 {
3987   GIMPLE_PASS, /* type */
3988   "slsr", /* name */
3989   OPTGROUP_NONE, /* optinfo_flags */
3990   TV_GIMPLE_SLSR, /* tv_id */
3991   ( PROP_cfg | PROP_ssa ), /* properties_required */
3992   0, /* properties_provided */
3993   0, /* properties_destroyed */
3994   0, /* todo_flags_start */
3995   0, /* todo_flags_finish */
3996 };
3997 
3998 class pass_strength_reduction : public gimple_opt_pass
3999 {
4000 public:
pass_strength_reduction(gcc::context * ctxt)4001   pass_strength_reduction (gcc::context *ctxt)
4002     : gimple_opt_pass (pass_data_strength_reduction, ctxt)
4003   {}
4004 
4005   /* opt_pass methods: */
gate(function *)4006   virtual bool gate (function *) { return flag_tree_slsr; }
4007   virtual unsigned int execute (function *);
4008 
4009 }; // class pass_strength_reduction
4010 
4011 unsigned
execute(function * fun)4012 pass_strength_reduction::execute (function *fun)
4013 {
4014   /* Create the obstack where candidates will reside.  */
4015   gcc_obstack_init (&cand_obstack);
4016 
4017   /* Allocate the candidate vector and initialize the first NULL element.  */
4018   cand_vec.create (128);
4019   cand_vec.safe_push (NULL);
4020 
4021   /* Allocate the mapping from statements to candidate indices.  */
4022   stmt_cand_map = new hash_map<gimple *, slsr_cand_t>;
4023 
4024   /* Create the obstack where candidate chains will reside.  */
4025   gcc_obstack_init (&chain_obstack);
4026 
4027   /* Allocate the mapping from base expressions to candidate chains.  */
4028   base_cand_map = new hash_table<cand_chain_hasher> (500);
4029 
4030   /* Allocate the mapping from bases to alternative bases.  */
4031   alt_base_map = new hash_map<tree, tree>;
4032 
4033   /* Initialize the loop optimizer.  We need to detect flow across
4034      back edges, and this gives us dominator information as well.  */
4035   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4036 
4037   /* Walk the CFG in predominator order looking for strength reduction
4038      candidates.  */
4039   find_candidates_dom_walker (CDI_DOMINATORS)
4040     .walk (fun->cfg->x_entry_block_ptr);
4041 
4042   if (dump_file && (dump_flags & TDF_DETAILS))
4043     {
4044       dump_cand_vec ();
4045       dump_cand_chains ();
4046     }
4047 
4048   delete alt_base_map;
4049   free_affine_expand_cache (&name_expansions);
4050 
4051   /* Analyze costs and make appropriate replacements.  */
4052   analyze_candidates_and_replace ();
4053 
4054   loop_optimizer_finalize ();
4055   delete base_cand_map;
4056   base_cand_map = NULL;
4057   obstack_free (&chain_obstack, NULL);
4058   delete stmt_cand_map;
4059   cand_vec.release ();
4060   obstack_free (&cand_obstack, NULL);
4061 
4062   return 0;
4063 }
4064 
4065 } // anon namespace
4066 
4067 gimple_opt_pass *
make_pass_strength_reduction(gcc::context * ctxt)4068 make_pass_strength_reduction (gcc::context *ctxt)
4069 {
4070   return new pass_strength_reduction (ctxt);
4071 }
4072