1 /* Straight-line strength reduction.
2 Copyright (C) 2012-2014 Free Software Foundation, Inc.
3 Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* There are many algorithms for performing strength reduction on
22 loops. This is not one of them. IVOPTS handles strength reduction
23 of induction variables just fine. This pass is intended to pick
24 up the crumbs it leaves behind, by considering opportunities for
25 strength reduction along dominator paths.
26
27 Strength reduction addresses explicit multiplies, and certain
28 multiplies implicit in addressing expressions. It would also be
29 possible to apply strength reduction to divisions and modulos,
30 but such opportunities are relatively uncommon.
31
32 Strength reduction is also currently restricted to integer operations.
33 If desired, it could be extended to floating-point operations under
34 control of something like -funsafe-math-optimizations. */
35
36 #include "config.h"
37 #include "system.h"
38 #include "coretypes.h"
39 #include "tree.h"
40 #include "pointer-set.h"
41 #include "hash-table.h"
42 #include "basic-block.h"
43 #include "tree-ssa-alias.h"
44 #include "internal-fn.h"
45 #include "gimple-expr.h"
46 #include "is-a.h"
47 #include "gimple.h"
48 #include "gimple-iterator.h"
49 #include "gimplify-me.h"
50 #include "stor-layout.h"
51 #include "expr.h"
52 #include "tree-pass.h"
53 #include "cfgloop.h"
54 #include "gimple-pretty-print.h"
55 #include "gimple-ssa.h"
56 #include "tree-cfg.h"
57 #include "tree-phinodes.h"
58 #include "ssa-iterators.h"
59 #include "stringpool.h"
60 #include "tree-ssanames.h"
61 #include "domwalk.h"
62 #include "expmed.h"
63 #include "params.h"
64 #include "tree-ssa-address.h"
65 #include "tree-affine.h"
66
67 /* Information about a strength reduction candidate. Each statement
68 in the candidate table represents an expression of one of the
69 following forms (the special case of CAND_REF will be described
70 later):
71
72 (CAND_MULT) S1: X = (B + i) * S
73 (CAND_ADD) S1: X = B + (i * S)
74
75 Here X and B are SSA names, i is an integer constant, and S is
76 either an SSA name or a constant. We call B the "base," i the
77 "index", and S the "stride."
78
79 Any statement S0 that dominates S1 and is of the form:
80
81 (CAND_MULT) S0: Y = (B + i') * S
82 (CAND_ADD) S0: Y = B + (i' * S)
83
84 is called a "basis" for S1. In both cases, S1 may be replaced by
85
86 S1': X = Y + (i - i') * S,
87
88 where (i - i') * S is folded to the extent possible.
89
90 All gimple statements are visited in dominator order, and each
91 statement that may contribute to one of the forms of S1 above is
92 given at least one entry in the candidate table. Such statements
93 include addition, pointer addition, subtraction, multiplication,
94 negation, copies, and nontrivial type casts. If a statement may
95 represent more than one expression of the forms of S1 above,
96 multiple "interpretations" are stored in the table and chained
97 together. Examples:
98
99 * An add of two SSA names may treat either operand as the base.
100 * A multiply of two SSA names, likewise.
101 * A copy or cast may be thought of as either a CAND_MULT with
102 i = 0 and S = 1, or as a CAND_ADD with i = 0 or S = 0.
103
104 Candidate records are allocated from an obstack. They are addressed
105 both from a hash table keyed on S1, and from a vector of candidate
106 pointers arranged in predominator order.
107
108 Opportunity note
109 ----------------
110 Currently we don't recognize:
111
112 S0: Y = (S * i') - B
113 S1: X = (S * i) - B
114
115 as a strength reduction opportunity, even though this S1 would
116 also be replaceable by the S1' above. This can be added if it
117 comes up in practice.
118
119 Strength reduction in addressing
120 --------------------------------
121 There is another kind of candidate known as CAND_REF. A CAND_REF
122 describes a statement containing a memory reference having
123 complex addressing that might benefit from strength reduction.
124 Specifically, we are interested in references for which
125 get_inner_reference returns a base address, offset, and bitpos as
126 follows:
127
128 base: MEM_REF (T1, C1)
129 offset: MULT_EXPR (PLUS_EXPR (T2, C2), C3)
130 bitpos: C4 * BITS_PER_UNIT
131
132 Here T1 and T2 are arbitrary trees, and C1, C2, C3, C4 are
133 arbitrary integer constants. Note that C2 may be zero, in which
134 case the offset will be MULT_EXPR (T2, C3).
135
136 When this pattern is recognized, the original memory reference
137 can be replaced with:
138
139 MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)),
140 C1 + (C2 * C3) + C4)
141
142 which distributes the multiply to allow constant folding. When
143 two or more addressing expressions can be represented by MEM_REFs
144 of this form, differing only in the constants C1, C2, and C4,
145 making this substitution produces more efficient addressing during
146 the RTL phases. When there are not at least two expressions with
147 the same values of T1, T2, and C3, there is nothing to be gained
148 by the replacement.
149
150 Strength reduction of CAND_REFs uses the same infrastructure as
151 that used by CAND_MULTs and CAND_ADDs. We record T1 in the base (B)
152 field, MULT_EXPR (T2, C3) in the stride (S) field, and
153 C1 + (C2 * C3) + C4 in the index (i) field. A basis for a CAND_REF
154 is thus another CAND_REF with the same B and S values. When at
155 least two CAND_REFs are chained together using the basis relation,
156 each of them is replaced as above, resulting in improved code
157 generation for addressing.
158
159 Conditional candidates
160 ======================
161
162 Conditional candidates are best illustrated with an example.
163 Consider the code sequence:
164
165 (1) x_0 = ...;
166 (2) a_0 = x_0 * 5; MULT (B: x_0; i: 0; S: 5)
167 if (...)
168 (3) x_1 = x_0 + 1; ADD (B: x_0, i: 1; S: 1)
169 (4) x_2 = PHI <x_0, x_1>; PHI (B: x_0, i: 0, S: 1)
170 (5) x_3 = x_2 + 1; ADD (B: x_2, i: 1, S: 1)
171 (6) a_1 = x_3 * 5; MULT (B: x_2, i: 1; S: 5)
172
173 Here strength reduction is complicated by the uncertain value of x_2.
174 A legitimate transformation is:
175
176 (1) x_0 = ...;
177 (2) a_0 = x_0 * 5;
178 if (...)
179 {
180 (3) [x_1 = x_0 + 1;]
181 (3a) t_1 = a_0 + 5;
182 }
183 (4) [x_2 = PHI <x_0, x_1>;]
184 (4a) t_2 = PHI <a_0, t_1>;
185 (5) [x_3 = x_2 + 1;]
186 (6r) a_1 = t_2 + 5;
187
188 where the bracketed instructions may go dead.
189
190 To recognize this opportunity, we have to observe that statement (6)
191 has a "hidden basis" (2). The hidden basis is unlike a normal basis
192 in that the statement and the hidden basis have different base SSA
193 names (x_2 and x_0, respectively). The relationship is established
194 when a statement's base name (x_2) is defined by a phi statement (4),
195 each argument of which (x_0, x_1) has an identical "derived base name."
196 If the argument is defined by a candidate (as x_1 is by (3)) that is a
197 CAND_ADD having a stride of 1, the derived base name of the argument is
198 the base name of the candidate (x_0). Otherwise, the argument itself
199 is its derived base name (as is the case with argument x_0).
200
201 The hidden basis for statement (6) is the nearest dominating candidate
202 whose base name is the derived base name (x_0) of the feeding phi (4),
203 and whose stride is identical to that of the statement. We can then
204 create the new "phi basis" (4a) and feeding adds along incoming arcs (3a),
205 allowing the final replacement of (6) by the strength-reduced (6r).
206
207 To facilitate this, a new kind of candidate (CAND_PHI) is introduced.
208 A CAND_PHI is not a candidate for replacement, but is maintained in the
209 candidate table to ease discovery of hidden bases. Any phi statement
210 whose arguments share a common derived base name is entered into the
211 table with the derived base name, an (arbitrary) index of zero, and a
212 stride of 1. A statement with a hidden basis can then be detected by
213 simply looking up its feeding phi definition in the candidate table,
214 extracting the derived base name, and searching for a basis in the
215 usual manner after substituting the derived base name.
216
217 Note that the transformation is only valid when the original phi and
218 the statements that define the phi's arguments are all at the same
219 position in the loop hierarchy. */
220
221
222 /* Index into the candidate vector, offset by 1. VECs are zero-based,
223 while cand_idx's are one-based, with zero indicating null. */
224 typedef unsigned cand_idx;
225
226 /* The kind of candidate. */
227 enum cand_kind
228 {
229 CAND_MULT,
230 CAND_ADD,
231 CAND_REF,
232 CAND_PHI
233 };
234
235 struct slsr_cand_d
236 {
237 /* The candidate statement S1. */
238 gimple cand_stmt;
239
240 /* The base expression B: often an SSA name, but not always. */
241 tree base_expr;
242
243 /* The stride S. */
244 tree stride;
245
246 /* The index constant i. */
247 double_int index;
248
249 /* The type of the candidate. This is normally the type of base_expr,
250 but casts may have occurred when combining feeding instructions.
251 A candidate can only be a basis for candidates of the same final type.
252 (For CAND_REFs, this is the type to be used for operand 1 of the
253 replacement MEM_REF.) */
254 tree cand_type;
255
256 /* The kind of candidate (CAND_MULT, etc.). */
257 enum cand_kind kind;
258
259 /* Index of this candidate in the candidate vector. */
260 cand_idx cand_num;
261
262 /* Index of the next candidate record for the same statement.
263 A statement may be useful in more than one way (e.g., due to
264 commutativity). So we can have multiple "interpretations"
265 of a statement. */
266 cand_idx next_interp;
267
268 /* Index of the basis statement S0, if any, in the candidate vector. */
269 cand_idx basis;
270
271 /* First candidate for which this candidate is a basis, if one exists. */
272 cand_idx dependent;
273
274 /* Next candidate having the same basis as this one. */
275 cand_idx sibling;
276
277 /* If this is a conditional candidate, the CAND_PHI candidate
278 that defines the base SSA name B. */
279 cand_idx def_phi;
280
281 /* Savings that can be expected from eliminating dead code if this
282 candidate is replaced. */
283 int dead_savings;
284 };
285
286 typedef struct slsr_cand_d slsr_cand, *slsr_cand_t;
287 typedef const struct slsr_cand_d *const_slsr_cand_t;
288
289 /* Pointers to candidates are chained together as part of a mapping
290 from base expressions to the candidates that use them. */
291
292 struct cand_chain_d
293 {
294 /* Base expression for the chain of candidates: often, but not
295 always, an SSA name. */
296 tree base_expr;
297
298 /* Pointer to a candidate. */
299 slsr_cand_t cand;
300
301 /* Chain pointer. */
302 struct cand_chain_d *next;
303
304 };
305
306 typedef struct cand_chain_d cand_chain, *cand_chain_t;
307 typedef const struct cand_chain_d *const_cand_chain_t;
308
309 /* Information about a unique "increment" associated with candidates
310 having an SSA name for a stride. An increment is the difference
311 between the index of the candidate and the index of its basis,
312 i.e., (i - i') as discussed in the module commentary.
313
314 When we are not going to generate address arithmetic we treat
315 increments that differ only in sign as the same, allowing sharing
316 of the cost of initializers. The absolute value of the increment
317 is stored in the incr_info. */
318
319 struct incr_info_d
320 {
321 /* The increment that relates a candidate to its basis. */
322 double_int incr;
323
324 /* How many times the increment occurs in the candidate tree. */
325 unsigned count;
326
327 /* Cost of replacing candidates using this increment. Negative and
328 zero costs indicate replacement should be performed. */
329 int cost;
330
331 /* If this increment is profitable but is not -1, 0, or 1, it requires
332 an initializer T_0 = stride * incr to be found or introduced in the
333 nearest common dominator of all candidates. This field holds T_0
334 for subsequent use. */
335 tree initializer;
336
337 /* If the initializer was found to already exist, this is the block
338 where it was found. */
339 basic_block init_bb;
340 };
341
342 typedef struct incr_info_d incr_info, *incr_info_t;
343
344 /* Candidates are maintained in a vector. If candidate X dominates
345 candidate Y, then X appears before Y in the vector; but the
346 converse does not necessarily hold. */
347 static vec<slsr_cand_t> cand_vec;
348
349 enum cost_consts
350 {
351 COST_NEUTRAL = 0,
352 COST_INFINITE = 1000
353 };
354
355 enum stride_status
356 {
357 UNKNOWN_STRIDE = 0,
358 KNOWN_STRIDE = 1
359 };
360
361 enum phi_adjust_status
362 {
363 NOT_PHI_ADJUST = 0,
364 PHI_ADJUST = 1
365 };
366
367 enum count_phis_status
368 {
369 DONT_COUNT_PHIS = 0,
370 COUNT_PHIS = 1
371 };
372
373 /* Pointer map embodying a mapping from statements to candidates. */
374 static struct pointer_map_t *stmt_cand_map;
375
376 /* Obstack for candidates. */
377 static struct obstack cand_obstack;
378
379 /* Obstack for candidate chains. */
380 static struct obstack chain_obstack;
381
382 /* An array INCR_VEC of incr_infos is used during analysis of related
383 candidates having an SSA name for a stride. INCR_VEC_LEN describes
384 its current length. MAX_INCR_VEC_LEN is used to avoid costly
385 pathological cases. */
386 static incr_info_t incr_vec;
387 static unsigned incr_vec_len;
388 const int MAX_INCR_VEC_LEN = 16;
389
390 /* For a chain of candidates with unknown stride, indicates whether or not
391 we must generate pointer arithmetic when replacing statements. */
392 static bool address_arithmetic_p;
393
394 /* Forward function declarations. */
395 static slsr_cand_t base_cand_from_table (tree);
396 static tree introduce_cast_before_cand (slsr_cand_t, tree, tree);
397 static bool legal_cast_p_1 (tree, tree);
398
399 /* Produce a pointer to the IDX'th candidate in the candidate vector. */
400
401 static slsr_cand_t
lookup_cand(cand_idx idx)402 lookup_cand (cand_idx idx)
403 {
404 return cand_vec[idx - 1];
405 }
406
407 /* Helper for hashing a candidate chain header. */
408
409 struct cand_chain_hasher : typed_noop_remove <cand_chain>
410 {
411 typedef cand_chain value_type;
412 typedef cand_chain compare_type;
413 static inline hashval_t hash (const value_type *);
414 static inline bool equal (const value_type *, const compare_type *);
415 };
416
417 inline hashval_t
hash(const value_type * p)418 cand_chain_hasher::hash (const value_type *p)
419 {
420 tree base_expr = p->base_expr;
421 return iterative_hash_expr (base_expr, 0);
422 }
423
424 inline bool
equal(const value_type * chain1,const compare_type * chain2)425 cand_chain_hasher::equal (const value_type *chain1, const compare_type *chain2)
426 {
427 return operand_equal_p (chain1->base_expr, chain2->base_expr, 0);
428 }
429
430 /* Hash table embodying a mapping from base exprs to chains of candidates. */
431 static hash_table <cand_chain_hasher> base_cand_map;
432
433 /* Pointer map used by tree_to_aff_combination_expand. */
434 static struct pointer_map_t *name_expansions;
435 /* Pointer map embodying a mapping from bases to alternative bases. */
436 static struct pointer_map_t *alt_base_map;
437
438 /* Given BASE, use the tree affine combiniation facilities to
439 find the underlying tree expression for BASE, with any
440 immediate offset excluded.
441
442 N.B. we should eliminate this backtracking with better forward
443 analysis in a future release. */
444
445 static tree
get_alternative_base(tree base)446 get_alternative_base (tree base)
447 {
448 tree *result = (tree *) pointer_map_contains (alt_base_map, base);
449
450 if (result == NULL)
451 {
452 tree expr;
453 aff_tree aff;
454
455 tree_to_aff_combination_expand (base, TREE_TYPE (base),
456 &aff, &name_expansions);
457 aff.offset = tree_to_double_int (integer_zero_node);
458 expr = aff_combination_to_tree (&aff);
459
460 result = (tree *) pointer_map_insert (alt_base_map, base);
461 gcc_assert (!*result);
462
463 if (expr == base)
464 *result = NULL;
465 else
466 *result = expr;
467 }
468
469 return *result;
470 }
471
472 /* Look in the candidate table for a CAND_PHI that defines BASE and
473 return it if found; otherwise return NULL. */
474
475 static cand_idx
find_phi_def(tree base)476 find_phi_def (tree base)
477 {
478 slsr_cand_t c;
479
480 if (TREE_CODE (base) != SSA_NAME)
481 return 0;
482
483 c = base_cand_from_table (base);
484
485 if (!c || c->kind != CAND_PHI)
486 return 0;
487
488 return c->cand_num;
489 }
490
491 /* Helper routine for find_basis_for_candidate. May be called twice:
492 once for the candidate's base expr, and optionally again either for
493 the candidate's phi definition or for a CAND_REF's alternative base
494 expression. */
495
496 static slsr_cand_t
find_basis_for_base_expr(slsr_cand_t c,tree base_expr)497 find_basis_for_base_expr (slsr_cand_t c, tree base_expr)
498 {
499 cand_chain mapping_key;
500 cand_chain_t chain;
501 slsr_cand_t basis = NULL;
502
503 // Limit potential of N^2 behavior for long candidate chains.
504 int iters = 0;
505 int max_iters = PARAM_VALUE (PARAM_MAX_SLSR_CANDIDATE_SCAN);
506
507 mapping_key.base_expr = base_expr;
508 chain = base_cand_map.find (&mapping_key);
509
510 for (; chain && iters < max_iters; chain = chain->next, ++iters)
511 {
512 slsr_cand_t one_basis = chain->cand;
513
514 if (one_basis->kind != c->kind
515 || one_basis->cand_stmt == c->cand_stmt
516 || !operand_equal_p (one_basis->stride, c->stride, 0)
517 || !types_compatible_p (one_basis->cand_type, c->cand_type)
518 || !dominated_by_p (CDI_DOMINATORS,
519 gimple_bb (c->cand_stmt),
520 gimple_bb (one_basis->cand_stmt)))
521 continue;
522
523 if (!basis || basis->cand_num < one_basis->cand_num)
524 basis = one_basis;
525 }
526
527 return basis;
528 }
529
530 /* Use the base expr from candidate C to look for possible candidates
531 that can serve as a basis for C. Each potential basis must also
532 appear in a block that dominates the candidate statement and have
533 the same stride and type. If more than one possible basis exists,
534 the one with highest index in the vector is chosen; this will be
535 the most immediately dominating basis. */
536
537 static int
find_basis_for_candidate(slsr_cand_t c)538 find_basis_for_candidate (slsr_cand_t c)
539 {
540 slsr_cand_t basis = find_basis_for_base_expr (c, c->base_expr);
541
542 /* If a candidate doesn't have a basis using its base expression,
543 it may have a basis hidden by one or more intervening phis. */
544 if (!basis && c->def_phi)
545 {
546 basic_block basis_bb, phi_bb;
547 slsr_cand_t phi_cand = lookup_cand (c->def_phi);
548 basis = find_basis_for_base_expr (c, phi_cand->base_expr);
549
550 if (basis)
551 {
552 /* A hidden basis must dominate the phi-definition of the
553 candidate's base name. */
554 phi_bb = gimple_bb (phi_cand->cand_stmt);
555 basis_bb = gimple_bb (basis->cand_stmt);
556
557 if (phi_bb == basis_bb
558 || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
559 {
560 basis = NULL;
561 c->basis = 0;
562 }
563
564 /* If we found a hidden basis, estimate additional dead-code
565 savings if the phi and its feeding statements can be removed. */
566 if (basis && has_single_use (gimple_phi_result (phi_cand->cand_stmt)))
567 c->dead_savings += phi_cand->dead_savings;
568 }
569 }
570
571 if (flag_expensive_optimizations && !basis && c->kind == CAND_REF)
572 {
573 tree alt_base_expr = get_alternative_base (c->base_expr);
574 if (alt_base_expr)
575 basis = find_basis_for_base_expr (c, alt_base_expr);
576 }
577
578 if (basis)
579 {
580 c->sibling = basis->dependent;
581 basis->dependent = c->cand_num;
582 return basis->cand_num;
583 }
584
585 return 0;
586 }
587
588 /* Record a mapping from BASE to C, indicating that C may potentially serve
589 as a basis using that base expression. BASE may be the same as
590 C->BASE_EXPR; alternatively BASE can be a different tree that share the
591 underlining expression of C->BASE_EXPR. */
592
593 static void
record_potential_basis(slsr_cand_t c,tree base)594 record_potential_basis (slsr_cand_t c, tree base)
595 {
596 cand_chain_t node;
597 cand_chain **slot;
598
599 gcc_assert (base);
600
601 node = (cand_chain_t) obstack_alloc (&chain_obstack, sizeof (cand_chain));
602 node->base_expr = base;
603 node->cand = c;
604 node->next = NULL;
605 slot = base_cand_map.find_slot (node, INSERT);
606
607 if (*slot)
608 {
609 cand_chain_t head = (cand_chain_t) (*slot);
610 node->next = head->next;
611 head->next = node;
612 }
613 else
614 *slot = node;
615 }
616
617 /* Allocate storage for a new candidate and initialize its fields.
618 Attempt to find a basis for the candidate.
619
620 For CAND_REF, an alternative base may also be recorded and used
621 to find a basis. This helps cases where the expression hidden
622 behind BASE (which is usually an SSA_NAME) has immediate offset,
623 e.g.
624
625 a2[i][j] = 1;
626 a2[i + 20][j] = 2; */
627
628 static slsr_cand_t
alloc_cand_and_find_basis(enum cand_kind kind,gimple gs,tree base,double_int index,tree stride,tree ctype,unsigned savings)629 alloc_cand_and_find_basis (enum cand_kind kind, gimple gs, tree base,
630 double_int index, tree stride, tree ctype,
631 unsigned savings)
632 {
633 slsr_cand_t c = (slsr_cand_t) obstack_alloc (&cand_obstack,
634 sizeof (slsr_cand));
635 c->cand_stmt = gs;
636 c->base_expr = base;
637 c->stride = stride;
638 c->index = index;
639 c->cand_type = ctype;
640 c->kind = kind;
641 c->cand_num = cand_vec.length () + 1;
642 c->next_interp = 0;
643 c->dependent = 0;
644 c->sibling = 0;
645 c->def_phi = kind == CAND_MULT ? find_phi_def (base) : 0;
646 c->dead_savings = savings;
647
648 cand_vec.safe_push (c);
649
650 if (kind == CAND_PHI)
651 c->basis = 0;
652 else
653 c->basis = find_basis_for_candidate (c);
654
655 record_potential_basis (c, base);
656 if (flag_expensive_optimizations && kind == CAND_REF)
657 {
658 tree alt_base = get_alternative_base (base);
659 if (alt_base)
660 record_potential_basis (c, alt_base);
661 }
662
663 return c;
664 }
665
666 /* Determine the target cost of statement GS when compiling according
667 to SPEED. */
668
669 static int
stmt_cost(gimple gs,bool speed)670 stmt_cost (gimple gs, bool speed)
671 {
672 tree lhs, rhs1, rhs2;
673 enum machine_mode lhs_mode;
674
675 gcc_assert (is_gimple_assign (gs));
676 lhs = gimple_assign_lhs (gs);
677 rhs1 = gimple_assign_rhs1 (gs);
678 lhs_mode = TYPE_MODE (TREE_TYPE (lhs));
679
680 switch (gimple_assign_rhs_code (gs))
681 {
682 case MULT_EXPR:
683 rhs2 = gimple_assign_rhs2 (gs);
684
685 if (tree_fits_shwi_p (rhs2))
686 return mult_by_coeff_cost (tree_to_shwi (rhs2), lhs_mode, speed);
687
688 gcc_assert (TREE_CODE (rhs1) != INTEGER_CST);
689 return mul_cost (speed, lhs_mode);
690
691 case PLUS_EXPR:
692 case POINTER_PLUS_EXPR:
693 case MINUS_EXPR:
694 return add_cost (speed, lhs_mode);
695
696 case NEGATE_EXPR:
697 return neg_cost (speed, lhs_mode);
698
699 case NOP_EXPR:
700 return convert_cost (lhs_mode, TYPE_MODE (TREE_TYPE (rhs1)), speed);
701
702 /* Note that we don't assign costs to copies that in most cases
703 will go away. */
704 default:
705 ;
706 }
707
708 gcc_unreachable ();
709 return 0;
710 }
711
712 /* Look up the defining statement for BASE_IN and return a pointer
713 to its candidate in the candidate table, if any; otherwise NULL.
714 Only CAND_ADD and CAND_MULT candidates are returned. */
715
716 static slsr_cand_t
base_cand_from_table(tree base_in)717 base_cand_from_table (tree base_in)
718 {
719 slsr_cand_t *result;
720
721 gimple def = SSA_NAME_DEF_STMT (base_in);
722 if (!def)
723 return (slsr_cand_t) NULL;
724
725 result = (slsr_cand_t *) pointer_map_contains (stmt_cand_map, def);
726
727 if (result && (*result)->kind != CAND_REF)
728 return *result;
729
730 return (slsr_cand_t) NULL;
731 }
732
733 /* Add an entry to the statement-to-candidate mapping. */
734
735 static void
add_cand_for_stmt(gimple gs,slsr_cand_t c)736 add_cand_for_stmt (gimple gs, slsr_cand_t c)
737 {
738 void **slot = pointer_map_insert (stmt_cand_map, gs);
739 gcc_assert (!*slot);
740 *slot = c;
741 }
742
743 /* Given PHI which contains a phi statement, determine whether it
744 satisfies all the requirements of a phi candidate. If so, create
745 a candidate. Note that a CAND_PHI never has a basis itself, but
746 is used to help find a basis for subsequent candidates. */
747
748 static void
slsr_process_phi(gimple phi,bool speed)749 slsr_process_phi (gimple phi, bool speed)
750 {
751 unsigned i;
752 tree arg0_base = NULL_TREE, base_type;
753 slsr_cand_t c;
754 struct loop *cand_loop = gimple_bb (phi)->loop_father;
755 unsigned savings = 0;
756
757 /* A CAND_PHI requires each of its arguments to have the same
758 derived base name. (See the module header commentary for a
759 definition of derived base names.) Furthermore, all feeding
760 definitions must be in the same position in the loop hierarchy
761 as PHI. */
762
763 for (i = 0; i < gimple_phi_num_args (phi); i++)
764 {
765 slsr_cand_t arg_cand;
766 tree arg = gimple_phi_arg_def (phi, i);
767 tree derived_base_name = NULL_TREE;
768 gimple arg_stmt = NULL;
769 basic_block arg_bb = NULL;
770
771 if (TREE_CODE (arg) != SSA_NAME)
772 return;
773
774 arg_cand = base_cand_from_table (arg);
775
776 if (arg_cand)
777 {
778 while (arg_cand->kind != CAND_ADD && arg_cand->kind != CAND_PHI)
779 {
780 if (!arg_cand->next_interp)
781 return;
782
783 arg_cand = lookup_cand (arg_cand->next_interp);
784 }
785
786 if (!integer_onep (arg_cand->stride))
787 return;
788
789 derived_base_name = arg_cand->base_expr;
790 arg_stmt = arg_cand->cand_stmt;
791 arg_bb = gimple_bb (arg_stmt);
792
793 /* Gather potential dead code savings if the phi statement
794 can be removed later on. */
795 if (has_single_use (arg))
796 {
797 if (gimple_code (arg_stmt) == GIMPLE_PHI)
798 savings += arg_cand->dead_savings;
799 else
800 savings += stmt_cost (arg_stmt, speed);
801 }
802 }
803 else
804 {
805 derived_base_name = arg;
806
807 if (SSA_NAME_IS_DEFAULT_DEF (arg))
808 arg_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
809 else
810 gimple_bb (SSA_NAME_DEF_STMT (arg));
811 }
812
813 if (!arg_bb || arg_bb->loop_father != cand_loop)
814 return;
815
816 if (i == 0)
817 arg0_base = derived_base_name;
818 else if (!operand_equal_p (derived_base_name, arg0_base, 0))
819 return;
820 }
821
822 /* Create the candidate. "alloc_cand_and_find_basis" is named
823 misleadingly for this case, as no basis will be sought for a
824 CAND_PHI. */
825 base_type = TREE_TYPE (arg0_base);
826
827 c = alloc_cand_and_find_basis (CAND_PHI, phi, arg0_base, double_int_zero,
828 integer_one_node, base_type, savings);
829
830 /* Add the candidate to the statement-candidate mapping. */
831 add_cand_for_stmt (phi, c);
832 }
833
834 /* Given PBASE which is a pointer to tree, look up the defining
835 statement for it and check whether the candidate is in the
836 form of:
837
838 X = B + (1 * S), S is integer constant
839 X = B + (i * S), S is integer one
840
841 If so, set PBASE to the candidate's base_expr and return double
842 int (i * S).
843 Otherwise, just return double int zero. */
844
845 static double_int
backtrace_base_for_ref(tree * pbase)846 backtrace_base_for_ref (tree *pbase)
847 {
848 tree base_in = *pbase;
849 slsr_cand_t base_cand;
850
851 STRIP_NOPS (base_in);
852
853 /* Strip off widening conversion(s) to handle cases where
854 e.g. 'B' is widened from an 'int' in order to calculate
855 a 64-bit address. */
856 if (CONVERT_EXPR_P (base_in)
857 && legal_cast_p_1 (base_in, TREE_OPERAND (base_in, 0)))
858 base_in = get_unwidened (base_in, NULL_TREE);
859
860 if (TREE_CODE (base_in) != SSA_NAME)
861 return tree_to_double_int (integer_zero_node);
862
863 base_cand = base_cand_from_table (base_in);
864
865 while (base_cand && base_cand->kind != CAND_PHI)
866 {
867 if (base_cand->kind == CAND_ADD
868 && base_cand->index.is_one ()
869 && TREE_CODE (base_cand->stride) == INTEGER_CST)
870 {
871 /* X = B + (1 * S), S is integer constant. */
872 *pbase = base_cand->base_expr;
873 return tree_to_double_int (base_cand->stride);
874 }
875 else if (base_cand->kind == CAND_ADD
876 && TREE_CODE (base_cand->stride) == INTEGER_CST
877 && integer_onep (base_cand->stride))
878 {
879 /* X = B + (i * S), S is integer one. */
880 *pbase = base_cand->base_expr;
881 return base_cand->index;
882 }
883
884 if (base_cand->next_interp)
885 base_cand = lookup_cand (base_cand->next_interp);
886 else
887 base_cand = NULL;
888 }
889
890 return tree_to_double_int (integer_zero_node);
891 }
892
893 /* Look for the following pattern:
894
895 *PBASE: MEM_REF (T1, C1)
896
897 *POFFSET: MULT_EXPR (T2, C3) [C2 is zero]
898 or
899 MULT_EXPR (PLUS_EXPR (T2, C2), C3)
900 or
901 MULT_EXPR (MINUS_EXPR (T2, -C2), C3)
902
903 *PINDEX: C4 * BITS_PER_UNIT
904
905 If not present, leave the input values unchanged and return FALSE.
906 Otherwise, modify the input values as follows and return TRUE:
907
908 *PBASE: T1
909 *POFFSET: MULT_EXPR (T2, C3)
910 *PINDEX: C1 + (C2 * C3) + C4
911
912 When T2 is recorded by a CAND_ADD in the form of (T2' + C5), it
913 will be further restructured to:
914
915 *PBASE: T1
916 *POFFSET: MULT_EXPR (T2', C3)
917 *PINDEX: C1 + (C2 * C3) + C4 + (C5 * C3) */
918
919 static bool
restructure_reference(tree * pbase,tree * poffset,double_int * pindex,tree * ptype)920 restructure_reference (tree *pbase, tree *poffset, double_int *pindex,
921 tree *ptype)
922 {
923 tree base = *pbase, offset = *poffset;
924 double_int index = *pindex;
925 double_int bpu = double_int::from_uhwi (BITS_PER_UNIT);
926 tree mult_op0, mult_op1, t1, t2, type;
927 double_int c1, c2, c3, c4, c5;
928
929 if (!base
930 || !offset
931 || TREE_CODE (base) != MEM_REF
932 || TREE_CODE (offset) != MULT_EXPR
933 || TREE_CODE (TREE_OPERAND (offset, 1)) != INTEGER_CST
934 || !index.umod (bpu, FLOOR_MOD_EXPR).is_zero ())
935 return false;
936
937 t1 = TREE_OPERAND (base, 0);
938 c1 = mem_ref_offset (base);
939 type = TREE_TYPE (TREE_OPERAND (base, 1));
940
941 mult_op0 = TREE_OPERAND (offset, 0);
942 mult_op1 = TREE_OPERAND (offset, 1);
943
944 c3 = tree_to_double_int (mult_op1);
945
946 if (TREE_CODE (mult_op0) == PLUS_EXPR)
947
948 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
949 {
950 t2 = TREE_OPERAND (mult_op0, 0);
951 c2 = tree_to_double_int (TREE_OPERAND (mult_op0, 1));
952 }
953 else
954 return false;
955
956 else if (TREE_CODE (mult_op0) == MINUS_EXPR)
957
958 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
959 {
960 t2 = TREE_OPERAND (mult_op0, 0);
961 c2 = -tree_to_double_int (TREE_OPERAND (mult_op0, 1));
962 }
963 else
964 return false;
965
966 else
967 {
968 t2 = mult_op0;
969 c2 = double_int_zero;
970 }
971
972 c4 = index.udiv (bpu, FLOOR_DIV_EXPR);
973 c5 = backtrace_base_for_ref (&t2);
974
975 *pbase = t1;
976 *poffset = fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, t2),
977 double_int_to_tree (sizetype, c3));
978 *pindex = c1 + c2 * c3 + c4 + c5 * c3;
979 *ptype = type;
980
981 return true;
982 }
983
984 /* Given GS which contains a data reference, create a CAND_REF entry in
985 the candidate table and attempt to find a basis. */
986
987 static void
slsr_process_ref(gimple gs)988 slsr_process_ref (gimple gs)
989 {
990 tree ref_expr, base, offset, type;
991 HOST_WIDE_INT bitsize, bitpos;
992 enum machine_mode mode;
993 int unsignedp, volatilep;
994 double_int index;
995 slsr_cand_t c;
996
997 if (gimple_vdef (gs))
998 ref_expr = gimple_assign_lhs (gs);
999 else
1000 ref_expr = gimple_assign_rhs1 (gs);
1001
1002 if (!handled_component_p (ref_expr)
1003 || TREE_CODE (ref_expr) == BIT_FIELD_REF
1004 || (TREE_CODE (ref_expr) == COMPONENT_REF
1005 && DECL_BIT_FIELD (TREE_OPERAND (ref_expr, 1))))
1006 return;
1007
1008 base = get_inner_reference (ref_expr, &bitsize, &bitpos, &offset, &mode,
1009 &unsignedp, &volatilep, false);
1010 index = double_int::from_uhwi (bitpos);
1011
1012 if (!restructure_reference (&base, &offset, &index, &type))
1013 return;
1014
1015 c = alloc_cand_and_find_basis (CAND_REF, gs, base, index, offset,
1016 type, 0);
1017
1018 /* Add the candidate to the statement-candidate mapping. */
1019 add_cand_for_stmt (gs, c);
1020 }
1021
1022 /* Create a candidate entry for a statement GS, where GS multiplies
1023 two SSA names BASE_IN and STRIDE_IN. Propagate any known information
1024 about the two SSA names into the new candidate. Return the new
1025 candidate. */
1026
1027 static slsr_cand_t
create_mul_ssa_cand(gimple gs,tree base_in,tree stride_in,bool speed)1028 create_mul_ssa_cand (gimple gs, tree base_in, tree stride_in, bool speed)
1029 {
1030 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1031 double_int index;
1032 unsigned savings = 0;
1033 slsr_cand_t c;
1034 slsr_cand_t base_cand = base_cand_from_table (base_in);
1035
1036 /* Look at all interpretations of the base candidate, if necessary,
1037 to find information to propagate into this candidate. */
1038 while (base_cand && !base && base_cand->kind != CAND_PHI)
1039 {
1040
1041 if (base_cand->kind == CAND_MULT && integer_onep (base_cand->stride))
1042 {
1043 /* Y = (B + i') * 1
1044 X = Y * Z
1045 ================
1046 X = (B + i') * Z */
1047 base = base_cand->base_expr;
1048 index = base_cand->index;
1049 stride = stride_in;
1050 ctype = base_cand->cand_type;
1051 if (has_single_use (base_in))
1052 savings = (base_cand->dead_savings
1053 + stmt_cost (base_cand->cand_stmt, speed));
1054 }
1055 else if (base_cand->kind == CAND_ADD
1056 && TREE_CODE (base_cand->stride) == INTEGER_CST)
1057 {
1058 /* Y = B + (i' * S), S constant
1059 X = Y * Z
1060 ============================
1061 X = B + ((i' * S) * Z) */
1062 base = base_cand->base_expr;
1063 index = base_cand->index * tree_to_double_int (base_cand->stride);
1064 stride = stride_in;
1065 ctype = base_cand->cand_type;
1066 if (has_single_use (base_in))
1067 savings = (base_cand->dead_savings
1068 + stmt_cost (base_cand->cand_stmt, speed));
1069 }
1070
1071 if (base_cand->next_interp)
1072 base_cand = lookup_cand (base_cand->next_interp);
1073 else
1074 base_cand = NULL;
1075 }
1076
1077 if (!base)
1078 {
1079 /* No interpretations had anything useful to propagate, so
1080 produce X = (Y + 0) * Z. */
1081 base = base_in;
1082 index = double_int_zero;
1083 stride = stride_in;
1084 ctype = TREE_TYPE (base_in);
1085 }
1086
1087 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
1088 ctype, savings);
1089 return c;
1090 }
1091
1092 /* Create a candidate entry for a statement GS, where GS multiplies
1093 SSA name BASE_IN by constant STRIDE_IN. Propagate any known
1094 information about BASE_IN into the new candidate. Return the new
1095 candidate. */
1096
1097 static slsr_cand_t
create_mul_imm_cand(gimple gs,tree base_in,tree stride_in,bool speed)1098 create_mul_imm_cand (gimple gs, tree base_in, tree stride_in, bool speed)
1099 {
1100 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1101 double_int index, temp;
1102 unsigned savings = 0;
1103 slsr_cand_t c;
1104 slsr_cand_t base_cand = base_cand_from_table (base_in);
1105
1106 /* Look at all interpretations of the base candidate, if necessary,
1107 to find information to propagate into this candidate. */
1108 while (base_cand && !base && base_cand->kind != CAND_PHI)
1109 {
1110 if (base_cand->kind == CAND_MULT
1111 && TREE_CODE (base_cand->stride) == INTEGER_CST)
1112 {
1113 /* Y = (B + i') * S, S constant
1114 X = Y * c
1115 ============================
1116 X = (B + i') * (S * c) */
1117 temp = tree_to_double_int (base_cand->stride)
1118 * tree_to_double_int (stride_in);
1119 if (double_int_fits_to_tree_p (TREE_TYPE (stride_in), temp))
1120 {
1121 base = base_cand->base_expr;
1122 index = base_cand->index;
1123 stride = double_int_to_tree (TREE_TYPE (stride_in), temp);
1124 ctype = base_cand->cand_type;
1125 if (has_single_use (base_in))
1126 savings = (base_cand->dead_savings
1127 + stmt_cost (base_cand->cand_stmt, speed));
1128 }
1129 }
1130 else if (base_cand->kind == CAND_ADD && integer_onep (base_cand->stride))
1131 {
1132 /* Y = B + (i' * 1)
1133 X = Y * c
1134 ===========================
1135 X = (B + i') * c */
1136 base = base_cand->base_expr;
1137 index = base_cand->index;
1138 stride = stride_in;
1139 ctype = base_cand->cand_type;
1140 if (has_single_use (base_in))
1141 savings = (base_cand->dead_savings
1142 + stmt_cost (base_cand->cand_stmt, speed));
1143 }
1144 else if (base_cand->kind == CAND_ADD
1145 && base_cand->index.is_one ()
1146 && TREE_CODE (base_cand->stride) == INTEGER_CST)
1147 {
1148 /* Y = B + (1 * S), S constant
1149 X = Y * c
1150 ===========================
1151 X = (B + S) * c */
1152 base = base_cand->base_expr;
1153 index = tree_to_double_int (base_cand->stride);
1154 stride = stride_in;
1155 ctype = base_cand->cand_type;
1156 if (has_single_use (base_in))
1157 savings = (base_cand->dead_savings
1158 + stmt_cost (base_cand->cand_stmt, speed));
1159 }
1160
1161 if (base_cand->next_interp)
1162 base_cand = lookup_cand (base_cand->next_interp);
1163 else
1164 base_cand = NULL;
1165 }
1166
1167 if (!base)
1168 {
1169 /* No interpretations had anything useful to propagate, so
1170 produce X = (Y + 0) * c. */
1171 base = base_in;
1172 index = double_int_zero;
1173 stride = stride_in;
1174 ctype = TREE_TYPE (base_in);
1175 }
1176
1177 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
1178 ctype, savings);
1179 return c;
1180 }
1181
1182 /* Given GS which is a multiply of scalar integers, make an appropriate
1183 entry in the candidate table. If this is a multiply of two SSA names,
1184 create two CAND_MULT interpretations and attempt to find a basis for
1185 each of them. Otherwise, create a single CAND_MULT and attempt to
1186 find a basis. */
1187
1188 static void
slsr_process_mul(gimple gs,tree rhs1,tree rhs2,bool speed)1189 slsr_process_mul (gimple gs, tree rhs1, tree rhs2, bool speed)
1190 {
1191 slsr_cand_t c, c2;
1192
1193 /* If this is a multiply of an SSA name with itself, it is highly
1194 unlikely that we will get a strength reduction opportunity, so
1195 don't record it as a candidate. This simplifies the logic for
1196 finding a basis, so if this is removed that must be considered. */
1197 if (rhs1 == rhs2)
1198 return;
1199
1200 if (TREE_CODE (rhs2) == SSA_NAME)
1201 {
1202 /* Record an interpretation of this statement in the candidate table
1203 assuming RHS1 is the base expression and RHS2 is the stride. */
1204 c = create_mul_ssa_cand (gs, rhs1, rhs2, speed);
1205
1206 /* Add the first interpretation to the statement-candidate mapping. */
1207 add_cand_for_stmt (gs, c);
1208
1209 /* Record another interpretation of this statement assuming RHS1
1210 is the stride and RHS2 is the base expression. */
1211 c2 = create_mul_ssa_cand (gs, rhs2, rhs1, speed);
1212 c->next_interp = c2->cand_num;
1213 }
1214 else
1215 {
1216 /* Record an interpretation for the multiply-immediate. */
1217 c = create_mul_imm_cand (gs, rhs1, rhs2, speed);
1218
1219 /* Add the interpretation to the statement-candidate mapping. */
1220 add_cand_for_stmt (gs, c);
1221 }
1222 }
1223
1224 /* Create a candidate entry for a statement GS, where GS adds two
1225 SSA names BASE_IN and ADDEND_IN if SUBTRACT_P is false, and
1226 subtracts ADDEND_IN from BASE_IN otherwise. Propagate any known
1227 information about the two SSA names into the new candidate.
1228 Return the new candidate. */
1229
1230 static slsr_cand_t
create_add_ssa_cand(gimple gs,tree base_in,tree addend_in,bool subtract_p,bool speed)1231 create_add_ssa_cand (gimple gs, tree base_in, tree addend_in,
1232 bool subtract_p, bool speed)
1233 {
1234 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL;
1235 double_int index;
1236 unsigned savings = 0;
1237 slsr_cand_t c;
1238 slsr_cand_t base_cand = base_cand_from_table (base_in);
1239 slsr_cand_t addend_cand = base_cand_from_table (addend_in);
1240
1241 /* The most useful transformation is a multiply-immediate feeding
1242 an add or subtract. Look for that first. */
1243 while (addend_cand && !base && addend_cand->kind != CAND_PHI)
1244 {
1245 if (addend_cand->kind == CAND_MULT
1246 && addend_cand->index.is_zero ()
1247 && TREE_CODE (addend_cand->stride) == INTEGER_CST)
1248 {
1249 /* Z = (B + 0) * S, S constant
1250 X = Y +/- Z
1251 ===========================
1252 X = Y + ((+/-1 * S) * B) */
1253 base = base_in;
1254 index = tree_to_double_int (addend_cand->stride);
1255 if (subtract_p)
1256 index = -index;
1257 stride = addend_cand->base_expr;
1258 ctype = TREE_TYPE (base_in);
1259 if (has_single_use (addend_in))
1260 savings = (addend_cand->dead_savings
1261 + stmt_cost (addend_cand->cand_stmt, speed));
1262 }
1263
1264 if (addend_cand->next_interp)
1265 addend_cand = lookup_cand (addend_cand->next_interp);
1266 else
1267 addend_cand = NULL;
1268 }
1269
1270 while (base_cand && !base && base_cand->kind != CAND_PHI)
1271 {
1272 if (base_cand->kind == CAND_ADD
1273 && (base_cand->index.is_zero ()
1274 || operand_equal_p (base_cand->stride,
1275 integer_zero_node, 0)))
1276 {
1277 /* Y = B + (i' * S), i' * S = 0
1278 X = Y +/- Z
1279 ============================
1280 X = B + (+/-1 * Z) */
1281 base = base_cand->base_expr;
1282 index = subtract_p ? double_int_minus_one : double_int_one;
1283 stride = addend_in;
1284 ctype = base_cand->cand_type;
1285 if (has_single_use (base_in))
1286 savings = (base_cand->dead_savings
1287 + stmt_cost (base_cand->cand_stmt, speed));
1288 }
1289 else if (subtract_p)
1290 {
1291 slsr_cand_t subtrahend_cand = base_cand_from_table (addend_in);
1292
1293 while (subtrahend_cand && !base && subtrahend_cand->kind != CAND_PHI)
1294 {
1295 if (subtrahend_cand->kind == CAND_MULT
1296 && subtrahend_cand->index.is_zero ()
1297 && TREE_CODE (subtrahend_cand->stride) == INTEGER_CST)
1298 {
1299 /* Z = (B + 0) * S, S constant
1300 X = Y - Z
1301 ===========================
1302 Value: X = Y + ((-1 * S) * B) */
1303 base = base_in;
1304 index = tree_to_double_int (subtrahend_cand->stride);
1305 index = -index;
1306 stride = subtrahend_cand->base_expr;
1307 ctype = TREE_TYPE (base_in);
1308 if (has_single_use (addend_in))
1309 savings = (subtrahend_cand->dead_savings
1310 + stmt_cost (subtrahend_cand->cand_stmt, speed));
1311 }
1312
1313 if (subtrahend_cand->next_interp)
1314 subtrahend_cand = lookup_cand (subtrahend_cand->next_interp);
1315 else
1316 subtrahend_cand = NULL;
1317 }
1318 }
1319
1320 if (base_cand->next_interp)
1321 base_cand = lookup_cand (base_cand->next_interp);
1322 else
1323 base_cand = NULL;
1324 }
1325
1326 if (!base)
1327 {
1328 /* No interpretations had anything useful to propagate, so
1329 produce X = Y + (1 * Z). */
1330 base = base_in;
1331 index = subtract_p ? double_int_minus_one : double_int_one;
1332 stride = addend_in;
1333 ctype = TREE_TYPE (base_in);
1334 }
1335
1336 c = alloc_cand_and_find_basis (CAND_ADD, gs, base, index, stride,
1337 ctype, savings);
1338 return c;
1339 }
1340
1341 /* Create a candidate entry for a statement GS, where GS adds SSA
1342 name BASE_IN to constant INDEX_IN. Propagate any known information
1343 about BASE_IN into the new candidate. Return the new candidate. */
1344
1345 static slsr_cand_t
create_add_imm_cand(gimple gs,tree base_in,double_int index_in,bool speed)1346 create_add_imm_cand (gimple gs, tree base_in, double_int index_in, bool speed)
1347 {
1348 enum cand_kind kind = CAND_ADD;
1349 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1350 double_int index, multiple;
1351 unsigned savings = 0;
1352 slsr_cand_t c;
1353 slsr_cand_t base_cand = base_cand_from_table (base_in);
1354
1355 while (base_cand && !base && base_cand->kind != CAND_PHI)
1356 {
1357 bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (base_cand->stride));
1358
1359 if (TREE_CODE (base_cand->stride) == INTEGER_CST
1360 && index_in.multiple_of (tree_to_double_int (base_cand->stride),
1361 unsigned_p, &multiple))
1362 {
1363 /* Y = (B + i') * S, S constant, c = kS for some integer k
1364 X = Y + c
1365 ============================
1366 X = (B + (i'+ k)) * S
1367 OR
1368 Y = B + (i' * S), S constant, c = kS for some integer k
1369 X = Y + c
1370 ============================
1371 X = (B + (i'+ k)) * S */
1372 kind = base_cand->kind;
1373 base = base_cand->base_expr;
1374 index = base_cand->index + multiple;
1375 stride = base_cand->stride;
1376 ctype = base_cand->cand_type;
1377 if (has_single_use (base_in))
1378 savings = (base_cand->dead_savings
1379 + stmt_cost (base_cand->cand_stmt, speed));
1380 }
1381
1382 if (base_cand->next_interp)
1383 base_cand = lookup_cand (base_cand->next_interp);
1384 else
1385 base_cand = NULL;
1386 }
1387
1388 if (!base)
1389 {
1390 /* No interpretations had anything useful to propagate, so
1391 produce X = Y + (c * 1). */
1392 kind = CAND_ADD;
1393 base = base_in;
1394 index = index_in;
1395 stride = integer_one_node;
1396 ctype = TREE_TYPE (base_in);
1397 }
1398
1399 c = alloc_cand_and_find_basis (kind, gs, base, index, stride,
1400 ctype, savings);
1401 return c;
1402 }
1403
1404 /* Given GS which is an add or subtract of scalar integers or pointers,
1405 make at least one appropriate entry in the candidate table. */
1406
1407 static void
slsr_process_add(gimple gs,tree rhs1,tree rhs2,bool speed)1408 slsr_process_add (gimple gs, tree rhs1, tree rhs2, bool speed)
1409 {
1410 bool subtract_p = gimple_assign_rhs_code (gs) == MINUS_EXPR;
1411 slsr_cand_t c = NULL, c2;
1412
1413 if (TREE_CODE (rhs2) == SSA_NAME)
1414 {
1415 /* First record an interpretation assuming RHS1 is the base expression
1416 and RHS2 is the stride. But it doesn't make sense for the
1417 stride to be a pointer, so don't record a candidate in that case. */
1418 if (!POINTER_TYPE_P (TREE_TYPE (rhs2)))
1419 {
1420 c = create_add_ssa_cand (gs, rhs1, rhs2, subtract_p, speed);
1421
1422 /* Add the first interpretation to the statement-candidate
1423 mapping. */
1424 add_cand_for_stmt (gs, c);
1425 }
1426
1427 /* If the two RHS operands are identical, or this is a subtract,
1428 we're done. */
1429 if (operand_equal_p (rhs1, rhs2, 0) || subtract_p)
1430 return;
1431
1432 /* Otherwise, record another interpretation assuming RHS2 is the
1433 base expression and RHS1 is the stride, again provided that the
1434 stride is not a pointer. */
1435 if (!POINTER_TYPE_P (TREE_TYPE (rhs1)))
1436 {
1437 c2 = create_add_ssa_cand (gs, rhs2, rhs1, false, speed);
1438 if (c)
1439 c->next_interp = c2->cand_num;
1440 else
1441 add_cand_for_stmt (gs, c2);
1442 }
1443 }
1444 else
1445 {
1446 double_int index;
1447
1448 /* Record an interpretation for the add-immediate. */
1449 index = tree_to_double_int (rhs2);
1450 if (subtract_p)
1451 index = -index;
1452
1453 c = create_add_imm_cand (gs, rhs1, index, speed);
1454
1455 /* Add the interpretation to the statement-candidate mapping. */
1456 add_cand_for_stmt (gs, c);
1457 }
1458 }
1459
1460 /* Given GS which is a negate of a scalar integer, make an appropriate
1461 entry in the candidate table. A negate is equivalent to a multiply
1462 by -1. */
1463
1464 static void
slsr_process_neg(gimple gs,tree rhs1,bool speed)1465 slsr_process_neg (gimple gs, tree rhs1, bool speed)
1466 {
1467 /* Record a CAND_MULT interpretation for the multiply by -1. */
1468 slsr_cand_t c = create_mul_imm_cand (gs, rhs1, integer_minus_one_node, speed);
1469
1470 /* Add the interpretation to the statement-candidate mapping. */
1471 add_cand_for_stmt (gs, c);
1472 }
1473
1474 /* Help function for legal_cast_p, operating on two trees. Checks
1475 whether it's allowable to cast from RHS to LHS. See legal_cast_p
1476 for more details. */
1477
1478 static bool
legal_cast_p_1(tree lhs,tree rhs)1479 legal_cast_p_1 (tree lhs, tree rhs)
1480 {
1481 tree lhs_type, rhs_type;
1482 unsigned lhs_size, rhs_size;
1483 bool lhs_wraps, rhs_wraps;
1484
1485 lhs_type = TREE_TYPE (lhs);
1486 rhs_type = TREE_TYPE (rhs);
1487 lhs_size = TYPE_PRECISION (lhs_type);
1488 rhs_size = TYPE_PRECISION (rhs_type);
1489 lhs_wraps = TYPE_OVERFLOW_WRAPS (lhs_type);
1490 rhs_wraps = TYPE_OVERFLOW_WRAPS (rhs_type);
1491
1492 if (lhs_size < rhs_size
1493 || (rhs_wraps && !lhs_wraps)
1494 || (rhs_wraps && lhs_wraps && rhs_size != lhs_size))
1495 return false;
1496
1497 return true;
1498 }
1499
1500 /* Return TRUE if GS is a statement that defines an SSA name from
1501 a conversion and is legal for us to combine with an add and multiply
1502 in the candidate table. For example, suppose we have:
1503
1504 A = B + i;
1505 C = (type) A;
1506 D = C * S;
1507
1508 Without the type-cast, we would create a CAND_MULT for D with base B,
1509 index i, and stride S. We want to record this candidate only if it
1510 is equivalent to apply the type cast following the multiply:
1511
1512 A = B + i;
1513 E = A * S;
1514 D = (type) E;
1515
1516 We will record the type with the candidate for D. This allows us
1517 to use a similar previous candidate as a basis. If we have earlier seen
1518
1519 A' = B + i';
1520 C' = (type) A';
1521 D' = C' * S;
1522
1523 we can replace D with
1524
1525 D = D' + (i - i') * S;
1526
1527 But if moving the type-cast would change semantics, we mustn't do this.
1528
1529 This is legitimate for casts from a non-wrapping integral type to
1530 any integral type of the same or larger size. It is not legitimate
1531 to convert a wrapping type to a non-wrapping type, or to a wrapping
1532 type of a different size. I.e., with a wrapping type, we must
1533 assume that the addition B + i could wrap, in which case performing
1534 the multiply before or after one of the "illegal" type casts will
1535 have different semantics. */
1536
1537 static bool
legal_cast_p(gimple gs,tree rhs)1538 legal_cast_p (gimple gs, tree rhs)
1539 {
1540 if (!is_gimple_assign (gs)
1541 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (gs)))
1542 return false;
1543
1544 return legal_cast_p_1 (gimple_assign_lhs (gs), rhs);
1545 }
1546
1547 /* Given GS which is a cast to a scalar integer type, determine whether
1548 the cast is legal for strength reduction. If so, make at least one
1549 appropriate entry in the candidate table. */
1550
1551 static void
slsr_process_cast(gimple gs,tree rhs1,bool speed)1552 slsr_process_cast (gimple gs, tree rhs1, bool speed)
1553 {
1554 tree lhs, ctype;
1555 slsr_cand_t base_cand, c, c2;
1556 unsigned savings = 0;
1557
1558 if (!legal_cast_p (gs, rhs1))
1559 return;
1560
1561 lhs = gimple_assign_lhs (gs);
1562 base_cand = base_cand_from_table (rhs1);
1563 ctype = TREE_TYPE (lhs);
1564
1565 if (base_cand && base_cand->kind != CAND_PHI)
1566 {
1567 while (base_cand)
1568 {
1569 /* Propagate all data from the base candidate except the type,
1570 which comes from the cast, and the base candidate's cast,
1571 which is no longer applicable. */
1572 if (has_single_use (rhs1))
1573 savings = (base_cand->dead_savings
1574 + stmt_cost (base_cand->cand_stmt, speed));
1575
1576 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1577 base_cand->base_expr,
1578 base_cand->index, base_cand->stride,
1579 ctype, savings);
1580 if (base_cand->next_interp)
1581 base_cand = lookup_cand (base_cand->next_interp);
1582 else
1583 base_cand = NULL;
1584 }
1585 }
1586 else
1587 {
1588 /* If nothing is known about the RHS, create fresh CAND_ADD and
1589 CAND_MULT interpretations:
1590
1591 X = Y + (0 * 1)
1592 X = (Y + 0) * 1
1593
1594 The first of these is somewhat arbitrary, but the choice of
1595 1 for the stride simplifies the logic for propagating casts
1596 into their uses. */
1597 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, double_int_zero,
1598 integer_one_node, ctype, 0);
1599 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, double_int_zero,
1600 integer_one_node, ctype, 0);
1601 c->next_interp = c2->cand_num;
1602 }
1603
1604 /* Add the first (or only) interpretation to the statement-candidate
1605 mapping. */
1606 add_cand_for_stmt (gs, c);
1607 }
1608
1609 /* Given GS which is a copy of a scalar integer type, make at least one
1610 appropriate entry in the candidate table.
1611
1612 This interface is included for completeness, but is unnecessary
1613 if this pass immediately follows a pass that performs copy
1614 propagation, such as DOM. */
1615
1616 static void
slsr_process_copy(gimple gs,tree rhs1,bool speed)1617 slsr_process_copy (gimple gs, tree rhs1, bool speed)
1618 {
1619 slsr_cand_t base_cand, c, c2;
1620 unsigned savings = 0;
1621
1622 base_cand = base_cand_from_table (rhs1);
1623
1624 if (base_cand && base_cand->kind != CAND_PHI)
1625 {
1626 while (base_cand)
1627 {
1628 /* Propagate all data from the base candidate. */
1629 if (has_single_use (rhs1))
1630 savings = (base_cand->dead_savings
1631 + stmt_cost (base_cand->cand_stmt, speed));
1632
1633 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1634 base_cand->base_expr,
1635 base_cand->index, base_cand->stride,
1636 base_cand->cand_type, savings);
1637 if (base_cand->next_interp)
1638 base_cand = lookup_cand (base_cand->next_interp);
1639 else
1640 base_cand = NULL;
1641 }
1642 }
1643 else
1644 {
1645 /* If nothing is known about the RHS, create fresh CAND_ADD and
1646 CAND_MULT interpretations:
1647
1648 X = Y + (0 * 1)
1649 X = (Y + 0) * 1
1650
1651 The first of these is somewhat arbitrary, but the choice of
1652 1 for the stride simplifies the logic for propagating casts
1653 into their uses. */
1654 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, double_int_zero,
1655 integer_one_node, TREE_TYPE (rhs1), 0);
1656 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, double_int_zero,
1657 integer_one_node, TREE_TYPE (rhs1), 0);
1658 c->next_interp = c2->cand_num;
1659 }
1660
1661 /* Add the first (or only) interpretation to the statement-candidate
1662 mapping. */
1663 add_cand_for_stmt (gs, c);
1664 }
1665
1666 class find_candidates_dom_walker : public dom_walker
1667 {
1668 public:
find_candidates_dom_walker(cdi_direction direction)1669 find_candidates_dom_walker (cdi_direction direction)
1670 : dom_walker (direction) {}
1671 virtual void before_dom_children (basic_block);
1672 };
1673
1674 /* Find strength-reduction candidates in block BB. */
1675
1676 void
before_dom_children(basic_block bb)1677 find_candidates_dom_walker::before_dom_children (basic_block bb)
1678 {
1679 bool speed = optimize_bb_for_speed_p (bb);
1680 gimple_stmt_iterator gsi;
1681
1682 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1683 slsr_process_phi (gsi_stmt (gsi), speed);
1684
1685 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1686 {
1687 gimple gs = gsi_stmt (gsi);
1688
1689 if (gimple_vuse (gs) && gimple_assign_single_p (gs))
1690 slsr_process_ref (gs);
1691
1692 else if (is_gimple_assign (gs)
1693 && SCALAR_INT_MODE_P
1694 (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs)))))
1695 {
1696 tree rhs1 = NULL_TREE, rhs2 = NULL_TREE;
1697
1698 switch (gimple_assign_rhs_code (gs))
1699 {
1700 case MULT_EXPR:
1701 case PLUS_EXPR:
1702 rhs1 = gimple_assign_rhs1 (gs);
1703 rhs2 = gimple_assign_rhs2 (gs);
1704 /* Should never happen, but currently some buggy situations
1705 in earlier phases put constants in rhs1. */
1706 if (TREE_CODE (rhs1) != SSA_NAME)
1707 continue;
1708 break;
1709
1710 /* Possible future opportunity: rhs1 of a ptr+ can be
1711 an ADDR_EXPR. */
1712 case POINTER_PLUS_EXPR:
1713 case MINUS_EXPR:
1714 rhs2 = gimple_assign_rhs2 (gs);
1715 /* Fall-through. */
1716
1717 case NOP_EXPR:
1718 case MODIFY_EXPR:
1719 case NEGATE_EXPR:
1720 rhs1 = gimple_assign_rhs1 (gs);
1721 if (TREE_CODE (rhs1) != SSA_NAME)
1722 continue;
1723 break;
1724
1725 default:
1726 ;
1727 }
1728
1729 switch (gimple_assign_rhs_code (gs))
1730 {
1731 case MULT_EXPR:
1732 slsr_process_mul (gs, rhs1, rhs2, speed);
1733 break;
1734
1735 case PLUS_EXPR:
1736 case POINTER_PLUS_EXPR:
1737 case MINUS_EXPR:
1738 slsr_process_add (gs, rhs1, rhs2, speed);
1739 break;
1740
1741 case NEGATE_EXPR:
1742 slsr_process_neg (gs, rhs1, speed);
1743 break;
1744
1745 case NOP_EXPR:
1746 slsr_process_cast (gs, rhs1, speed);
1747 break;
1748
1749 case MODIFY_EXPR:
1750 slsr_process_copy (gs, rhs1, speed);
1751 break;
1752
1753 default:
1754 ;
1755 }
1756 }
1757 }
1758 }
1759
1760 /* Dump a candidate for debug. */
1761
1762 static void
dump_candidate(slsr_cand_t c)1763 dump_candidate (slsr_cand_t c)
1764 {
1765 fprintf (dump_file, "%3d [%d] ", c->cand_num,
1766 gimple_bb (c->cand_stmt)->index);
1767 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1768 switch (c->kind)
1769 {
1770 case CAND_MULT:
1771 fputs (" MULT : (", dump_file);
1772 print_generic_expr (dump_file, c->base_expr, 0);
1773 fputs (" + ", dump_file);
1774 dump_double_int (dump_file, c->index, false);
1775 fputs (") * ", dump_file);
1776 print_generic_expr (dump_file, c->stride, 0);
1777 fputs (" : ", dump_file);
1778 break;
1779 case CAND_ADD:
1780 fputs (" ADD : ", dump_file);
1781 print_generic_expr (dump_file, c->base_expr, 0);
1782 fputs (" + (", dump_file);
1783 dump_double_int (dump_file, c->index, false);
1784 fputs (" * ", dump_file);
1785 print_generic_expr (dump_file, c->stride, 0);
1786 fputs (") : ", dump_file);
1787 break;
1788 case CAND_REF:
1789 fputs (" REF : ", dump_file);
1790 print_generic_expr (dump_file, c->base_expr, 0);
1791 fputs (" + (", dump_file);
1792 print_generic_expr (dump_file, c->stride, 0);
1793 fputs (") + ", dump_file);
1794 dump_double_int (dump_file, c->index, false);
1795 fputs (" : ", dump_file);
1796 break;
1797 case CAND_PHI:
1798 fputs (" PHI : ", dump_file);
1799 print_generic_expr (dump_file, c->base_expr, 0);
1800 fputs (" + (unknown * ", dump_file);
1801 print_generic_expr (dump_file, c->stride, 0);
1802 fputs (") : ", dump_file);
1803 break;
1804 default:
1805 gcc_unreachable ();
1806 }
1807 print_generic_expr (dump_file, c->cand_type, 0);
1808 fprintf (dump_file, "\n basis: %d dependent: %d sibling: %d\n",
1809 c->basis, c->dependent, c->sibling);
1810 fprintf (dump_file, " next-interp: %d dead-savings: %d\n",
1811 c->next_interp, c->dead_savings);
1812 if (c->def_phi)
1813 fprintf (dump_file, " phi: %d\n", c->def_phi);
1814 fputs ("\n", dump_file);
1815 }
1816
1817 /* Dump the candidate vector for debug. */
1818
1819 static void
dump_cand_vec(void)1820 dump_cand_vec (void)
1821 {
1822 unsigned i;
1823 slsr_cand_t c;
1824
1825 fprintf (dump_file, "\nStrength reduction candidate vector:\n\n");
1826
1827 FOR_EACH_VEC_ELT (cand_vec, i, c)
1828 dump_candidate (c);
1829 }
1830
1831 /* Callback used to dump the candidate chains hash table. */
1832
1833 int
ssa_base_cand_dump_callback(cand_chain ** slot,void * ignored ATTRIBUTE_UNUSED)1834 ssa_base_cand_dump_callback (cand_chain **slot, void *ignored ATTRIBUTE_UNUSED)
1835 {
1836 const_cand_chain_t chain = *slot;
1837 cand_chain_t p;
1838
1839 print_generic_expr (dump_file, chain->base_expr, 0);
1840 fprintf (dump_file, " -> %d", chain->cand->cand_num);
1841
1842 for (p = chain->next; p; p = p->next)
1843 fprintf (dump_file, " -> %d", p->cand->cand_num);
1844
1845 fputs ("\n", dump_file);
1846 return 1;
1847 }
1848
1849 /* Dump the candidate chains. */
1850
1851 static void
dump_cand_chains(void)1852 dump_cand_chains (void)
1853 {
1854 fprintf (dump_file, "\nStrength reduction candidate chains:\n\n");
1855 base_cand_map.traverse_noresize <void *, ssa_base_cand_dump_callback> (NULL);
1856 fputs ("\n", dump_file);
1857 }
1858
1859 /* Dump the increment vector for debug. */
1860
1861 static void
dump_incr_vec(void)1862 dump_incr_vec (void)
1863 {
1864 if (dump_file && (dump_flags & TDF_DETAILS))
1865 {
1866 unsigned i;
1867
1868 fprintf (dump_file, "\nIncrement vector:\n\n");
1869
1870 for (i = 0; i < incr_vec_len; i++)
1871 {
1872 fprintf (dump_file, "%3d increment: ", i);
1873 dump_double_int (dump_file, incr_vec[i].incr, false);
1874 fprintf (dump_file, "\n count: %d", incr_vec[i].count);
1875 fprintf (dump_file, "\n cost: %d", incr_vec[i].cost);
1876 fputs ("\n initializer: ", dump_file);
1877 print_generic_expr (dump_file, incr_vec[i].initializer, 0);
1878 fputs ("\n\n", dump_file);
1879 }
1880 }
1881 }
1882
1883 /* Replace *EXPR in candidate C with an equivalent strength-reduced
1884 data reference. */
1885
1886 static void
replace_ref(tree * expr,slsr_cand_t c)1887 replace_ref (tree *expr, slsr_cand_t c)
1888 {
1889 tree add_expr, mem_ref, acc_type = TREE_TYPE (*expr);
1890 unsigned HOST_WIDE_INT misalign;
1891 unsigned align;
1892
1893 /* Ensure the memory reference carries the minimum alignment
1894 requirement for the data type. See PR58041. */
1895 get_object_alignment_1 (*expr, &align, &misalign);
1896 if (misalign != 0)
1897 align = (misalign & -misalign);
1898 if (align < TYPE_ALIGN (acc_type))
1899 acc_type = build_aligned_type (acc_type, align);
1900
1901 add_expr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (c->base_expr),
1902 c->base_expr, c->stride);
1903 mem_ref = fold_build2 (MEM_REF, acc_type, add_expr,
1904 double_int_to_tree (c->cand_type, c->index));
1905
1906 /* Gimplify the base addressing expression for the new MEM_REF tree. */
1907 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1908 TREE_OPERAND (mem_ref, 0)
1909 = force_gimple_operand_gsi (&gsi, TREE_OPERAND (mem_ref, 0),
1910 /*simple_p=*/true, NULL,
1911 /*before=*/true, GSI_SAME_STMT);
1912 copy_ref_info (mem_ref, *expr);
1913 *expr = mem_ref;
1914 update_stmt (c->cand_stmt);
1915 }
1916
1917 /* Replace CAND_REF candidate C, each sibling of candidate C, and each
1918 dependent of candidate C with an equivalent strength-reduced data
1919 reference. */
1920
1921 static void
replace_refs(slsr_cand_t c)1922 replace_refs (slsr_cand_t c)
1923 {
1924 if (dump_file && (dump_flags & TDF_DETAILS))
1925 {
1926 fputs ("Replacing reference: ", dump_file);
1927 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1928 }
1929
1930 if (gimple_vdef (c->cand_stmt))
1931 {
1932 tree *lhs = gimple_assign_lhs_ptr (c->cand_stmt);
1933 replace_ref (lhs, c);
1934 }
1935 else
1936 {
1937 tree *rhs = gimple_assign_rhs1_ptr (c->cand_stmt);
1938 replace_ref (rhs, c);
1939 }
1940
1941 if (dump_file && (dump_flags & TDF_DETAILS))
1942 {
1943 fputs ("With: ", dump_file);
1944 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1945 fputs ("\n", dump_file);
1946 }
1947
1948 if (c->sibling)
1949 replace_refs (lookup_cand (c->sibling));
1950
1951 if (c->dependent)
1952 replace_refs (lookup_cand (c->dependent));
1953 }
1954
1955 /* Return TRUE if candidate C is dependent upon a PHI. */
1956
1957 static bool
phi_dependent_cand_p(slsr_cand_t c)1958 phi_dependent_cand_p (slsr_cand_t c)
1959 {
1960 /* A candidate is not necessarily dependent upon a PHI just because
1961 it has a phi definition for its base name. It may have a basis
1962 that relies upon the same phi definition, in which case the PHI
1963 is irrelevant to this candidate. */
1964 return (c->def_phi
1965 && c->basis
1966 && lookup_cand (c->basis)->def_phi != c->def_phi);
1967 }
1968
1969 /* Calculate the increment required for candidate C relative to
1970 its basis. */
1971
1972 static double_int
cand_increment(slsr_cand_t c)1973 cand_increment (slsr_cand_t c)
1974 {
1975 slsr_cand_t basis;
1976
1977 /* If the candidate doesn't have a basis, just return its own
1978 index. This is useful in record_increments to help us find
1979 an existing initializer. Also, if the candidate's basis is
1980 hidden by a phi, then its own index will be the increment
1981 from the newly introduced phi basis. */
1982 if (!c->basis || phi_dependent_cand_p (c))
1983 return c->index;
1984
1985 basis = lookup_cand (c->basis);
1986 gcc_assert (operand_equal_p (c->base_expr, basis->base_expr, 0));
1987 return c->index - basis->index;
1988 }
1989
1990 /* Calculate the increment required for candidate C relative to
1991 its basis. If we aren't going to generate pointer arithmetic
1992 for this candidate, return the absolute value of that increment
1993 instead. */
1994
1995 static inline double_int
cand_abs_increment(slsr_cand_t c)1996 cand_abs_increment (slsr_cand_t c)
1997 {
1998 double_int increment = cand_increment (c);
1999
2000 if (!address_arithmetic_p && increment.is_negative ())
2001 increment = -increment;
2002
2003 return increment;
2004 }
2005
2006 /* Return TRUE iff candidate C has already been replaced under
2007 another interpretation. */
2008
2009 static inline bool
cand_already_replaced(slsr_cand_t c)2010 cand_already_replaced (slsr_cand_t c)
2011 {
2012 return (gimple_bb (c->cand_stmt) == 0);
2013 }
2014
2015 /* Common logic used by replace_unconditional_candidate and
2016 replace_conditional_candidate. */
2017
2018 static void
replace_mult_candidate(slsr_cand_t c,tree basis_name,double_int bump)2019 replace_mult_candidate (slsr_cand_t c, tree basis_name, double_int bump)
2020 {
2021 tree target_type = TREE_TYPE (gimple_assign_lhs (c->cand_stmt));
2022 enum tree_code cand_code = gimple_assign_rhs_code (c->cand_stmt);
2023
2024 /* It is highly unlikely, but possible, that the resulting
2025 bump doesn't fit in a HWI. Abandon the replacement
2026 in this case. This does not affect siblings or dependents
2027 of C. Restriction to signed HWI is conservative for unsigned
2028 types but allows for safe negation without twisted logic. */
2029 if (bump.fits_shwi ()
2030 && bump.to_shwi () != HOST_WIDE_INT_MIN
2031 /* It is not useful to replace casts, copies, or adds of
2032 an SSA name and a constant. */
2033 && cand_code != MODIFY_EXPR
2034 && cand_code != NOP_EXPR
2035 && cand_code != PLUS_EXPR
2036 && cand_code != POINTER_PLUS_EXPR
2037 && cand_code != MINUS_EXPR)
2038 {
2039 enum tree_code code = PLUS_EXPR;
2040 tree bump_tree;
2041 gimple stmt_to_print = NULL;
2042
2043 /* If the basis name and the candidate's LHS have incompatible
2044 types, introduce a cast. */
2045 if (!useless_type_conversion_p (target_type, TREE_TYPE (basis_name)))
2046 basis_name = introduce_cast_before_cand (c, target_type, basis_name);
2047 if (bump.is_negative ())
2048 {
2049 code = MINUS_EXPR;
2050 bump = -bump;
2051 }
2052
2053 bump_tree = double_int_to_tree (target_type, bump);
2054
2055 if (dump_file && (dump_flags & TDF_DETAILS))
2056 {
2057 fputs ("Replacing: ", dump_file);
2058 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
2059 }
2060
2061 if (bump.is_zero ())
2062 {
2063 tree lhs = gimple_assign_lhs (c->cand_stmt);
2064 gimple copy_stmt = gimple_build_assign (lhs, basis_name);
2065 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2066 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
2067 gsi_replace (&gsi, copy_stmt, false);
2068 c->cand_stmt = copy_stmt;
2069 if (dump_file && (dump_flags & TDF_DETAILS))
2070 stmt_to_print = copy_stmt;
2071 }
2072 else
2073 {
2074 tree rhs1, rhs2;
2075 if (cand_code != NEGATE_EXPR) {
2076 rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2077 rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2078 }
2079 if (cand_code != NEGATE_EXPR
2080 && ((operand_equal_p (rhs1, basis_name, 0)
2081 && operand_equal_p (rhs2, bump_tree, 0))
2082 || (operand_equal_p (rhs1, bump_tree, 0)
2083 && operand_equal_p (rhs2, basis_name, 0))))
2084 {
2085 if (dump_file && (dump_flags & TDF_DETAILS))
2086 {
2087 fputs ("(duplicate, not actually replacing)", dump_file);
2088 stmt_to_print = c->cand_stmt;
2089 }
2090 }
2091 else
2092 {
2093 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2094 gimple_assign_set_rhs_with_ops (&gsi, code,
2095 basis_name, bump_tree);
2096 update_stmt (gsi_stmt (gsi));
2097 c->cand_stmt = gsi_stmt (gsi);
2098 if (dump_file && (dump_flags & TDF_DETAILS))
2099 stmt_to_print = gsi_stmt (gsi);
2100 }
2101 }
2102
2103 if (dump_file && (dump_flags & TDF_DETAILS))
2104 {
2105 fputs ("With: ", dump_file);
2106 print_gimple_stmt (dump_file, stmt_to_print, 0, 0);
2107 fputs ("\n", dump_file);
2108 }
2109 }
2110 }
2111
2112 /* Replace candidate C with an add or subtract. Note that we only
2113 operate on CAND_MULTs with known strides, so we will never generate
2114 a POINTER_PLUS_EXPR. Each candidate X = (B + i) * S is replaced by
2115 X = Y + ((i - i') * S), as described in the module commentary. The
2116 folded value ((i - i') * S) is referred to here as the "bump." */
2117
2118 static void
replace_unconditional_candidate(slsr_cand_t c)2119 replace_unconditional_candidate (slsr_cand_t c)
2120 {
2121 slsr_cand_t basis;
2122 double_int stride, bump;
2123
2124 if (cand_already_replaced (c))
2125 return;
2126
2127 basis = lookup_cand (c->basis);
2128 stride = tree_to_double_int (c->stride);
2129 bump = cand_increment (c) * stride;
2130
2131 replace_mult_candidate (c, gimple_assign_lhs (basis->cand_stmt), bump);
2132 }
2133
2134 /* Return the index in the increment vector of the given INCREMENT,
2135 or -1 if not found. The latter can occur if more than
2136 MAX_INCR_VEC_LEN increments have been found. */
2137
2138 static inline int
incr_vec_index(double_int increment)2139 incr_vec_index (double_int increment)
2140 {
2141 unsigned i;
2142
2143 for (i = 0; i < incr_vec_len && increment != incr_vec[i].incr; i++)
2144 ;
2145
2146 if (i < incr_vec_len)
2147 return i;
2148 else
2149 return -1;
2150 }
2151
2152 /* Create a new statement along edge E to add BASIS_NAME to the product
2153 of INCREMENT and the stride of candidate C. Create and return a new
2154 SSA name from *VAR to be used as the LHS of the new statement.
2155 KNOWN_STRIDE is true iff C's stride is a constant. */
2156
2157 static tree
create_add_on_incoming_edge(slsr_cand_t c,tree basis_name,double_int increment,edge e,location_t loc,bool known_stride)2158 create_add_on_incoming_edge (slsr_cand_t c, tree basis_name,
2159 double_int increment, edge e, location_t loc,
2160 bool known_stride)
2161 {
2162 basic_block insert_bb;
2163 gimple_stmt_iterator gsi;
2164 tree lhs, basis_type;
2165 gimple new_stmt;
2166
2167 /* If the add candidate along this incoming edge has the same
2168 index as C's hidden basis, the hidden basis represents this
2169 edge correctly. */
2170 if (increment.is_zero ())
2171 return basis_name;
2172
2173 basis_type = TREE_TYPE (basis_name);
2174 lhs = make_temp_ssa_name (basis_type, NULL, "slsr");
2175
2176 if (known_stride)
2177 {
2178 tree bump_tree;
2179 enum tree_code code = PLUS_EXPR;
2180 double_int bump = increment * tree_to_double_int (c->stride);
2181 if (bump.is_negative ())
2182 {
2183 code = MINUS_EXPR;
2184 bump = -bump;
2185 }
2186
2187 bump_tree = double_int_to_tree (basis_type, bump);
2188 new_stmt = gimple_build_assign_with_ops (code, lhs, basis_name,
2189 bump_tree);
2190 }
2191 else
2192 {
2193 int i;
2194 bool negate_incr = (!address_arithmetic_p && increment.is_negative ());
2195 i = incr_vec_index (negate_incr ? -increment : increment);
2196 gcc_assert (i >= 0);
2197
2198 if (incr_vec[i].initializer)
2199 {
2200 enum tree_code code = negate_incr ? MINUS_EXPR : PLUS_EXPR;
2201 new_stmt = gimple_build_assign_with_ops (code, lhs, basis_name,
2202 incr_vec[i].initializer);
2203 }
2204 else if (increment.is_one ())
2205 new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, lhs, basis_name,
2206 c->stride);
2207 else if (increment.is_minus_one ())
2208 new_stmt = gimple_build_assign_with_ops (MINUS_EXPR, lhs, basis_name,
2209 c->stride);
2210 else
2211 gcc_unreachable ();
2212 }
2213
2214 insert_bb = single_succ_p (e->src) ? e->src : split_edge (e);
2215 gsi = gsi_last_bb (insert_bb);
2216
2217 if (!gsi_end_p (gsi) && is_ctrl_stmt (gsi_stmt (gsi)))
2218 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
2219 else
2220 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
2221
2222 gimple_set_location (new_stmt, loc);
2223
2224 if (dump_file && (dump_flags & TDF_DETAILS))
2225 {
2226 fprintf (dump_file, "Inserting in block %d: ", insert_bb->index);
2227 print_gimple_stmt (dump_file, new_stmt, 0, 0);
2228 }
2229
2230 return lhs;
2231 }
2232
2233 /* Given a candidate C with BASIS_NAME being the LHS of C's basis which
2234 is hidden by the phi node FROM_PHI, create a new phi node in the same
2235 block as FROM_PHI. The new phi is suitable for use as a basis by C,
2236 with its phi arguments representing conditional adjustments to the
2237 hidden basis along conditional incoming paths. Those adjustments are
2238 made by creating add statements (and sometimes recursively creating
2239 phis) along those incoming paths. LOC is the location to attach to
2240 the introduced statements. KNOWN_STRIDE is true iff C's stride is a
2241 constant. */
2242
2243 static tree
create_phi_basis(slsr_cand_t c,gimple from_phi,tree basis_name,location_t loc,bool known_stride)2244 create_phi_basis (slsr_cand_t c, gimple from_phi, tree basis_name,
2245 location_t loc, bool known_stride)
2246 {
2247 int i;
2248 tree name, phi_arg;
2249 gimple phi;
2250 vec<tree> phi_args;
2251 slsr_cand_t basis = lookup_cand (c->basis);
2252 int nargs = gimple_phi_num_args (from_phi);
2253 basic_block phi_bb = gimple_bb (from_phi);
2254 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (from_phi));
2255 phi_args.create (nargs);
2256
2257 /* Process each argument of the existing phi that represents
2258 conditionally-executed add candidates. */
2259 for (i = 0; i < nargs; i++)
2260 {
2261 edge e = (*phi_bb->preds)[i];
2262 tree arg = gimple_phi_arg_def (from_phi, i);
2263 tree feeding_def;
2264
2265 /* If the phi argument is the base name of the CAND_PHI, then
2266 this incoming arc should use the hidden basis. */
2267 if (operand_equal_p (arg, phi_cand->base_expr, 0))
2268 if (basis->index.is_zero ())
2269 feeding_def = gimple_assign_lhs (basis->cand_stmt);
2270 else
2271 {
2272 double_int incr = -basis->index;
2273 feeding_def = create_add_on_incoming_edge (c, basis_name, incr,
2274 e, loc, known_stride);
2275 }
2276 else
2277 {
2278 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2279
2280 /* If there is another phi along this incoming edge, we must
2281 process it in the same fashion to ensure that all basis
2282 adjustments are made along its incoming edges. */
2283 if (gimple_code (arg_def) == GIMPLE_PHI)
2284 feeding_def = create_phi_basis (c, arg_def, basis_name,
2285 loc, known_stride);
2286 else
2287 {
2288 slsr_cand_t arg_cand = base_cand_from_table (arg);
2289 double_int diff = arg_cand->index - basis->index;
2290 feeding_def = create_add_on_incoming_edge (c, basis_name, diff,
2291 e, loc, known_stride);
2292 }
2293 }
2294
2295 /* Because of recursion, we need to save the arguments in a vector
2296 so we can create the PHI statement all at once. Otherwise the
2297 storage for the half-created PHI can be reclaimed. */
2298 phi_args.safe_push (feeding_def);
2299 }
2300
2301 /* Create the new phi basis. */
2302 name = make_temp_ssa_name (TREE_TYPE (basis_name), NULL, "slsr");
2303 phi = create_phi_node (name, phi_bb);
2304 SSA_NAME_DEF_STMT (name) = phi;
2305
2306 FOR_EACH_VEC_ELT (phi_args, i, phi_arg)
2307 {
2308 edge e = (*phi_bb->preds)[i];
2309 add_phi_arg (phi, phi_arg, e, loc);
2310 }
2311
2312 update_stmt (phi);
2313
2314 if (dump_file && (dump_flags & TDF_DETAILS))
2315 {
2316 fputs ("Introducing new phi basis: ", dump_file);
2317 print_gimple_stmt (dump_file, phi, 0, 0);
2318 }
2319
2320 return name;
2321 }
2322
2323 /* Given a candidate C whose basis is hidden by at least one intervening
2324 phi, introduce a matching number of new phis to represent its basis
2325 adjusted by conditional increments along possible incoming paths. Then
2326 replace C as though it were an unconditional candidate, using the new
2327 basis. */
2328
2329 static void
replace_conditional_candidate(slsr_cand_t c)2330 replace_conditional_candidate (slsr_cand_t c)
2331 {
2332 tree basis_name, name;
2333 slsr_cand_t basis;
2334 location_t loc;
2335 double_int stride, bump;
2336
2337 /* Look up the LHS SSA name from C's basis. This will be the
2338 RHS1 of the adds we will introduce to create new phi arguments. */
2339 basis = lookup_cand (c->basis);
2340 basis_name = gimple_assign_lhs (basis->cand_stmt);
2341
2342 /* Create a new phi statement which will represent C's true basis
2343 after the transformation is complete. */
2344 loc = gimple_location (c->cand_stmt);
2345 name = create_phi_basis (c, lookup_cand (c->def_phi)->cand_stmt,
2346 basis_name, loc, KNOWN_STRIDE);
2347 /* Replace C with an add of the new basis phi and a constant. */
2348 stride = tree_to_double_int (c->stride);
2349 bump = c->index * stride;
2350
2351 replace_mult_candidate (c, name, bump);
2352 }
2353
2354 /* Compute the expected costs of inserting basis adjustments for
2355 candidate C with phi-definition PHI. The cost of inserting
2356 one adjustment is given by ONE_ADD_COST. If PHI has arguments
2357 which are themselves phi results, recursively calculate costs
2358 for those phis as well. */
2359
2360 static int
phi_add_costs(gimple phi,slsr_cand_t c,int one_add_cost)2361 phi_add_costs (gimple phi, slsr_cand_t c, int one_add_cost)
2362 {
2363 unsigned i;
2364 int cost = 0;
2365 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
2366
2367 /* If we work our way back to a phi that isn't dominated by the hidden
2368 basis, this isn't a candidate for replacement. Indicate this by
2369 returning an unreasonably high cost. It's not easy to detect
2370 these situations when determining the basis, so we defer the
2371 decision until now. */
2372 basic_block phi_bb = gimple_bb (phi);
2373 slsr_cand_t basis = lookup_cand (c->basis);
2374 basic_block basis_bb = gimple_bb (basis->cand_stmt);
2375
2376 if (phi_bb == basis_bb || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
2377 return COST_INFINITE;
2378
2379 for (i = 0; i < gimple_phi_num_args (phi); i++)
2380 {
2381 tree arg = gimple_phi_arg_def (phi, i);
2382
2383 if (arg != phi_cand->base_expr)
2384 {
2385 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2386
2387 if (gimple_code (arg_def) == GIMPLE_PHI)
2388 cost += phi_add_costs (arg_def, c, one_add_cost);
2389 else
2390 {
2391 slsr_cand_t arg_cand = base_cand_from_table (arg);
2392
2393 if (arg_cand->index != c->index)
2394 cost += one_add_cost;
2395 }
2396 }
2397 }
2398
2399 return cost;
2400 }
2401
2402 /* For candidate C, each sibling of candidate C, and each dependent of
2403 candidate C, determine whether the candidate is dependent upon a
2404 phi that hides its basis. If not, replace the candidate unconditionally.
2405 Otherwise, determine whether the cost of introducing compensation code
2406 for the candidate is offset by the gains from strength reduction. If
2407 so, replace the candidate and introduce the compensation code. */
2408
2409 static void
replace_uncond_cands_and_profitable_phis(slsr_cand_t c)2410 replace_uncond_cands_and_profitable_phis (slsr_cand_t c)
2411 {
2412 if (phi_dependent_cand_p (c))
2413 {
2414 if (c->kind == CAND_MULT)
2415 {
2416 /* A candidate dependent upon a phi will replace a multiply by
2417 a constant with an add, and will insert at most one add for
2418 each phi argument. Add these costs with the potential dead-code
2419 savings to determine profitability. */
2420 bool speed = optimize_bb_for_speed_p (gimple_bb (c->cand_stmt));
2421 int mult_savings = stmt_cost (c->cand_stmt, speed);
2422 gimple phi = lookup_cand (c->def_phi)->cand_stmt;
2423 tree phi_result = gimple_phi_result (phi);
2424 int one_add_cost = add_cost (speed,
2425 TYPE_MODE (TREE_TYPE (phi_result)));
2426 int add_costs = one_add_cost + phi_add_costs (phi, c, one_add_cost);
2427 int cost = add_costs - mult_savings - c->dead_savings;
2428
2429 if (dump_file && (dump_flags & TDF_DETAILS))
2430 {
2431 fprintf (dump_file, " Conditional candidate %d:\n", c->cand_num);
2432 fprintf (dump_file, " add_costs = %d\n", add_costs);
2433 fprintf (dump_file, " mult_savings = %d\n", mult_savings);
2434 fprintf (dump_file, " dead_savings = %d\n", c->dead_savings);
2435 fprintf (dump_file, " cost = %d\n", cost);
2436 if (cost <= COST_NEUTRAL)
2437 fputs (" Replacing...\n", dump_file);
2438 else
2439 fputs (" Not replaced.\n", dump_file);
2440 }
2441
2442 if (cost <= COST_NEUTRAL)
2443 replace_conditional_candidate (c);
2444 }
2445 }
2446 else
2447 replace_unconditional_candidate (c);
2448
2449 if (c->sibling)
2450 replace_uncond_cands_and_profitable_phis (lookup_cand (c->sibling));
2451
2452 if (c->dependent)
2453 replace_uncond_cands_and_profitable_phis (lookup_cand (c->dependent));
2454 }
2455
2456 /* Count the number of candidates in the tree rooted at C that have
2457 not already been replaced under other interpretations. */
2458
2459 static int
count_candidates(slsr_cand_t c)2460 count_candidates (slsr_cand_t c)
2461 {
2462 unsigned count = cand_already_replaced (c) ? 0 : 1;
2463
2464 if (c->sibling)
2465 count += count_candidates (lookup_cand (c->sibling));
2466
2467 if (c->dependent)
2468 count += count_candidates (lookup_cand (c->dependent));
2469
2470 return count;
2471 }
2472
2473 /* Increase the count of INCREMENT by one in the increment vector.
2474 INCREMENT is associated with candidate C. If INCREMENT is to be
2475 conditionally executed as part of a conditional candidate replacement,
2476 IS_PHI_ADJUST is true, otherwise false. If an initializer
2477 T_0 = stride * I is provided by a candidate that dominates all
2478 candidates with the same increment, also record T_0 for subsequent use. */
2479
2480 static void
record_increment(slsr_cand_t c,double_int increment,bool is_phi_adjust)2481 record_increment (slsr_cand_t c, double_int increment, bool is_phi_adjust)
2482 {
2483 bool found = false;
2484 unsigned i;
2485
2486 /* Treat increments that differ only in sign as identical so as to
2487 share initializers, unless we are generating pointer arithmetic. */
2488 if (!address_arithmetic_p && increment.is_negative ())
2489 increment = -increment;
2490
2491 for (i = 0; i < incr_vec_len; i++)
2492 {
2493 if (incr_vec[i].incr == increment)
2494 {
2495 incr_vec[i].count++;
2496 found = true;
2497
2498 /* If we previously recorded an initializer that doesn't
2499 dominate this candidate, it's not going to be useful to
2500 us after all. */
2501 if (incr_vec[i].initializer
2502 && !dominated_by_p (CDI_DOMINATORS,
2503 gimple_bb (c->cand_stmt),
2504 incr_vec[i].init_bb))
2505 {
2506 incr_vec[i].initializer = NULL_TREE;
2507 incr_vec[i].init_bb = NULL;
2508 }
2509
2510 break;
2511 }
2512 }
2513
2514 if (!found && incr_vec_len < MAX_INCR_VEC_LEN - 1)
2515 {
2516 /* The first time we see an increment, create the entry for it.
2517 If this is the root candidate which doesn't have a basis, set
2518 the count to zero. We're only processing it so it can possibly
2519 provide an initializer for other candidates. */
2520 incr_vec[incr_vec_len].incr = increment;
2521 incr_vec[incr_vec_len].count = c->basis || is_phi_adjust ? 1 : 0;
2522 incr_vec[incr_vec_len].cost = COST_INFINITE;
2523
2524 /* Optimistically record the first occurrence of this increment
2525 as providing an initializer (if it does); we will revise this
2526 opinion later if it doesn't dominate all other occurrences.
2527 Exception: increments of -1, 0, 1 never need initializers;
2528 and phi adjustments don't ever provide initializers. */
2529 if (c->kind == CAND_ADD
2530 && !is_phi_adjust
2531 && c->index == increment
2532 && (increment.sgt (double_int_one)
2533 || increment.slt (double_int_minus_one))
2534 && (gimple_assign_rhs_code (c->cand_stmt) == PLUS_EXPR
2535 || gimple_assign_rhs_code (c->cand_stmt) == POINTER_PLUS_EXPR))
2536 {
2537 tree t0 = NULL_TREE;
2538 tree rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2539 tree rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2540 if (operand_equal_p (rhs1, c->base_expr, 0))
2541 t0 = rhs2;
2542 else if (operand_equal_p (rhs2, c->base_expr, 0))
2543 t0 = rhs1;
2544 if (t0
2545 && SSA_NAME_DEF_STMT (t0)
2546 && gimple_bb (SSA_NAME_DEF_STMT (t0)))
2547 {
2548 incr_vec[incr_vec_len].initializer = t0;
2549 incr_vec[incr_vec_len++].init_bb
2550 = gimple_bb (SSA_NAME_DEF_STMT (t0));
2551 }
2552 else
2553 {
2554 incr_vec[incr_vec_len].initializer = NULL_TREE;
2555 incr_vec[incr_vec_len++].init_bb = NULL;
2556 }
2557 }
2558 else
2559 {
2560 incr_vec[incr_vec_len].initializer = NULL_TREE;
2561 incr_vec[incr_vec_len++].init_bb = NULL;
2562 }
2563 }
2564 }
2565
2566 /* Given phi statement PHI that hides a candidate from its BASIS, find
2567 the increments along each incoming arc (recursively handling additional
2568 phis that may be present) and record them. These increments are the
2569 difference in index between the index-adjusting statements and the
2570 index of the basis. */
2571
2572 static void
record_phi_increments(slsr_cand_t basis,gimple phi)2573 record_phi_increments (slsr_cand_t basis, gimple phi)
2574 {
2575 unsigned i;
2576 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
2577
2578 for (i = 0; i < gimple_phi_num_args (phi); i++)
2579 {
2580 tree arg = gimple_phi_arg_def (phi, i);
2581
2582 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
2583 {
2584 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2585
2586 if (gimple_code (arg_def) == GIMPLE_PHI)
2587 record_phi_increments (basis, arg_def);
2588 else
2589 {
2590 slsr_cand_t arg_cand = base_cand_from_table (arg);
2591 double_int diff = arg_cand->index - basis->index;
2592 record_increment (arg_cand, diff, PHI_ADJUST);
2593 }
2594 }
2595 }
2596 }
2597
2598 /* Determine how many times each unique increment occurs in the set
2599 of candidates rooted at C's parent, recording the data in the
2600 increment vector. For each unique increment I, if an initializer
2601 T_0 = stride * I is provided by a candidate that dominates all
2602 candidates with the same increment, also record T_0 for subsequent
2603 use. */
2604
2605 static void
record_increments(slsr_cand_t c)2606 record_increments (slsr_cand_t c)
2607 {
2608 if (!cand_already_replaced (c))
2609 {
2610 if (!phi_dependent_cand_p (c))
2611 record_increment (c, cand_increment (c), NOT_PHI_ADJUST);
2612 else
2613 {
2614 /* A candidate with a basis hidden by a phi will have one
2615 increment for its relationship to the index represented by
2616 the phi, and potentially additional increments along each
2617 incoming edge. For the root of the dependency tree (which
2618 has no basis), process just the initial index in case it has
2619 an initializer that can be used by subsequent candidates. */
2620 record_increment (c, c->index, NOT_PHI_ADJUST);
2621
2622 if (c->basis)
2623 record_phi_increments (lookup_cand (c->basis),
2624 lookup_cand (c->def_phi)->cand_stmt);
2625 }
2626 }
2627
2628 if (c->sibling)
2629 record_increments (lookup_cand (c->sibling));
2630
2631 if (c->dependent)
2632 record_increments (lookup_cand (c->dependent));
2633 }
2634
2635 /* Add up and return the costs of introducing add statements that
2636 require the increment INCR on behalf of candidate C and phi
2637 statement PHI. Accumulate into *SAVINGS the potential savings
2638 from removing existing statements that feed PHI and have no other
2639 uses. */
2640
2641 static int
phi_incr_cost(slsr_cand_t c,double_int incr,gimple phi,int * savings)2642 phi_incr_cost (slsr_cand_t c, double_int incr, gimple phi, int *savings)
2643 {
2644 unsigned i;
2645 int cost = 0;
2646 slsr_cand_t basis = lookup_cand (c->basis);
2647 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
2648
2649 for (i = 0; i < gimple_phi_num_args (phi); i++)
2650 {
2651 tree arg = gimple_phi_arg_def (phi, i);
2652
2653 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
2654 {
2655 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2656
2657 if (gimple_code (arg_def) == GIMPLE_PHI)
2658 {
2659 int feeding_savings = 0;
2660 cost += phi_incr_cost (c, incr, arg_def, &feeding_savings);
2661 if (has_single_use (gimple_phi_result (arg_def)))
2662 *savings += feeding_savings;
2663 }
2664 else
2665 {
2666 slsr_cand_t arg_cand = base_cand_from_table (arg);
2667 double_int diff = arg_cand->index - basis->index;
2668
2669 if (incr == diff)
2670 {
2671 tree basis_lhs = gimple_assign_lhs (basis->cand_stmt);
2672 tree lhs = gimple_assign_lhs (arg_cand->cand_stmt);
2673 cost += add_cost (true, TYPE_MODE (TREE_TYPE (basis_lhs)));
2674 if (has_single_use (lhs))
2675 *savings += stmt_cost (arg_cand->cand_stmt, true);
2676 }
2677 }
2678 }
2679 }
2680
2681 return cost;
2682 }
2683
2684 /* Return the first candidate in the tree rooted at C that has not
2685 already been replaced, favoring siblings over dependents. */
2686
2687 static slsr_cand_t
unreplaced_cand_in_tree(slsr_cand_t c)2688 unreplaced_cand_in_tree (slsr_cand_t c)
2689 {
2690 if (!cand_already_replaced (c))
2691 return c;
2692
2693 if (c->sibling)
2694 {
2695 slsr_cand_t sib = unreplaced_cand_in_tree (lookup_cand (c->sibling));
2696 if (sib)
2697 return sib;
2698 }
2699
2700 if (c->dependent)
2701 {
2702 slsr_cand_t dep = unreplaced_cand_in_tree (lookup_cand (c->dependent));
2703 if (dep)
2704 return dep;
2705 }
2706
2707 return NULL;
2708 }
2709
2710 /* Return TRUE if the candidates in the tree rooted at C should be
2711 optimized for speed, else FALSE. We estimate this based on the block
2712 containing the most dominant candidate in the tree that has not yet
2713 been replaced. */
2714
2715 static bool
optimize_cands_for_speed_p(slsr_cand_t c)2716 optimize_cands_for_speed_p (slsr_cand_t c)
2717 {
2718 slsr_cand_t c2 = unreplaced_cand_in_tree (c);
2719 gcc_assert (c2);
2720 return optimize_bb_for_speed_p (gimple_bb (c2->cand_stmt));
2721 }
2722
2723 /* Add COST_IN to the lowest cost of any dependent path starting at
2724 candidate C or any of its siblings, counting only candidates along
2725 such paths with increment INCR. Assume that replacing a candidate
2726 reduces cost by REPL_SAVINGS. Also account for savings from any
2727 statements that would go dead. If COUNT_PHIS is true, include
2728 costs of introducing feeding statements for conditional candidates. */
2729
2730 static int
lowest_cost_path(int cost_in,int repl_savings,slsr_cand_t c,double_int incr,bool count_phis)2731 lowest_cost_path (int cost_in, int repl_savings, slsr_cand_t c,
2732 double_int incr, bool count_phis)
2733 {
2734 int local_cost, sib_cost, savings = 0;
2735 double_int cand_incr = cand_abs_increment (c);
2736
2737 if (cand_already_replaced (c))
2738 local_cost = cost_in;
2739 else if (incr == cand_incr)
2740 local_cost = cost_in - repl_savings - c->dead_savings;
2741 else
2742 local_cost = cost_in - c->dead_savings;
2743
2744 if (count_phis
2745 && phi_dependent_cand_p (c)
2746 && !cand_already_replaced (c))
2747 {
2748 gimple phi = lookup_cand (c->def_phi)->cand_stmt;
2749 local_cost += phi_incr_cost (c, incr, phi, &savings);
2750
2751 if (has_single_use (gimple_phi_result (phi)))
2752 local_cost -= savings;
2753 }
2754
2755 if (c->dependent)
2756 local_cost = lowest_cost_path (local_cost, repl_savings,
2757 lookup_cand (c->dependent), incr,
2758 count_phis);
2759
2760 if (c->sibling)
2761 {
2762 sib_cost = lowest_cost_path (cost_in, repl_savings,
2763 lookup_cand (c->sibling), incr,
2764 count_phis);
2765 local_cost = MIN (local_cost, sib_cost);
2766 }
2767
2768 return local_cost;
2769 }
2770
2771 /* Compute the total savings that would accrue from all replacements
2772 in the candidate tree rooted at C, counting only candidates with
2773 increment INCR. Assume that replacing a candidate reduces cost
2774 by REPL_SAVINGS. Also account for savings from statements that
2775 would go dead. */
2776
2777 static int
total_savings(int repl_savings,slsr_cand_t c,double_int incr,bool count_phis)2778 total_savings (int repl_savings, slsr_cand_t c, double_int incr,
2779 bool count_phis)
2780 {
2781 int savings = 0;
2782 double_int cand_incr = cand_abs_increment (c);
2783
2784 if (incr == cand_incr && !cand_already_replaced (c))
2785 savings += repl_savings + c->dead_savings;
2786
2787 if (count_phis
2788 && phi_dependent_cand_p (c)
2789 && !cand_already_replaced (c))
2790 {
2791 int phi_savings = 0;
2792 gimple phi = lookup_cand (c->def_phi)->cand_stmt;
2793 savings -= phi_incr_cost (c, incr, phi, &phi_savings);
2794
2795 if (has_single_use (gimple_phi_result (phi)))
2796 savings += phi_savings;
2797 }
2798
2799 if (c->dependent)
2800 savings += total_savings (repl_savings, lookup_cand (c->dependent), incr,
2801 count_phis);
2802
2803 if (c->sibling)
2804 savings += total_savings (repl_savings, lookup_cand (c->sibling), incr,
2805 count_phis);
2806
2807 return savings;
2808 }
2809
2810 /* Use target-specific costs to determine and record which increments
2811 in the current candidate tree are profitable to replace, assuming
2812 MODE and SPEED. FIRST_DEP is the first dependent of the root of
2813 the candidate tree.
2814
2815 One slight limitation here is that we don't account for the possible
2816 introduction of casts in some cases. See replace_one_candidate for
2817 the cases where these are introduced. This should probably be cleaned
2818 up sometime. */
2819
2820 static void
analyze_increments(slsr_cand_t first_dep,enum machine_mode mode,bool speed)2821 analyze_increments (slsr_cand_t first_dep, enum machine_mode mode, bool speed)
2822 {
2823 unsigned i;
2824
2825 for (i = 0; i < incr_vec_len; i++)
2826 {
2827 HOST_WIDE_INT incr = incr_vec[i].incr.to_shwi ();
2828
2829 /* If somehow this increment is bigger than a HWI, we won't
2830 be optimizing candidates that use it. And if the increment
2831 has a count of zero, nothing will be done with it. */
2832 if (!incr_vec[i].incr.fits_shwi () || !incr_vec[i].count)
2833 incr_vec[i].cost = COST_INFINITE;
2834
2835 /* Increments of 0, 1, and -1 are always profitable to replace,
2836 because they always replace a multiply or add with an add or
2837 copy, and may cause one or more existing instructions to go
2838 dead. Exception: -1 can't be assumed to be profitable for
2839 pointer addition. */
2840 else if (incr == 0
2841 || incr == 1
2842 || (incr == -1
2843 && (gimple_assign_rhs_code (first_dep->cand_stmt)
2844 != POINTER_PLUS_EXPR)))
2845 incr_vec[i].cost = COST_NEUTRAL;
2846
2847 /* FORNOW: If we need to add an initializer, give up if a cast from
2848 the candidate's type to its stride's type can lose precision.
2849 This could eventually be handled better by expressly retaining the
2850 result of a cast to a wider type in the stride. Example:
2851
2852 short int _1;
2853 _2 = (int) _1;
2854 _3 = _2 * 10;
2855 _4 = x + _3; ADD: x + (10 * _1) : int
2856 _5 = _2 * 15;
2857 _6 = x + _3; ADD: x + (15 * _1) : int
2858
2859 Right now replacing _6 would cause insertion of an initializer
2860 of the form "short int T = _1 * 5;" followed by a cast to
2861 int, which could overflow incorrectly. Had we recorded _2 or
2862 (int)_1 as the stride, this wouldn't happen. However, doing
2863 this breaks other opportunities, so this will require some
2864 care. */
2865 else if (!incr_vec[i].initializer
2866 && TREE_CODE (first_dep->stride) != INTEGER_CST
2867 && !legal_cast_p_1 (first_dep->stride,
2868 gimple_assign_lhs (first_dep->cand_stmt)))
2869
2870 incr_vec[i].cost = COST_INFINITE;
2871
2872 /* If we need to add an initializer, make sure we don't introduce
2873 a multiply by a pointer type, which can happen in certain cast
2874 scenarios. FIXME: When cleaning up these cast issues, we can
2875 afford to introduce the multiply provided we cast out to an
2876 unsigned int of appropriate size. */
2877 else if (!incr_vec[i].initializer
2878 && TREE_CODE (first_dep->stride) != INTEGER_CST
2879 && POINTER_TYPE_P (TREE_TYPE (first_dep->stride)))
2880
2881 incr_vec[i].cost = COST_INFINITE;
2882
2883 /* For any other increment, if this is a multiply candidate, we
2884 must introduce a temporary T and initialize it with
2885 T_0 = stride * increment. When optimizing for speed, walk the
2886 candidate tree to calculate the best cost reduction along any
2887 path; if it offsets the fixed cost of inserting the initializer,
2888 replacing the increment is profitable. When optimizing for
2889 size, instead calculate the total cost reduction from replacing
2890 all candidates with this increment. */
2891 else if (first_dep->kind == CAND_MULT)
2892 {
2893 int cost = mult_by_coeff_cost (incr, mode, speed);
2894 int repl_savings = mul_cost (speed, mode) - add_cost (speed, mode);
2895 if (speed)
2896 cost = lowest_cost_path (cost, repl_savings, first_dep,
2897 incr_vec[i].incr, COUNT_PHIS);
2898 else
2899 cost -= total_savings (repl_savings, first_dep, incr_vec[i].incr,
2900 COUNT_PHIS);
2901
2902 incr_vec[i].cost = cost;
2903 }
2904
2905 /* If this is an add candidate, the initializer may already
2906 exist, so only calculate the cost of the initializer if it
2907 doesn't. We are replacing one add with another here, so the
2908 known replacement savings is zero. We will account for removal
2909 of dead instructions in lowest_cost_path or total_savings. */
2910 else
2911 {
2912 int cost = 0;
2913 if (!incr_vec[i].initializer)
2914 cost = mult_by_coeff_cost (incr, mode, speed);
2915
2916 if (speed)
2917 cost = lowest_cost_path (cost, 0, first_dep, incr_vec[i].incr,
2918 DONT_COUNT_PHIS);
2919 else
2920 cost -= total_savings (0, first_dep, incr_vec[i].incr,
2921 DONT_COUNT_PHIS);
2922
2923 incr_vec[i].cost = cost;
2924 }
2925 }
2926 }
2927
2928 /* Return the nearest common dominator of BB1 and BB2. If the blocks
2929 are identical, return the earlier of C1 and C2 in *WHERE. Otherwise,
2930 if the NCD matches BB1, return C1 in *WHERE; if the NCD matches BB2,
2931 return C2 in *WHERE; and if the NCD matches neither, return NULL in
2932 *WHERE. Note: It is possible for one of C1 and C2 to be NULL. */
2933
2934 static basic_block
ncd_for_two_cands(basic_block bb1,basic_block bb2,slsr_cand_t c1,slsr_cand_t c2,slsr_cand_t * where)2935 ncd_for_two_cands (basic_block bb1, basic_block bb2,
2936 slsr_cand_t c1, slsr_cand_t c2, slsr_cand_t *where)
2937 {
2938 basic_block ncd;
2939
2940 if (!bb1)
2941 {
2942 *where = c2;
2943 return bb2;
2944 }
2945
2946 if (!bb2)
2947 {
2948 *where = c1;
2949 return bb1;
2950 }
2951
2952 ncd = nearest_common_dominator (CDI_DOMINATORS, bb1, bb2);
2953
2954 /* If both candidates are in the same block, the earlier
2955 candidate wins. */
2956 if (bb1 == ncd && bb2 == ncd)
2957 {
2958 if (!c1 || (c2 && c2->cand_num < c1->cand_num))
2959 *where = c2;
2960 else
2961 *where = c1;
2962 }
2963
2964 /* Otherwise, if one of them produced a candidate in the
2965 dominator, that one wins. */
2966 else if (bb1 == ncd)
2967 *where = c1;
2968
2969 else if (bb2 == ncd)
2970 *where = c2;
2971
2972 /* If neither matches the dominator, neither wins. */
2973 else
2974 *where = NULL;
2975
2976 return ncd;
2977 }
2978
2979 /* Consider all candidates that feed PHI. Find the nearest common
2980 dominator of those candidates requiring the given increment INCR.
2981 Further find and return the nearest common dominator of this result
2982 with block NCD. If the returned block contains one or more of the
2983 candidates, return the earliest candidate in the block in *WHERE. */
2984
2985 static basic_block
ncd_with_phi(slsr_cand_t c,double_int incr,gimple phi,basic_block ncd,slsr_cand_t * where)2986 ncd_with_phi (slsr_cand_t c, double_int incr, gimple phi,
2987 basic_block ncd, slsr_cand_t *where)
2988 {
2989 unsigned i;
2990 slsr_cand_t basis = lookup_cand (c->basis);
2991 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
2992
2993 for (i = 0; i < gimple_phi_num_args (phi); i++)
2994 {
2995 tree arg = gimple_phi_arg_def (phi, i);
2996
2997 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
2998 {
2999 gimple arg_def = SSA_NAME_DEF_STMT (arg);
3000
3001 if (gimple_code (arg_def) == GIMPLE_PHI)
3002 ncd = ncd_with_phi (c, incr, arg_def, ncd, where);
3003 else
3004 {
3005 slsr_cand_t arg_cand = base_cand_from_table (arg);
3006 double_int diff = arg_cand->index - basis->index;
3007 basic_block pred = gimple_phi_arg_edge (phi, i)->src;
3008
3009 if ((incr == diff) || (!address_arithmetic_p && incr == -diff))
3010 ncd = ncd_for_two_cands (ncd, pred, *where, NULL, where);
3011 }
3012 }
3013 }
3014
3015 return ncd;
3016 }
3017
3018 /* Consider the candidate C together with any candidates that feed
3019 C's phi dependence (if any). Find and return the nearest common
3020 dominator of those candidates requiring the given increment INCR.
3021 If the returned block contains one or more of the candidates,
3022 return the earliest candidate in the block in *WHERE. */
3023
3024 static basic_block
ncd_of_cand_and_phis(slsr_cand_t c,double_int incr,slsr_cand_t * where)3025 ncd_of_cand_and_phis (slsr_cand_t c, double_int incr, slsr_cand_t *where)
3026 {
3027 basic_block ncd = NULL;
3028
3029 if (cand_abs_increment (c) == incr)
3030 {
3031 ncd = gimple_bb (c->cand_stmt);
3032 *where = c;
3033 }
3034
3035 if (phi_dependent_cand_p (c))
3036 ncd = ncd_with_phi (c, incr, lookup_cand (c->def_phi)->cand_stmt,
3037 ncd, where);
3038
3039 return ncd;
3040 }
3041
3042 /* Consider all candidates in the tree rooted at C for which INCR
3043 represents the required increment of C relative to its basis.
3044 Find and return the basic block that most nearly dominates all
3045 such candidates. If the returned block contains one or more of
3046 the candidates, return the earliest candidate in the block in
3047 *WHERE. */
3048
3049 static basic_block
nearest_common_dominator_for_cands(slsr_cand_t c,double_int incr,slsr_cand_t * where)3050 nearest_common_dominator_for_cands (slsr_cand_t c, double_int incr,
3051 slsr_cand_t *where)
3052 {
3053 basic_block sib_ncd = NULL, dep_ncd = NULL, this_ncd = NULL, ncd;
3054 slsr_cand_t sib_where = NULL, dep_where = NULL, this_where = NULL, new_where;
3055
3056 /* First find the NCD of all siblings and dependents. */
3057 if (c->sibling)
3058 sib_ncd = nearest_common_dominator_for_cands (lookup_cand (c->sibling),
3059 incr, &sib_where);
3060 if (c->dependent)
3061 dep_ncd = nearest_common_dominator_for_cands (lookup_cand (c->dependent),
3062 incr, &dep_where);
3063 if (!sib_ncd && !dep_ncd)
3064 {
3065 new_where = NULL;
3066 ncd = NULL;
3067 }
3068 else if (sib_ncd && !dep_ncd)
3069 {
3070 new_where = sib_where;
3071 ncd = sib_ncd;
3072 }
3073 else if (dep_ncd && !sib_ncd)
3074 {
3075 new_where = dep_where;
3076 ncd = dep_ncd;
3077 }
3078 else
3079 ncd = ncd_for_two_cands (sib_ncd, dep_ncd, sib_where,
3080 dep_where, &new_where);
3081
3082 /* If the candidate's increment doesn't match the one we're interested
3083 in (and nor do any increments for feeding defs of a phi-dependence),
3084 then the result depends only on siblings and dependents. */
3085 this_ncd = ncd_of_cand_and_phis (c, incr, &this_where);
3086
3087 if (!this_ncd || cand_already_replaced (c))
3088 {
3089 *where = new_where;
3090 return ncd;
3091 }
3092
3093 /* Otherwise, compare this candidate with the result from all siblings
3094 and dependents. */
3095 ncd = ncd_for_two_cands (ncd, this_ncd, new_where, this_where, where);
3096
3097 return ncd;
3098 }
3099
3100 /* Return TRUE if the increment indexed by INDEX is profitable to replace. */
3101
3102 static inline bool
profitable_increment_p(unsigned index)3103 profitable_increment_p (unsigned index)
3104 {
3105 return (incr_vec[index].cost <= COST_NEUTRAL);
3106 }
3107
3108 /* For each profitable increment in the increment vector not equal to
3109 0 or 1 (or -1, for non-pointer arithmetic), find the nearest common
3110 dominator of all statements in the candidate chain rooted at C
3111 that require that increment, and insert an initializer
3112 T_0 = stride * increment at that location. Record T_0 with the
3113 increment record. */
3114
3115 static void
insert_initializers(slsr_cand_t c)3116 insert_initializers (slsr_cand_t c)
3117 {
3118 unsigned i;
3119
3120 for (i = 0; i < incr_vec_len; i++)
3121 {
3122 basic_block bb;
3123 slsr_cand_t where = NULL;
3124 gimple init_stmt;
3125 tree stride_type, new_name, incr_tree;
3126 double_int incr = incr_vec[i].incr;
3127
3128 if (!profitable_increment_p (i)
3129 || incr.is_one ()
3130 || (incr.is_minus_one ()
3131 && gimple_assign_rhs_code (c->cand_stmt) != POINTER_PLUS_EXPR)
3132 || incr.is_zero ())
3133 continue;
3134
3135 /* We may have already identified an existing initializer that
3136 will suffice. */
3137 if (incr_vec[i].initializer)
3138 {
3139 if (dump_file && (dump_flags & TDF_DETAILS))
3140 {
3141 fputs ("Using existing initializer: ", dump_file);
3142 print_gimple_stmt (dump_file,
3143 SSA_NAME_DEF_STMT (incr_vec[i].initializer),
3144 0, 0);
3145 }
3146 continue;
3147 }
3148
3149 /* Find the block that most closely dominates all candidates
3150 with this increment. If there is at least one candidate in
3151 that block, the earliest one will be returned in WHERE. */
3152 bb = nearest_common_dominator_for_cands (c, incr, &where);
3153
3154 /* Create a new SSA name to hold the initializer's value. */
3155 stride_type = TREE_TYPE (c->stride);
3156 new_name = make_temp_ssa_name (stride_type, NULL, "slsr");
3157 incr_vec[i].initializer = new_name;
3158
3159 /* Create the initializer and insert it in the latest possible
3160 dominating position. */
3161 incr_tree = double_int_to_tree (stride_type, incr);
3162 init_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_name,
3163 c->stride, incr_tree);
3164 if (where)
3165 {
3166 gimple_stmt_iterator gsi = gsi_for_stmt (where->cand_stmt);
3167 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
3168 gimple_set_location (init_stmt, gimple_location (where->cand_stmt));
3169 }
3170 else
3171 {
3172 gimple_stmt_iterator gsi = gsi_last_bb (bb);
3173 gimple basis_stmt = lookup_cand (c->basis)->cand_stmt;
3174
3175 if (!gsi_end_p (gsi) && is_ctrl_stmt (gsi_stmt (gsi)))
3176 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
3177 else
3178 gsi_insert_after (&gsi, init_stmt, GSI_SAME_STMT);
3179
3180 gimple_set_location (init_stmt, gimple_location (basis_stmt));
3181 }
3182
3183 if (dump_file && (dump_flags & TDF_DETAILS))
3184 {
3185 fputs ("Inserting initializer: ", dump_file);
3186 print_gimple_stmt (dump_file, init_stmt, 0, 0);
3187 }
3188 }
3189 }
3190
3191 /* Return TRUE iff all required increments for candidates feeding PHI
3192 are profitable to replace on behalf of candidate C. */
3193
3194 static bool
all_phi_incrs_profitable(slsr_cand_t c,gimple phi)3195 all_phi_incrs_profitable (slsr_cand_t c, gimple phi)
3196 {
3197 unsigned i;
3198 slsr_cand_t basis = lookup_cand (c->basis);
3199 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
3200
3201 for (i = 0; i < gimple_phi_num_args (phi); i++)
3202 {
3203 tree arg = gimple_phi_arg_def (phi, i);
3204
3205 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
3206 {
3207 gimple arg_def = SSA_NAME_DEF_STMT (arg);
3208
3209 if (gimple_code (arg_def) == GIMPLE_PHI)
3210 {
3211 if (!all_phi_incrs_profitable (c, arg_def))
3212 return false;
3213 }
3214 else
3215 {
3216 int j;
3217 slsr_cand_t arg_cand = base_cand_from_table (arg);
3218 double_int increment = arg_cand->index - basis->index;
3219
3220 if (!address_arithmetic_p && increment.is_negative ())
3221 increment = -increment;
3222
3223 j = incr_vec_index (increment);
3224
3225 if (dump_file && (dump_flags & TDF_DETAILS))
3226 {
3227 fprintf (dump_file, " Conditional candidate %d, phi: ",
3228 c->cand_num);
3229 print_gimple_stmt (dump_file, phi, 0, 0);
3230 fputs (" increment: ", dump_file);
3231 dump_double_int (dump_file, increment, false);
3232 if (j < 0)
3233 fprintf (dump_file,
3234 "\n Not replaced; incr_vec overflow.\n");
3235 else {
3236 fprintf (dump_file, "\n cost: %d\n", incr_vec[j].cost);
3237 if (profitable_increment_p (j))
3238 fputs (" Replacing...\n", dump_file);
3239 else
3240 fputs (" Not replaced.\n", dump_file);
3241 }
3242 }
3243
3244 if (j < 0 || !profitable_increment_p (j))
3245 return false;
3246 }
3247 }
3248 }
3249
3250 return true;
3251 }
3252
3253 /* Create a NOP_EXPR that copies FROM_EXPR into a new SSA name of
3254 type TO_TYPE, and insert it in front of the statement represented
3255 by candidate C. Use *NEW_VAR to create the new SSA name. Return
3256 the new SSA name. */
3257
3258 static tree
introduce_cast_before_cand(slsr_cand_t c,tree to_type,tree from_expr)3259 introduce_cast_before_cand (slsr_cand_t c, tree to_type, tree from_expr)
3260 {
3261 tree cast_lhs;
3262 gimple cast_stmt;
3263 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3264
3265 cast_lhs = make_temp_ssa_name (to_type, NULL, "slsr");
3266 cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, cast_lhs,
3267 from_expr, NULL_TREE);
3268 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
3269 gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
3270
3271 if (dump_file && (dump_flags & TDF_DETAILS))
3272 {
3273 fputs (" Inserting: ", dump_file);
3274 print_gimple_stmt (dump_file, cast_stmt, 0, 0);
3275 }
3276
3277 return cast_lhs;
3278 }
3279
3280 /* Replace the RHS of the statement represented by candidate C with
3281 NEW_CODE, NEW_RHS1, and NEW_RHS2, provided that to do so doesn't
3282 leave C unchanged or just interchange its operands. The original
3283 operation and operands are in OLD_CODE, OLD_RHS1, and OLD_RHS2.
3284 If the replacement was made and we are doing a details dump,
3285 return the revised statement, else NULL. */
3286
3287 static gimple
replace_rhs_if_not_dup(enum tree_code new_code,tree new_rhs1,tree new_rhs2,enum tree_code old_code,tree old_rhs1,tree old_rhs2,slsr_cand_t c)3288 replace_rhs_if_not_dup (enum tree_code new_code, tree new_rhs1, tree new_rhs2,
3289 enum tree_code old_code, tree old_rhs1, tree old_rhs2,
3290 slsr_cand_t c)
3291 {
3292 if (new_code != old_code
3293 || ((!operand_equal_p (new_rhs1, old_rhs1, 0)
3294 || !operand_equal_p (new_rhs2, old_rhs2, 0))
3295 && (!operand_equal_p (new_rhs1, old_rhs2, 0)
3296 || !operand_equal_p (new_rhs2, old_rhs1, 0))))
3297 {
3298 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3299 gimple_assign_set_rhs_with_ops (&gsi, new_code, new_rhs1, new_rhs2);
3300 update_stmt (gsi_stmt (gsi));
3301 c->cand_stmt = gsi_stmt (gsi);
3302
3303 if (dump_file && (dump_flags & TDF_DETAILS))
3304 return gsi_stmt (gsi);
3305 }
3306
3307 else if (dump_file && (dump_flags & TDF_DETAILS))
3308 fputs (" (duplicate, not actually replacing)\n", dump_file);
3309
3310 return NULL;
3311 }
3312
3313 /* Strength-reduce the statement represented by candidate C by replacing
3314 it with an equivalent addition or subtraction. I is the index into
3315 the increment vector identifying C's increment. NEW_VAR is used to
3316 create a new SSA name if a cast needs to be introduced. BASIS_NAME
3317 is the rhs1 to use in creating the add/subtract. */
3318
3319 static void
replace_one_candidate(slsr_cand_t c,unsigned i,tree basis_name)3320 replace_one_candidate (slsr_cand_t c, unsigned i, tree basis_name)
3321 {
3322 gimple stmt_to_print = NULL;
3323 tree orig_rhs1, orig_rhs2;
3324 tree rhs2;
3325 enum tree_code orig_code, repl_code;
3326 double_int cand_incr;
3327
3328 orig_code = gimple_assign_rhs_code (c->cand_stmt);
3329 orig_rhs1 = gimple_assign_rhs1 (c->cand_stmt);
3330 orig_rhs2 = gimple_assign_rhs2 (c->cand_stmt);
3331 cand_incr = cand_increment (c);
3332
3333 if (dump_file && (dump_flags & TDF_DETAILS))
3334 {
3335 fputs ("Replacing: ", dump_file);
3336 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
3337 stmt_to_print = c->cand_stmt;
3338 }
3339
3340 if (address_arithmetic_p)
3341 repl_code = POINTER_PLUS_EXPR;
3342 else
3343 repl_code = PLUS_EXPR;
3344
3345 /* If the increment has an initializer T_0, replace the candidate
3346 statement with an add of the basis name and the initializer. */
3347 if (incr_vec[i].initializer)
3348 {
3349 tree init_type = TREE_TYPE (incr_vec[i].initializer);
3350 tree orig_type = TREE_TYPE (orig_rhs2);
3351
3352 if (types_compatible_p (orig_type, init_type))
3353 rhs2 = incr_vec[i].initializer;
3354 else
3355 rhs2 = introduce_cast_before_cand (c, orig_type,
3356 incr_vec[i].initializer);
3357
3358 if (incr_vec[i].incr != cand_incr)
3359 {
3360 gcc_assert (repl_code == PLUS_EXPR);
3361 repl_code = MINUS_EXPR;
3362 }
3363
3364 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
3365 orig_code, orig_rhs1, orig_rhs2,
3366 c);
3367 }
3368
3369 /* Otherwise, the increment is one of -1, 0, and 1. Replace
3370 with a subtract of the stride from the basis name, a copy
3371 from the basis name, or an add of the stride to the basis
3372 name, respectively. It may be necessary to introduce a
3373 cast (or reuse an existing cast). */
3374 else if (cand_incr.is_one ())
3375 {
3376 tree stride_type = TREE_TYPE (c->stride);
3377 tree orig_type = TREE_TYPE (orig_rhs2);
3378
3379 if (types_compatible_p (orig_type, stride_type))
3380 rhs2 = c->stride;
3381 else
3382 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride);
3383
3384 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
3385 orig_code, orig_rhs1, orig_rhs2,
3386 c);
3387 }
3388
3389 else if (cand_incr.is_minus_one ())
3390 {
3391 tree stride_type = TREE_TYPE (c->stride);
3392 tree orig_type = TREE_TYPE (orig_rhs2);
3393 gcc_assert (repl_code != POINTER_PLUS_EXPR);
3394
3395 if (types_compatible_p (orig_type, stride_type))
3396 rhs2 = c->stride;
3397 else
3398 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride);
3399
3400 if (orig_code != MINUS_EXPR
3401 || !operand_equal_p (basis_name, orig_rhs1, 0)
3402 || !operand_equal_p (rhs2, orig_rhs2, 0))
3403 {
3404 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3405 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, basis_name, rhs2);
3406 update_stmt (gsi_stmt (gsi));
3407 c->cand_stmt = gsi_stmt (gsi);
3408
3409 if (dump_file && (dump_flags & TDF_DETAILS))
3410 stmt_to_print = gsi_stmt (gsi);
3411 }
3412 else if (dump_file && (dump_flags & TDF_DETAILS))
3413 fputs (" (duplicate, not actually replacing)\n", dump_file);
3414 }
3415
3416 else if (cand_incr.is_zero ())
3417 {
3418 tree lhs = gimple_assign_lhs (c->cand_stmt);
3419 tree lhs_type = TREE_TYPE (lhs);
3420 tree basis_type = TREE_TYPE (basis_name);
3421
3422 if (types_compatible_p (lhs_type, basis_type))
3423 {
3424 gimple copy_stmt = gimple_build_assign (lhs, basis_name);
3425 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3426 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
3427 gsi_replace (&gsi, copy_stmt, false);
3428 c->cand_stmt = copy_stmt;
3429
3430 if (dump_file && (dump_flags & TDF_DETAILS))
3431 stmt_to_print = copy_stmt;
3432 }
3433 else
3434 {
3435 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3436 gimple cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, lhs,
3437 basis_name,
3438 NULL_TREE);
3439 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
3440 gsi_replace (&gsi, cast_stmt, false);
3441 c->cand_stmt = cast_stmt;
3442
3443 if (dump_file && (dump_flags & TDF_DETAILS))
3444 stmt_to_print = cast_stmt;
3445 }
3446 }
3447 else
3448 gcc_unreachable ();
3449
3450 if (dump_file && (dump_flags & TDF_DETAILS) && stmt_to_print)
3451 {
3452 fputs ("With: ", dump_file);
3453 print_gimple_stmt (dump_file, stmt_to_print, 0, 0);
3454 fputs ("\n", dump_file);
3455 }
3456 }
3457
3458 /* For each candidate in the tree rooted at C, replace it with
3459 an increment if such has been shown to be profitable. */
3460
3461 static void
replace_profitable_candidates(slsr_cand_t c)3462 replace_profitable_candidates (slsr_cand_t c)
3463 {
3464 if (!cand_already_replaced (c))
3465 {
3466 double_int increment = cand_abs_increment (c);
3467 enum tree_code orig_code = gimple_assign_rhs_code (c->cand_stmt);
3468 int i;
3469
3470 i = incr_vec_index (increment);
3471
3472 /* Only process profitable increments. Nothing useful can be done
3473 to a cast or copy. */
3474 if (i >= 0
3475 && profitable_increment_p (i)
3476 && orig_code != MODIFY_EXPR
3477 && orig_code != NOP_EXPR)
3478 {
3479 if (phi_dependent_cand_p (c))
3480 {
3481 gimple phi = lookup_cand (c->def_phi)->cand_stmt;
3482
3483 if (all_phi_incrs_profitable (c, phi))
3484 {
3485 /* Look up the LHS SSA name from C's basis. This will be
3486 the RHS1 of the adds we will introduce to create new
3487 phi arguments. */
3488 slsr_cand_t basis = lookup_cand (c->basis);
3489 tree basis_name = gimple_assign_lhs (basis->cand_stmt);
3490
3491 /* Create a new phi statement that will represent C's true
3492 basis after the transformation is complete. */
3493 location_t loc = gimple_location (c->cand_stmt);
3494 tree name = create_phi_basis (c, phi, basis_name,
3495 loc, UNKNOWN_STRIDE);
3496
3497 /* Replace C with an add of the new basis phi and the
3498 increment. */
3499 replace_one_candidate (c, i, name);
3500 }
3501 }
3502 else
3503 {
3504 slsr_cand_t basis = lookup_cand (c->basis);
3505 tree basis_name = gimple_assign_lhs (basis->cand_stmt);
3506 replace_one_candidate (c, i, basis_name);
3507 }
3508 }
3509 }
3510
3511 if (c->sibling)
3512 replace_profitable_candidates (lookup_cand (c->sibling));
3513
3514 if (c->dependent)
3515 replace_profitable_candidates (lookup_cand (c->dependent));
3516 }
3517
3518 /* Analyze costs of related candidates in the candidate vector,
3519 and make beneficial replacements. */
3520
3521 static void
analyze_candidates_and_replace(void)3522 analyze_candidates_and_replace (void)
3523 {
3524 unsigned i;
3525 slsr_cand_t c;
3526
3527 /* Each candidate that has a null basis and a non-null
3528 dependent is the root of a tree of related statements.
3529 Analyze each tree to determine a subset of those
3530 statements that can be replaced with maximum benefit. */
3531 FOR_EACH_VEC_ELT (cand_vec, i, c)
3532 {
3533 slsr_cand_t first_dep;
3534
3535 if (c->basis != 0 || c->dependent == 0)
3536 continue;
3537
3538 if (dump_file && (dump_flags & TDF_DETAILS))
3539 fprintf (dump_file, "\nProcessing dependency tree rooted at %d.\n",
3540 c->cand_num);
3541
3542 first_dep = lookup_cand (c->dependent);
3543
3544 /* If this is a chain of CAND_REFs, unconditionally replace
3545 each of them with a strength-reduced data reference. */
3546 if (c->kind == CAND_REF)
3547 replace_refs (c);
3548
3549 /* If the common stride of all related candidates is a known
3550 constant, each candidate without a phi-dependence can be
3551 profitably replaced. Each replaces a multiply by a single
3552 add, with the possibility that a feeding add also goes dead.
3553 A candidate with a phi-dependence is replaced only if the
3554 compensation code it requires is offset by the strength
3555 reduction savings. */
3556 else if (TREE_CODE (c->stride) == INTEGER_CST)
3557 replace_uncond_cands_and_profitable_phis (first_dep);
3558
3559 /* When the stride is an SSA name, it may still be profitable
3560 to replace some or all of the dependent candidates, depending
3561 on whether the introduced increments can be reused, or are
3562 less expensive to calculate than the replaced statements. */
3563 else
3564 {
3565 enum machine_mode mode;
3566 bool speed;
3567
3568 /* Determine whether we'll be generating pointer arithmetic
3569 when replacing candidates. */
3570 address_arithmetic_p = (c->kind == CAND_ADD
3571 && POINTER_TYPE_P (c->cand_type));
3572
3573 /* If all candidates have already been replaced under other
3574 interpretations, nothing remains to be done. */
3575 if (!count_candidates (c))
3576 continue;
3577
3578 /* Construct an array of increments for this candidate chain. */
3579 incr_vec = XNEWVEC (incr_info, MAX_INCR_VEC_LEN);
3580 incr_vec_len = 0;
3581 record_increments (c);
3582
3583 /* Determine which increments are profitable to replace. */
3584 mode = TYPE_MODE (TREE_TYPE (gimple_assign_lhs (c->cand_stmt)));
3585 speed = optimize_cands_for_speed_p (c);
3586 analyze_increments (first_dep, mode, speed);
3587
3588 /* Insert initializers of the form T_0 = stride * increment
3589 for use in profitable replacements. */
3590 insert_initializers (first_dep);
3591 dump_incr_vec ();
3592
3593 /* Perform the replacements. */
3594 replace_profitable_candidates (first_dep);
3595 free (incr_vec);
3596 }
3597 }
3598 }
3599
3600 static unsigned
execute_strength_reduction(void)3601 execute_strength_reduction (void)
3602 {
3603 /* Create the obstack where candidates will reside. */
3604 gcc_obstack_init (&cand_obstack);
3605
3606 /* Allocate the candidate vector. */
3607 cand_vec.create (128);
3608
3609 /* Allocate the mapping from statements to candidate indices. */
3610 stmt_cand_map = pointer_map_create ();
3611
3612 /* Create the obstack where candidate chains will reside. */
3613 gcc_obstack_init (&chain_obstack);
3614
3615 /* Allocate the mapping from base expressions to candidate chains. */
3616 base_cand_map.create (500);
3617
3618 /* Allocate the mapping from bases to alternative bases. */
3619 alt_base_map = pointer_map_create ();
3620
3621 /* Initialize the loop optimizer. We need to detect flow across
3622 back edges, and this gives us dominator information as well. */
3623 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
3624
3625 /* Walk the CFG in predominator order looking for strength reduction
3626 candidates. */
3627 find_candidates_dom_walker (CDI_DOMINATORS)
3628 .walk (cfun->cfg->x_entry_block_ptr);
3629
3630 if (dump_file && (dump_flags & TDF_DETAILS))
3631 {
3632 dump_cand_vec ();
3633 dump_cand_chains ();
3634 }
3635
3636 pointer_map_destroy (alt_base_map);
3637 free_affine_expand_cache (&name_expansions);
3638
3639 /* Analyze costs and make appropriate replacements. */
3640 analyze_candidates_and_replace ();
3641
3642 loop_optimizer_finalize ();
3643 base_cand_map.dispose ();
3644 obstack_free (&chain_obstack, NULL);
3645 pointer_map_destroy (stmt_cand_map);
3646 cand_vec.release ();
3647 obstack_free (&cand_obstack, NULL);
3648
3649 return 0;
3650 }
3651
3652 static bool
gate_strength_reduction(void)3653 gate_strength_reduction (void)
3654 {
3655 return flag_tree_slsr;
3656 }
3657
3658 namespace {
3659
3660 const pass_data pass_data_strength_reduction =
3661 {
3662 GIMPLE_PASS, /* type */
3663 "slsr", /* name */
3664 OPTGROUP_NONE, /* optinfo_flags */
3665 true, /* has_gate */
3666 true, /* has_execute */
3667 TV_GIMPLE_SLSR, /* tv_id */
3668 ( PROP_cfg | PROP_ssa ), /* properties_required */
3669 0, /* properties_provided */
3670 0, /* properties_destroyed */
3671 0, /* todo_flags_start */
3672 TODO_verify_ssa, /* todo_flags_finish */
3673 };
3674
3675 class pass_strength_reduction : public gimple_opt_pass
3676 {
3677 public:
pass_strength_reduction(gcc::context * ctxt)3678 pass_strength_reduction (gcc::context *ctxt)
3679 : gimple_opt_pass (pass_data_strength_reduction, ctxt)
3680 {}
3681
3682 /* opt_pass methods: */
gate()3683 bool gate () { return gate_strength_reduction (); }
execute()3684 unsigned int execute () { return execute_strength_reduction (); }
3685
3686 }; // class pass_strength_reduction
3687
3688 } // anon namespace
3689
3690 gimple_opt_pass *
make_pass_strength_reduction(gcc::context * ctxt)3691 make_pass_strength_reduction (gcc::context *ctxt)
3692 {
3693 return new pass_strength_reduction (ctxt);
3694 }
3695