1 /* Full and partial redundancy elimination and code hoisting on SSA GIMPLE.
2 Copyright (C) 2001-2019 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org> and Steven Bosscher
4 <stevenb@suse.de>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "predict.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "ssa.h"
33 #include "cgraph.h"
34 #include "gimple-pretty-print.h"
35 #include "fold-const.h"
36 #include "cfganal.h"
37 #include "gimple-fold.h"
38 #include "tree-eh.h"
39 #include "gimplify.h"
40 #include "gimple-iterator.h"
41 #include "tree-cfg.h"
42 #include "tree-into-ssa.h"
43 #include "tree-dfa.h"
44 #include "tree-ssa.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-sccvn.h"
47 #include "tree-scalar-evolution.h"
48 #include "params.h"
49 #include "dbgcnt.h"
50 #include "domwalk.h"
51 #include "tree-ssa-propagate.h"
52 #include "tree-ssa-dce.h"
53 #include "tree-cfgcleanup.h"
54 #include "alias.h"
55
56 /* Even though this file is called tree-ssa-pre.c, we actually
57 implement a bit more than just PRE here. All of them piggy-back
58 on GVN which is implemented in tree-ssa-sccvn.c.
59
60 1. Full Redundancy Elimination (FRE)
61 This is the elimination phase of GVN.
62
63 2. Partial Redundancy Elimination (PRE)
64 This is adds computation of AVAIL_OUT and ANTIC_IN and
65 doing expression insertion to form GVN-PRE.
66
67 3. Code hoisting
68 This optimization uses the ANTIC_IN sets computed for PRE
69 to move expressions further up than PRE would do, to make
70 multiple computations of the same value fully redundant.
71 This pass is explained below (after the explanation of the
72 basic algorithm for PRE).
73 */
74
75 /* TODO:
76
77 1. Avail sets can be shared by making an avail_find_leader that
78 walks up the dominator tree and looks in those avail sets.
79 This might affect code optimality, it's unclear right now.
80 Currently the AVAIL_OUT sets are the remaining quadraticness in
81 memory of GVN-PRE.
82 2. Strength reduction can be performed by anticipating expressions
83 we can repair later on.
84 3. We can do back-substitution or smarter value numbering to catch
85 commutative expressions split up over multiple statements.
86 */
87
88 /* For ease of terminology, "expression node" in the below refers to
89 every expression node but GIMPLE_ASSIGN, because GIMPLE_ASSIGNs
90 represent the actual statement containing the expressions we care about,
91 and we cache the value number by putting it in the expression. */
92
93 /* Basic algorithm for Partial Redundancy Elimination:
94
95 First we walk the statements to generate the AVAIL sets, the
96 EXP_GEN sets, and the tmp_gen sets. EXP_GEN sets represent the
97 generation of values/expressions by a given block. We use them
98 when computing the ANTIC sets. The AVAIL sets consist of
99 SSA_NAME's that represent values, so we know what values are
100 available in what blocks. AVAIL is a forward dataflow problem. In
101 SSA, values are never killed, so we don't need a kill set, or a
102 fixpoint iteration, in order to calculate the AVAIL sets. In
103 traditional parlance, AVAIL sets tell us the downsafety of the
104 expressions/values.
105
106 Next, we generate the ANTIC sets. These sets represent the
107 anticipatable expressions. ANTIC is a backwards dataflow
108 problem. An expression is anticipatable in a given block if it could
109 be generated in that block. This means that if we had to perform
110 an insertion in that block, of the value of that expression, we
111 could. Calculating the ANTIC sets requires phi translation of
112 expressions, because the flow goes backwards through phis. We must
113 iterate to a fixpoint of the ANTIC sets, because we have a kill
114 set. Even in SSA form, values are not live over the entire
115 function, only from their definition point onwards. So we have to
116 remove values from the ANTIC set once we go past the definition
117 point of the leaders that make them up.
118 compute_antic/compute_antic_aux performs this computation.
119
120 Third, we perform insertions to make partially redundant
121 expressions fully redundant.
122
123 An expression is partially redundant (excluding partial
124 anticipation) if:
125
126 1. It is AVAIL in some, but not all, of the predecessors of a
127 given block.
128 2. It is ANTIC in all the predecessors.
129
130 In order to make it fully redundant, we insert the expression into
131 the predecessors where it is not available, but is ANTIC.
132
133 When optimizing for size, we only eliminate the partial redundancy
134 if we need to insert in only one predecessor. This avoids almost
135 completely the code size increase that PRE usually causes.
136
137 For the partial anticipation case, we only perform insertion if it
138 is partially anticipated in some block, and fully available in all
139 of the predecessors.
140
141 do_pre_regular_insertion/do_pre_partial_partial_insertion
142 performs these steps, driven by insert/insert_aux.
143
144 Fourth, we eliminate fully redundant expressions.
145 This is a simple statement walk that replaces redundant
146 calculations with the now available values. */
147
148 /* Basic algorithm for Code Hoisting:
149
150 Code hoisting is: Moving value computations up in the control flow
151 graph to make multiple copies redundant. Typically this is a size
152 optimization, but there are cases where it also is helpful for speed.
153
154 A simple code hoisting algorithm is implemented that piggy-backs on
155 the PRE infrastructure. For code hoisting, we have to know ANTIC_OUT
156 which is effectively ANTIC_IN - AVAIL_OUT. The latter two have to be
157 computed for PRE, and we can use them to perform a limited version of
158 code hoisting, too.
159
160 For the purpose of this implementation, a value is hoistable to a basic
161 block B if the following properties are met:
162
163 1. The value is in ANTIC_IN(B) -- the value will be computed on all
164 paths from B to function exit and it can be computed in B);
165
166 2. The value is not in AVAIL_OUT(B) -- there would be no need to
167 compute the value again and make it available twice;
168
169 3. All successors of B are dominated by B -- makes sure that inserting
170 a computation of the value in B will make the remaining
171 computations fully redundant;
172
173 4. At least one successor has the value in AVAIL_OUT -- to avoid
174 hoisting values up too far;
175
176 5. There are at least two successors of B -- hoisting in straight
177 line code is pointless.
178
179 The third condition is not strictly necessary, but it would complicate
180 the hoisting pass a lot. In fact, I don't know of any code hoisting
181 algorithm that does not have this requirement. Fortunately, experiments
182 have show that most candidate hoistable values are in regions that meet
183 this condition (e.g. diamond-shape regions).
184
185 The forth condition is necessary to avoid hoisting things up too far
186 away from the uses of the value. Nothing else limits the algorithm
187 from hoisting everything up as far as ANTIC_IN allows. Experiments
188 with SPEC and CSiBE have shown that hoisting up too far results in more
189 spilling, less benefits for code size, and worse benchmark scores.
190 Fortunately, in practice most of the interesting hoisting opportunities
191 are caught despite this limitation.
192
193 For hoistable values that meet all conditions, expressions are inserted
194 to make the calculation of the hoistable value fully redundant. We
195 perform code hoisting insertions after each round of PRE insertions,
196 because code hoisting never exposes new PRE opportunities, but PRE can
197 create new code hoisting opportunities.
198
199 The code hoisting algorithm is implemented in do_hoist_insert, driven
200 by insert/insert_aux. */
201
202 /* Representations of value numbers:
203
204 Value numbers are represented by a representative SSA_NAME. We
205 will create fake SSA_NAME's in situations where we need a
206 representative but do not have one (because it is a complex
207 expression). In order to facilitate storing the value numbers in
208 bitmaps, and keep the number of wasted SSA_NAME's down, we also
209 associate a value_id with each value number, and create full blown
210 ssa_name's only where we actually need them (IE in operands of
211 existing expressions).
212
213 Theoretically you could replace all the value_id's with
214 SSA_NAME_VERSION, but this would allocate a large number of
215 SSA_NAME's (which are each > 30 bytes) just to get a 4 byte number.
216 It would also require an additional indirection at each point we
217 use the value id. */
218
219 /* Representation of expressions on value numbers:
220
221 Expressions consisting of value numbers are represented the same
222 way as our VN internally represents them, with an additional
223 "pre_expr" wrapping around them in order to facilitate storing all
224 of the expressions in the same sets. */
225
226 /* Representation of sets:
227
228 The dataflow sets do not need to be sorted in any particular order
229 for the majority of their lifetime, are simply represented as two
230 bitmaps, one that keeps track of values present in the set, and one
231 that keeps track of expressions present in the set.
232
233 When we need them in topological order, we produce it on demand by
234 transforming the bitmap into an array and sorting it into topo
235 order. */
236
237 /* Type of expression, used to know which member of the PRE_EXPR union
238 is valid. */
239
240 enum pre_expr_kind
241 {
242 NAME,
243 NARY,
244 REFERENCE,
245 CONSTANT
246 };
247
248 union pre_expr_union
249 {
250 tree name;
251 tree constant;
252 vn_nary_op_t nary;
253 vn_reference_t reference;
254 };
255
256 typedef struct pre_expr_d : nofree_ptr_hash <pre_expr_d>
257 {
258 enum pre_expr_kind kind;
259 unsigned int id;
260 pre_expr_union u;
261
262 /* hash_table support. */
263 static inline hashval_t hash (const pre_expr_d *);
264 static inline int equal (const pre_expr_d *, const pre_expr_d *);
265 } *pre_expr;
266
267 #define PRE_EXPR_NAME(e) (e)->u.name
268 #define PRE_EXPR_NARY(e) (e)->u.nary
269 #define PRE_EXPR_REFERENCE(e) (e)->u.reference
270 #define PRE_EXPR_CONSTANT(e) (e)->u.constant
271
272 /* Compare E1 and E1 for equality. */
273
274 inline int
equal(const pre_expr_d * e1,const pre_expr_d * e2)275 pre_expr_d::equal (const pre_expr_d *e1, const pre_expr_d *e2)
276 {
277 if (e1->kind != e2->kind)
278 return false;
279
280 switch (e1->kind)
281 {
282 case CONSTANT:
283 return vn_constant_eq_with_type (PRE_EXPR_CONSTANT (e1),
284 PRE_EXPR_CONSTANT (e2));
285 case NAME:
286 return PRE_EXPR_NAME (e1) == PRE_EXPR_NAME (e2);
287 case NARY:
288 return vn_nary_op_eq (PRE_EXPR_NARY (e1), PRE_EXPR_NARY (e2));
289 case REFERENCE:
290 return vn_reference_eq (PRE_EXPR_REFERENCE (e1),
291 PRE_EXPR_REFERENCE (e2));
292 default:
293 gcc_unreachable ();
294 }
295 }
296
297 /* Hash E. */
298
299 inline hashval_t
hash(const pre_expr_d * e)300 pre_expr_d::hash (const pre_expr_d *e)
301 {
302 switch (e->kind)
303 {
304 case CONSTANT:
305 return vn_hash_constant_with_type (PRE_EXPR_CONSTANT (e));
306 case NAME:
307 return SSA_NAME_VERSION (PRE_EXPR_NAME (e));
308 case NARY:
309 return PRE_EXPR_NARY (e)->hashcode;
310 case REFERENCE:
311 return PRE_EXPR_REFERENCE (e)->hashcode;
312 default:
313 gcc_unreachable ();
314 }
315 }
316
317 /* Next global expression id number. */
318 static unsigned int next_expression_id;
319
320 /* Mapping from expression to id number we can use in bitmap sets. */
321 static vec<pre_expr> expressions;
322 static hash_table<pre_expr_d> *expression_to_id;
323 static vec<unsigned> name_to_id;
324
325 /* Allocate an expression id for EXPR. */
326
327 static inline unsigned int
alloc_expression_id(pre_expr expr)328 alloc_expression_id (pre_expr expr)
329 {
330 struct pre_expr_d **slot;
331 /* Make sure we won't overflow. */
332 gcc_assert (next_expression_id + 1 > next_expression_id);
333 expr->id = next_expression_id++;
334 expressions.safe_push (expr);
335 if (expr->kind == NAME)
336 {
337 unsigned version = SSA_NAME_VERSION (PRE_EXPR_NAME (expr));
338 /* vec::safe_grow_cleared allocates no headroom. Avoid frequent
339 re-allocations by using vec::reserve upfront. */
340 unsigned old_len = name_to_id.length ();
341 name_to_id.reserve (num_ssa_names - old_len);
342 name_to_id.quick_grow_cleared (num_ssa_names);
343 gcc_assert (name_to_id[version] == 0);
344 name_to_id[version] = expr->id;
345 }
346 else
347 {
348 slot = expression_to_id->find_slot (expr, INSERT);
349 gcc_assert (!*slot);
350 *slot = expr;
351 }
352 return next_expression_id - 1;
353 }
354
355 /* Return the expression id for tree EXPR. */
356
357 static inline unsigned int
get_expression_id(const pre_expr expr)358 get_expression_id (const pre_expr expr)
359 {
360 return expr->id;
361 }
362
363 static inline unsigned int
lookup_expression_id(const pre_expr expr)364 lookup_expression_id (const pre_expr expr)
365 {
366 struct pre_expr_d **slot;
367
368 if (expr->kind == NAME)
369 {
370 unsigned version = SSA_NAME_VERSION (PRE_EXPR_NAME (expr));
371 if (name_to_id.length () <= version)
372 return 0;
373 return name_to_id[version];
374 }
375 else
376 {
377 slot = expression_to_id->find_slot (expr, NO_INSERT);
378 if (!slot)
379 return 0;
380 return ((pre_expr)*slot)->id;
381 }
382 }
383
384 /* Return the existing expression id for EXPR, or create one if one
385 does not exist yet. */
386
387 static inline unsigned int
get_or_alloc_expression_id(pre_expr expr)388 get_or_alloc_expression_id (pre_expr expr)
389 {
390 unsigned int id = lookup_expression_id (expr);
391 if (id == 0)
392 return alloc_expression_id (expr);
393 return expr->id = id;
394 }
395
396 /* Return the expression that has expression id ID */
397
398 static inline pre_expr
expression_for_id(unsigned int id)399 expression_for_id (unsigned int id)
400 {
401 return expressions[id];
402 }
403
404 static object_allocator<pre_expr_d> pre_expr_pool ("pre_expr nodes");
405
406 /* Given an SSA_NAME NAME, get or create a pre_expr to represent it. */
407
408 static pre_expr
get_or_alloc_expr_for_name(tree name)409 get_or_alloc_expr_for_name (tree name)
410 {
411 struct pre_expr_d expr;
412 pre_expr result;
413 unsigned int result_id;
414
415 expr.kind = NAME;
416 expr.id = 0;
417 PRE_EXPR_NAME (&expr) = name;
418 result_id = lookup_expression_id (&expr);
419 if (result_id != 0)
420 return expression_for_id (result_id);
421
422 result = pre_expr_pool.allocate ();
423 result->kind = NAME;
424 PRE_EXPR_NAME (result) = name;
425 alloc_expression_id (result);
426 return result;
427 }
428
429 /* An unordered bitmap set. One bitmap tracks values, the other,
430 expressions. */
431 typedef struct bitmap_set
432 {
433 bitmap_head expressions;
434 bitmap_head values;
435 } *bitmap_set_t;
436
437 #define FOR_EACH_EXPR_ID_IN_SET(set, id, bi) \
438 EXECUTE_IF_SET_IN_BITMAP (&(set)->expressions, 0, (id), (bi))
439
440 #define FOR_EACH_VALUE_ID_IN_SET(set, id, bi) \
441 EXECUTE_IF_SET_IN_BITMAP (&(set)->values, 0, (id), (bi))
442
443 /* Mapping from value id to expressions with that value_id. */
444 static vec<bitmap> value_expressions;
445
446 /* Sets that we need to keep track of. */
447 typedef struct bb_bitmap_sets
448 {
449 /* The EXP_GEN set, which represents expressions/values generated in
450 a basic block. */
451 bitmap_set_t exp_gen;
452
453 /* The PHI_GEN set, which represents PHI results generated in a
454 basic block. */
455 bitmap_set_t phi_gen;
456
457 /* The TMP_GEN set, which represents results/temporaries generated
458 in a basic block. IE the LHS of an expression. */
459 bitmap_set_t tmp_gen;
460
461 /* The AVAIL_OUT set, which represents which values are available in
462 a given basic block. */
463 bitmap_set_t avail_out;
464
465 /* The ANTIC_IN set, which represents which values are anticipatable
466 in a given basic block. */
467 bitmap_set_t antic_in;
468
469 /* The PA_IN set, which represents which values are
470 partially anticipatable in a given basic block. */
471 bitmap_set_t pa_in;
472
473 /* The NEW_SETS set, which is used during insertion to augment the
474 AVAIL_OUT set of blocks with the new insertions performed during
475 the current iteration. */
476 bitmap_set_t new_sets;
477
478 /* A cache for value_dies_in_block_x. */
479 bitmap expr_dies;
480
481 /* The live virtual operand on successor edges. */
482 tree vop_on_exit;
483
484 /* True if we have visited this block during ANTIC calculation. */
485 unsigned int visited : 1;
486
487 /* True when the block contains a call that might not return. */
488 unsigned int contains_may_not_return_call : 1;
489 } *bb_value_sets_t;
490
491 #define EXP_GEN(BB) ((bb_value_sets_t) ((BB)->aux))->exp_gen
492 #define PHI_GEN(BB) ((bb_value_sets_t) ((BB)->aux))->phi_gen
493 #define TMP_GEN(BB) ((bb_value_sets_t) ((BB)->aux))->tmp_gen
494 #define AVAIL_OUT(BB) ((bb_value_sets_t) ((BB)->aux))->avail_out
495 #define ANTIC_IN(BB) ((bb_value_sets_t) ((BB)->aux))->antic_in
496 #define PA_IN(BB) ((bb_value_sets_t) ((BB)->aux))->pa_in
497 #define NEW_SETS(BB) ((bb_value_sets_t) ((BB)->aux))->new_sets
498 #define EXPR_DIES(BB) ((bb_value_sets_t) ((BB)->aux))->expr_dies
499 #define BB_VISITED(BB) ((bb_value_sets_t) ((BB)->aux))->visited
500 #define BB_MAY_NOTRETURN(BB) ((bb_value_sets_t) ((BB)->aux))->contains_may_not_return_call
501 #define BB_LIVE_VOP_ON_EXIT(BB) ((bb_value_sets_t) ((BB)->aux))->vop_on_exit
502
503
504 /* This structure is used to keep track of statistics on what
505 optimization PRE was able to perform. */
506 static struct
507 {
508 /* The number of new expressions/temporaries generated by PRE. */
509 int insertions;
510
511 /* The number of inserts found due to partial anticipation */
512 int pa_insert;
513
514 /* The number of inserts made for code hoisting. */
515 int hoist_insert;
516
517 /* The number of new PHI nodes added by PRE. */
518 int phis;
519 } pre_stats;
520
521 static bool do_partial_partial;
522 static pre_expr bitmap_find_leader (bitmap_set_t, unsigned int);
523 static void bitmap_value_insert_into_set (bitmap_set_t, pre_expr);
524 static void bitmap_value_replace_in_set (bitmap_set_t, pre_expr);
525 static void bitmap_set_copy (bitmap_set_t, bitmap_set_t);
526 static bool bitmap_set_contains_value (bitmap_set_t, unsigned int);
527 static void bitmap_insert_into_set (bitmap_set_t, pre_expr);
528 static bitmap_set_t bitmap_set_new (void);
529 static tree create_expression_by_pieces (basic_block, pre_expr, gimple_seq *,
530 tree);
531 static tree find_or_generate_expression (basic_block, tree, gimple_seq *);
532 static unsigned int get_expr_value_id (pre_expr);
533
534 /* We can add and remove elements and entries to and from sets
535 and hash tables, so we use alloc pools for them. */
536
537 static object_allocator<bitmap_set> bitmap_set_pool ("Bitmap sets");
538 static bitmap_obstack grand_bitmap_obstack;
539
540 /* A three tuple {e, pred, v} used to cache phi translations in the
541 phi_translate_table. */
542
543 typedef struct expr_pred_trans_d : free_ptr_hash<expr_pred_trans_d>
544 {
545 /* The expression. */
546 pre_expr e;
547
548 /* The predecessor block along which we translated the expression. */
549 basic_block pred;
550
551 /* The value that resulted from the translation. */
552 pre_expr v;
553
554 /* The hashcode for the expression, pred pair. This is cached for
555 speed reasons. */
556 hashval_t hashcode;
557
558 /* hash_table support. */
559 static inline hashval_t hash (const expr_pred_trans_d *);
560 static inline int equal (const expr_pred_trans_d *, const expr_pred_trans_d *);
561 } *expr_pred_trans_t;
562 typedef const struct expr_pred_trans_d *const_expr_pred_trans_t;
563
564 inline hashval_t
hash(const expr_pred_trans_d * e)565 expr_pred_trans_d::hash (const expr_pred_trans_d *e)
566 {
567 return e->hashcode;
568 }
569
570 inline int
equal(const expr_pred_trans_d * ve1,const expr_pred_trans_d * ve2)571 expr_pred_trans_d::equal (const expr_pred_trans_d *ve1,
572 const expr_pred_trans_d *ve2)
573 {
574 basic_block b1 = ve1->pred;
575 basic_block b2 = ve2->pred;
576
577 /* If they are not translations for the same basic block, they can't
578 be equal. */
579 if (b1 != b2)
580 return false;
581 return pre_expr_d::equal (ve1->e, ve2->e);
582 }
583
584 /* The phi_translate_table caches phi translations for a given
585 expression and predecessor. */
586 static hash_table<expr_pred_trans_d> *phi_translate_table;
587
588 /* Add the tuple mapping from {expression E, basic block PRED} to
589 the phi translation table and return whether it pre-existed. */
590
591 static inline bool
phi_trans_add(expr_pred_trans_t * entry,pre_expr e,basic_block pred)592 phi_trans_add (expr_pred_trans_t *entry, pre_expr e, basic_block pred)
593 {
594 expr_pred_trans_t *slot;
595 expr_pred_trans_d tem;
596 hashval_t hash = iterative_hash_hashval_t (pre_expr_d::hash (e),
597 pred->index);
598 tem.e = e;
599 tem.pred = pred;
600 tem.hashcode = hash;
601 slot = phi_translate_table->find_slot_with_hash (&tem, hash, INSERT);
602 if (*slot)
603 {
604 *entry = *slot;
605 return true;
606 }
607
608 *entry = *slot = XNEW (struct expr_pred_trans_d);
609 (*entry)->e = e;
610 (*entry)->pred = pred;
611 (*entry)->hashcode = hash;
612 return false;
613 }
614
615
616 /* Add expression E to the expression set of value id V. */
617
618 static void
add_to_value(unsigned int v,pre_expr e)619 add_to_value (unsigned int v, pre_expr e)
620 {
621 bitmap set;
622
623 gcc_checking_assert (get_expr_value_id (e) == v);
624
625 if (v >= value_expressions.length ())
626 {
627 value_expressions.safe_grow_cleared (v + 1);
628 }
629
630 set = value_expressions[v];
631 if (!set)
632 {
633 set = BITMAP_ALLOC (&grand_bitmap_obstack);
634 value_expressions[v] = set;
635 }
636
637 bitmap_set_bit (set, get_or_alloc_expression_id (e));
638 }
639
640 /* Create a new bitmap set and return it. */
641
642 static bitmap_set_t
bitmap_set_new(void)643 bitmap_set_new (void)
644 {
645 bitmap_set_t ret = bitmap_set_pool.allocate ();
646 bitmap_initialize (&ret->expressions, &grand_bitmap_obstack);
647 bitmap_initialize (&ret->values, &grand_bitmap_obstack);
648 return ret;
649 }
650
651 /* Return the value id for a PRE expression EXPR. */
652
653 static unsigned int
get_expr_value_id(pre_expr expr)654 get_expr_value_id (pre_expr expr)
655 {
656 unsigned int id;
657 switch (expr->kind)
658 {
659 case CONSTANT:
660 id = get_constant_value_id (PRE_EXPR_CONSTANT (expr));
661 break;
662 case NAME:
663 id = VN_INFO (PRE_EXPR_NAME (expr))->value_id;
664 break;
665 case NARY:
666 gcc_assert (!PRE_EXPR_NARY (expr)->predicated_values);
667 id = PRE_EXPR_NARY (expr)->value_id;
668 break;
669 case REFERENCE:
670 id = PRE_EXPR_REFERENCE (expr)->value_id;
671 break;
672 default:
673 gcc_unreachable ();
674 }
675 /* ??? We cannot assert that expr has a value-id (it can be 0), because
676 we assign value-ids only to expressions that have a result
677 in set_hashtable_value_ids. */
678 return id;
679 }
680
681 /* Return a VN valnum (SSA name or constant) for the PRE value-id VAL. */
682
683 static tree
vn_valnum_from_value_id(unsigned int val)684 vn_valnum_from_value_id (unsigned int val)
685 {
686 bitmap_iterator bi;
687 unsigned int i;
688 bitmap exprset = value_expressions[val];
689 EXECUTE_IF_SET_IN_BITMAP (exprset, 0, i, bi)
690 {
691 pre_expr vexpr = expression_for_id (i);
692 if (vexpr->kind == NAME)
693 return VN_INFO (PRE_EXPR_NAME (vexpr))->valnum;
694 else if (vexpr->kind == CONSTANT)
695 return PRE_EXPR_CONSTANT (vexpr);
696 }
697 return NULL_TREE;
698 }
699
700 /* Insert an expression EXPR into a bitmapped set. */
701
702 static void
bitmap_insert_into_set(bitmap_set_t set,pre_expr expr)703 bitmap_insert_into_set (bitmap_set_t set, pre_expr expr)
704 {
705 unsigned int val = get_expr_value_id (expr);
706 if (! value_id_constant_p (val))
707 {
708 /* Note this is the only function causing multiple expressions
709 for the same value to appear in a set. This is needed for
710 TMP_GEN, PHI_GEN and NEW_SETs. */
711 bitmap_set_bit (&set->values, val);
712 bitmap_set_bit (&set->expressions, get_or_alloc_expression_id (expr));
713 }
714 }
715
716 /* Copy a bitmapped set ORIG, into bitmapped set DEST. */
717
718 static void
bitmap_set_copy(bitmap_set_t dest,bitmap_set_t orig)719 bitmap_set_copy (bitmap_set_t dest, bitmap_set_t orig)
720 {
721 bitmap_copy (&dest->expressions, &orig->expressions);
722 bitmap_copy (&dest->values, &orig->values);
723 }
724
725
726 /* Free memory used up by SET. */
727 static void
bitmap_set_free(bitmap_set_t set)728 bitmap_set_free (bitmap_set_t set)
729 {
730 bitmap_clear (&set->expressions);
731 bitmap_clear (&set->values);
732 }
733
734
735 /* Generate an topological-ordered array of bitmap set SET. */
736
737 static vec<pre_expr>
sorted_array_from_bitmap_set(bitmap_set_t set)738 sorted_array_from_bitmap_set (bitmap_set_t set)
739 {
740 unsigned int i, j;
741 bitmap_iterator bi, bj;
742 vec<pre_expr> result;
743
744 /* Pre-allocate enough space for the array. */
745 result.create (bitmap_count_bits (&set->expressions));
746
747 FOR_EACH_VALUE_ID_IN_SET (set, i, bi)
748 {
749 /* The number of expressions having a given value is usually
750 relatively small. Thus, rather than making a vector of all
751 the expressions and sorting it by value-id, we walk the values
752 and check in the reverse mapping that tells us what expressions
753 have a given value, to filter those in our set. As a result,
754 the expressions are inserted in value-id order, which means
755 topological order.
756
757 If this is somehow a significant lose for some cases, we can
758 choose which set to walk based on the set size. */
759 bitmap exprset = value_expressions[i];
760 EXECUTE_IF_SET_IN_BITMAP (exprset, 0, j, bj)
761 {
762 if (bitmap_bit_p (&set->expressions, j))
763 result.quick_push (expression_for_id (j));
764 }
765 }
766
767 return result;
768 }
769
770 /* Subtract all expressions contained in ORIG from DEST. */
771
772 static bitmap_set_t
bitmap_set_subtract_expressions(bitmap_set_t dest,bitmap_set_t orig)773 bitmap_set_subtract_expressions (bitmap_set_t dest, bitmap_set_t orig)
774 {
775 bitmap_set_t result = bitmap_set_new ();
776 bitmap_iterator bi;
777 unsigned int i;
778
779 bitmap_and_compl (&result->expressions, &dest->expressions,
780 &orig->expressions);
781
782 FOR_EACH_EXPR_ID_IN_SET (result, i, bi)
783 {
784 pre_expr expr = expression_for_id (i);
785 unsigned int value_id = get_expr_value_id (expr);
786 bitmap_set_bit (&result->values, value_id);
787 }
788
789 return result;
790 }
791
792 /* Subtract all values in bitmap set B from bitmap set A. */
793
794 static void
bitmap_set_subtract_values(bitmap_set_t a,bitmap_set_t b)795 bitmap_set_subtract_values (bitmap_set_t a, bitmap_set_t b)
796 {
797 unsigned int i;
798 bitmap_iterator bi;
799 unsigned to_remove = -1U;
800 bitmap_and_compl_into (&a->values, &b->values);
801 FOR_EACH_EXPR_ID_IN_SET (a, i, bi)
802 {
803 if (to_remove != -1U)
804 {
805 bitmap_clear_bit (&a->expressions, to_remove);
806 to_remove = -1U;
807 }
808 pre_expr expr = expression_for_id (i);
809 if (! bitmap_bit_p (&a->values, get_expr_value_id (expr)))
810 to_remove = i;
811 }
812 if (to_remove != -1U)
813 bitmap_clear_bit (&a->expressions, to_remove);
814 }
815
816
817 /* Return true if bitmapped set SET contains the value VALUE_ID. */
818
819 static bool
bitmap_set_contains_value(bitmap_set_t set,unsigned int value_id)820 bitmap_set_contains_value (bitmap_set_t set, unsigned int value_id)
821 {
822 if (value_id_constant_p (value_id))
823 return true;
824
825 return bitmap_bit_p (&set->values, value_id);
826 }
827
828 /* Return true if two bitmap sets are equal. */
829
830 static bool
bitmap_set_equal(bitmap_set_t a,bitmap_set_t b)831 bitmap_set_equal (bitmap_set_t a, bitmap_set_t b)
832 {
833 return bitmap_equal_p (&a->values, &b->values);
834 }
835
836 /* Replace an instance of EXPR's VALUE with EXPR in SET if it exists,
837 and add it otherwise. */
838
839 static void
bitmap_value_replace_in_set(bitmap_set_t set,pre_expr expr)840 bitmap_value_replace_in_set (bitmap_set_t set, pre_expr expr)
841 {
842 unsigned int val = get_expr_value_id (expr);
843 if (value_id_constant_p (val))
844 return;
845
846 if (bitmap_set_contains_value (set, val))
847 {
848 /* The number of expressions having a given value is usually
849 significantly less than the total number of expressions in SET.
850 Thus, rather than check, for each expression in SET, whether it
851 has the value LOOKFOR, we walk the reverse mapping that tells us
852 what expressions have a given value, and see if any of those
853 expressions are in our set. For large testcases, this is about
854 5-10x faster than walking the bitmap. If this is somehow a
855 significant lose for some cases, we can choose which set to walk
856 based on the set size. */
857 unsigned int i;
858 bitmap_iterator bi;
859 bitmap exprset = value_expressions[val];
860 EXECUTE_IF_SET_IN_BITMAP (exprset, 0, i, bi)
861 {
862 if (bitmap_clear_bit (&set->expressions, i))
863 {
864 bitmap_set_bit (&set->expressions, get_expression_id (expr));
865 return;
866 }
867 }
868 gcc_unreachable ();
869 }
870 else
871 bitmap_insert_into_set (set, expr);
872 }
873
874 /* Insert EXPR into SET if EXPR's value is not already present in
875 SET. */
876
877 static void
bitmap_value_insert_into_set(bitmap_set_t set,pre_expr expr)878 bitmap_value_insert_into_set (bitmap_set_t set, pre_expr expr)
879 {
880 unsigned int val = get_expr_value_id (expr);
881
882 gcc_checking_assert (expr->id == get_or_alloc_expression_id (expr));
883
884 /* Constant values are always considered to be part of the set. */
885 if (value_id_constant_p (val))
886 return;
887
888 /* If the value membership changed, add the expression. */
889 if (bitmap_set_bit (&set->values, val))
890 bitmap_set_bit (&set->expressions, expr->id);
891 }
892
893 /* Print out EXPR to outfile. */
894
895 static void
print_pre_expr(FILE * outfile,const pre_expr expr)896 print_pre_expr (FILE *outfile, const pre_expr expr)
897 {
898 if (! expr)
899 {
900 fprintf (outfile, "NULL");
901 return;
902 }
903 switch (expr->kind)
904 {
905 case CONSTANT:
906 print_generic_expr (outfile, PRE_EXPR_CONSTANT (expr));
907 break;
908 case NAME:
909 print_generic_expr (outfile, PRE_EXPR_NAME (expr));
910 break;
911 case NARY:
912 {
913 unsigned int i;
914 vn_nary_op_t nary = PRE_EXPR_NARY (expr);
915 fprintf (outfile, "{%s,", get_tree_code_name (nary->opcode));
916 for (i = 0; i < nary->length; i++)
917 {
918 print_generic_expr (outfile, nary->op[i]);
919 if (i != (unsigned) nary->length - 1)
920 fprintf (outfile, ",");
921 }
922 fprintf (outfile, "}");
923 }
924 break;
925
926 case REFERENCE:
927 {
928 vn_reference_op_t vro;
929 unsigned int i;
930 vn_reference_t ref = PRE_EXPR_REFERENCE (expr);
931 fprintf (outfile, "{");
932 for (i = 0;
933 ref->operands.iterate (i, &vro);
934 i++)
935 {
936 bool closebrace = false;
937 if (vro->opcode != SSA_NAME
938 && TREE_CODE_CLASS (vro->opcode) != tcc_declaration)
939 {
940 fprintf (outfile, "%s", get_tree_code_name (vro->opcode));
941 if (vro->op0)
942 {
943 fprintf (outfile, "<");
944 closebrace = true;
945 }
946 }
947 if (vro->op0)
948 {
949 print_generic_expr (outfile, vro->op0);
950 if (vro->op1)
951 {
952 fprintf (outfile, ",");
953 print_generic_expr (outfile, vro->op1);
954 }
955 if (vro->op2)
956 {
957 fprintf (outfile, ",");
958 print_generic_expr (outfile, vro->op2);
959 }
960 }
961 if (closebrace)
962 fprintf (outfile, ">");
963 if (i != ref->operands.length () - 1)
964 fprintf (outfile, ",");
965 }
966 fprintf (outfile, "}");
967 if (ref->vuse)
968 {
969 fprintf (outfile, "@");
970 print_generic_expr (outfile, ref->vuse);
971 }
972 }
973 break;
974 }
975 }
976 void debug_pre_expr (pre_expr);
977
978 /* Like print_pre_expr but always prints to stderr. */
979 DEBUG_FUNCTION void
debug_pre_expr(pre_expr e)980 debug_pre_expr (pre_expr e)
981 {
982 print_pre_expr (stderr, e);
983 fprintf (stderr, "\n");
984 }
985
986 /* Print out SET to OUTFILE. */
987
988 static void
print_bitmap_set(FILE * outfile,bitmap_set_t set,const char * setname,int blockindex)989 print_bitmap_set (FILE *outfile, bitmap_set_t set,
990 const char *setname, int blockindex)
991 {
992 fprintf (outfile, "%s[%d] := { ", setname, blockindex);
993 if (set)
994 {
995 bool first = true;
996 unsigned i;
997 bitmap_iterator bi;
998
999 FOR_EACH_EXPR_ID_IN_SET (set, i, bi)
1000 {
1001 const pre_expr expr = expression_for_id (i);
1002
1003 if (!first)
1004 fprintf (outfile, ", ");
1005 first = false;
1006 print_pre_expr (outfile, expr);
1007
1008 fprintf (outfile, " (%04d)", get_expr_value_id (expr));
1009 }
1010 }
1011 fprintf (outfile, " }\n");
1012 }
1013
1014 void debug_bitmap_set (bitmap_set_t);
1015
1016 DEBUG_FUNCTION void
debug_bitmap_set(bitmap_set_t set)1017 debug_bitmap_set (bitmap_set_t set)
1018 {
1019 print_bitmap_set (stderr, set, "debug", 0);
1020 }
1021
1022 void debug_bitmap_sets_for (basic_block);
1023
1024 DEBUG_FUNCTION void
debug_bitmap_sets_for(basic_block bb)1025 debug_bitmap_sets_for (basic_block bb)
1026 {
1027 print_bitmap_set (stderr, AVAIL_OUT (bb), "avail_out", bb->index);
1028 print_bitmap_set (stderr, EXP_GEN (bb), "exp_gen", bb->index);
1029 print_bitmap_set (stderr, PHI_GEN (bb), "phi_gen", bb->index);
1030 print_bitmap_set (stderr, TMP_GEN (bb), "tmp_gen", bb->index);
1031 print_bitmap_set (stderr, ANTIC_IN (bb), "antic_in", bb->index);
1032 if (do_partial_partial)
1033 print_bitmap_set (stderr, PA_IN (bb), "pa_in", bb->index);
1034 print_bitmap_set (stderr, NEW_SETS (bb), "new_sets", bb->index);
1035 }
1036
1037 /* Print out the expressions that have VAL to OUTFILE. */
1038
1039 static void
print_value_expressions(FILE * outfile,unsigned int val)1040 print_value_expressions (FILE *outfile, unsigned int val)
1041 {
1042 bitmap set = value_expressions[val];
1043 if (set)
1044 {
1045 bitmap_set x;
1046 char s[10];
1047 sprintf (s, "%04d", val);
1048 x.expressions = *set;
1049 print_bitmap_set (outfile, &x, s, 0);
1050 }
1051 }
1052
1053
1054 DEBUG_FUNCTION void
debug_value_expressions(unsigned int val)1055 debug_value_expressions (unsigned int val)
1056 {
1057 print_value_expressions (stderr, val);
1058 }
1059
1060 /* Given a CONSTANT, allocate a new CONSTANT type PRE_EXPR to
1061 represent it. */
1062
1063 static pre_expr
get_or_alloc_expr_for_constant(tree constant)1064 get_or_alloc_expr_for_constant (tree constant)
1065 {
1066 unsigned int result_id;
1067 unsigned int value_id;
1068 struct pre_expr_d expr;
1069 pre_expr newexpr;
1070
1071 expr.kind = CONSTANT;
1072 PRE_EXPR_CONSTANT (&expr) = constant;
1073 result_id = lookup_expression_id (&expr);
1074 if (result_id != 0)
1075 return expression_for_id (result_id);
1076
1077 newexpr = pre_expr_pool.allocate ();
1078 newexpr->kind = CONSTANT;
1079 PRE_EXPR_CONSTANT (newexpr) = constant;
1080 alloc_expression_id (newexpr);
1081 value_id = get_or_alloc_constant_value_id (constant);
1082 add_to_value (value_id, newexpr);
1083 return newexpr;
1084 }
1085
1086 /* Get or allocate a pre_expr for a piece of GIMPLE, and return it.
1087 Currently only supports constants and SSA_NAMES. */
1088 static pre_expr
get_or_alloc_expr_for(tree t)1089 get_or_alloc_expr_for (tree t)
1090 {
1091 if (TREE_CODE (t) == SSA_NAME)
1092 return get_or_alloc_expr_for_name (t);
1093 else if (is_gimple_min_invariant (t))
1094 return get_or_alloc_expr_for_constant (t);
1095 gcc_unreachable ();
1096 }
1097
1098 /* Return the folded version of T if T, when folded, is a gimple
1099 min_invariant or an SSA name. Otherwise, return T. */
1100
1101 static pre_expr
fully_constant_expression(pre_expr e)1102 fully_constant_expression (pre_expr e)
1103 {
1104 switch (e->kind)
1105 {
1106 case CONSTANT:
1107 return e;
1108 case NARY:
1109 {
1110 vn_nary_op_t nary = PRE_EXPR_NARY (e);
1111 tree res = vn_nary_simplify (nary);
1112 if (!res)
1113 return e;
1114 if (is_gimple_min_invariant (res))
1115 return get_or_alloc_expr_for_constant (res);
1116 if (TREE_CODE (res) == SSA_NAME)
1117 return get_or_alloc_expr_for_name (res);
1118 return e;
1119 }
1120 case REFERENCE:
1121 {
1122 vn_reference_t ref = PRE_EXPR_REFERENCE (e);
1123 tree folded;
1124 if ((folded = fully_constant_vn_reference_p (ref)))
1125 return get_or_alloc_expr_for_constant (folded);
1126 return e;
1127 }
1128 default:
1129 return e;
1130 }
1131 return e;
1132 }
1133
1134 /* Translate the VUSE backwards through phi nodes in PHIBLOCK, so that
1135 it has the value it would have in BLOCK. Set *SAME_VALID to true
1136 in case the new vuse doesn't change the value id of the OPERANDS. */
1137
1138 static tree
translate_vuse_through_block(vec<vn_reference_op_s> operands,alias_set_type set,tree type,tree vuse,basic_block phiblock,basic_block block,bool * same_valid)1139 translate_vuse_through_block (vec<vn_reference_op_s> operands,
1140 alias_set_type set, tree type, tree vuse,
1141 basic_block phiblock,
1142 basic_block block, bool *same_valid)
1143 {
1144 gimple *phi = SSA_NAME_DEF_STMT (vuse);
1145 ao_ref ref;
1146 edge e = NULL;
1147 bool use_oracle;
1148
1149 *same_valid = true;
1150
1151 if (gimple_bb (phi) != phiblock)
1152 return vuse;
1153
1154 unsigned int cnt = PARAM_VALUE (PARAM_SCCVN_MAX_ALIAS_QUERIES_PER_ACCESS);
1155 use_oracle = ao_ref_init_from_vn_reference (&ref, set, type, operands);
1156
1157 /* Use the alias-oracle to find either the PHI node in this block,
1158 the first VUSE used in this block that is equivalent to vuse or
1159 the first VUSE which definition in this block kills the value. */
1160 if (gimple_code (phi) == GIMPLE_PHI)
1161 e = find_edge (block, phiblock);
1162 else if (use_oracle)
1163 while (cnt > 0
1164 && !stmt_may_clobber_ref_p_1 (phi, &ref))
1165 {
1166 --cnt;
1167 vuse = gimple_vuse (phi);
1168 phi = SSA_NAME_DEF_STMT (vuse);
1169 if (gimple_bb (phi) != phiblock)
1170 return vuse;
1171 if (gimple_code (phi) == GIMPLE_PHI)
1172 {
1173 e = find_edge (block, phiblock);
1174 break;
1175 }
1176 }
1177 else
1178 return NULL_TREE;
1179
1180 if (e)
1181 {
1182 if (use_oracle)
1183 {
1184 bitmap visited = NULL;
1185 /* Try to find a vuse that dominates this phi node by skipping
1186 non-clobbering statements. */
1187 vuse = get_continuation_for_phi (phi, &ref, true,
1188 cnt, &visited, false, NULL, NULL);
1189 if (visited)
1190 BITMAP_FREE (visited);
1191 }
1192 else
1193 vuse = NULL_TREE;
1194 if (!vuse)
1195 {
1196 /* If we didn't find any, the value ID can't stay the same,
1197 but return the translated vuse. */
1198 *same_valid = false;
1199 vuse = PHI_ARG_DEF (phi, e->dest_idx);
1200 }
1201 /* ??? We would like to return vuse here as this is the canonical
1202 upmost vdef that this reference is associated with. But during
1203 insertion of the references into the hash tables we only ever
1204 directly insert with their direct gimple_vuse, hence returning
1205 something else would make us not find the other expression. */
1206 return PHI_ARG_DEF (phi, e->dest_idx);
1207 }
1208
1209 return NULL_TREE;
1210 }
1211
1212 /* Like bitmap_find_leader, but checks for the value existing in SET1 *or*
1213 SET2 *or* SET3. This is used to avoid making a set consisting of the union
1214 of PA_IN and ANTIC_IN during insert and phi-translation. */
1215
1216 static inline pre_expr
1217 find_leader_in_sets (unsigned int val, bitmap_set_t set1, bitmap_set_t set2,
1218 bitmap_set_t set3 = NULL)
1219 {
1220 pre_expr result = NULL;
1221
1222 if (set1)
1223 result = bitmap_find_leader (set1, val);
1224 if (!result && set2)
1225 result = bitmap_find_leader (set2, val);
1226 if (!result && set3)
1227 result = bitmap_find_leader (set3, val);
1228 return result;
1229 }
1230
1231 /* Get the tree type for our PRE expression e. */
1232
1233 static tree
get_expr_type(const pre_expr e)1234 get_expr_type (const pre_expr e)
1235 {
1236 switch (e->kind)
1237 {
1238 case NAME:
1239 return TREE_TYPE (PRE_EXPR_NAME (e));
1240 case CONSTANT:
1241 return TREE_TYPE (PRE_EXPR_CONSTANT (e));
1242 case REFERENCE:
1243 return PRE_EXPR_REFERENCE (e)->type;
1244 case NARY:
1245 return PRE_EXPR_NARY (e)->type;
1246 }
1247 gcc_unreachable ();
1248 }
1249
1250 /* Get a representative SSA_NAME for a given expression that is available in B.
1251 Since all of our sub-expressions are treated as values, we require
1252 them to be SSA_NAME's for simplicity.
1253 Prior versions of GVNPRE used to use "value handles" here, so that
1254 an expression would be VH.11 + VH.10 instead of d_3 + e_6. In
1255 either case, the operands are really values (IE we do not expect
1256 them to be usable without finding leaders). */
1257
1258 static tree
1259 get_representative_for (const pre_expr e, basic_block b = NULL)
1260 {
1261 tree name, valnum = NULL_TREE;
1262 unsigned int value_id = get_expr_value_id (e);
1263
1264 switch (e->kind)
1265 {
1266 case NAME:
1267 return PRE_EXPR_NAME (e);
1268 case CONSTANT:
1269 return PRE_EXPR_CONSTANT (e);
1270 case NARY:
1271 case REFERENCE:
1272 {
1273 /* Go through all of the expressions representing this value
1274 and pick out an SSA_NAME. */
1275 unsigned int i;
1276 bitmap_iterator bi;
1277 bitmap exprs = value_expressions[value_id];
1278 EXECUTE_IF_SET_IN_BITMAP (exprs, 0, i, bi)
1279 {
1280 pre_expr rep = expression_for_id (i);
1281 if (rep->kind == NAME)
1282 {
1283 tree name = PRE_EXPR_NAME (rep);
1284 valnum = VN_INFO (name)->valnum;
1285 gimple *def = SSA_NAME_DEF_STMT (name);
1286 /* We have to return either a new representative or one
1287 that can be used for expression simplification and thus
1288 is available in B. */
1289 if (! b
1290 || gimple_nop_p (def)
1291 || dominated_by_p (CDI_DOMINATORS, b, gimple_bb (def)))
1292 return name;
1293 }
1294 else if (rep->kind == CONSTANT)
1295 return PRE_EXPR_CONSTANT (rep);
1296 }
1297 }
1298 break;
1299 }
1300
1301 /* If we reached here we couldn't find an SSA_NAME. This can
1302 happen when we've discovered a value that has never appeared in
1303 the program as set to an SSA_NAME, as the result of phi translation.
1304 Create one here.
1305 ??? We should be able to re-use this when we insert the statement
1306 to compute it. */
1307 name = make_temp_ssa_name (get_expr_type (e), gimple_build_nop (), "pretmp");
1308 VN_INFO (name)->value_id = value_id;
1309 VN_INFO (name)->valnum = valnum ? valnum : name;
1310 /* ??? For now mark this SSA name for release by VN. */
1311 VN_INFO (name)->needs_insertion = true;
1312 add_to_value (value_id, get_or_alloc_expr_for_name (name));
1313 if (dump_file && (dump_flags & TDF_DETAILS))
1314 {
1315 fprintf (dump_file, "Created SSA_NAME representative ");
1316 print_generic_expr (dump_file, name);
1317 fprintf (dump_file, " for expression:");
1318 print_pre_expr (dump_file, e);
1319 fprintf (dump_file, " (%04d)\n", value_id);
1320 }
1321
1322 return name;
1323 }
1324
1325
1326 static pre_expr
1327 phi_translate (bitmap_set_t, pre_expr, bitmap_set_t, bitmap_set_t, edge);
1328
1329 /* Translate EXPR using phis in PHIBLOCK, so that it has the values of
1330 the phis in PRED. Return NULL if we can't find a leader for each part
1331 of the translated expression. */
1332
1333 static pre_expr
phi_translate_1(bitmap_set_t dest,pre_expr expr,bitmap_set_t set1,bitmap_set_t set2,edge e)1334 phi_translate_1 (bitmap_set_t dest,
1335 pre_expr expr, bitmap_set_t set1, bitmap_set_t set2, edge e)
1336 {
1337 basic_block pred = e->src;
1338 basic_block phiblock = e->dest;
1339 switch (expr->kind)
1340 {
1341 case NARY:
1342 {
1343 unsigned int i;
1344 bool changed = false;
1345 vn_nary_op_t nary = PRE_EXPR_NARY (expr);
1346 vn_nary_op_t newnary = XALLOCAVAR (struct vn_nary_op_s,
1347 sizeof_vn_nary_op (nary->length));
1348 memcpy (newnary, nary, sizeof_vn_nary_op (nary->length));
1349
1350 for (i = 0; i < newnary->length; i++)
1351 {
1352 if (TREE_CODE (newnary->op[i]) != SSA_NAME)
1353 continue;
1354 else
1355 {
1356 pre_expr leader, result;
1357 unsigned int op_val_id = VN_INFO (newnary->op[i])->value_id;
1358 leader = find_leader_in_sets (op_val_id, set1, set2);
1359 result = phi_translate (dest, leader, set1, set2, e);
1360 if (result && result != leader)
1361 /* If op has a leader in the sets we translate make
1362 sure to use the value of the translated expression.
1363 We might need a new representative for that. */
1364 newnary->op[i] = get_representative_for (result, pred);
1365 else if (!result)
1366 return NULL;
1367
1368 changed |= newnary->op[i] != nary->op[i];
1369 }
1370 }
1371 if (changed)
1372 {
1373 pre_expr constant;
1374 unsigned int new_val_id;
1375
1376 PRE_EXPR_NARY (expr) = newnary;
1377 constant = fully_constant_expression (expr);
1378 PRE_EXPR_NARY (expr) = nary;
1379 if (constant != expr)
1380 {
1381 /* For non-CONSTANTs we have to make sure we can eventually
1382 insert the expression. Which means we need to have a
1383 leader for it. */
1384 if (constant->kind != CONSTANT)
1385 {
1386 /* Do not allow simplifications to non-constants over
1387 backedges as this will likely result in a loop PHI node
1388 to be inserted and increased register pressure.
1389 See PR77498 - this avoids doing predcoms work in
1390 a less efficient way. */
1391 if (e->flags & EDGE_DFS_BACK)
1392 ;
1393 else
1394 {
1395 unsigned value_id = get_expr_value_id (constant);
1396 /* We want a leader in ANTIC_OUT or AVAIL_OUT here.
1397 dest has what we computed into ANTIC_OUT sofar
1398 so pick from that - since topological sorting
1399 by sorted_array_from_bitmap_set isn't perfect
1400 we may lose some cases here. */
1401 constant = find_leader_in_sets (value_id, dest,
1402 AVAIL_OUT (pred));
1403 if (constant)
1404 {
1405 if (dump_file && (dump_flags & TDF_DETAILS))
1406 {
1407 fprintf (dump_file, "simplifying ");
1408 print_pre_expr (dump_file, expr);
1409 fprintf (dump_file, " translated %d -> %d to ",
1410 phiblock->index, pred->index);
1411 PRE_EXPR_NARY (expr) = newnary;
1412 print_pre_expr (dump_file, expr);
1413 PRE_EXPR_NARY (expr) = nary;
1414 fprintf (dump_file, " to ");
1415 print_pre_expr (dump_file, constant);
1416 fprintf (dump_file, "\n");
1417 }
1418 return constant;
1419 }
1420 }
1421 }
1422 else
1423 return constant;
1424 }
1425
1426 /* vn_nary_* do not valueize operands. */
1427 for (i = 0; i < newnary->length; ++i)
1428 if (TREE_CODE (newnary->op[i]) == SSA_NAME)
1429 newnary->op[i] = VN_INFO (newnary->op[i])->valnum;
1430 tree result = vn_nary_op_lookup_pieces (newnary->length,
1431 newnary->opcode,
1432 newnary->type,
1433 &newnary->op[0],
1434 &nary);
1435 if (result && is_gimple_min_invariant (result))
1436 return get_or_alloc_expr_for_constant (result);
1437
1438 expr = pre_expr_pool.allocate ();
1439 expr->kind = NARY;
1440 expr->id = 0;
1441 if (nary && !nary->predicated_values)
1442 {
1443 PRE_EXPR_NARY (expr) = nary;
1444 new_val_id = nary->value_id;
1445 get_or_alloc_expression_id (expr);
1446 }
1447 else
1448 {
1449 new_val_id = get_next_value_id ();
1450 value_expressions.safe_grow_cleared (get_max_value_id () + 1);
1451 nary = vn_nary_op_insert_pieces (newnary->length,
1452 newnary->opcode,
1453 newnary->type,
1454 &newnary->op[0],
1455 result, new_val_id);
1456 PRE_EXPR_NARY (expr) = nary;
1457 get_or_alloc_expression_id (expr);
1458 }
1459 add_to_value (new_val_id, expr);
1460 }
1461 return expr;
1462 }
1463 break;
1464
1465 case REFERENCE:
1466 {
1467 vn_reference_t ref = PRE_EXPR_REFERENCE (expr);
1468 vec<vn_reference_op_s> operands = ref->operands;
1469 tree vuse = ref->vuse;
1470 tree newvuse = vuse;
1471 vec<vn_reference_op_s> newoperands = vNULL;
1472 bool changed = false, same_valid = true;
1473 unsigned int i, n;
1474 vn_reference_op_t operand;
1475 vn_reference_t newref;
1476
1477 for (i = 0; operands.iterate (i, &operand); i++)
1478 {
1479 pre_expr opresult;
1480 pre_expr leader;
1481 tree op[3];
1482 tree type = operand->type;
1483 vn_reference_op_s newop = *operand;
1484 op[0] = operand->op0;
1485 op[1] = operand->op1;
1486 op[2] = operand->op2;
1487 for (n = 0; n < 3; ++n)
1488 {
1489 unsigned int op_val_id;
1490 if (!op[n])
1491 continue;
1492 if (TREE_CODE (op[n]) != SSA_NAME)
1493 {
1494 /* We can't possibly insert these. */
1495 if (n != 0
1496 && !is_gimple_min_invariant (op[n]))
1497 break;
1498 continue;
1499 }
1500 op_val_id = VN_INFO (op[n])->value_id;
1501 leader = find_leader_in_sets (op_val_id, set1, set2);
1502 opresult = phi_translate (dest, leader, set1, set2, e);
1503 if (opresult && opresult != leader)
1504 {
1505 tree name = get_representative_for (opresult);
1506 changed |= name != op[n];
1507 op[n] = name;
1508 }
1509 else if (!opresult)
1510 break;
1511 }
1512 if (n != 3)
1513 {
1514 newoperands.release ();
1515 return NULL;
1516 }
1517 if (!changed)
1518 continue;
1519 if (!newoperands.exists ())
1520 newoperands = operands.copy ();
1521 /* We may have changed from an SSA_NAME to a constant */
1522 if (newop.opcode == SSA_NAME && TREE_CODE (op[0]) != SSA_NAME)
1523 newop.opcode = TREE_CODE (op[0]);
1524 newop.type = type;
1525 newop.op0 = op[0];
1526 newop.op1 = op[1];
1527 newop.op2 = op[2];
1528 newoperands[i] = newop;
1529 }
1530 gcc_checking_assert (i == operands.length ());
1531
1532 if (vuse)
1533 {
1534 newvuse = translate_vuse_through_block (newoperands.exists ()
1535 ? newoperands : operands,
1536 ref->set, ref->type,
1537 vuse, phiblock, pred,
1538 &same_valid);
1539 if (newvuse == NULL_TREE)
1540 {
1541 newoperands.release ();
1542 return NULL;
1543 }
1544 }
1545
1546 if (changed || newvuse != vuse)
1547 {
1548 unsigned int new_val_id;
1549
1550 tree result = vn_reference_lookup_pieces (newvuse, ref->set,
1551 ref->type,
1552 newoperands.exists ()
1553 ? newoperands : operands,
1554 &newref, VN_WALK);
1555 if (result)
1556 newoperands.release ();
1557
1558 /* We can always insert constants, so if we have a partial
1559 redundant constant load of another type try to translate it
1560 to a constant of appropriate type. */
1561 if (result && is_gimple_min_invariant (result))
1562 {
1563 tree tem = result;
1564 if (!useless_type_conversion_p (ref->type, TREE_TYPE (result)))
1565 {
1566 tem = fold_unary (VIEW_CONVERT_EXPR, ref->type, result);
1567 if (tem && !is_gimple_min_invariant (tem))
1568 tem = NULL_TREE;
1569 }
1570 if (tem)
1571 return get_or_alloc_expr_for_constant (tem);
1572 }
1573
1574 /* If we'd have to convert things we would need to validate
1575 if we can insert the translated expression. So fail
1576 here for now - we cannot insert an alias with a different
1577 type in the VN tables either, as that would assert. */
1578 if (result
1579 && !useless_type_conversion_p (ref->type, TREE_TYPE (result)))
1580 return NULL;
1581 else if (!result && newref
1582 && !useless_type_conversion_p (ref->type, newref->type))
1583 {
1584 newoperands.release ();
1585 return NULL;
1586 }
1587
1588 expr = pre_expr_pool.allocate ();
1589 expr->kind = REFERENCE;
1590 expr->id = 0;
1591
1592 if (newref)
1593 new_val_id = newref->value_id;
1594 else
1595 {
1596 if (changed || !same_valid)
1597 {
1598 new_val_id = get_next_value_id ();
1599 value_expressions.safe_grow_cleared
1600 (get_max_value_id () + 1);
1601 }
1602 else
1603 new_val_id = ref->value_id;
1604 if (!newoperands.exists ())
1605 newoperands = operands.copy ();
1606 newref = vn_reference_insert_pieces (newvuse, ref->set,
1607 ref->type,
1608 newoperands,
1609 result, new_val_id);
1610 newoperands = vNULL;
1611 }
1612 PRE_EXPR_REFERENCE (expr) = newref;
1613 get_or_alloc_expression_id (expr);
1614 add_to_value (new_val_id, expr);
1615 }
1616 newoperands.release ();
1617 return expr;
1618 }
1619 break;
1620
1621 case NAME:
1622 {
1623 tree name = PRE_EXPR_NAME (expr);
1624 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1625 /* If the SSA name is defined by a PHI node in this block,
1626 translate it. */
1627 if (gimple_code (def_stmt) == GIMPLE_PHI
1628 && gimple_bb (def_stmt) == phiblock)
1629 {
1630 tree def = PHI_ARG_DEF (def_stmt, e->dest_idx);
1631
1632 /* Handle constant. */
1633 if (is_gimple_min_invariant (def))
1634 return get_or_alloc_expr_for_constant (def);
1635
1636 return get_or_alloc_expr_for_name (def);
1637 }
1638 /* Otherwise return it unchanged - it will get removed if its
1639 value is not available in PREDs AVAIL_OUT set of expressions
1640 by the subtraction of TMP_GEN. */
1641 return expr;
1642 }
1643
1644 default:
1645 gcc_unreachable ();
1646 }
1647 }
1648
1649 /* Wrapper around phi_translate_1 providing caching functionality. */
1650
1651 static pre_expr
phi_translate(bitmap_set_t dest,pre_expr expr,bitmap_set_t set1,bitmap_set_t set2,edge e)1652 phi_translate (bitmap_set_t dest, pre_expr expr,
1653 bitmap_set_t set1, bitmap_set_t set2, edge e)
1654 {
1655 expr_pred_trans_t slot = NULL;
1656 pre_expr phitrans;
1657
1658 if (!expr)
1659 return NULL;
1660
1661 /* Constants contain no values that need translation. */
1662 if (expr->kind == CONSTANT)
1663 return expr;
1664
1665 if (value_id_constant_p (get_expr_value_id (expr)))
1666 return expr;
1667
1668 /* Don't add translations of NAMEs as those are cheap to translate. */
1669 if (expr->kind != NAME)
1670 {
1671 if (phi_trans_add (&slot, expr, e->src))
1672 return slot->v;
1673 /* Store NULL for the value we want to return in the case of
1674 recursing. */
1675 slot->v = NULL;
1676 }
1677
1678 /* Translate. */
1679 basic_block saved_valueize_bb = vn_context_bb;
1680 vn_context_bb = e->src;
1681 phitrans = phi_translate_1 (dest, expr, set1, set2, e);
1682 vn_context_bb = saved_valueize_bb;
1683
1684 if (slot)
1685 {
1686 if (phitrans)
1687 slot->v = phitrans;
1688 else
1689 /* Remove failed translations again, they cause insert
1690 iteration to not pick up new opportunities reliably. */
1691 phi_translate_table->remove_elt_with_hash (slot, slot->hashcode);
1692 }
1693
1694 return phitrans;
1695 }
1696
1697
1698 /* For each expression in SET, translate the values through phi nodes
1699 in PHIBLOCK using edge PHIBLOCK->PRED, and store the resulting
1700 expressions in DEST. */
1701
1702 static void
phi_translate_set(bitmap_set_t dest,bitmap_set_t set,edge e)1703 phi_translate_set (bitmap_set_t dest, bitmap_set_t set, edge e)
1704 {
1705 vec<pre_expr> exprs;
1706 pre_expr expr;
1707 int i;
1708
1709 if (gimple_seq_empty_p (phi_nodes (e->dest)))
1710 {
1711 bitmap_set_copy (dest, set);
1712 return;
1713 }
1714
1715 exprs = sorted_array_from_bitmap_set (set);
1716 FOR_EACH_VEC_ELT (exprs, i, expr)
1717 {
1718 pre_expr translated;
1719 translated = phi_translate (dest, expr, set, NULL, e);
1720 if (!translated)
1721 continue;
1722
1723 bitmap_insert_into_set (dest, translated);
1724 }
1725 exprs.release ();
1726 }
1727
1728 /* Find the leader for a value (i.e., the name representing that
1729 value) in a given set, and return it. Return NULL if no leader
1730 is found. */
1731
1732 static pre_expr
bitmap_find_leader(bitmap_set_t set,unsigned int val)1733 bitmap_find_leader (bitmap_set_t set, unsigned int val)
1734 {
1735 if (value_id_constant_p (val))
1736 {
1737 unsigned int i;
1738 bitmap_iterator bi;
1739 bitmap exprset = value_expressions[val];
1740
1741 EXECUTE_IF_SET_IN_BITMAP (exprset, 0, i, bi)
1742 {
1743 pre_expr expr = expression_for_id (i);
1744 if (expr->kind == CONSTANT)
1745 return expr;
1746 }
1747 }
1748 if (bitmap_set_contains_value (set, val))
1749 {
1750 /* Rather than walk the entire bitmap of expressions, and see
1751 whether any of them has the value we are looking for, we look
1752 at the reverse mapping, which tells us the set of expressions
1753 that have a given value (IE value->expressions with that
1754 value) and see if any of those expressions are in our set.
1755 The number of expressions per value is usually significantly
1756 less than the number of expressions in the set. In fact, for
1757 large testcases, doing it this way is roughly 5-10x faster
1758 than walking the bitmap.
1759 If this is somehow a significant lose for some cases, we can
1760 choose which set to walk based on which set is smaller. */
1761 unsigned int i;
1762 bitmap_iterator bi;
1763 bitmap exprset = value_expressions[val];
1764
1765 EXECUTE_IF_AND_IN_BITMAP (exprset, &set->expressions, 0, i, bi)
1766 return expression_for_id (i);
1767 }
1768 return NULL;
1769 }
1770
1771 /* Determine if EXPR, a memory expression, is ANTIC_IN at the top of
1772 BLOCK by seeing if it is not killed in the block. Note that we are
1773 only determining whether there is a store that kills it. Because
1774 of the order in which clean iterates over values, we are guaranteed
1775 that altered operands will have caused us to be eliminated from the
1776 ANTIC_IN set already. */
1777
1778 static bool
value_dies_in_block_x(pre_expr expr,basic_block block)1779 value_dies_in_block_x (pre_expr expr, basic_block block)
1780 {
1781 tree vuse = PRE_EXPR_REFERENCE (expr)->vuse;
1782 vn_reference_t refx = PRE_EXPR_REFERENCE (expr);
1783 gimple *def;
1784 gimple_stmt_iterator gsi;
1785 unsigned id = get_expression_id (expr);
1786 bool res = false;
1787 ao_ref ref;
1788
1789 if (!vuse)
1790 return false;
1791
1792 /* Lookup a previously calculated result. */
1793 if (EXPR_DIES (block)
1794 && bitmap_bit_p (EXPR_DIES (block), id * 2))
1795 return bitmap_bit_p (EXPR_DIES (block), id * 2 + 1);
1796
1797 /* A memory expression {e, VUSE} dies in the block if there is a
1798 statement that may clobber e. If, starting statement walk from the
1799 top of the basic block, a statement uses VUSE there can be no kill
1800 inbetween that use and the original statement that loaded {e, VUSE},
1801 so we can stop walking. */
1802 ref.base = NULL_TREE;
1803 for (gsi = gsi_start_bb (block); !gsi_end_p (gsi); gsi_next (&gsi))
1804 {
1805 tree def_vuse, def_vdef;
1806 def = gsi_stmt (gsi);
1807 def_vuse = gimple_vuse (def);
1808 def_vdef = gimple_vdef (def);
1809
1810 /* Not a memory statement. */
1811 if (!def_vuse)
1812 continue;
1813
1814 /* Not a may-def. */
1815 if (!def_vdef)
1816 {
1817 /* A load with the same VUSE, we're done. */
1818 if (def_vuse == vuse)
1819 break;
1820
1821 continue;
1822 }
1823
1824 /* Init ref only if we really need it. */
1825 if (ref.base == NULL_TREE
1826 && !ao_ref_init_from_vn_reference (&ref, refx->set, refx->type,
1827 refx->operands))
1828 {
1829 res = true;
1830 break;
1831 }
1832 /* If the statement may clobber expr, it dies. */
1833 if (stmt_may_clobber_ref_p_1 (def, &ref))
1834 {
1835 res = true;
1836 break;
1837 }
1838 }
1839
1840 /* Remember the result. */
1841 if (!EXPR_DIES (block))
1842 EXPR_DIES (block) = BITMAP_ALLOC (&grand_bitmap_obstack);
1843 bitmap_set_bit (EXPR_DIES (block), id * 2);
1844 if (res)
1845 bitmap_set_bit (EXPR_DIES (block), id * 2 + 1);
1846
1847 return res;
1848 }
1849
1850
1851 /* Determine if OP is valid in SET1 U SET2, which it is when the union
1852 contains its value-id. */
1853
1854 static bool
op_valid_in_sets(bitmap_set_t set1,bitmap_set_t set2,tree op)1855 op_valid_in_sets (bitmap_set_t set1, bitmap_set_t set2, tree op)
1856 {
1857 if (op && TREE_CODE (op) == SSA_NAME)
1858 {
1859 unsigned int value_id = VN_INFO (op)->value_id;
1860 if (!(bitmap_set_contains_value (set1, value_id)
1861 || (set2 && bitmap_set_contains_value (set2, value_id))))
1862 return false;
1863 }
1864 return true;
1865 }
1866
1867 /* Determine if the expression EXPR is valid in SET1 U SET2.
1868 ONLY SET2 CAN BE NULL.
1869 This means that we have a leader for each part of the expression
1870 (if it consists of values), or the expression is an SSA_NAME.
1871 For loads/calls, we also see if the vuse is killed in this block. */
1872
1873 static bool
valid_in_sets(bitmap_set_t set1,bitmap_set_t set2,pre_expr expr)1874 valid_in_sets (bitmap_set_t set1, bitmap_set_t set2, pre_expr expr)
1875 {
1876 switch (expr->kind)
1877 {
1878 case NAME:
1879 /* By construction all NAMEs are available. Non-available
1880 NAMEs are removed by subtracting TMP_GEN from the sets. */
1881 return true;
1882 case NARY:
1883 {
1884 unsigned int i;
1885 vn_nary_op_t nary = PRE_EXPR_NARY (expr);
1886 for (i = 0; i < nary->length; i++)
1887 if (!op_valid_in_sets (set1, set2, nary->op[i]))
1888 return false;
1889 return true;
1890 }
1891 break;
1892 case REFERENCE:
1893 {
1894 vn_reference_t ref = PRE_EXPR_REFERENCE (expr);
1895 vn_reference_op_t vro;
1896 unsigned int i;
1897
1898 FOR_EACH_VEC_ELT (ref->operands, i, vro)
1899 {
1900 if (!op_valid_in_sets (set1, set2, vro->op0)
1901 || !op_valid_in_sets (set1, set2, vro->op1)
1902 || !op_valid_in_sets (set1, set2, vro->op2))
1903 return false;
1904 }
1905 return true;
1906 }
1907 default:
1908 gcc_unreachable ();
1909 }
1910 }
1911
1912 /* Clean the set of expressions SET1 that are no longer valid in SET1 or SET2.
1913 This means expressions that are made up of values we have no leaders for
1914 in SET1 or SET2. */
1915
1916 static void
1917 clean (bitmap_set_t set1, bitmap_set_t set2 = NULL)
1918 {
1919 vec<pre_expr> exprs = sorted_array_from_bitmap_set (set1);
1920 pre_expr expr;
1921 int i;
1922
FOR_EACH_VEC_ELT(exprs,i,expr)1923 FOR_EACH_VEC_ELT (exprs, i, expr)
1924 {
1925 if (!valid_in_sets (set1, set2, expr))
1926 {
1927 unsigned int val = get_expr_value_id (expr);
1928 bitmap_clear_bit (&set1->expressions, get_expression_id (expr));
1929 /* We are entered with possibly multiple expressions for a value
1930 so before removing a value from the set see if there's an
1931 expression for it left. */
1932 if (! bitmap_find_leader (set1, val))
1933 bitmap_clear_bit (&set1->values, val);
1934 }
1935 }
1936 exprs.release ();
1937 }
1938
1939 /* Clean the set of expressions that are no longer valid in SET because
1940 they are clobbered in BLOCK or because they trap and may not be executed. */
1941
1942 static void
prune_clobbered_mems(bitmap_set_t set,basic_block block)1943 prune_clobbered_mems (bitmap_set_t set, basic_block block)
1944 {
1945 bitmap_iterator bi;
1946 unsigned i;
1947 unsigned to_remove = -1U;
1948 bool any_removed = false;
1949
1950 FOR_EACH_EXPR_ID_IN_SET (set, i, bi)
1951 {
1952 /* Remove queued expr. */
1953 if (to_remove != -1U)
1954 {
1955 bitmap_clear_bit (&set->expressions, to_remove);
1956 any_removed = true;
1957 to_remove = -1U;
1958 }
1959
1960 pre_expr expr = expression_for_id (i);
1961 if (expr->kind == REFERENCE)
1962 {
1963 vn_reference_t ref = PRE_EXPR_REFERENCE (expr);
1964 if (ref->vuse)
1965 {
1966 gimple *def_stmt = SSA_NAME_DEF_STMT (ref->vuse);
1967 if (!gimple_nop_p (def_stmt)
1968 && ((gimple_bb (def_stmt) != block
1969 && !dominated_by_p (CDI_DOMINATORS,
1970 block, gimple_bb (def_stmt)))
1971 || (gimple_bb (def_stmt) == block
1972 && value_dies_in_block_x (expr, block))))
1973 to_remove = i;
1974 }
1975 }
1976 else if (expr->kind == NARY)
1977 {
1978 vn_nary_op_t nary = PRE_EXPR_NARY (expr);
1979 /* If the NARY may trap make sure the block does not contain
1980 a possible exit point.
1981 ??? This is overly conservative if we translate AVAIL_OUT
1982 as the available expression might be after the exit point. */
1983 if (BB_MAY_NOTRETURN (block)
1984 && vn_nary_may_trap (nary))
1985 to_remove = i;
1986 }
1987 }
1988
1989 /* Remove queued expr. */
1990 if (to_remove != -1U)
1991 {
1992 bitmap_clear_bit (&set->expressions, to_remove);
1993 any_removed = true;
1994 }
1995
1996 /* Above we only removed expressions, now clean the set of values
1997 which no longer have any corresponding expression. We cannot
1998 clear the value at the time we remove an expression since there
1999 may be multiple expressions per value.
2000 If we'd queue possibly to be removed values we could use
2001 the bitmap_find_leader way to see if there's still an expression
2002 for it. For some ratio of to be removed values and number of
2003 values/expressions in the set this might be faster than rebuilding
2004 the value-set. */
2005 if (any_removed)
2006 {
2007 bitmap_clear (&set->values);
2008 FOR_EACH_EXPR_ID_IN_SET (set, i, bi)
2009 {
2010 pre_expr expr = expression_for_id (i);
2011 unsigned int value_id = get_expr_value_id (expr);
2012 bitmap_set_bit (&set->values, value_id);
2013 }
2014 }
2015 }
2016
2017 static sbitmap has_abnormal_preds;
2018
2019 /* Compute the ANTIC set for BLOCK.
2020
2021 If succs(BLOCK) > 1 then
2022 ANTIC_OUT[BLOCK] = intersection of ANTIC_IN[b] for all succ(BLOCK)
2023 else if succs(BLOCK) == 1 then
2024 ANTIC_OUT[BLOCK] = phi_translate (ANTIC_IN[succ(BLOCK)])
2025
2026 ANTIC_IN[BLOCK] = clean(ANTIC_OUT[BLOCK] U EXP_GEN[BLOCK] - TMP_GEN[BLOCK])
2027
2028 Note that clean() is deferred until after the iteration. */
2029
2030 static bool
compute_antic_aux(basic_block block,bool block_has_abnormal_pred_edge)2031 compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge)
2032 {
2033 bitmap_set_t S, old, ANTIC_OUT;
2034 edge e;
2035 edge_iterator ei;
2036
2037 bool was_visited = BB_VISITED (block);
2038 bool changed = ! BB_VISITED (block);
2039 BB_VISITED (block) = 1;
2040 old = ANTIC_OUT = S = NULL;
2041
2042 /* If any edges from predecessors are abnormal, antic_in is empty,
2043 so do nothing. */
2044 if (block_has_abnormal_pred_edge)
2045 goto maybe_dump_sets;
2046
2047 old = ANTIC_IN (block);
2048 ANTIC_OUT = bitmap_set_new ();
2049
2050 /* If the block has no successors, ANTIC_OUT is empty. */
2051 if (EDGE_COUNT (block->succs) == 0)
2052 ;
2053 /* If we have one successor, we could have some phi nodes to
2054 translate through. */
2055 else if (single_succ_p (block))
2056 {
2057 e = single_succ_edge (block);
2058 gcc_assert (BB_VISITED (e->dest));
2059 phi_translate_set (ANTIC_OUT, ANTIC_IN (e->dest), e);
2060 }
2061 /* If we have multiple successors, we take the intersection of all of
2062 them. Note that in the case of loop exit phi nodes, we may have
2063 phis to translate through. */
2064 else
2065 {
2066 size_t i;
2067 edge first = NULL;
2068
2069 auto_vec<edge> worklist (EDGE_COUNT (block->succs));
2070 FOR_EACH_EDGE (e, ei, block->succs)
2071 {
2072 if (!first
2073 && BB_VISITED (e->dest))
2074 first = e;
2075 else if (BB_VISITED (e->dest))
2076 worklist.quick_push (e);
2077 else
2078 {
2079 /* Unvisited successors get their ANTIC_IN replaced by the
2080 maximal set to arrive at a maximum ANTIC_IN solution.
2081 We can ignore them in the intersection operation and thus
2082 need not explicitely represent that maximum solution. */
2083 if (dump_file && (dump_flags & TDF_DETAILS))
2084 fprintf (dump_file, "ANTIC_IN is MAX on %d->%d\n",
2085 e->src->index, e->dest->index);
2086 }
2087 }
2088
2089 /* Of multiple successors we have to have visited one already
2090 which is guaranteed by iteration order. */
2091 gcc_assert (first != NULL);
2092
2093 phi_translate_set (ANTIC_OUT, ANTIC_IN (first->dest), first);
2094
2095 /* If we have multiple successors we need to intersect the ANTIC_OUT
2096 sets. For values that's a simple intersection but for
2097 expressions it is a union. Given we want to have a single
2098 expression per value in our sets we have to canonicalize.
2099 Avoid randomness and running into cycles like for PR82129 and
2100 canonicalize the expression we choose to the one with the
2101 lowest id. This requires we actually compute the union first. */
2102 FOR_EACH_VEC_ELT (worklist, i, e)
2103 {
2104 if (!gimple_seq_empty_p (phi_nodes (e->dest)))
2105 {
2106 bitmap_set_t tmp = bitmap_set_new ();
2107 phi_translate_set (tmp, ANTIC_IN (e->dest), e);
2108 bitmap_and_into (&ANTIC_OUT->values, &tmp->values);
2109 bitmap_ior_into (&ANTIC_OUT->expressions, &tmp->expressions);
2110 bitmap_set_free (tmp);
2111 }
2112 else
2113 {
2114 bitmap_and_into (&ANTIC_OUT->values, &ANTIC_IN (e->dest)->values);
2115 bitmap_ior_into (&ANTIC_OUT->expressions,
2116 &ANTIC_IN (e->dest)->expressions);
2117 }
2118 }
2119 if (! worklist.is_empty ())
2120 {
2121 /* Prune expressions not in the value set. */
2122 bitmap_iterator bi;
2123 unsigned int i;
2124 unsigned int to_clear = -1U;
2125 FOR_EACH_EXPR_ID_IN_SET (ANTIC_OUT, i, bi)
2126 {
2127 if (to_clear != -1U)
2128 {
2129 bitmap_clear_bit (&ANTIC_OUT->expressions, to_clear);
2130 to_clear = -1U;
2131 }
2132 pre_expr expr = expression_for_id (i);
2133 unsigned int value_id = get_expr_value_id (expr);
2134 if (!bitmap_bit_p (&ANTIC_OUT->values, value_id))
2135 to_clear = i;
2136 }
2137 if (to_clear != -1U)
2138 bitmap_clear_bit (&ANTIC_OUT->expressions, to_clear);
2139 }
2140 }
2141
2142 /* Prune expressions that are clobbered in block and thus become
2143 invalid if translated from ANTIC_OUT to ANTIC_IN. */
2144 prune_clobbered_mems (ANTIC_OUT, block);
2145
2146 /* Generate ANTIC_OUT - TMP_GEN. */
2147 S = bitmap_set_subtract_expressions (ANTIC_OUT, TMP_GEN (block));
2148
2149 /* Start ANTIC_IN with EXP_GEN - TMP_GEN. */
2150 ANTIC_IN (block) = bitmap_set_subtract_expressions (EXP_GEN (block),
2151 TMP_GEN (block));
2152
2153 /* Then union in the ANTIC_OUT - TMP_GEN values,
2154 to get ANTIC_OUT U EXP_GEN - TMP_GEN */
2155 bitmap_ior_into (&ANTIC_IN (block)->values, &S->values);
2156 bitmap_ior_into (&ANTIC_IN (block)->expressions, &S->expressions);
2157
2158 /* clean (ANTIC_IN (block)) is defered to after the iteration converged
2159 because it can cause non-convergence, see for example PR81181. */
2160
2161 /* Intersect ANTIC_IN with the old ANTIC_IN. This is required until
2162 we properly represent the maximum expression set, thus not prune
2163 values without expressions during the iteration. */
2164 if (was_visited
2165 && bitmap_and_into (&ANTIC_IN (block)->values, &old->values))
2166 {
2167 if (dump_file && (dump_flags & TDF_DETAILS))
2168 fprintf (dump_file, "warning: intersecting with old ANTIC_IN "
2169 "shrinks the set\n");
2170 /* Prune expressions not in the value set. */
2171 bitmap_iterator bi;
2172 unsigned int i;
2173 unsigned int to_clear = -1U;
2174 FOR_EACH_EXPR_ID_IN_SET (ANTIC_IN (block), i, bi)
2175 {
2176 if (to_clear != -1U)
2177 {
2178 bitmap_clear_bit (&ANTIC_IN (block)->expressions, to_clear);
2179 to_clear = -1U;
2180 }
2181 pre_expr expr = expression_for_id (i);
2182 unsigned int value_id = get_expr_value_id (expr);
2183 if (!bitmap_bit_p (&ANTIC_IN (block)->values, value_id))
2184 to_clear = i;
2185 }
2186 if (to_clear != -1U)
2187 bitmap_clear_bit (&ANTIC_IN (block)->expressions, to_clear);
2188 }
2189
2190 if (!bitmap_set_equal (old, ANTIC_IN (block)))
2191 changed = true;
2192
2193 maybe_dump_sets:
2194 if (dump_file && (dump_flags & TDF_DETAILS))
2195 {
2196 if (ANTIC_OUT)
2197 print_bitmap_set (dump_file, ANTIC_OUT, "ANTIC_OUT", block->index);
2198
2199 if (changed)
2200 fprintf (dump_file, "[changed] ");
2201 print_bitmap_set (dump_file, ANTIC_IN (block), "ANTIC_IN",
2202 block->index);
2203
2204 if (S)
2205 print_bitmap_set (dump_file, S, "S", block->index);
2206 }
2207 if (old)
2208 bitmap_set_free (old);
2209 if (S)
2210 bitmap_set_free (S);
2211 if (ANTIC_OUT)
2212 bitmap_set_free (ANTIC_OUT);
2213 return changed;
2214 }
2215
2216 /* Compute PARTIAL_ANTIC for BLOCK.
2217
2218 If succs(BLOCK) > 1 then
2219 PA_OUT[BLOCK] = value wise union of PA_IN[b] + all ANTIC_IN not
2220 in ANTIC_OUT for all succ(BLOCK)
2221 else if succs(BLOCK) == 1 then
2222 PA_OUT[BLOCK] = phi_translate (PA_IN[succ(BLOCK)])
2223
2224 PA_IN[BLOCK] = clean(PA_OUT[BLOCK] - TMP_GEN[BLOCK] - ANTIC_IN[BLOCK])
2225
2226 */
2227 static void
compute_partial_antic_aux(basic_block block,bool block_has_abnormal_pred_edge)2228 compute_partial_antic_aux (basic_block block,
2229 bool block_has_abnormal_pred_edge)
2230 {
2231 bitmap_set_t old_PA_IN;
2232 bitmap_set_t PA_OUT;
2233 edge e;
2234 edge_iterator ei;
2235 unsigned long max_pa = PARAM_VALUE (PARAM_MAX_PARTIAL_ANTIC_LENGTH);
2236
2237 old_PA_IN = PA_OUT = NULL;
2238
2239 /* If any edges from predecessors are abnormal, antic_in is empty,
2240 so do nothing. */
2241 if (block_has_abnormal_pred_edge)
2242 goto maybe_dump_sets;
2243
2244 /* If there are too many partially anticipatable values in the
2245 block, phi_translate_set can take an exponential time: stop
2246 before the translation starts. */
2247 if (max_pa
2248 && single_succ_p (block)
2249 && bitmap_count_bits (&PA_IN (single_succ (block))->values) > max_pa)
2250 goto maybe_dump_sets;
2251
2252 old_PA_IN = PA_IN (block);
2253 PA_OUT = bitmap_set_new ();
2254
2255 /* If the block has no successors, ANTIC_OUT is empty. */
2256 if (EDGE_COUNT (block->succs) == 0)
2257 ;
2258 /* If we have one successor, we could have some phi nodes to
2259 translate through. Note that we can't phi translate across DFS
2260 back edges in partial antic, because it uses a union operation on
2261 the successors. For recurrences like IV's, we will end up
2262 generating a new value in the set on each go around (i + 3 (VH.1)
2263 VH.1 + 1 (VH.2), VH.2 + 1 (VH.3), etc), forever. */
2264 else if (single_succ_p (block))
2265 {
2266 e = single_succ_edge (block);
2267 if (!(e->flags & EDGE_DFS_BACK))
2268 phi_translate_set (PA_OUT, PA_IN (e->dest), e);
2269 }
2270 /* If we have multiple successors, we take the union of all of
2271 them. */
2272 else
2273 {
2274 size_t i;
2275
2276 auto_vec<edge> worklist (EDGE_COUNT (block->succs));
2277 FOR_EACH_EDGE (e, ei, block->succs)
2278 {
2279 if (e->flags & EDGE_DFS_BACK)
2280 continue;
2281 worklist.quick_push (e);
2282 }
2283 if (worklist.length () > 0)
2284 {
2285 FOR_EACH_VEC_ELT (worklist, i, e)
2286 {
2287 unsigned int i;
2288 bitmap_iterator bi;
2289
2290 FOR_EACH_EXPR_ID_IN_SET (ANTIC_IN (e->dest), i, bi)
2291 bitmap_value_insert_into_set (PA_OUT,
2292 expression_for_id (i));
2293 if (!gimple_seq_empty_p (phi_nodes (e->dest)))
2294 {
2295 bitmap_set_t pa_in = bitmap_set_new ();
2296 phi_translate_set (pa_in, PA_IN (e->dest), e);
2297 FOR_EACH_EXPR_ID_IN_SET (pa_in, i, bi)
2298 bitmap_value_insert_into_set (PA_OUT,
2299 expression_for_id (i));
2300 bitmap_set_free (pa_in);
2301 }
2302 else
2303 FOR_EACH_EXPR_ID_IN_SET (PA_IN (e->dest), i, bi)
2304 bitmap_value_insert_into_set (PA_OUT,
2305 expression_for_id (i));
2306 }
2307 }
2308 }
2309
2310 /* Prune expressions that are clobbered in block and thus become
2311 invalid if translated from PA_OUT to PA_IN. */
2312 prune_clobbered_mems (PA_OUT, block);
2313
2314 /* PA_IN starts with PA_OUT - TMP_GEN.
2315 Then we subtract things from ANTIC_IN. */
2316 PA_IN (block) = bitmap_set_subtract_expressions (PA_OUT, TMP_GEN (block));
2317
2318 /* For partial antic, we want to put back in the phi results, since
2319 we will properly avoid making them partially antic over backedges. */
2320 bitmap_ior_into (&PA_IN (block)->values, &PHI_GEN (block)->values);
2321 bitmap_ior_into (&PA_IN (block)->expressions, &PHI_GEN (block)->expressions);
2322
2323 /* PA_IN[block] = PA_IN[block] - ANTIC_IN[block] */
2324 bitmap_set_subtract_values (PA_IN (block), ANTIC_IN (block));
2325
2326 clean (PA_IN (block), ANTIC_IN (block));
2327
2328 maybe_dump_sets:
2329 if (dump_file && (dump_flags & TDF_DETAILS))
2330 {
2331 if (PA_OUT)
2332 print_bitmap_set (dump_file, PA_OUT, "PA_OUT", block->index);
2333
2334 print_bitmap_set (dump_file, PA_IN (block), "PA_IN", block->index);
2335 }
2336 if (old_PA_IN)
2337 bitmap_set_free (old_PA_IN);
2338 if (PA_OUT)
2339 bitmap_set_free (PA_OUT);
2340 }
2341
2342 /* Compute ANTIC and partial ANTIC sets. */
2343
2344 static void
compute_antic(void)2345 compute_antic (void)
2346 {
2347 bool changed = true;
2348 int num_iterations = 0;
2349 basic_block block;
2350 int i;
2351 edge_iterator ei;
2352 edge e;
2353
2354 /* If any predecessor edges are abnormal, we punt, so antic_in is empty.
2355 We pre-build the map of blocks with incoming abnormal edges here. */
2356 has_abnormal_preds = sbitmap_alloc (last_basic_block_for_fn (cfun));
2357 bitmap_clear (has_abnormal_preds);
2358
2359 FOR_ALL_BB_FN (block, cfun)
2360 {
2361 BB_VISITED (block) = 0;
2362
2363 FOR_EACH_EDGE (e, ei, block->preds)
2364 if (e->flags & EDGE_ABNORMAL)
2365 {
2366 bitmap_set_bit (has_abnormal_preds, block->index);
2367 break;
2368 }
2369
2370 /* While we are here, give empty ANTIC_IN sets to each block. */
2371 ANTIC_IN (block) = bitmap_set_new ();
2372 if (do_partial_partial)
2373 PA_IN (block) = bitmap_set_new ();
2374 }
2375
2376 /* At the exit block we anticipate nothing. */
2377 BB_VISITED (EXIT_BLOCK_PTR_FOR_FN (cfun)) = 1;
2378
2379 /* For ANTIC computation we need a postorder that also guarantees that
2380 a block with a single successor is visited after its successor.
2381 RPO on the inverted CFG has this property. */
2382 auto_vec<int, 20> postorder;
2383 inverted_post_order_compute (&postorder);
2384
2385 auto_sbitmap worklist (last_basic_block_for_fn (cfun) + 1);
2386 bitmap_clear (worklist);
2387 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2388 bitmap_set_bit (worklist, e->src->index);
2389 while (changed)
2390 {
2391 if (dump_file && (dump_flags & TDF_DETAILS))
2392 fprintf (dump_file, "Starting iteration %d\n", num_iterations);
2393 /* ??? We need to clear our PHI translation cache here as the
2394 ANTIC sets shrink and we restrict valid translations to
2395 those having operands with leaders in ANTIC. Same below
2396 for PA ANTIC computation. */
2397 num_iterations++;
2398 changed = false;
2399 for (i = postorder.length () - 1; i >= 0; i--)
2400 {
2401 if (bitmap_bit_p (worklist, postorder[i]))
2402 {
2403 basic_block block = BASIC_BLOCK_FOR_FN (cfun, postorder[i]);
2404 bitmap_clear_bit (worklist, block->index);
2405 if (compute_antic_aux (block,
2406 bitmap_bit_p (has_abnormal_preds,
2407 block->index)))
2408 {
2409 FOR_EACH_EDGE (e, ei, block->preds)
2410 bitmap_set_bit (worklist, e->src->index);
2411 changed = true;
2412 }
2413 }
2414 }
2415 /* Theoretically possible, but *highly* unlikely. */
2416 gcc_checking_assert (num_iterations < 500);
2417 }
2418
2419 /* We have to clean after the dataflow problem converged as cleaning
2420 can cause non-convergence because it is based on expressions
2421 rather than values. */
2422 FOR_EACH_BB_FN (block, cfun)
2423 clean (ANTIC_IN (block));
2424
2425 statistics_histogram_event (cfun, "compute_antic iterations",
2426 num_iterations);
2427
2428 if (do_partial_partial)
2429 {
2430 /* For partial antic we ignore backedges and thus we do not need
2431 to perform any iteration when we process blocks in postorder. */
2432 for (i = postorder.length () - 1; i >= 0; i--)
2433 {
2434 basic_block block = BASIC_BLOCK_FOR_FN (cfun, postorder[i]);
2435 compute_partial_antic_aux (block,
2436 bitmap_bit_p (has_abnormal_preds,
2437 block->index));
2438 }
2439 }
2440
2441 sbitmap_free (has_abnormal_preds);
2442 }
2443
2444
2445 /* Inserted expressions are placed onto this worklist, which is used
2446 for performing quick dead code elimination of insertions we made
2447 that didn't turn out to be necessary. */
2448 static bitmap inserted_exprs;
2449
2450 /* The actual worker for create_component_ref_by_pieces. */
2451
2452 static tree
create_component_ref_by_pieces_1(basic_block block,vn_reference_t ref,unsigned int * operand,gimple_seq * stmts)2453 create_component_ref_by_pieces_1 (basic_block block, vn_reference_t ref,
2454 unsigned int *operand, gimple_seq *stmts)
2455 {
2456 vn_reference_op_t currop = &ref->operands[*operand];
2457 tree genop;
2458 ++*operand;
2459 switch (currop->opcode)
2460 {
2461 case CALL_EXPR:
2462 gcc_unreachable ();
2463
2464 case MEM_REF:
2465 {
2466 tree baseop = create_component_ref_by_pieces_1 (block, ref, operand,
2467 stmts);
2468 if (!baseop)
2469 return NULL_TREE;
2470 tree offset = currop->op0;
2471 if (TREE_CODE (baseop) == ADDR_EXPR
2472 && handled_component_p (TREE_OPERAND (baseop, 0)))
2473 {
2474 poly_int64 off;
2475 tree base;
2476 base = get_addr_base_and_unit_offset (TREE_OPERAND (baseop, 0),
2477 &off);
2478 gcc_assert (base);
2479 offset = int_const_binop (PLUS_EXPR, offset,
2480 build_int_cst (TREE_TYPE (offset),
2481 off));
2482 baseop = build_fold_addr_expr (base);
2483 }
2484 genop = build2 (MEM_REF, currop->type, baseop, offset);
2485 MR_DEPENDENCE_CLIQUE (genop) = currop->clique;
2486 MR_DEPENDENCE_BASE (genop) = currop->base;
2487 REF_REVERSE_STORAGE_ORDER (genop) = currop->reverse;
2488 return genop;
2489 }
2490
2491 case TARGET_MEM_REF:
2492 {
2493 tree genop0 = NULL_TREE, genop1 = NULL_TREE;
2494 vn_reference_op_t nextop = &ref->operands[++*operand];
2495 tree baseop = create_component_ref_by_pieces_1 (block, ref, operand,
2496 stmts);
2497 if (!baseop)
2498 return NULL_TREE;
2499 if (currop->op0)
2500 {
2501 genop0 = find_or_generate_expression (block, currop->op0, stmts);
2502 if (!genop0)
2503 return NULL_TREE;
2504 }
2505 if (nextop->op0)
2506 {
2507 genop1 = find_or_generate_expression (block, nextop->op0, stmts);
2508 if (!genop1)
2509 return NULL_TREE;
2510 }
2511 genop = build5 (TARGET_MEM_REF, currop->type,
2512 baseop, currop->op2, genop0, currop->op1, genop1);
2513
2514 MR_DEPENDENCE_CLIQUE (genop) = currop->clique;
2515 MR_DEPENDENCE_BASE (genop) = currop->base;
2516 return genop;
2517 }
2518
2519 case ADDR_EXPR:
2520 if (currop->op0)
2521 {
2522 gcc_assert (is_gimple_min_invariant (currop->op0));
2523 return currop->op0;
2524 }
2525 /* Fallthrough. */
2526 case REALPART_EXPR:
2527 case IMAGPART_EXPR:
2528 case VIEW_CONVERT_EXPR:
2529 {
2530 tree genop0 = create_component_ref_by_pieces_1 (block, ref, operand,
2531 stmts);
2532 if (!genop0)
2533 return NULL_TREE;
2534 return fold_build1 (currop->opcode, currop->type, genop0);
2535 }
2536
2537 case WITH_SIZE_EXPR:
2538 {
2539 tree genop0 = create_component_ref_by_pieces_1 (block, ref, operand,
2540 stmts);
2541 if (!genop0)
2542 return NULL_TREE;
2543 tree genop1 = find_or_generate_expression (block, currop->op0, stmts);
2544 if (!genop1)
2545 return NULL_TREE;
2546 return fold_build2 (currop->opcode, currop->type, genop0, genop1);
2547 }
2548
2549 case BIT_FIELD_REF:
2550 {
2551 tree genop0 = create_component_ref_by_pieces_1 (block, ref, operand,
2552 stmts);
2553 if (!genop0)
2554 return NULL_TREE;
2555 tree op1 = currop->op0;
2556 tree op2 = currop->op1;
2557 tree t = build3 (BIT_FIELD_REF, currop->type, genop0, op1, op2);
2558 REF_REVERSE_STORAGE_ORDER (t) = currop->reverse;
2559 return fold (t);
2560 }
2561
2562 /* For array ref vn_reference_op's, operand 1 of the array ref
2563 is op0 of the reference op and operand 3 of the array ref is
2564 op1. */
2565 case ARRAY_RANGE_REF:
2566 case ARRAY_REF:
2567 {
2568 tree genop0;
2569 tree genop1 = currop->op0;
2570 tree genop2 = currop->op1;
2571 tree genop3 = currop->op2;
2572 genop0 = create_component_ref_by_pieces_1 (block, ref, operand,
2573 stmts);
2574 if (!genop0)
2575 return NULL_TREE;
2576 genop1 = find_or_generate_expression (block, genop1, stmts);
2577 if (!genop1)
2578 return NULL_TREE;
2579 if (genop2)
2580 {
2581 tree domain_type = TYPE_DOMAIN (TREE_TYPE (genop0));
2582 /* Drop zero minimum index if redundant. */
2583 if (integer_zerop (genop2)
2584 && (!domain_type
2585 || integer_zerop (TYPE_MIN_VALUE (domain_type))))
2586 genop2 = NULL_TREE;
2587 else
2588 {
2589 genop2 = find_or_generate_expression (block, genop2, stmts);
2590 if (!genop2)
2591 return NULL_TREE;
2592 }
2593 }
2594 if (genop3)
2595 {
2596 tree elmt_type = TREE_TYPE (TREE_TYPE (genop0));
2597 /* We can't always put a size in units of the element alignment
2598 here as the element alignment may be not visible. See
2599 PR43783. Simply drop the element size for constant
2600 sizes. */
2601 if (TREE_CODE (genop3) == INTEGER_CST
2602 && TREE_CODE (TYPE_SIZE_UNIT (elmt_type)) == INTEGER_CST
2603 && wi::eq_p (wi::to_offset (TYPE_SIZE_UNIT (elmt_type)),
2604 (wi::to_offset (genop3)
2605 * vn_ref_op_align_unit (currop))))
2606 genop3 = NULL_TREE;
2607 else
2608 {
2609 genop3 = find_or_generate_expression (block, genop3, stmts);
2610 if (!genop3)
2611 return NULL_TREE;
2612 }
2613 }
2614 return build4 (currop->opcode, currop->type, genop0, genop1,
2615 genop2, genop3);
2616 }
2617 case COMPONENT_REF:
2618 {
2619 tree op0;
2620 tree op1;
2621 tree genop2 = currop->op1;
2622 op0 = create_component_ref_by_pieces_1 (block, ref, operand, stmts);
2623 if (!op0)
2624 return NULL_TREE;
2625 /* op1 should be a FIELD_DECL, which are represented by themselves. */
2626 op1 = currop->op0;
2627 if (genop2)
2628 {
2629 genop2 = find_or_generate_expression (block, genop2, stmts);
2630 if (!genop2)
2631 return NULL_TREE;
2632 }
2633 return fold_build3 (COMPONENT_REF, TREE_TYPE (op1), op0, op1, genop2);
2634 }
2635
2636 case SSA_NAME:
2637 {
2638 genop = find_or_generate_expression (block, currop->op0, stmts);
2639 return genop;
2640 }
2641 case STRING_CST:
2642 case INTEGER_CST:
2643 case COMPLEX_CST:
2644 case VECTOR_CST:
2645 case REAL_CST:
2646 case CONSTRUCTOR:
2647 case VAR_DECL:
2648 case PARM_DECL:
2649 case CONST_DECL:
2650 case RESULT_DECL:
2651 case FUNCTION_DECL:
2652 return currop->op0;
2653
2654 default:
2655 gcc_unreachable ();
2656 }
2657 }
2658
2659 /* For COMPONENT_REF's and ARRAY_REF's, we can't have any intermediates for the
2660 COMPONENT_REF or MEM_REF or ARRAY_REF portion, because we'd end up with
2661 trying to rename aggregates into ssa form directly, which is a no no.
2662
2663 Thus, this routine doesn't create temporaries, it just builds a
2664 single access expression for the array, calling
2665 find_or_generate_expression to build the innermost pieces.
2666
2667 This function is a subroutine of create_expression_by_pieces, and
2668 should not be called on it's own unless you really know what you
2669 are doing. */
2670
2671 static tree
create_component_ref_by_pieces(basic_block block,vn_reference_t ref,gimple_seq * stmts)2672 create_component_ref_by_pieces (basic_block block, vn_reference_t ref,
2673 gimple_seq *stmts)
2674 {
2675 unsigned int op = 0;
2676 return create_component_ref_by_pieces_1 (block, ref, &op, stmts);
2677 }
2678
2679 /* Find a simple leader for an expression, or generate one using
2680 create_expression_by_pieces from a NARY expression for the value.
2681 BLOCK is the basic_block we are looking for leaders in.
2682 OP is the tree expression to find a leader for or generate.
2683 Returns the leader or NULL_TREE on failure. */
2684
2685 static tree
find_or_generate_expression(basic_block block,tree op,gimple_seq * stmts)2686 find_or_generate_expression (basic_block block, tree op, gimple_seq *stmts)
2687 {
2688 pre_expr expr = get_or_alloc_expr_for (op);
2689 unsigned int lookfor = get_expr_value_id (expr);
2690 pre_expr leader = bitmap_find_leader (AVAIL_OUT (block), lookfor);
2691 if (leader)
2692 {
2693 if (leader->kind == NAME)
2694 return PRE_EXPR_NAME (leader);
2695 else if (leader->kind == CONSTANT)
2696 return PRE_EXPR_CONSTANT (leader);
2697
2698 /* Defer. */
2699 return NULL_TREE;
2700 }
2701
2702 /* It must be a complex expression, so generate it recursively. Note
2703 that this is only necessary to handle gcc.dg/tree-ssa/ssa-pre28.c
2704 where the insert algorithm fails to insert a required expression. */
2705 bitmap exprset = value_expressions[lookfor];
2706 bitmap_iterator bi;
2707 unsigned int i;
2708 EXECUTE_IF_SET_IN_BITMAP (exprset, 0, i, bi)
2709 {
2710 pre_expr temp = expression_for_id (i);
2711 /* We cannot insert random REFERENCE expressions at arbitrary
2712 places. We can insert NARYs which eventually re-materializes
2713 its operand values. */
2714 if (temp->kind == NARY)
2715 return create_expression_by_pieces (block, temp, stmts,
2716 get_expr_type (expr));
2717 }
2718
2719 /* Defer. */
2720 return NULL_TREE;
2721 }
2722
2723 /* Create an expression in pieces, so that we can handle very complex
2724 expressions that may be ANTIC, but not necessary GIMPLE.
2725 BLOCK is the basic block the expression will be inserted into,
2726 EXPR is the expression to insert (in value form)
2727 STMTS is a statement list to append the necessary insertions into.
2728
2729 This function will die if we hit some value that shouldn't be
2730 ANTIC but is (IE there is no leader for it, or its components).
2731 The function returns NULL_TREE in case a different antic expression
2732 has to be inserted first.
2733 This function may also generate expressions that are themselves
2734 partially or fully redundant. Those that are will be either made
2735 fully redundant during the next iteration of insert (for partially
2736 redundant ones), or eliminated by eliminate (for fully redundant
2737 ones). */
2738
2739 static tree
create_expression_by_pieces(basic_block block,pre_expr expr,gimple_seq * stmts,tree type)2740 create_expression_by_pieces (basic_block block, pre_expr expr,
2741 gimple_seq *stmts, tree type)
2742 {
2743 tree name;
2744 tree folded;
2745 gimple_seq forced_stmts = NULL;
2746 unsigned int value_id;
2747 gimple_stmt_iterator gsi;
2748 tree exprtype = type ? type : get_expr_type (expr);
2749 pre_expr nameexpr;
2750 gassign *newstmt;
2751
2752 switch (expr->kind)
2753 {
2754 /* We may hit the NAME/CONSTANT case if we have to convert types
2755 that value numbering saw through. */
2756 case NAME:
2757 folded = PRE_EXPR_NAME (expr);
2758 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (folded))
2759 return NULL_TREE;
2760 if (useless_type_conversion_p (exprtype, TREE_TYPE (folded)))
2761 return folded;
2762 break;
2763 case CONSTANT:
2764 {
2765 folded = PRE_EXPR_CONSTANT (expr);
2766 tree tem = fold_convert (exprtype, folded);
2767 if (is_gimple_min_invariant (tem))
2768 return tem;
2769 break;
2770 }
2771 case REFERENCE:
2772 if (PRE_EXPR_REFERENCE (expr)->operands[0].opcode == CALL_EXPR)
2773 {
2774 vn_reference_t ref = PRE_EXPR_REFERENCE (expr);
2775 unsigned int operand = 1;
2776 vn_reference_op_t currop = &ref->operands[0];
2777 tree sc = NULL_TREE;
2778 tree fn = find_or_generate_expression (block, currop->op0, stmts);
2779 if (!fn)
2780 return NULL_TREE;
2781 if (currop->op1)
2782 {
2783 sc = find_or_generate_expression (block, currop->op1, stmts);
2784 if (!sc)
2785 return NULL_TREE;
2786 }
2787 auto_vec<tree> args (ref->operands.length () - 1);
2788 while (operand < ref->operands.length ())
2789 {
2790 tree arg = create_component_ref_by_pieces_1 (block, ref,
2791 &operand, stmts);
2792 if (!arg)
2793 return NULL_TREE;
2794 args.quick_push (arg);
2795 }
2796 gcall *call = gimple_build_call_vec (fn, args);
2797 gimple_call_set_fntype (call, currop->type);
2798 if (sc)
2799 gimple_call_set_chain (call, sc);
2800 tree forcedname = make_ssa_name (TREE_TYPE (currop->type));
2801 gimple_call_set_lhs (call, forcedname);
2802 /* There's no CCP pass after PRE which would re-compute alignment
2803 information so make sure we re-materialize this here. */
2804 if (gimple_call_builtin_p (call, BUILT_IN_ASSUME_ALIGNED)
2805 && args.length () - 2 <= 1
2806 && tree_fits_uhwi_p (args[1])
2807 && (args.length () != 3 || tree_fits_uhwi_p (args[2])))
2808 {
2809 unsigned HOST_WIDE_INT halign = tree_to_uhwi (args[1]);
2810 unsigned HOST_WIDE_INT hmisalign
2811 = args.length () == 3 ? tree_to_uhwi (args[2]) : 0;
2812 if ((halign & (halign - 1)) == 0
2813 && (hmisalign & ~(halign - 1)) == 0
2814 && (unsigned int)halign != 0)
2815 set_ptr_info_alignment (get_ptr_info (forcedname),
2816 halign, hmisalign);
2817 }
2818 gimple_set_vuse (call, BB_LIVE_VOP_ON_EXIT (block));
2819 gimple_seq_add_stmt_without_update (&forced_stmts, call);
2820 folded = forcedname;
2821 }
2822 else
2823 {
2824 folded = create_component_ref_by_pieces (block,
2825 PRE_EXPR_REFERENCE (expr),
2826 stmts);
2827 if (!folded)
2828 return NULL_TREE;
2829 name = make_temp_ssa_name (exprtype, NULL, "pretmp");
2830 newstmt = gimple_build_assign (name, folded);
2831 gimple_seq_add_stmt_without_update (&forced_stmts, newstmt);
2832 gimple_set_vuse (newstmt, BB_LIVE_VOP_ON_EXIT (block));
2833 folded = name;
2834 }
2835 break;
2836 case NARY:
2837 {
2838 vn_nary_op_t nary = PRE_EXPR_NARY (expr);
2839 tree *genop = XALLOCAVEC (tree, nary->length);
2840 unsigned i;
2841 for (i = 0; i < nary->length; ++i)
2842 {
2843 genop[i] = find_or_generate_expression (block, nary->op[i], stmts);
2844 if (!genop[i])
2845 return NULL_TREE;
2846 /* Ensure genop[] is properly typed for POINTER_PLUS_EXPR. It
2847 may have conversions stripped. */
2848 if (nary->opcode == POINTER_PLUS_EXPR)
2849 {
2850 if (i == 0)
2851 genop[i] = gimple_convert (&forced_stmts,
2852 nary->type, genop[i]);
2853 else if (i == 1)
2854 genop[i] = gimple_convert (&forced_stmts,
2855 sizetype, genop[i]);
2856 }
2857 else
2858 genop[i] = gimple_convert (&forced_stmts,
2859 TREE_TYPE (nary->op[i]), genop[i]);
2860 }
2861 if (nary->opcode == CONSTRUCTOR)
2862 {
2863 vec<constructor_elt, va_gc> *elts = NULL;
2864 for (i = 0; i < nary->length; ++i)
2865 CONSTRUCTOR_APPEND_ELT (elts, NULL_TREE, genop[i]);
2866 folded = build_constructor (nary->type, elts);
2867 name = make_temp_ssa_name (exprtype, NULL, "pretmp");
2868 newstmt = gimple_build_assign (name, folded);
2869 gimple_seq_add_stmt_without_update (&forced_stmts, newstmt);
2870 folded = name;
2871 }
2872 else
2873 {
2874 switch (nary->length)
2875 {
2876 case 1:
2877 folded = gimple_build (&forced_stmts, nary->opcode, nary->type,
2878 genop[0]);
2879 break;
2880 case 2:
2881 folded = gimple_build (&forced_stmts, nary->opcode, nary->type,
2882 genop[0], genop[1]);
2883 break;
2884 case 3:
2885 folded = gimple_build (&forced_stmts, nary->opcode, nary->type,
2886 genop[0], genop[1], genop[2]);
2887 break;
2888 default:
2889 gcc_unreachable ();
2890 }
2891 }
2892 }
2893 break;
2894 default:
2895 gcc_unreachable ();
2896 }
2897
2898 folded = gimple_convert (&forced_stmts, exprtype, folded);
2899
2900 /* If there is nothing to insert, return the simplified result. */
2901 if (gimple_seq_empty_p (forced_stmts))
2902 return folded;
2903 /* If we simplified to a constant return it and discard eventually
2904 built stmts. */
2905 if (is_gimple_min_invariant (folded))
2906 {
2907 gimple_seq_discard (forced_stmts);
2908 return folded;
2909 }
2910 /* Likewise if we simplified to sth not queued for insertion. */
2911 bool found = false;
2912 gsi = gsi_last (forced_stmts);
2913 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2914 {
2915 gimple *stmt = gsi_stmt (gsi);
2916 tree forcedname = gimple_get_lhs (stmt);
2917 if (forcedname == folded)
2918 {
2919 found = true;
2920 break;
2921 }
2922 }
2923 if (! found)
2924 {
2925 gimple_seq_discard (forced_stmts);
2926 return folded;
2927 }
2928 gcc_assert (TREE_CODE (folded) == SSA_NAME);
2929
2930 /* If we have any intermediate expressions to the value sets, add them
2931 to the value sets and chain them in the instruction stream. */
2932 if (forced_stmts)
2933 {
2934 gsi = gsi_start (forced_stmts);
2935 for (; !gsi_end_p (gsi); gsi_next (&gsi))
2936 {
2937 gimple *stmt = gsi_stmt (gsi);
2938 tree forcedname = gimple_get_lhs (stmt);
2939 pre_expr nameexpr;
2940
2941 if (forcedname != folded)
2942 {
2943 VN_INFO (forcedname)->valnum = forcedname;
2944 VN_INFO (forcedname)->value_id = get_next_value_id ();
2945 nameexpr = get_or_alloc_expr_for_name (forcedname);
2946 add_to_value (VN_INFO (forcedname)->value_id, nameexpr);
2947 bitmap_value_replace_in_set (NEW_SETS (block), nameexpr);
2948 bitmap_value_replace_in_set (AVAIL_OUT (block), nameexpr);
2949 }
2950
2951 bitmap_set_bit (inserted_exprs, SSA_NAME_VERSION (forcedname));
2952 }
2953 gimple_seq_add_seq (stmts, forced_stmts);
2954 }
2955
2956 name = folded;
2957
2958 /* Fold the last statement. */
2959 gsi = gsi_last (*stmts);
2960 if (fold_stmt_inplace (&gsi))
2961 update_stmt (gsi_stmt (gsi));
2962
2963 /* Add a value number to the temporary.
2964 The value may already exist in either NEW_SETS, or AVAIL_OUT, because
2965 we are creating the expression by pieces, and this particular piece of
2966 the expression may have been represented. There is no harm in replacing
2967 here. */
2968 value_id = get_expr_value_id (expr);
2969 VN_INFO (name)->value_id = value_id;
2970 VN_INFO (name)->valnum = vn_valnum_from_value_id (value_id);
2971 if (VN_INFO (name)->valnum == NULL_TREE)
2972 VN_INFO (name)->valnum = name;
2973 gcc_assert (VN_INFO (name)->valnum != NULL_TREE);
2974 nameexpr = get_or_alloc_expr_for_name (name);
2975 add_to_value (value_id, nameexpr);
2976 if (NEW_SETS (block))
2977 bitmap_value_replace_in_set (NEW_SETS (block), nameexpr);
2978 bitmap_value_replace_in_set (AVAIL_OUT (block), nameexpr);
2979
2980 pre_stats.insertions++;
2981 if (dump_file && (dump_flags & TDF_DETAILS))
2982 {
2983 fprintf (dump_file, "Inserted ");
2984 print_gimple_stmt (dump_file, gsi_stmt (gsi_last (*stmts)), 0);
2985 fprintf (dump_file, " in predecessor %d (%04d)\n",
2986 block->index, value_id);
2987 }
2988
2989 return name;
2990 }
2991
2992
2993 /* Insert the to-be-made-available values of expression EXPRNUM for each
2994 predecessor, stored in AVAIL, into the predecessors of BLOCK, and
2995 merge the result with a phi node, given the same value number as
2996 NODE. Return true if we have inserted new stuff. */
2997
2998 static bool
insert_into_preds_of_block(basic_block block,unsigned int exprnum,vec<pre_expr> avail)2999 insert_into_preds_of_block (basic_block block, unsigned int exprnum,
3000 vec<pre_expr> avail)
3001 {
3002 pre_expr expr = expression_for_id (exprnum);
3003 pre_expr newphi;
3004 unsigned int val = get_expr_value_id (expr);
3005 edge pred;
3006 bool insertions = false;
3007 bool nophi = false;
3008 basic_block bprime;
3009 pre_expr eprime;
3010 edge_iterator ei;
3011 tree type = get_expr_type (expr);
3012 tree temp;
3013 gphi *phi;
3014
3015 /* Make sure we aren't creating an induction variable. */
3016 if (bb_loop_depth (block) > 0 && EDGE_COUNT (block->preds) == 2)
3017 {
3018 bool firstinsideloop = false;
3019 bool secondinsideloop = false;
3020 firstinsideloop = flow_bb_inside_loop_p (block->loop_father,
3021 EDGE_PRED (block, 0)->src);
3022 secondinsideloop = flow_bb_inside_loop_p (block->loop_father,
3023 EDGE_PRED (block, 1)->src);
3024 /* Induction variables only have one edge inside the loop. */
3025 if ((firstinsideloop ^ secondinsideloop)
3026 && expr->kind != REFERENCE)
3027 {
3028 if (dump_file && (dump_flags & TDF_DETAILS))
3029 fprintf (dump_file, "Skipping insertion of phi for partial redundancy: Looks like an induction variable\n");
3030 nophi = true;
3031 }
3032 }
3033
3034 /* Make the necessary insertions. */
3035 FOR_EACH_EDGE (pred, ei, block->preds)
3036 {
3037 gimple_seq stmts = NULL;
3038 tree builtexpr;
3039 bprime = pred->src;
3040 eprime = avail[pred->dest_idx];
3041 builtexpr = create_expression_by_pieces (bprime, eprime,
3042 &stmts, type);
3043 gcc_assert (!(pred->flags & EDGE_ABNORMAL));
3044 if (!gimple_seq_empty_p (stmts))
3045 {
3046 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pred, stmts);
3047 gcc_assert (! new_bb);
3048 insertions = true;
3049 }
3050 if (!builtexpr)
3051 {
3052 /* We cannot insert a PHI node if we failed to insert
3053 on one edge. */
3054 nophi = true;
3055 continue;
3056 }
3057 if (is_gimple_min_invariant (builtexpr))
3058 avail[pred->dest_idx] = get_or_alloc_expr_for_constant (builtexpr);
3059 else
3060 avail[pred->dest_idx] = get_or_alloc_expr_for_name (builtexpr);
3061 }
3062 /* If we didn't want a phi node, and we made insertions, we still have
3063 inserted new stuff, and thus return true. If we didn't want a phi node,
3064 and didn't make insertions, we haven't added anything new, so return
3065 false. */
3066 if (nophi && insertions)
3067 return true;
3068 else if (nophi && !insertions)
3069 return false;
3070
3071 /* Now build a phi for the new variable. */
3072 temp = make_temp_ssa_name (type, NULL, "prephitmp");
3073 phi = create_phi_node (temp, block);
3074
3075 VN_INFO (temp)->value_id = val;
3076 VN_INFO (temp)->valnum = vn_valnum_from_value_id (val);
3077 if (VN_INFO (temp)->valnum == NULL_TREE)
3078 VN_INFO (temp)->valnum = temp;
3079 bitmap_set_bit (inserted_exprs, SSA_NAME_VERSION (temp));
3080 FOR_EACH_EDGE (pred, ei, block->preds)
3081 {
3082 pre_expr ae = avail[pred->dest_idx];
3083 gcc_assert (get_expr_type (ae) == type
3084 || useless_type_conversion_p (type, get_expr_type (ae)));
3085 if (ae->kind == CONSTANT)
3086 add_phi_arg (phi, unshare_expr (PRE_EXPR_CONSTANT (ae)),
3087 pred, UNKNOWN_LOCATION);
3088 else
3089 add_phi_arg (phi, PRE_EXPR_NAME (ae), pred, UNKNOWN_LOCATION);
3090 }
3091
3092 newphi = get_or_alloc_expr_for_name (temp);
3093 add_to_value (val, newphi);
3094
3095 /* The value should *not* exist in PHI_GEN, or else we wouldn't be doing
3096 this insertion, since we test for the existence of this value in PHI_GEN
3097 before proceeding with the partial redundancy checks in insert_aux.
3098
3099 The value may exist in AVAIL_OUT, in particular, it could be represented
3100 by the expression we are trying to eliminate, in which case we want the
3101 replacement to occur. If it's not existing in AVAIL_OUT, we want it
3102 inserted there.
3103
3104 Similarly, to the PHI_GEN case, the value should not exist in NEW_SETS of
3105 this block, because if it did, it would have existed in our dominator's
3106 AVAIL_OUT, and would have been skipped due to the full redundancy check.
3107 */
3108
3109 bitmap_insert_into_set (PHI_GEN (block), newphi);
3110 bitmap_value_replace_in_set (AVAIL_OUT (block),
3111 newphi);
3112 bitmap_insert_into_set (NEW_SETS (block),
3113 newphi);
3114
3115 /* If we insert a PHI node for a conversion of another PHI node
3116 in the same basic-block try to preserve range information.
3117 This is important so that followup loop passes receive optimal
3118 number of iteration analysis results. See PR61743. */
3119 if (expr->kind == NARY
3120 && CONVERT_EXPR_CODE_P (expr->u.nary->opcode)
3121 && TREE_CODE (expr->u.nary->op[0]) == SSA_NAME
3122 && gimple_bb (SSA_NAME_DEF_STMT (expr->u.nary->op[0])) == block
3123 && INTEGRAL_TYPE_P (type)
3124 && INTEGRAL_TYPE_P (TREE_TYPE (expr->u.nary->op[0]))
3125 && (TYPE_PRECISION (type)
3126 >= TYPE_PRECISION (TREE_TYPE (expr->u.nary->op[0])))
3127 && SSA_NAME_RANGE_INFO (expr->u.nary->op[0]))
3128 {
3129 wide_int min, max;
3130 if (get_range_info (expr->u.nary->op[0], &min, &max) == VR_RANGE
3131 && !wi::neg_p (min, SIGNED)
3132 && !wi::neg_p (max, SIGNED))
3133 /* Just handle extension and sign-changes of all-positive ranges. */
3134 set_range_info (temp,
3135 SSA_NAME_RANGE_TYPE (expr->u.nary->op[0]),
3136 wide_int_storage::from (min, TYPE_PRECISION (type),
3137 TYPE_SIGN (type)),
3138 wide_int_storage::from (max, TYPE_PRECISION (type),
3139 TYPE_SIGN (type)));
3140 }
3141
3142 if (dump_file && (dump_flags & TDF_DETAILS))
3143 {
3144 fprintf (dump_file, "Created phi ");
3145 print_gimple_stmt (dump_file, phi, 0);
3146 fprintf (dump_file, " in block %d (%04d)\n", block->index, val);
3147 }
3148 pre_stats.phis++;
3149 return true;
3150 }
3151
3152
3153
3154 /* Perform insertion of partially redundant or hoistable values.
3155 For BLOCK, do the following:
3156 1. Propagate the NEW_SETS of the dominator into the current block.
3157 If the block has multiple predecessors,
3158 2a. Iterate over the ANTIC expressions for the block to see if
3159 any of them are partially redundant.
3160 2b. If so, insert them into the necessary predecessors to make
3161 the expression fully redundant.
3162 2c. Insert a new PHI merging the values of the predecessors.
3163 2d. Insert the new PHI, and the new expressions, into the
3164 NEW_SETS set.
3165 If the block has multiple successors,
3166 3a. Iterate over the ANTIC values for the block to see if
3167 any of them are good candidates for hoisting.
3168 3b. If so, insert expressions computing the values in BLOCK,
3169 and add the new expressions into the NEW_SETS set.
3170 4. Recursively call ourselves on the dominator children of BLOCK.
3171
3172 Steps 1, 2a, and 4 are done by insert_aux. 2b, 2c and 2d are done by
3173 do_pre_regular_insertion and do_partial_insertion. 3a and 3b are
3174 done in do_hoist_insertion.
3175 */
3176
3177 static bool
do_pre_regular_insertion(basic_block block,basic_block dom)3178 do_pre_regular_insertion (basic_block block, basic_block dom)
3179 {
3180 bool new_stuff = false;
3181 vec<pre_expr> exprs;
3182 pre_expr expr;
3183 auto_vec<pre_expr> avail;
3184 int i;
3185
3186 exprs = sorted_array_from_bitmap_set (ANTIC_IN (block));
3187 avail.safe_grow (EDGE_COUNT (block->preds));
3188
3189 FOR_EACH_VEC_ELT (exprs, i, expr)
3190 {
3191 if (expr->kind == NARY
3192 || expr->kind == REFERENCE)
3193 {
3194 unsigned int val;
3195 bool by_some = false;
3196 bool cant_insert = false;
3197 bool all_same = true;
3198 pre_expr first_s = NULL;
3199 edge pred;
3200 basic_block bprime;
3201 pre_expr eprime = NULL;
3202 edge_iterator ei;
3203 pre_expr edoubleprime = NULL;
3204 bool do_insertion = false;
3205
3206 val = get_expr_value_id (expr);
3207 if (bitmap_set_contains_value (PHI_GEN (block), val))
3208 continue;
3209 if (bitmap_set_contains_value (AVAIL_OUT (dom), val))
3210 {
3211 if (dump_file && (dump_flags & TDF_DETAILS))
3212 {
3213 fprintf (dump_file, "Found fully redundant value: ");
3214 print_pre_expr (dump_file, expr);
3215 fprintf (dump_file, "\n");
3216 }
3217 continue;
3218 }
3219
3220 FOR_EACH_EDGE (pred, ei, block->preds)
3221 {
3222 unsigned int vprime;
3223
3224 /* We should never run insertion for the exit block
3225 and so not come across fake pred edges. */
3226 gcc_assert (!(pred->flags & EDGE_FAKE));
3227 bprime = pred->src;
3228 /* We are looking at ANTIC_OUT of bprime. */
3229 eprime = phi_translate (NULL, expr, ANTIC_IN (block), NULL, pred);
3230
3231 /* eprime will generally only be NULL if the
3232 value of the expression, translated
3233 through the PHI for this predecessor, is
3234 undefined. If that is the case, we can't
3235 make the expression fully redundant,
3236 because its value is undefined along a
3237 predecessor path. We can thus break out
3238 early because it doesn't matter what the
3239 rest of the results are. */
3240 if (eprime == NULL)
3241 {
3242 avail[pred->dest_idx] = NULL;
3243 cant_insert = true;
3244 break;
3245 }
3246
3247 vprime = get_expr_value_id (eprime);
3248 edoubleprime = bitmap_find_leader (AVAIL_OUT (bprime),
3249 vprime);
3250 if (edoubleprime == NULL)
3251 {
3252 avail[pred->dest_idx] = eprime;
3253 all_same = false;
3254 }
3255 else
3256 {
3257 avail[pred->dest_idx] = edoubleprime;
3258 by_some = true;
3259 /* We want to perform insertions to remove a redundancy on
3260 a path in the CFG we want to optimize for speed. */
3261 if (optimize_edge_for_speed_p (pred))
3262 do_insertion = true;
3263 if (first_s == NULL)
3264 first_s = edoubleprime;
3265 else if (!pre_expr_d::equal (first_s, edoubleprime))
3266 all_same = false;
3267 }
3268 }
3269 /* If we can insert it, it's not the same value
3270 already existing along every predecessor, and
3271 it's defined by some predecessor, it is
3272 partially redundant. */
3273 if (!cant_insert && !all_same && by_some)
3274 {
3275 if (!do_insertion)
3276 {
3277 if (dump_file && (dump_flags & TDF_DETAILS))
3278 {
3279 fprintf (dump_file, "Skipping partial redundancy for "
3280 "expression ");
3281 print_pre_expr (dump_file, expr);
3282 fprintf (dump_file, " (%04d), no redundancy on to be "
3283 "optimized for speed edge\n", val);
3284 }
3285 }
3286 else if (dbg_cnt (treepre_insert))
3287 {
3288 if (dump_file && (dump_flags & TDF_DETAILS))
3289 {
3290 fprintf (dump_file, "Found partial redundancy for "
3291 "expression ");
3292 print_pre_expr (dump_file, expr);
3293 fprintf (dump_file, " (%04d)\n",
3294 get_expr_value_id (expr));
3295 }
3296 if (insert_into_preds_of_block (block,
3297 get_expression_id (expr),
3298 avail))
3299 new_stuff = true;
3300 }
3301 }
3302 /* If all edges produce the same value and that value is
3303 an invariant, then the PHI has the same value on all
3304 edges. Note this. */
3305 else if (!cant_insert && all_same)
3306 {
3307 gcc_assert (edoubleprime->kind == CONSTANT
3308 || edoubleprime->kind == NAME);
3309
3310 tree temp = make_temp_ssa_name (get_expr_type (expr),
3311 NULL, "pretmp");
3312 gassign *assign
3313 = gimple_build_assign (temp,
3314 edoubleprime->kind == CONSTANT ?
3315 PRE_EXPR_CONSTANT (edoubleprime) :
3316 PRE_EXPR_NAME (edoubleprime));
3317 gimple_stmt_iterator gsi = gsi_after_labels (block);
3318 gsi_insert_before (&gsi, assign, GSI_NEW_STMT);
3319
3320 VN_INFO (temp)->value_id = val;
3321 VN_INFO (temp)->valnum = vn_valnum_from_value_id (val);
3322 if (VN_INFO (temp)->valnum == NULL_TREE)
3323 VN_INFO (temp)->valnum = temp;
3324 bitmap_set_bit (inserted_exprs, SSA_NAME_VERSION (temp));
3325 pre_expr newe = get_or_alloc_expr_for_name (temp);
3326 add_to_value (val, newe);
3327 bitmap_value_replace_in_set (AVAIL_OUT (block), newe);
3328 bitmap_insert_into_set (NEW_SETS (block), newe);
3329 }
3330 }
3331 }
3332
3333 exprs.release ();
3334 return new_stuff;
3335 }
3336
3337
3338 /* Perform insertion for partially anticipatable expressions. There
3339 is only one case we will perform insertion for these. This case is
3340 if the expression is partially anticipatable, and fully available.
3341 In this case, we know that putting it earlier will enable us to
3342 remove the later computation. */
3343
3344 static bool
do_pre_partial_partial_insertion(basic_block block,basic_block dom)3345 do_pre_partial_partial_insertion (basic_block block, basic_block dom)
3346 {
3347 bool new_stuff = false;
3348 vec<pre_expr> exprs;
3349 pre_expr expr;
3350 auto_vec<pre_expr> avail;
3351 int i;
3352
3353 exprs = sorted_array_from_bitmap_set (PA_IN (block));
3354 avail.safe_grow (EDGE_COUNT (block->preds));
3355
3356 FOR_EACH_VEC_ELT (exprs, i, expr)
3357 {
3358 if (expr->kind == NARY
3359 || expr->kind == REFERENCE)
3360 {
3361 unsigned int val;
3362 bool by_all = true;
3363 bool cant_insert = false;
3364 edge pred;
3365 basic_block bprime;
3366 pre_expr eprime = NULL;
3367 edge_iterator ei;
3368
3369 val = get_expr_value_id (expr);
3370 if (bitmap_set_contains_value (PHI_GEN (block), val))
3371 continue;
3372 if (bitmap_set_contains_value (AVAIL_OUT (dom), val))
3373 continue;
3374
3375 FOR_EACH_EDGE (pred, ei, block->preds)
3376 {
3377 unsigned int vprime;
3378 pre_expr edoubleprime;
3379
3380 /* We should never run insertion for the exit block
3381 and so not come across fake pred edges. */
3382 gcc_assert (!(pred->flags & EDGE_FAKE));
3383 bprime = pred->src;
3384 eprime = phi_translate (NULL, expr, ANTIC_IN (block),
3385 PA_IN (block), pred);
3386
3387 /* eprime will generally only be NULL if the
3388 value of the expression, translated
3389 through the PHI for this predecessor, is
3390 undefined. If that is the case, we can't
3391 make the expression fully redundant,
3392 because its value is undefined along a
3393 predecessor path. We can thus break out
3394 early because it doesn't matter what the
3395 rest of the results are. */
3396 if (eprime == NULL)
3397 {
3398 avail[pred->dest_idx] = NULL;
3399 cant_insert = true;
3400 break;
3401 }
3402
3403 vprime = get_expr_value_id (eprime);
3404 edoubleprime = bitmap_find_leader (AVAIL_OUT (bprime), vprime);
3405 avail[pred->dest_idx] = edoubleprime;
3406 if (edoubleprime == NULL)
3407 {
3408 by_all = false;
3409 break;
3410 }
3411 }
3412
3413 /* If we can insert it, it's not the same value
3414 already existing along every predecessor, and
3415 it's defined by some predecessor, it is
3416 partially redundant. */
3417 if (!cant_insert && by_all)
3418 {
3419 edge succ;
3420 bool do_insertion = false;
3421
3422 /* Insert only if we can remove a later expression on a path
3423 that we want to optimize for speed.
3424 The phi node that we will be inserting in BLOCK is not free,
3425 and inserting it for the sake of !optimize_for_speed successor
3426 may cause regressions on the speed path. */
3427 FOR_EACH_EDGE (succ, ei, block->succs)
3428 {
3429 if (bitmap_set_contains_value (PA_IN (succ->dest), val)
3430 || bitmap_set_contains_value (ANTIC_IN (succ->dest), val))
3431 {
3432 if (optimize_edge_for_speed_p (succ))
3433 do_insertion = true;
3434 }
3435 }
3436
3437 if (!do_insertion)
3438 {
3439 if (dump_file && (dump_flags & TDF_DETAILS))
3440 {
3441 fprintf (dump_file, "Skipping partial partial redundancy "
3442 "for expression ");
3443 print_pre_expr (dump_file, expr);
3444 fprintf (dump_file, " (%04d), not (partially) anticipated "
3445 "on any to be optimized for speed edges\n", val);
3446 }
3447 }
3448 else if (dbg_cnt (treepre_insert))
3449 {
3450 pre_stats.pa_insert++;
3451 if (dump_file && (dump_flags & TDF_DETAILS))
3452 {
3453 fprintf (dump_file, "Found partial partial redundancy "
3454 "for expression ");
3455 print_pre_expr (dump_file, expr);
3456 fprintf (dump_file, " (%04d)\n",
3457 get_expr_value_id (expr));
3458 }
3459 if (insert_into_preds_of_block (block,
3460 get_expression_id (expr),
3461 avail))
3462 new_stuff = true;
3463 }
3464 }
3465 }
3466 }
3467
3468 exprs.release ();
3469 return new_stuff;
3470 }
3471
3472 /* Insert expressions in BLOCK to compute hoistable values up.
3473 Return TRUE if something was inserted, otherwise return FALSE.
3474 The caller has to make sure that BLOCK has at least two successors. */
3475
3476 static bool
do_hoist_insertion(basic_block block)3477 do_hoist_insertion (basic_block block)
3478 {
3479 edge e;
3480 edge_iterator ei;
3481 bool new_stuff = false;
3482 unsigned i;
3483 gimple_stmt_iterator last;
3484
3485 /* At least two successors, or else... */
3486 gcc_assert (EDGE_COUNT (block->succs) >= 2);
3487
3488 /* Check that all successors of BLOCK are dominated by block.
3489 We could use dominated_by_p() for this, but actually there is a much
3490 quicker check: any successor that is dominated by BLOCK can't have
3491 more than one predecessor edge. */
3492 FOR_EACH_EDGE (e, ei, block->succs)
3493 if (! single_pred_p (e->dest))
3494 return false;
3495
3496 /* Determine the insertion point. If we cannot safely insert before
3497 the last stmt if we'd have to, bail out. */
3498 last = gsi_last_bb (block);
3499 if (!gsi_end_p (last)
3500 && !is_ctrl_stmt (gsi_stmt (last))
3501 && stmt_ends_bb_p (gsi_stmt (last)))
3502 return false;
3503
3504 /* Compute the set of hoistable expressions from ANTIC_IN. First compute
3505 hoistable values. */
3506 bitmap_set hoistable_set;
3507
3508 /* A hoistable value must be in ANTIC_IN(block)
3509 but not in AVAIL_OUT(BLOCK). */
3510 bitmap_initialize (&hoistable_set.values, &grand_bitmap_obstack);
3511 bitmap_and_compl (&hoistable_set.values,
3512 &ANTIC_IN (block)->values, &AVAIL_OUT (block)->values);
3513
3514 /* Short-cut for a common case: hoistable_set is empty. */
3515 if (bitmap_empty_p (&hoistable_set.values))
3516 return false;
3517
3518 /* Compute which of the hoistable values is in AVAIL_OUT of
3519 at least one of the successors of BLOCK. */
3520 bitmap_head availout_in_some;
3521 bitmap_initialize (&availout_in_some, &grand_bitmap_obstack);
3522 FOR_EACH_EDGE (e, ei, block->succs)
3523 /* Do not consider expressions solely because their availability
3524 on loop exits. They'd be ANTIC-IN throughout the whole loop
3525 and thus effectively hoisted across loops by combination of
3526 PRE and hoisting. */
3527 if (! loop_exit_edge_p (block->loop_father, e))
3528 bitmap_ior_and_into (&availout_in_some, &hoistable_set.values,
3529 &AVAIL_OUT (e->dest)->values);
3530 bitmap_clear (&hoistable_set.values);
3531
3532 /* Short-cut for a common case: availout_in_some is empty. */
3533 if (bitmap_empty_p (&availout_in_some))
3534 return false;
3535
3536 /* Hack hoitable_set in-place so we can use sorted_array_from_bitmap_set. */
3537 hoistable_set.values = availout_in_some;
3538 hoistable_set.expressions = ANTIC_IN (block)->expressions;
3539
3540 /* Now finally construct the topological-ordered expression set. */
3541 vec<pre_expr> exprs = sorted_array_from_bitmap_set (&hoistable_set);
3542
3543 bitmap_clear (&hoistable_set.values);
3544
3545 /* If there are candidate values for hoisting, insert expressions
3546 strategically to make the hoistable expressions fully redundant. */
3547 pre_expr expr;
3548 FOR_EACH_VEC_ELT (exprs, i, expr)
3549 {
3550 /* While we try to sort expressions topologically above the
3551 sorting doesn't work out perfectly. Catch expressions we
3552 already inserted. */
3553 unsigned int value_id = get_expr_value_id (expr);
3554 if (bitmap_set_contains_value (AVAIL_OUT (block), value_id))
3555 {
3556 if (dump_file && (dump_flags & TDF_DETAILS))
3557 {
3558 fprintf (dump_file,
3559 "Already inserted expression for ");
3560 print_pre_expr (dump_file, expr);
3561 fprintf (dump_file, " (%04d)\n", value_id);
3562 }
3563 continue;
3564 }
3565
3566 /* If we end up with a punned expression representation and this
3567 happens to be a float typed one give up - we can't know for
3568 sure whether all paths perform the floating-point load we are
3569 about to insert and on some targets this can cause correctness
3570 issues. See PR88240. */
3571 if (expr->kind == REFERENCE
3572 && PRE_EXPR_REFERENCE (expr)->punned
3573 && FLOAT_TYPE_P (get_expr_type (expr)))
3574 continue;
3575
3576 /* OK, we should hoist this value. Perform the transformation. */
3577 pre_stats.hoist_insert++;
3578 if (dump_file && (dump_flags & TDF_DETAILS))
3579 {
3580 fprintf (dump_file,
3581 "Inserting expression in block %d for code hoisting: ",
3582 block->index);
3583 print_pre_expr (dump_file, expr);
3584 fprintf (dump_file, " (%04d)\n", value_id);
3585 }
3586
3587 gimple_seq stmts = NULL;
3588 tree res = create_expression_by_pieces (block, expr, &stmts,
3589 get_expr_type (expr));
3590
3591 /* Do not return true if expression creation ultimately
3592 did not insert any statements. */
3593 if (gimple_seq_empty_p (stmts))
3594 res = NULL_TREE;
3595 else
3596 {
3597 if (gsi_end_p (last) || is_ctrl_stmt (gsi_stmt (last)))
3598 gsi_insert_seq_before (&last, stmts, GSI_SAME_STMT);
3599 else
3600 gsi_insert_seq_after (&last, stmts, GSI_NEW_STMT);
3601 }
3602
3603 /* Make sure to not return true if expression creation ultimately
3604 failed but also make sure to insert any stmts produced as they
3605 are tracked in inserted_exprs. */
3606 if (! res)
3607 continue;
3608
3609 new_stuff = true;
3610 }
3611
3612 exprs.release ();
3613
3614 return new_stuff;
3615 }
3616
3617 /* Do a dominator walk on the control flow graph, and insert computations
3618 of values as necessary for PRE and hoisting. */
3619
3620 static bool
insert_aux(basic_block block,bool do_pre,bool do_hoist)3621 insert_aux (basic_block block, bool do_pre, bool do_hoist)
3622 {
3623 basic_block son;
3624 bool new_stuff = false;
3625
3626 if (block)
3627 {
3628 basic_block dom;
3629 dom = get_immediate_dominator (CDI_DOMINATORS, block);
3630 if (dom)
3631 {
3632 unsigned i;
3633 bitmap_iterator bi;
3634 bitmap_set_t newset;
3635
3636 /* First, update the AVAIL_OUT set with anything we may have
3637 inserted higher up in the dominator tree. */
3638 newset = NEW_SETS (dom);
3639 if (newset)
3640 {
3641 /* Note that we need to value_replace both NEW_SETS, and
3642 AVAIL_OUT. For both the case of NEW_SETS, the value may be
3643 represented by some non-simple expression here that we want
3644 to replace it with. */
3645 FOR_EACH_EXPR_ID_IN_SET (newset, i, bi)
3646 {
3647 pre_expr expr = expression_for_id (i);
3648 bitmap_value_replace_in_set (NEW_SETS (block), expr);
3649 bitmap_value_replace_in_set (AVAIL_OUT (block), expr);
3650 }
3651 }
3652
3653 /* Insert expressions for partial redundancies. */
3654 if (do_pre && !single_pred_p (block))
3655 {
3656 new_stuff |= do_pre_regular_insertion (block, dom);
3657 if (do_partial_partial)
3658 new_stuff |= do_pre_partial_partial_insertion (block, dom);
3659 }
3660
3661 /* Insert expressions for hoisting. */
3662 if (do_hoist && EDGE_COUNT (block->succs) >= 2)
3663 new_stuff |= do_hoist_insertion (block);
3664 }
3665 }
3666 for (son = first_dom_son (CDI_DOMINATORS, block);
3667 son;
3668 son = next_dom_son (CDI_DOMINATORS, son))
3669 {
3670 new_stuff |= insert_aux (son, do_pre, do_hoist);
3671 }
3672
3673 return new_stuff;
3674 }
3675
3676 /* Perform insertion of partially redundant and hoistable values. */
3677
3678 static void
insert(void)3679 insert (void)
3680 {
3681 bool new_stuff = true;
3682 basic_block bb;
3683 int num_iterations = 0;
3684
3685 FOR_ALL_BB_FN (bb, cfun)
3686 NEW_SETS (bb) = bitmap_set_new ();
3687
3688 while (new_stuff)
3689 {
3690 num_iterations++;
3691 if (dump_file && dump_flags & TDF_DETAILS)
3692 fprintf (dump_file, "Starting insert iteration %d\n", num_iterations);
3693 new_stuff = insert_aux (ENTRY_BLOCK_PTR_FOR_FN (cfun), flag_tree_pre,
3694 flag_code_hoisting);
3695
3696 /* Clear the NEW sets before the next iteration. We have already
3697 fully propagated its contents. */
3698 if (new_stuff)
3699 FOR_ALL_BB_FN (bb, cfun)
3700 bitmap_set_free (NEW_SETS (bb));
3701 }
3702 statistics_histogram_event (cfun, "insert iterations", num_iterations);
3703 }
3704
3705
3706 /* Compute the AVAIL set for all basic blocks.
3707
3708 This function performs value numbering of the statements in each basic
3709 block. The AVAIL sets are built from information we glean while doing
3710 this value numbering, since the AVAIL sets contain only one entry per
3711 value.
3712
3713 AVAIL_IN[BLOCK] = AVAIL_OUT[dom(BLOCK)].
3714 AVAIL_OUT[BLOCK] = AVAIL_IN[BLOCK] U PHI_GEN[BLOCK] U TMP_GEN[BLOCK]. */
3715
3716 static void
compute_avail(void)3717 compute_avail (void)
3718 {
3719
3720 basic_block block, son;
3721 basic_block *worklist;
3722 size_t sp = 0;
3723 unsigned i;
3724 tree name;
3725
3726 /* We pretend that default definitions are defined in the entry block.
3727 This includes function arguments and the static chain decl. */
3728 FOR_EACH_SSA_NAME (i, name, cfun)
3729 {
3730 pre_expr e;
3731 if (!SSA_NAME_IS_DEFAULT_DEF (name)
3732 || has_zero_uses (name)
3733 || virtual_operand_p (name))
3734 continue;
3735
3736 e = get_or_alloc_expr_for_name (name);
3737 add_to_value (get_expr_value_id (e), e);
3738 bitmap_insert_into_set (TMP_GEN (ENTRY_BLOCK_PTR_FOR_FN (cfun)), e);
3739 bitmap_value_insert_into_set (AVAIL_OUT (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
3740 e);
3741 }
3742
3743 if (dump_file && (dump_flags & TDF_DETAILS))
3744 {
3745 print_bitmap_set (dump_file, TMP_GEN (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
3746 "tmp_gen", ENTRY_BLOCK);
3747 print_bitmap_set (dump_file, AVAIL_OUT (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
3748 "avail_out", ENTRY_BLOCK);
3749 }
3750
3751 /* Allocate the worklist. */
3752 worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
3753
3754 /* Seed the algorithm by putting the dominator children of the entry
3755 block on the worklist. */
3756 for (son = first_dom_son (CDI_DOMINATORS, ENTRY_BLOCK_PTR_FOR_FN (cfun));
3757 son;
3758 son = next_dom_son (CDI_DOMINATORS, son))
3759 worklist[sp++] = son;
3760
3761 BB_LIVE_VOP_ON_EXIT (ENTRY_BLOCK_PTR_FOR_FN (cfun))
3762 = ssa_default_def (cfun, gimple_vop (cfun));
3763
3764 /* Loop until the worklist is empty. */
3765 while (sp)
3766 {
3767 gimple *stmt;
3768 basic_block dom;
3769
3770 /* Pick a block from the worklist. */
3771 block = worklist[--sp];
3772 vn_context_bb = block;
3773
3774 /* Initially, the set of available values in BLOCK is that of
3775 its immediate dominator. */
3776 dom = get_immediate_dominator (CDI_DOMINATORS, block);
3777 if (dom)
3778 {
3779 bitmap_set_copy (AVAIL_OUT (block), AVAIL_OUT (dom));
3780 BB_LIVE_VOP_ON_EXIT (block) = BB_LIVE_VOP_ON_EXIT (dom);
3781 }
3782
3783 /* Generate values for PHI nodes. */
3784 for (gphi_iterator gsi = gsi_start_phis (block); !gsi_end_p (gsi);
3785 gsi_next (&gsi))
3786 {
3787 tree result = gimple_phi_result (gsi.phi ());
3788
3789 /* We have no need for virtual phis, as they don't represent
3790 actual computations. */
3791 if (virtual_operand_p (result))
3792 {
3793 BB_LIVE_VOP_ON_EXIT (block) = result;
3794 continue;
3795 }
3796
3797 pre_expr e = get_or_alloc_expr_for_name (result);
3798 add_to_value (get_expr_value_id (e), e);
3799 bitmap_value_insert_into_set (AVAIL_OUT (block), e);
3800 bitmap_insert_into_set (PHI_GEN (block), e);
3801 }
3802
3803 BB_MAY_NOTRETURN (block) = 0;
3804
3805 /* Now compute value numbers and populate value sets with all
3806 the expressions computed in BLOCK. */
3807 for (gimple_stmt_iterator gsi = gsi_start_bb (block); !gsi_end_p (gsi);
3808 gsi_next (&gsi))
3809 {
3810 ssa_op_iter iter;
3811 tree op;
3812
3813 stmt = gsi_stmt (gsi);
3814
3815 /* Cache whether the basic-block has any non-visible side-effect
3816 or control flow.
3817 If this isn't a call or it is the last stmt in the
3818 basic-block then the CFG represents things correctly. */
3819 if (is_gimple_call (stmt) && !stmt_ends_bb_p (stmt))
3820 {
3821 /* Non-looping const functions always return normally.
3822 Otherwise the call might not return or have side-effects
3823 that forbids hoisting possibly trapping expressions
3824 before it. */
3825 int flags = gimple_call_flags (stmt);
3826 if (!(flags & ECF_CONST)
3827 || (flags & ECF_LOOPING_CONST_OR_PURE))
3828 BB_MAY_NOTRETURN (block) = 1;
3829 }
3830
3831 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_DEF)
3832 {
3833 pre_expr e = get_or_alloc_expr_for_name (op);
3834
3835 add_to_value (get_expr_value_id (e), e);
3836 bitmap_insert_into_set (TMP_GEN (block), e);
3837 bitmap_value_insert_into_set (AVAIL_OUT (block), e);
3838 }
3839
3840 if (gimple_vdef (stmt))
3841 BB_LIVE_VOP_ON_EXIT (block) = gimple_vdef (stmt);
3842
3843 if (gimple_has_side_effects (stmt)
3844 || stmt_could_throw_p (cfun, stmt)
3845 || is_gimple_debug (stmt))
3846 continue;
3847
3848 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
3849 {
3850 if (ssa_undefined_value_p (op))
3851 continue;
3852 pre_expr e = get_or_alloc_expr_for_name (op);
3853 bitmap_value_insert_into_set (EXP_GEN (block), e);
3854 }
3855
3856 switch (gimple_code (stmt))
3857 {
3858 case GIMPLE_RETURN:
3859 continue;
3860
3861 case GIMPLE_CALL:
3862 {
3863 vn_reference_t ref;
3864 vn_reference_s ref1;
3865 pre_expr result = NULL;
3866
3867 /* We can value number only calls to real functions. */
3868 if (gimple_call_internal_p (stmt))
3869 continue;
3870
3871 vn_reference_lookup_call (as_a <gcall *> (stmt), &ref, &ref1);
3872 if (!ref)
3873 continue;
3874
3875 /* If the value of the call is not invalidated in
3876 this block until it is computed, add the expression
3877 to EXP_GEN. */
3878 if (!gimple_vuse (stmt)
3879 || gimple_code
3880 (SSA_NAME_DEF_STMT (gimple_vuse (stmt))) == GIMPLE_PHI
3881 || gimple_bb (SSA_NAME_DEF_STMT
3882 (gimple_vuse (stmt))) != block)
3883 {
3884 result = pre_expr_pool.allocate ();
3885 result->kind = REFERENCE;
3886 result->id = 0;
3887 PRE_EXPR_REFERENCE (result) = ref;
3888
3889 get_or_alloc_expression_id (result);
3890 add_to_value (get_expr_value_id (result), result);
3891 bitmap_value_insert_into_set (EXP_GEN (block), result);
3892 }
3893 continue;
3894 }
3895
3896 case GIMPLE_ASSIGN:
3897 {
3898 pre_expr result = NULL;
3899 switch (vn_get_stmt_kind (stmt))
3900 {
3901 case VN_NARY:
3902 {
3903 enum tree_code code = gimple_assign_rhs_code (stmt);
3904 vn_nary_op_t nary;
3905
3906 /* COND_EXPR and VEC_COND_EXPR are awkward in
3907 that they contain an embedded complex expression.
3908 Don't even try to shove those through PRE. */
3909 if (code == COND_EXPR
3910 || code == VEC_COND_EXPR)
3911 continue;
3912
3913 vn_nary_op_lookup_stmt (stmt, &nary);
3914 if (!nary || nary->predicated_values)
3915 continue;
3916
3917 /* If the NARY traps and there was a preceding
3918 point in the block that might not return avoid
3919 adding the nary to EXP_GEN. */
3920 if (BB_MAY_NOTRETURN (block)
3921 && vn_nary_may_trap (nary))
3922 continue;
3923
3924 result = pre_expr_pool.allocate ();
3925 result->kind = NARY;
3926 result->id = 0;
3927 PRE_EXPR_NARY (result) = nary;
3928 break;
3929 }
3930
3931 case VN_REFERENCE:
3932 {
3933 tree rhs1 = gimple_assign_rhs1 (stmt);
3934 alias_set_type set = get_alias_set (rhs1);
3935 vec<vn_reference_op_s> operands
3936 = vn_reference_operands_for_lookup (rhs1);
3937 vn_reference_t ref;
3938 vn_reference_lookup_pieces (gimple_vuse (stmt), set,
3939 TREE_TYPE (rhs1),
3940 operands, &ref, VN_WALK);
3941 if (!ref)
3942 {
3943 operands.release ();
3944 continue;
3945 }
3946
3947 /* If the REFERENCE traps and there was a preceding
3948 point in the block that might not return avoid
3949 adding the reference to EXP_GEN. */
3950 if (BB_MAY_NOTRETURN (block)
3951 && vn_reference_may_trap (ref))
3952 continue;
3953
3954 /* If the value of the reference is not invalidated in
3955 this block until it is computed, add the expression
3956 to EXP_GEN. */
3957 if (gimple_vuse (stmt))
3958 {
3959 gimple *def_stmt;
3960 bool ok = true;
3961 def_stmt = SSA_NAME_DEF_STMT (gimple_vuse (stmt));
3962 while (!gimple_nop_p (def_stmt)
3963 && gimple_code (def_stmt) != GIMPLE_PHI
3964 && gimple_bb (def_stmt) == block)
3965 {
3966 if (stmt_may_clobber_ref_p
3967 (def_stmt, gimple_assign_rhs1 (stmt)))
3968 {
3969 ok = false;
3970 break;
3971 }
3972 def_stmt
3973 = SSA_NAME_DEF_STMT (gimple_vuse (def_stmt));
3974 }
3975 if (!ok)
3976 {
3977 operands.release ();
3978 continue;
3979 }
3980 }
3981
3982 /* If the load was value-numbered to another
3983 load make sure we do not use its expression
3984 for insertion if it wouldn't be a valid
3985 replacement. */
3986 /* At the momemt we have a testcase
3987 for hoist insertion of aligned vs. misaligned
3988 variants in gcc.dg/torture/pr65270-1.c thus
3989 with just alignment to be considered we can
3990 simply replace the expression in the hashtable
3991 with the most conservative one. */
3992 vn_reference_op_t ref1 = &ref->operands.last ();
3993 while (ref1->opcode != TARGET_MEM_REF
3994 && ref1->opcode != MEM_REF
3995 && ref1 != &ref->operands[0])
3996 --ref1;
3997 vn_reference_op_t ref2 = &operands.last ();
3998 while (ref2->opcode != TARGET_MEM_REF
3999 && ref2->opcode != MEM_REF
4000 && ref2 != &operands[0])
4001 --ref2;
4002 if ((ref1->opcode == TARGET_MEM_REF
4003 || ref1->opcode == MEM_REF)
4004 && (TYPE_ALIGN (ref1->type)
4005 > TYPE_ALIGN (ref2->type)))
4006 ref1->type
4007 = build_aligned_type (ref1->type,
4008 TYPE_ALIGN (ref2->type));
4009 /* TBAA behavior is an obvious part so make sure
4010 that the hashtable one covers this as well
4011 by adjusting the ref alias set and its base. */
4012 if (ref->set == set
4013 || alias_set_subset_of (set, ref->set))
4014 ;
4015 else if (alias_set_subset_of (ref->set, set))
4016 {
4017 ref->set = set;
4018 if (ref1->opcode == MEM_REF)
4019 ref1->op0
4020 = wide_int_to_tree (TREE_TYPE (ref2->op0),
4021 wi::to_wide (ref1->op0));
4022 else
4023 ref1->op2
4024 = wide_int_to_tree (TREE_TYPE (ref2->op2),
4025 wi::to_wide (ref1->op2));
4026 }
4027 else
4028 {
4029 ref->set = 0;
4030 if (ref1->opcode == MEM_REF)
4031 ref1->op0
4032 = wide_int_to_tree (ptr_type_node,
4033 wi::to_wide (ref1->op0));
4034 else
4035 ref1->op2
4036 = wide_int_to_tree (ptr_type_node,
4037 wi::to_wide (ref1->op2));
4038 }
4039 operands.release ();
4040
4041 result = pre_expr_pool.allocate ();
4042 result->kind = REFERENCE;
4043 result->id = 0;
4044 PRE_EXPR_REFERENCE (result) = ref;
4045 break;
4046 }
4047
4048 default:
4049 continue;
4050 }
4051
4052 get_or_alloc_expression_id (result);
4053 add_to_value (get_expr_value_id (result), result);
4054 bitmap_value_insert_into_set (EXP_GEN (block), result);
4055 continue;
4056 }
4057 default:
4058 break;
4059 }
4060 }
4061
4062 if (dump_file && (dump_flags & TDF_DETAILS))
4063 {
4064 print_bitmap_set (dump_file, EXP_GEN (block),
4065 "exp_gen", block->index);
4066 print_bitmap_set (dump_file, PHI_GEN (block),
4067 "phi_gen", block->index);
4068 print_bitmap_set (dump_file, TMP_GEN (block),
4069 "tmp_gen", block->index);
4070 print_bitmap_set (dump_file, AVAIL_OUT (block),
4071 "avail_out", block->index);
4072 }
4073
4074 /* Put the dominator children of BLOCK on the worklist of blocks
4075 to compute available sets for. */
4076 for (son = first_dom_son (CDI_DOMINATORS, block);
4077 son;
4078 son = next_dom_son (CDI_DOMINATORS, son))
4079 worklist[sp++] = son;
4080 }
4081 vn_context_bb = NULL;
4082
4083 free (worklist);
4084 }
4085
4086
4087 /* Initialize data structures used by PRE. */
4088
4089 static void
init_pre(void)4090 init_pre (void)
4091 {
4092 basic_block bb;
4093
4094 next_expression_id = 1;
4095 expressions.create (0);
4096 expressions.safe_push (NULL);
4097 value_expressions.create (get_max_value_id () + 1);
4098 value_expressions.safe_grow_cleared (get_max_value_id () + 1);
4099 name_to_id.create (0);
4100
4101 inserted_exprs = BITMAP_ALLOC (NULL);
4102
4103 connect_infinite_loops_to_exit ();
4104 memset (&pre_stats, 0, sizeof (pre_stats));
4105
4106 alloc_aux_for_blocks (sizeof (struct bb_bitmap_sets));
4107
4108 calculate_dominance_info (CDI_DOMINATORS);
4109
4110 bitmap_obstack_initialize (&grand_bitmap_obstack);
4111 phi_translate_table = new hash_table<expr_pred_trans_d> (5110);
4112 expression_to_id = new hash_table<pre_expr_d> (num_ssa_names * 3);
4113 FOR_ALL_BB_FN (bb, cfun)
4114 {
4115 EXP_GEN (bb) = bitmap_set_new ();
4116 PHI_GEN (bb) = bitmap_set_new ();
4117 TMP_GEN (bb) = bitmap_set_new ();
4118 AVAIL_OUT (bb) = bitmap_set_new ();
4119 }
4120 }
4121
4122
4123 /* Deallocate data structures used by PRE. */
4124
4125 static void
fini_pre()4126 fini_pre ()
4127 {
4128 value_expressions.release ();
4129 expressions.release ();
4130 BITMAP_FREE (inserted_exprs);
4131 bitmap_obstack_release (&grand_bitmap_obstack);
4132 bitmap_set_pool.release ();
4133 pre_expr_pool.release ();
4134 delete phi_translate_table;
4135 phi_translate_table = NULL;
4136 delete expression_to_id;
4137 expression_to_id = NULL;
4138 name_to_id.release ();
4139
4140 free_aux_for_blocks ();
4141 }
4142
4143 namespace {
4144
4145 const pass_data pass_data_pre =
4146 {
4147 GIMPLE_PASS, /* type */
4148 "pre", /* name */
4149 OPTGROUP_NONE, /* optinfo_flags */
4150 TV_TREE_PRE, /* tv_id */
4151 ( PROP_cfg | PROP_ssa ), /* properties_required */
4152 0, /* properties_provided */
4153 0, /* properties_destroyed */
4154 TODO_rebuild_alias, /* todo_flags_start */
4155 0, /* todo_flags_finish */
4156 };
4157
4158 class pass_pre : public gimple_opt_pass
4159 {
4160 public:
pass_pre(gcc::context * ctxt)4161 pass_pre (gcc::context *ctxt)
4162 : gimple_opt_pass (pass_data_pre, ctxt)
4163 {}
4164
4165 /* opt_pass methods: */
gate(function *)4166 virtual bool gate (function *)
4167 { return flag_tree_pre != 0 || flag_code_hoisting != 0; }
4168 virtual unsigned int execute (function *);
4169
4170 }; // class pass_pre
4171
4172 /* Valueization hook for RPO VN when we are calling back to it
4173 at ANTIC compute time. */
4174
4175 static tree
pre_valueize(tree name)4176 pre_valueize (tree name)
4177 {
4178 if (TREE_CODE (name) == SSA_NAME)
4179 {
4180 tree tem = VN_INFO (name)->valnum;
4181 if (tem != VN_TOP && tem != name)
4182 {
4183 if (TREE_CODE (tem) != SSA_NAME
4184 || SSA_NAME_IS_DEFAULT_DEF (tem))
4185 return tem;
4186 /* We create temporary SSA names for representatives that
4187 do not have a definition (yet) but are not default defs either
4188 assume they are fine to use. */
4189 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (tem));
4190 if (! def_bb
4191 || dominated_by_p (CDI_DOMINATORS, vn_context_bb, def_bb))
4192 return tem;
4193 /* ??? Now we could look for a leader. Ideally we'd somehow
4194 expose RPO VN leaders and get rid of AVAIL_OUT as well... */
4195 }
4196 }
4197 return name;
4198 }
4199
4200 unsigned int
execute(function * fun)4201 pass_pre::execute (function *fun)
4202 {
4203 unsigned int todo = 0;
4204
4205 do_partial_partial =
4206 flag_tree_partial_pre && optimize_function_for_speed_p (fun);
4207
4208 /* This has to happen before VN runs because
4209 loop_optimizer_init may create new phis, etc. */
4210 loop_optimizer_init (LOOPS_NORMAL);
4211 split_critical_edges ();
4212 scev_initialize ();
4213 calculate_dominance_info (CDI_DOMINATORS);
4214
4215 run_rpo_vn (VN_WALK);
4216
4217 init_pre ();
4218
4219 vn_valueize = pre_valueize;
4220
4221 /* Insert can get quite slow on an incredibly large number of basic
4222 blocks due to some quadratic behavior. Until this behavior is
4223 fixed, don't run it when he have an incredibly large number of
4224 bb's. If we aren't going to run insert, there is no point in
4225 computing ANTIC, either, even though it's plenty fast nor do
4226 we require AVAIL. */
4227 if (n_basic_blocks_for_fn (fun) < 4000)
4228 {
4229 compute_avail ();
4230 compute_antic ();
4231 insert ();
4232 }
4233
4234 /* Make sure to remove fake edges before committing our inserts.
4235 This makes sure we don't end up with extra critical edges that
4236 we would need to split. */
4237 remove_fake_exit_edges ();
4238 gsi_commit_edge_inserts ();
4239
4240 /* Eliminate folds statements which might (should not...) end up
4241 not keeping virtual operands up-to-date. */
4242 gcc_assert (!need_ssa_update_p (fun));
4243
4244 statistics_counter_event (fun, "Insertions", pre_stats.insertions);
4245 statistics_counter_event (fun, "PA inserted", pre_stats.pa_insert);
4246 statistics_counter_event (fun, "HOIST inserted", pre_stats.hoist_insert);
4247 statistics_counter_event (fun, "New PHIs", pre_stats.phis);
4248
4249 todo |= eliminate_with_rpo_vn (inserted_exprs);
4250
4251 vn_valueize = NULL;
4252
4253 /* Because we don't follow exactly the standard PRE algorithm, and decide not
4254 to insert PHI nodes sometimes, and because value numbering of casts isn't
4255 perfect, we sometimes end up inserting dead code. This simple DCE-like
4256 pass removes any insertions we made that weren't actually used. */
4257 simple_dce_from_worklist (inserted_exprs);
4258
4259 fini_pre ();
4260
4261 scev_finalize ();
4262 loop_optimizer_finalize ();
4263
4264 /* TODO: tail_merge_optimize may merge all predecessors of a block, in which
4265 case we can merge the block with the remaining predecessor of the block.
4266 It should either:
4267 - call merge_blocks after each tail merge iteration
4268 - call merge_blocks after all tail merge iterations
4269 - mark TODO_cleanup_cfg when necessary
4270 - share the cfg cleanup with fini_pre. */
4271 todo |= tail_merge_optimize (todo);
4272
4273 free_rpo_vn ();
4274
4275 /* Tail merging invalidates the virtual SSA web, together with
4276 cfg-cleanup opportunities exposed by PRE this will wreck the
4277 SSA updating machinery. So make sure to run update-ssa
4278 manually, before eventually scheduling cfg-cleanup as part of
4279 the todo. */
4280 update_ssa (TODO_update_ssa_only_virtuals);
4281
4282 return todo;
4283 }
4284
4285 } // anon namespace
4286
4287 gimple_opt_pass *
make_pass_pre(gcc::context * ctxt)4288 make_pass_pre (gcc::context *ctxt)
4289 {
4290 return new pass_pre (ctxt);
4291 }
4292