1 /* Loop invariant motion.
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "tree.h"
25 #include "gimple.h"
26 #include "cfghooks.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "gimple-pretty-print.h"
30 #include "fold-const.h"
31 #include "cfganal.h"
32 #include "tree-eh.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "tree-cfg.h"
36 #include "tree-ssa-loop-manip.h"
37 #include "tree-ssa-loop.h"
38 #include "tree-into-ssa.h"
39 #include "cfgloop.h"
40 #include "tree-affine.h"
41 #include "tree-ssa-propagate.h"
42 #include "trans-mem.h"
43 #include "gimple-fold.h"
44 #include "tree-scalar-evolution.h"
45 #include "tree-ssa-loop-niter.h"
46 #include "alias.h"
47 #include "builtins.h"
48 #include "tree-dfa.h"
49 #include "dbgcnt.h"
50
51 /* TODO: Support for predicated code motion. I.e.
52
53 while (1)
54 {
55 if (cond)
56 {
57 a = inv;
58 something;
59 }
60 }
61
62 Where COND and INV are invariants, but evaluating INV may trap or be
63 invalid from some other reason if !COND. This may be transformed to
64
65 if (cond)
66 a = inv;
67 while (1)
68 {
69 if (cond)
70 something;
71 } */
72
73 /* The auxiliary data kept for each statement. */
74
75 struct lim_aux_data
76 {
77 class loop *max_loop; /* The outermost loop in that the statement
78 is invariant. */
79
80 class loop *tgt_loop; /* The loop out of that we want to move the
81 invariant. */
82
83 class loop *always_executed_in;
84 /* The outermost loop for that we are sure
85 the statement is executed if the loop
86 is entered. */
87
88 unsigned cost; /* Cost of the computation performed by the
89 statement. */
90
91 unsigned ref; /* The simple_mem_ref in this stmt or 0. */
92
93 vec<gimple *> depends; /* Vector of statements that must be also
94 hoisted out of the loop when this statement
95 is hoisted; i.e. those that define the
96 operands of the statement and are inside of
97 the MAX_LOOP loop. */
98 };
99
100 /* Maps statements to their lim_aux_data. */
101
102 static hash_map<gimple *, lim_aux_data *> *lim_aux_data_map;
103
104 /* Description of a memory reference location. */
105
106 struct mem_ref_loc
107 {
108 tree *ref; /* The reference itself. */
109 gimple *stmt; /* The statement in that it occurs. */
110 };
111
112
113 /* Description of a memory reference. */
114
115 class im_mem_ref
116 {
117 public:
118 unsigned id : 30; /* ID assigned to the memory reference
119 (its index in memory_accesses.refs_list) */
120 unsigned ref_canonical : 1; /* Whether mem.ref was canonicalized. */
121 unsigned ref_decomposed : 1; /* Whether the ref was hashed from mem. */
122 hashval_t hash; /* Its hash value. */
123
124 /* The memory access itself and associated caching of alias-oracle
125 query meta-data. We are using mem.ref == error_mark_node for the
126 case the reference is represented by its single access stmt
127 in accesses_in_loop[0]. */
128 ao_ref mem;
129
130 bitmap stored; /* The set of loops in that this memory location
131 is stored to. */
132 bitmap loaded; /* The set of loops in that this memory location
133 is loaded from. */
134 vec<mem_ref_loc> accesses_in_loop;
135 /* The locations of the accesses. */
136
137 /* The following set is computed on demand. */
138 bitmap_head dep_loop; /* The set of loops in that the memory
139 reference is {in,}dependent in
140 different modes. */
141 };
142
143 /* We use six bits per loop in the ref->dep_loop bitmap to record
144 the dep_kind x dep_state combinations. */
145
146 enum dep_kind { lim_raw, sm_war, sm_waw };
147 enum dep_state { dep_unknown, dep_independent, dep_dependent };
148
149 /* Populate the loop dependence cache of REF for LOOP, KIND with STATE. */
150
151 static void
record_loop_dependence(class loop * loop,im_mem_ref * ref,dep_kind kind,dep_state state)152 record_loop_dependence (class loop *loop, im_mem_ref *ref,
153 dep_kind kind, dep_state state)
154 {
155 gcc_assert (state != dep_unknown);
156 unsigned bit = 6 * loop->num + kind * 2 + state == dep_dependent ? 1 : 0;
157 bitmap_set_bit (&ref->dep_loop, bit);
158 }
159
160 /* Query the loop dependence cache of REF for LOOP, KIND. */
161
162 static dep_state
query_loop_dependence(class loop * loop,im_mem_ref * ref,dep_kind kind)163 query_loop_dependence (class loop *loop, im_mem_ref *ref, dep_kind kind)
164 {
165 unsigned first_bit = 6 * loop->num + kind * 2;
166 if (bitmap_bit_p (&ref->dep_loop, first_bit))
167 return dep_independent;
168 else if (bitmap_bit_p (&ref->dep_loop, first_bit + 1))
169 return dep_dependent;
170 return dep_unknown;
171 }
172
173 /* Mem_ref hashtable helpers. */
174
175 struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref>
176 {
177 typedef ao_ref *compare_type;
178 static inline hashval_t hash (const im_mem_ref *);
179 static inline bool equal (const im_mem_ref *, const ao_ref *);
180 };
181
182 /* A hash function for class im_mem_ref object OBJ. */
183
184 inline hashval_t
hash(const im_mem_ref * mem)185 mem_ref_hasher::hash (const im_mem_ref *mem)
186 {
187 return mem->hash;
188 }
189
190 /* An equality function for class im_mem_ref object MEM1 with
191 memory reference OBJ2. */
192
193 inline bool
equal(const im_mem_ref * mem1,const ao_ref * obj2)194 mem_ref_hasher::equal (const im_mem_ref *mem1, const ao_ref *obj2)
195 {
196 if (obj2->max_size_known_p ())
197 return (mem1->ref_decomposed
198 && ((TREE_CODE (mem1->mem.base) == MEM_REF
199 && TREE_CODE (obj2->base) == MEM_REF
200 && operand_equal_p (TREE_OPERAND (mem1->mem.base, 0),
201 TREE_OPERAND (obj2->base, 0), 0)
202 && known_eq (mem_ref_offset (mem1->mem.base) * BITS_PER_UNIT + mem1->mem.offset,
203 mem_ref_offset (obj2->base) * BITS_PER_UNIT + obj2->offset))
204 || (operand_equal_p (mem1->mem.base, obj2->base, 0)
205 && known_eq (mem1->mem.offset, obj2->offset)))
206 && known_eq (mem1->mem.size, obj2->size)
207 && known_eq (mem1->mem.max_size, obj2->max_size)
208 && mem1->mem.volatile_p == obj2->volatile_p
209 && (mem1->mem.ref_alias_set == obj2->ref_alias_set
210 /* We are not canonicalizing alias-sets but for the
211 special-case we didn't canonicalize yet and the
212 incoming ref is a alias-set zero MEM we pick
213 the correct one already. */
214 || (!mem1->ref_canonical
215 && (TREE_CODE (obj2->ref) == MEM_REF
216 || TREE_CODE (obj2->ref) == TARGET_MEM_REF)
217 && obj2->ref_alias_set == 0)
218 /* Likewise if there's a canonical ref with alias-set zero. */
219 || (mem1->ref_canonical && mem1->mem.ref_alias_set == 0))
220 && types_compatible_p (TREE_TYPE (mem1->mem.ref),
221 TREE_TYPE (obj2->ref)));
222 else
223 return operand_equal_p (mem1->mem.ref, obj2->ref, 0);
224 }
225
226
227 /* Description of memory accesses in loops. */
228
229 static struct
230 {
231 /* The hash table of memory references accessed in loops. */
232 hash_table<mem_ref_hasher> *refs;
233
234 /* The list of memory references. */
235 vec<im_mem_ref *> refs_list;
236
237 /* The set of memory references accessed in each loop. */
238 vec<bitmap_head> refs_loaded_in_loop;
239
240 /* The set of memory references stored in each loop. */
241 vec<bitmap_head> refs_stored_in_loop;
242
243 /* The set of memory references stored in each loop, including subloops . */
244 vec<bitmap_head> all_refs_stored_in_loop;
245
246 /* Cache for expanding memory addresses. */
247 hash_map<tree, name_expansion *> *ttae_cache;
248 } memory_accesses;
249
250 /* Obstack for the bitmaps in the above data structures. */
251 static bitmap_obstack lim_bitmap_obstack;
252 static obstack mem_ref_obstack;
253
254 static bool ref_indep_loop_p (class loop *, im_mem_ref *, dep_kind);
255 static bool ref_always_accessed_p (class loop *, im_mem_ref *, bool);
256 static bool refs_independent_p (im_mem_ref *, im_mem_ref *, bool = true);
257
258 /* Minimum cost of an expensive expression. */
259 #define LIM_EXPENSIVE ((unsigned) param_lim_expensive)
260
261 /* The outermost loop for which execution of the header guarantees that the
262 block will be executed. */
263 #define ALWAYS_EXECUTED_IN(BB) ((class loop *) (BB)->aux)
264 #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
265
266 /* ID of the shared unanalyzable mem. */
267 #define UNANALYZABLE_MEM_ID 0
268
269 /* Whether the reference was analyzable. */
270 #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
271
272 static struct lim_aux_data *
init_lim_data(gimple * stmt)273 init_lim_data (gimple *stmt)
274 {
275 lim_aux_data *p = XCNEW (struct lim_aux_data);
276 lim_aux_data_map->put (stmt, p);
277
278 return p;
279 }
280
281 static struct lim_aux_data *
get_lim_data(gimple * stmt)282 get_lim_data (gimple *stmt)
283 {
284 lim_aux_data **p = lim_aux_data_map->get (stmt);
285 if (!p)
286 return NULL;
287
288 return *p;
289 }
290
291 /* Releases the memory occupied by DATA. */
292
293 static void
free_lim_aux_data(struct lim_aux_data * data)294 free_lim_aux_data (struct lim_aux_data *data)
295 {
296 data->depends.release ();
297 free (data);
298 }
299
300 static void
clear_lim_data(gimple * stmt)301 clear_lim_data (gimple *stmt)
302 {
303 lim_aux_data **p = lim_aux_data_map->get (stmt);
304 if (!p)
305 return;
306
307 free_lim_aux_data (*p);
308 *p = NULL;
309 }
310
311
312 /* The possibilities of statement movement. */
313 enum move_pos
314 {
315 MOVE_IMPOSSIBLE, /* No movement -- side effect expression. */
316 MOVE_PRESERVE_EXECUTION, /* Must not cause the non-executed statement
317 become executed -- memory accesses, ... */
318 MOVE_POSSIBLE /* Unlimited movement. */
319 };
320
321
322 /* If it is possible to hoist the statement STMT unconditionally,
323 returns MOVE_POSSIBLE.
324 If it is possible to hoist the statement STMT, but we must avoid making
325 it executed if it would not be executed in the original program (e.g.
326 because it may trap), return MOVE_PRESERVE_EXECUTION.
327 Otherwise return MOVE_IMPOSSIBLE. */
328
329 enum move_pos
movement_possibility(gimple * stmt)330 movement_possibility (gimple *stmt)
331 {
332 tree lhs;
333 enum move_pos ret = MOVE_POSSIBLE;
334
335 if (flag_unswitch_loops
336 && gimple_code (stmt) == GIMPLE_COND)
337 {
338 /* If we perform unswitching, force the operands of the invariant
339 condition to be moved out of the loop. */
340 return MOVE_POSSIBLE;
341 }
342
343 if (gimple_code (stmt) == GIMPLE_PHI
344 && gimple_phi_num_args (stmt) <= 2
345 && !virtual_operand_p (gimple_phi_result (stmt))
346 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
347 return MOVE_POSSIBLE;
348
349 if (gimple_get_lhs (stmt) == NULL_TREE)
350 return MOVE_IMPOSSIBLE;
351
352 if (gimple_vdef (stmt))
353 return MOVE_IMPOSSIBLE;
354
355 if (stmt_ends_bb_p (stmt)
356 || gimple_has_volatile_ops (stmt)
357 || gimple_has_side_effects (stmt)
358 || stmt_could_throw_p (cfun, stmt))
359 return MOVE_IMPOSSIBLE;
360
361 if (is_gimple_call (stmt))
362 {
363 /* While pure or const call is guaranteed to have no side effects, we
364 cannot move it arbitrarily. Consider code like
365
366 char *s = something ();
367
368 while (1)
369 {
370 if (s)
371 t = strlen (s);
372 else
373 t = 0;
374 }
375
376 Here the strlen call cannot be moved out of the loop, even though
377 s is invariant. In addition to possibly creating a call with
378 invalid arguments, moving out a function call that is not executed
379 may cause performance regressions in case the call is costly and
380 not executed at all. */
381 ret = MOVE_PRESERVE_EXECUTION;
382 lhs = gimple_call_lhs (stmt);
383 }
384 else if (is_gimple_assign (stmt))
385 lhs = gimple_assign_lhs (stmt);
386 else
387 return MOVE_IMPOSSIBLE;
388
389 if (TREE_CODE (lhs) == SSA_NAME
390 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
391 return MOVE_IMPOSSIBLE;
392
393 if (TREE_CODE (lhs) != SSA_NAME
394 || gimple_could_trap_p (stmt))
395 return MOVE_PRESERVE_EXECUTION;
396
397 /* Non local loads in a transaction cannot be hoisted out. Well,
398 unless the load happens on every path out of the loop, but we
399 don't take this into account yet. */
400 if (flag_tm
401 && gimple_in_transaction (stmt)
402 && gimple_assign_single_p (stmt))
403 {
404 tree rhs = gimple_assign_rhs1 (stmt);
405 if (DECL_P (rhs) && is_global_var (rhs))
406 {
407 if (dump_file)
408 {
409 fprintf (dump_file, "Cannot hoist conditional load of ");
410 print_generic_expr (dump_file, rhs, TDF_SLIM);
411 fprintf (dump_file, " because it is in a transaction.\n");
412 }
413 return MOVE_IMPOSSIBLE;
414 }
415 }
416
417 return ret;
418 }
419
420 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
421 loop to that we could move the expression using DEF if it did not have
422 other operands, i.e. the outermost loop enclosing LOOP in that the value
423 of DEF is invariant. */
424
425 static class loop *
outermost_invariant_loop(tree def,class loop * loop)426 outermost_invariant_loop (tree def, class loop *loop)
427 {
428 gimple *def_stmt;
429 basic_block def_bb;
430 class loop *max_loop;
431 struct lim_aux_data *lim_data;
432
433 if (!def)
434 return superloop_at_depth (loop, 1);
435
436 if (TREE_CODE (def) != SSA_NAME)
437 {
438 gcc_assert (is_gimple_min_invariant (def));
439 return superloop_at_depth (loop, 1);
440 }
441
442 def_stmt = SSA_NAME_DEF_STMT (def);
443 def_bb = gimple_bb (def_stmt);
444 if (!def_bb)
445 return superloop_at_depth (loop, 1);
446
447 max_loop = find_common_loop (loop, def_bb->loop_father);
448
449 lim_data = get_lim_data (def_stmt);
450 if (lim_data != NULL && lim_data->max_loop != NULL)
451 max_loop = find_common_loop (max_loop,
452 loop_outer (lim_data->max_loop));
453 if (max_loop == loop)
454 return NULL;
455 max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
456
457 return max_loop;
458 }
459
460 /* DATA is a structure containing information associated with a statement
461 inside LOOP. DEF is one of the operands of this statement.
462
463 Find the outermost loop enclosing LOOP in that value of DEF is invariant
464 and record this in DATA->max_loop field. If DEF itself is defined inside
465 this loop as well (i.e. we need to hoist it out of the loop if we want
466 to hoist the statement represented by DATA), record the statement in that
467 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
468 add the cost of the computation of DEF to the DATA->cost.
469
470 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
471
472 static bool
add_dependency(tree def,struct lim_aux_data * data,class loop * loop,bool add_cost)473 add_dependency (tree def, struct lim_aux_data *data, class loop *loop,
474 bool add_cost)
475 {
476 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
477 basic_block def_bb = gimple_bb (def_stmt);
478 class loop *max_loop;
479 struct lim_aux_data *def_data;
480
481 if (!def_bb)
482 return true;
483
484 max_loop = outermost_invariant_loop (def, loop);
485 if (!max_loop)
486 return false;
487
488 if (flow_loop_nested_p (data->max_loop, max_loop))
489 data->max_loop = max_loop;
490
491 def_data = get_lim_data (def_stmt);
492 if (!def_data)
493 return true;
494
495 if (add_cost
496 /* Only add the cost if the statement defining DEF is inside LOOP,
497 i.e. if it is likely that by moving the invariants dependent
498 on it, we will be able to avoid creating a new register for
499 it (since it will be only used in these dependent invariants). */
500 && def_bb->loop_father == loop)
501 data->cost += def_data->cost;
502
503 data->depends.safe_push (def_stmt);
504
505 return true;
506 }
507
508 /* Returns an estimate for a cost of statement STMT. The values here
509 are just ad-hoc constants, similar to costs for inlining. */
510
511 static unsigned
stmt_cost(gimple * stmt)512 stmt_cost (gimple *stmt)
513 {
514 /* Always try to create possibilities for unswitching. */
515 if (gimple_code (stmt) == GIMPLE_COND
516 || gimple_code (stmt) == GIMPLE_PHI)
517 return LIM_EXPENSIVE;
518
519 /* We should be hoisting calls if possible. */
520 if (is_gimple_call (stmt))
521 {
522 tree fndecl;
523
524 /* Unless the call is a builtin_constant_p; this always folds to a
525 constant, so moving it is useless. */
526 fndecl = gimple_call_fndecl (stmt);
527 if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_CONSTANT_P))
528 return 0;
529
530 return LIM_EXPENSIVE;
531 }
532
533 /* Hoisting memory references out should almost surely be a win. */
534 if (gimple_references_memory_p (stmt))
535 return LIM_EXPENSIVE;
536
537 if (gimple_code (stmt) != GIMPLE_ASSIGN)
538 return 1;
539
540 switch (gimple_assign_rhs_code (stmt))
541 {
542 case MULT_EXPR:
543 case WIDEN_MULT_EXPR:
544 case WIDEN_MULT_PLUS_EXPR:
545 case WIDEN_MULT_MINUS_EXPR:
546 case DOT_PROD_EXPR:
547 case TRUNC_DIV_EXPR:
548 case CEIL_DIV_EXPR:
549 case FLOOR_DIV_EXPR:
550 case ROUND_DIV_EXPR:
551 case EXACT_DIV_EXPR:
552 case CEIL_MOD_EXPR:
553 case FLOOR_MOD_EXPR:
554 case ROUND_MOD_EXPR:
555 case TRUNC_MOD_EXPR:
556 case RDIV_EXPR:
557 /* Division and multiplication are usually expensive. */
558 return LIM_EXPENSIVE;
559
560 case LSHIFT_EXPR:
561 case RSHIFT_EXPR:
562 case WIDEN_LSHIFT_EXPR:
563 case LROTATE_EXPR:
564 case RROTATE_EXPR:
565 /* Shifts and rotates are usually expensive. */
566 return LIM_EXPENSIVE;
567
568 case CONSTRUCTOR:
569 /* Make vector construction cost proportional to the number
570 of elements. */
571 return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
572
573 case SSA_NAME:
574 case PAREN_EXPR:
575 /* Whether or not something is wrapped inside a PAREN_EXPR
576 should not change move cost. Nor should an intermediate
577 unpropagated SSA name copy. */
578 return 0;
579
580 default:
581 return 1;
582 }
583 }
584
585 /* Finds the outermost loop between OUTER and LOOP in that the memory reference
586 REF is independent. If REF is not independent in LOOP, NULL is returned
587 instead. */
588
589 static class loop *
outermost_indep_loop(class loop * outer,class loop * loop,im_mem_ref * ref)590 outermost_indep_loop (class loop *outer, class loop *loop, im_mem_ref *ref)
591 {
592 class loop *aloop;
593
594 if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
595 return NULL;
596
597 for (aloop = outer;
598 aloop != loop;
599 aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
600 if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
601 && ref_indep_loop_p (aloop, ref, lim_raw))
602 return aloop;
603
604 if (ref_indep_loop_p (loop, ref, lim_raw))
605 return loop;
606 else
607 return NULL;
608 }
609
610 /* If there is a simple load or store to a memory reference in STMT, returns
611 the location of the memory reference, and sets IS_STORE according to whether
612 it is a store or load. Otherwise, returns NULL. */
613
614 static tree *
simple_mem_ref_in_stmt(gimple * stmt,bool * is_store)615 simple_mem_ref_in_stmt (gimple *stmt, bool *is_store)
616 {
617 tree *lhs, *rhs;
618
619 /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */
620 if (!gimple_assign_single_p (stmt))
621 return NULL;
622
623 lhs = gimple_assign_lhs_ptr (stmt);
624 rhs = gimple_assign_rhs1_ptr (stmt);
625
626 if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
627 {
628 *is_store = false;
629 return rhs;
630 }
631 else if (gimple_vdef (stmt)
632 && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
633 {
634 *is_store = true;
635 return lhs;
636 }
637 else
638 return NULL;
639 }
640
641 /* From a controlling predicate in DOM determine the arguments from
642 the PHI node PHI that are chosen if the predicate evaluates to
643 true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
644 they are non-NULL. Returns true if the arguments can be determined,
645 else return false. */
646
647 static bool
extract_true_false_args_from_phi(basic_block dom,gphi * phi,tree * true_arg_p,tree * false_arg_p)648 extract_true_false_args_from_phi (basic_block dom, gphi *phi,
649 tree *true_arg_p, tree *false_arg_p)
650 {
651 edge te, fe;
652 if (! extract_true_false_controlled_edges (dom, gimple_bb (phi),
653 &te, &fe))
654 return false;
655
656 if (true_arg_p)
657 *true_arg_p = PHI_ARG_DEF (phi, te->dest_idx);
658 if (false_arg_p)
659 *false_arg_p = PHI_ARG_DEF (phi, fe->dest_idx);
660
661 return true;
662 }
663
664 /* Determine the outermost loop to that it is possible to hoist a statement
665 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
666 the outermost loop in that the value computed by STMT is invariant.
667 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
668 we preserve the fact whether STMT is executed. It also fills other related
669 information to LIM_DATA (STMT).
670
671 The function returns false if STMT cannot be hoisted outside of the loop it
672 is defined in, and true otherwise. */
673
674 static bool
determine_max_movement(gimple * stmt,bool must_preserve_exec)675 determine_max_movement (gimple *stmt, bool must_preserve_exec)
676 {
677 basic_block bb = gimple_bb (stmt);
678 class loop *loop = bb->loop_father;
679 class loop *level;
680 struct lim_aux_data *lim_data = get_lim_data (stmt);
681 tree val;
682 ssa_op_iter iter;
683
684 if (must_preserve_exec)
685 level = ALWAYS_EXECUTED_IN (bb);
686 else
687 level = superloop_at_depth (loop, 1);
688 lim_data->max_loop = level;
689
690 if (gphi *phi = dyn_cast <gphi *> (stmt))
691 {
692 use_operand_p use_p;
693 unsigned min_cost = UINT_MAX;
694 unsigned total_cost = 0;
695 struct lim_aux_data *def_data;
696
697 /* We will end up promoting dependencies to be unconditionally
698 evaluated. For this reason the PHI cost (and thus the
699 cost we remove from the loop by doing the invariant motion)
700 is that of the cheapest PHI argument dependency chain. */
701 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
702 {
703 val = USE_FROM_PTR (use_p);
704
705 if (TREE_CODE (val) != SSA_NAME)
706 {
707 /* Assign const 1 to constants. */
708 min_cost = MIN (min_cost, 1);
709 total_cost += 1;
710 continue;
711 }
712 if (!add_dependency (val, lim_data, loop, false))
713 return false;
714
715 gimple *def_stmt = SSA_NAME_DEF_STMT (val);
716 if (gimple_bb (def_stmt)
717 && gimple_bb (def_stmt)->loop_father == loop)
718 {
719 def_data = get_lim_data (def_stmt);
720 if (def_data)
721 {
722 min_cost = MIN (min_cost, def_data->cost);
723 total_cost += def_data->cost;
724 }
725 }
726 }
727
728 min_cost = MIN (min_cost, total_cost);
729 lim_data->cost += min_cost;
730
731 if (gimple_phi_num_args (phi) > 1)
732 {
733 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
734 gimple *cond;
735 if (gsi_end_p (gsi_last_bb (dom)))
736 return false;
737 cond = gsi_stmt (gsi_last_bb (dom));
738 if (gimple_code (cond) != GIMPLE_COND)
739 return false;
740 /* Verify that this is an extended form of a diamond and
741 the PHI arguments are completely controlled by the
742 predicate in DOM. */
743 if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
744 return false;
745
746 /* Fold in dependencies and cost of the condition. */
747 FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
748 {
749 if (!add_dependency (val, lim_data, loop, false))
750 return false;
751 def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
752 if (def_data)
753 lim_data->cost += def_data->cost;
754 }
755
756 /* We want to avoid unconditionally executing very expensive
757 operations. As costs for our dependencies cannot be
758 negative just claim we are not invariand for this case.
759 We also are not sure whether the control-flow inside the
760 loop will vanish. */
761 if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
762 && !(min_cost != 0
763 && total_cost / min_cost <= 2))
764 return false;
765
766 /* Assume that the control-flow in the loop will vanish.
767 ??? We should verify this and not artificially increase
768 the cost if that is not the case. */
769 lim_data->cost += stmt_cost (stmt);
770 }
771
772 return true;
773 }
774 else
775 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
776 if (!add_dependency (val, lim_data, loop, true))
777 return false;
778
779 if (gimple_vuse (stmt))
780 {
781 im_mem_ref *ref
782 = lim_data ? memory_accesses.refs_list[lim_data->ref] : NULL;
783 if (ref
784 && MEM_ANALYZABLE (ref))
785 {
786 lim_data->max_loop = outermost_indep_loop (lim_data->max_loop,
787 loop, ref);
788 if (!lim_data->max_loop)
789 return false;
790 }
791 else if (! add_dependency (gimple_vuse (stmt), lim_data, loop, false))
792 return false;
793 }
794
795 lim_data->cost += stmt_cost (stmt);
796
797 return true;
798 }
799
800 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
801 and that one of the operands of this statement is computed by STMT.
802 Ensure that STMT (together with all the statements that define its
803 operands) is hoisted at least out of the loop LEVEL. */
804
805 static void
set_level(gimple * stmt,class loop * orig_loop,class loop * level)806 set_level (gimple *stmt, class loop *orig_loop, class loop *level)
807 {
808 class loop *stmt_loop = gimple_bb (stmt)->loop_father;
809 struct lim_aux_data *lim_data;
810 gimple *dep_stmt;
811 unsigned i;
812
813 stmt_loop = find_common_loop (orig_loop, stmt_loop);
814 lim_data = get_lim_data (stmt);
815 if (lim_data != NULL && lim_data->tgt_loop != NULL)
816 stmt_loop = find_common_loop (stmt_loop,
817 loop_outer (lim_data->tgt_loop));
818 if (flow_loop_nested_p (stmt_loop, level))
819 return;
820
821 gcc_assert (level == lim_data->max_loop
822 || flow_loop_nested_p (lim_data->max_loop, level));
823
824 lim_data->tgt_loop = level;
825 FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
826 set_level (dep_stmt, orig_loop, level);
827 }
828
829 /* Determines an outermost loop from that we want to hoist the statement STMT.
830 For now we chose the outermost possible loop. TODO -- use profiling
831 information to set it more sanely. */
832
833 static void
set_profitable_level(gimple * stmt)834 set_profitable_level (gimple *stmt)
835 {
836 set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
837 }
838
839 /* Returns true if STMT is a call that has side effects. */
840
841 static bool
nonpure_call_p(gimple * stmt)842 nonpure_call_p (gimple *stmt)
843 {
844 if (gimple_code (stmt) != GIMPLE_CALL)
845 return false;
846
847 return gimple_has_side_effects (stmt);
848 }
849
850 /* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */
851
852 static gimple *
rewrite_reciprocal(gimple_stmt_iterator * bsi)853 rewrite_reciprocal (gimple_stmt_iterator *bsi)
854 {
855 gassign *stmt, *stmt1, *stmt2;
856 tree name, lhs, type;
857 tree real_one;
858 gimple_stmt_iterator gsi;
859
860 stmt = as_a <gassign *> (gsi_stmt (*bsi));
861 lhs = gimple_assign_lhs (stmt);
862 type = TREE_TYPE (lhs);
863
864 real_one = build_one_cst (type);
865
866 name = make_temp_ssa_name (type, NULL, "reciptmp");
867 stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
868 gimple_assign_rhs2 (stmt));
869 stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
870 gimple_assign_rhs1 (stmt));
871
872 /* Replace division stmt with reciprocal and multiply stmts.
873 The multiply stmt is not invariant, so update iterator
874 and avoid rescanning. */
875 gsi = *bsi;
876 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
877 gsi_replace (&gsi, stmt2, true);
878
879 /* Continue processing with invariant reciprocal statement. */
880 return stmt1;
881 }
882
883 /* Check if the pattern at *BSI is a bittest of the form
884 (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */
885
886 static gimple *
rewrite_bittest(gimple_stmt_iterator * bsi)887 rewrite_bittest (gimple_stmt_iterator *bsi)
888 {
889 gassign *stmt;
890 gimple *stmt1;
891 gassign *stmt2;
892 gimple *use_stmt;
893 gcond *cond_stmt;
894 tree lhs, name, t, a, b;
895 use_operand_p use;
896
897 stmt = as_a <gassign *> (gsi_stmt (*bsi));
898 lhs = gimple_assign_lhs (stmt);
899
900 /* Verify that the single use of lhs is a comparison against zero. */
901 if (TREE_CODE (lhs) != SSA_NAME
902 || !single_imm_use (lhs, &use, &use_stmt))
903 return stmt;
904 cond_stmt = dyn_cast <gcond *> (use_stmt);
905 if (!cond_stmt)
906 return stmt;
907 if (gimple_cond_lhs (cond_stmt) != lhs
908 || (gimple_cond_code (cond_stmt) != NE_EXPR
909 && gimple_cond_code (cond_stmt) != EQ_EXPR)
910 || !integer_zerop (gimple_cond_rhs (cond_stmt)))
911 return stmt;
912
913 /* Get at the operands of the shift. The rhs is TMP1 & 1. */
914 stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
915 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
916 return stmt;
917
918 /* There is a conversion in between possibly inserted by fold. */
919 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
920 {
921 t = gimple_assign_rhs1 (stmt1);
922 if (TREE_CODE (t) != SSA_NAME
923 || !has_single_use (t))
924 return stmt;
925 stmt1 = SSA_NAME_DEF_STMT (t);
926 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
927 return stmt;
928 }
929
930 /* Verify that B is loop invariant but A is not. Verify that with
931 all the stmt walking we are still in the same loop. */
932 if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
933 || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
934 return stmt;
935
936 a = gimple_assign_rhs1 (stmt1);
937 b = gimple_assign_rhs2 (stmt1);
938
939 if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
940 && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
941 {
942 gimple_stmt_iterator rsi;
943
944 /* 1 << B */
945 t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
946 build_int_cst (TREE_TYPE (a), 1), b);
947 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
948 stmt1 = gimple_build_assign (name, t);
949
950 /* A & (1 << B) */
951 t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
952 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
953 stmt2 = gimple_build_assign (name, t);
954
955 /* Replace the SSA_NAME we compare against zero. Adjust
956 the type of zero accordingly. */
957 SET_USE (use, name);
958 gimple_cond_set_rhs (cond_stmt,
959 build_int_cst_type (TREE_TYPE (name),
960 0));
961
962 /* Don't use gsi_replace here, none of the new assignments sets
963 the variable originally set in stmt. Move bsi to stmt1, and
964 then remove the original stmt, so that we get a chance to
965 retain debug info for it. */
966 rsi = *bsi;
967 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
968 gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
969 gimple *to_release = gsi_stmt (rsi);
970 gsi_remove (&rsi, true);
971 release_defs (to_release);
972
973 return stmt1;
974 }
975
976 return stmt;
977 }
978
979 /* Determine the outermost loops in that statements in basic block BB are
980 invariant, and record them to the LIM_DATA associated with the
981 statements. */
982
983 static void
compute_invariantness(basic_block bb)984 compute_invariantness (basic_block bb)
985 {
986 enum move_pos pos;
987 gimple_stmt_iterator bsi;
988 gimple *stmt;
989 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
990 class loop *outermost = ALWAYS_EXECUTED_IN (bb);
991 struct lim_aux_data *lim_data;
992
993 if (!loop_outer (bb->loop_father))
994 return;
995
996 if (dump_file && (dump_flags & TDF_DETAILS))
997 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
998 bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
999
1000 /* Look at PHI nodes, but only if there is at most two.
1001 ??? We could relax this further by post-processing the inserted
1002 code and transforming adjacent cond-exprs with the same predicate
1003 to control flow again. */
1004 bsi = gsi_start_phis (bb);
1005 if (!gsi_end_p (bsi)
1006 && ((gsi_next (&bsi), gsi_end_p (bsi))
1007 || (gsi_next (&bsi), gsi_end_p (bsi))))
1008 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1009 {
1010 stmt = gsi_stmt (bsi);
1011
1012 pos = movement_possibility (stmt);
1013 if (pos == MOVE_IMPOSSIBLE)
1014 continue;
1015
1016 lim_data = get_lim_data (stmt);
1017 if (! lim_data)
1018 lim_data = init_lim_data (stmt);
1019 lim_data->always_executed_in = outermost;
1020
1021 if (!determine_max_movement (stmt, false))
1022 {
1023 lim_data->max_loop = NULL;
1024 continue;
1025 }
1026
1027 if (dump_file && (dump_flags & TDF_DETAILS))
1028 {
1029 print_gimple_stmt (dump_file, stmt, 2);
1030 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1031 loop_depth (lim_data->max_loop),
1032 lim_data->cost);
1033 }
1034
1035 if (lim_data->cost >= LIM_EXPENSIVE)
1036 set_profitable_level (stmt);
1037 }
1038
1039 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1040 {
1041 stmt = gsi_stmt (bsi);
1042
1043 pos = movement_possibility (stmt);
1044 if (pos == MOVE_IMPOSSIBLE)
1045 {
1046 if (nonpure_call_p (stmt))
1047 {
1048 maybe_never = true;
1049 outermost = NULL;
1050 }
1051 /* Make sure to note always_executed_in for stores to make
1052 store-motion work. */
1053 else if (stmt_makes_single_store (stmt))
1054 {
1055 struct lim_aux_data *lim_data = get_lim_data (stmt);
1056 if (! lim_data)
1057 lim_data = init_lim_data (stmt);
1058 lim_data->always_executed_in = outermost;
1059 }
1060 continue;
1061 }
1062
1063 if (is_gimple_assign (stmt)
1064 && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1065 == GIMPLE_BINARY_RHS))
1066 {
1067 tree op0 = gimple_assign_rhs1 (stmt);
1068 tree op1 = gimple_assign_rhs2 (stmt);
1069 class loop *ol1 = outermost_invariant_loop (op1,
1070 loop_containing_stmt (stmt));
1071
1072 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1073 to be hoisted out of loop, saving expensive divide. */
1074 if (pos == MOVE_POSSIBLE
1075 && gimple_assign_rhs_code (stmt) == RDIV_EXPR
1076 && flag_unsafe_math_optimizations
1077 && !flag_trapping_math
1078 && ol1 != NULL
1079 && outermost_invariant_loop (op0, ol1) == NULL)
1080 stmt = rewrite_reciprocal (&bsi);
1081
1082 /* If the shift count is invariant, convert (A >> B) & 1 to
1083 A & (1 << B) allowing the bit mask to be hoisted out of the loop
1084 saving an expensive shift. */
1085 if (pos == MOVE_POSSIBLE
1086 && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1087 && integer_onep (op1)
1088 && TREE_CODE (op0) == SSA_NAME
1089 && has_single_use (op0))
1090 stmt = rewrite_bittest (&bsi);
1091 }
1092
1093 lim_data = get_lim_data (stmt);
1094 if (! lim_data)
1095 lim_data = init_lim_data (stmt);
1096 lim_data->always_executed_in = outermost;
1097
1098 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1099 continue;
1100
1101 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1102 {
1103 lim_data->max_loop = NULL;
1104 continue;
1105 }
1106
1107 if (dump_file && (dump_flags & TDF_DETAILS))
1108 {
1109 print_gimple_stmt (dump_file, stmt, 2);
1110 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1111 loop_depth (lim_data->max_loop),
1112 lim_data->cost);
1113 }
1114
1115 if (lim_data->cost >= LIM_EXPENSIVE)
1116 set_profitable_level (stmt);
1117 }
1118 }
1119
1120 /* Hoist the statements in basic block BB out of the loops prescribed by
1121 data stored in LIM_DATA structures associated with each statement. Callback
1122 for walk_dominator_tree. */
1123
1124 unsigned int
move_computations_worker(basic_block bb)1125 move_computations_worker (basic_block bb)
1126 {
1127 class loop *level;
1128 unsigned cost = 0;
1129 struct lim_aux_data *lim_data;
1130 unsigned int todo = 0;
1131
1132 if (!loop_outer (bb->loop_father))
1133 return todo;
1134
1135 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
1136 {
1137 gassign *new_stmt;
1138 gphi *stmt = bsi.phi ();
1139
1140 lim_data = get_lim_data (stmt);
1141 if (lim_data == NULL)
1142 {
1143 gsi_next (&bsi);
1144 continue;
1145 }
1146
1147 cost = lim_data->cost;
1148 level = lim_data->tgt_loop;
1149 clear_lim_data (stmt);
1150
1151 if (!level)
1152 {
1153 gsi_next (&bsi);
1154 continue;
1155 }
1156
1157 if (dump_file && (dump_flags & TDF_DETAILS))
1158 {
1159 fprintf (dump_file, "Moving PHI node\n");
1160 print_gimple_stmt (dump_file, stmt, 0);
1161 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1162 cost, level->num);
1163 }
1164
1165 if (gimple_phi_num_args (stmt) == 1)
1166 {
1167 tree arg = PHI_ARG_DEF (stmt, 0);
1168 new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1169 TREE_CODE (arg), arg);
1170 }
1171 else
1172 {
1173 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1174 gimple *cond = gsi_stmt (gsi_last_bb (dom));
1175 tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1176 /* Get the PHI arguments corresponding to the true and false
1177 edges of COND. */
1178 extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1179 gcc_assert (arg0 && arg1);
1180 t = build2 (gimple_cond_code (cond), boolean_type_node,
1181 gimple_cond_lhs (cond), gimple_cond_rhs (cond));
1182 new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1183 COND_EXPR, t, arg0, arg1);
1184 todo |= TODO_cleanup_cfg;
1185 }
1186 if (!ALWAYS_EXECUTED_IN (bb)
1187 || (ALWAYS_EXECUTED_IN (bb) != level
1188 && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level)))
1189 reset_flow_sensitive_info (gimple_assign_lhs (new_stmt));
1190 gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1191 remove_phi_node (&bsi, false);
1192 }
1193
1194 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
1195 {
1196 edge e;
1197
1198 gimple *stmt = gsi_stmt (bsi);
1199
1200 lim_data = get_lim_data (stmt);
1201 if (lim_data == NULL)
1202 {
1203 gsi_next (&bsi);
1204 continue;
1205 }
1206
1207 cost = lim_data->cost;
1208 level = lim_data->tgt_loop;
1209 clear_lim_data (stmt);
1210
1211 if (!level)
1212 {
1213 gsi_next (&bsi);
1214 continue;
1215 }
1216
1217 /* We do not really want to move conditionals out of the loop; we just
1218 placed it here to force its operands to be moved if necessary. */
1219 if (gimple_code (stmt) == GIMPLE_COND)
1220 continue;
1221
1222 if (dump_file && (dump_flags & TDF_DETAILS))
1223 {
1224 fprintf (dump_file, "Moving statement\n");
1225 print_gimple_stmt (dump_file, stmt, 0);
1226 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1227 cost, level->num);
1228 }
1229
1230 e = loop_preheader_edge (level);
1231 gcc_assert (!gimple_vdef (stmt));
1232 if (gimple_vuse (stmt))
1233 {
1234 /* The new VUSE is the one from the virtual PHI in the loop
1235 header or the one already present. */
1236 gphi_iterator gsi2;
1237 for (gsi2 = gsi_start_phis (e->dest);
1238 !gsi_end_p (gsi2); gsi_next (&gsi2))
1239 {
1240 gphi *phi = gsi2.phi ();
1241 if (virtual_operand_p (gimple_phi_result (phi)))
1242 {
1243 SET_USE (gimple_vuse_op (stmt),
1244 PHI_ARG_DEF_FROM_EDGE (phi, e));
1245 break;
1246 }
1247 }
1248 }
1249 gsi_remove (&bsi, false);
1250 if (gimple_has_lhs (stmt)
1251 && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
1252 && (!ALWAYS_EXECUTED_IN (bb)
1253 || !(ALWAYS_EXECUTED_IN (bb) == level
1254 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1255 reset_flow_sensitive_info (gimple_get_lhs (stmt));
1256 /* In case this is a stmt that is not unconditionally executed
1257 when the target loop header is executed and the stmt may
1258 invoke undefined integer or pointer overflow rewrite it to
1259 unsigned arithmetic. */
1260 if (is_gimple_assign (stmt)
1261 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))
1262 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt)))
1263 && arith_code_with_undefined_signed_overflow
1264 (gimple_assign_rhs_code (stmt))
1265 && (!ALWAYS_EXECUTED_IN (bb)
1266 || !(ALWAYS_EXECUTED_IN (bb) == level
1267 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1268 gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt));
1269 else
1270 gsi_insert_on_edge (e, stmt);
1271 }
1272
1273 return todo;
1274 }
1275
1276 /* Checks whether the statement defining variable *INDEX can be hoisted
1277 out of the loop passed in DATA. Callback for for_each_index. */
1278
1279 static bool
may_move_till(tree ref,tree * index,void * data)1280 may_move_till (tree ref, tree *index, void *data)
1281 {
1282 class loop *loop = (class loop *) data, *max_loop;
1283
1284 /* If REF is an array reference, check also that the step and the lower
1285 bound is invariant in LOOP. */
1286 if (TREE_CODE (ref) == ARRAY_REF)
1287 {
1288 tree step = TREE_OPERAND (ref, 3);
1289 tree lbound = TREE_OPERAND (ref, 2);
1290
1291 max_loop = outermost_invariant_loop (step, loop);
1292 if (!max_loop)
1293 return false;
1294
1295 max_loop = outermost_invariant_loop (lbound, loop);
1296 if (!max_loop)
1297 return false;
1298 }
1299
1300 max_loop = outermost_invariant_loop (*index, loop);
1301 if (!max_loop)
1302 return false;
1303
1304 return true;
1305 }
1306
1307 /* If OP is SSA NAME, force the statement that defines it to be
1308 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
1309
1310 static void
force_move_till_op(tree op,class loop * orig_loop,class loop * loop)1311 force_move_till_op (tree op, class loop *orig_loop, class loop *loop)
1312 {
1313 gimple *stmt;
1314
1315 if (!op
1316 || is_gimple_min_invariant (op))
1317 return;
1318
1319 gcc_assert (TREE_CODE (op) == SSA_NAME);
1320
1321 stmt = SSA_NAME_DEF_STMT (op);
1322 if (gimple_nop_p (stmt))
1323 return;
1324
1325 set_level (stmt, orig_loop, loop);
1326 }
1327
1328 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1329 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
1330 for_each_index. */
1331
1332 struct fmt_data
1333 {
1334 class loop *loop;
1335 class loop *orig_loop;
1336 };
1337
1338 static bool
force_move_till(tree ref,tree * index,void * data)1339 force_move_till (tree ref, tree *index, void *data)
1340 {
1341 struct fmt_data *fmt_data = (struct fmt_data *) data;
1342
1343 if (TREE_CODE (ref) == ARRAY_REF)
1344 {
1345 tree step = TREE_OPERAND (ref, 3);
1346 tree lbound = TREE_OPERAND (ref, 2);
1347
1348 force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1349 force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
1350 }
1351
1352 force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
1353
1354 return true;
1355 }
1356
1357 /* A function to free the mem_ref object OBJ. */
1358
1359 static void
memref_free(class im_mem_ref * mem)1360 memref_free (class im_mem_ref *mem)
1361 {
1362 mem->accesses_in_loop.release ();
1363 }
1364
1365 /* Allocates and returns a memory reference description for MEM whose hash
1366 value is HASH and id is ID. */
1367
1368 static im_mem_ref *
mem_ref_alloc(ao_ref * mem,unsigned hash,unsigned id)1369 mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id)
1370 {
1371 im_mem_ref *ref = XOBNEW (&mem_ref_obstack, class im_mem_ref);
1372 if (mem)
1373 ref->mem = *mem;
1374 else
1375 ao_ref_init (&ref->mem, error_mark_node);
1376 ref->id = id;
1377 ref->ref_canonical = false;
1378 ref->ref_decomposed = false;
1379 ref->hash = hash;
1380 ref->stored = NULL;
1381 ref->loaded = NULL;
1382 bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
1383 ref->accesses_in_loop.create (1);
1384
1385 return ref;
1386 }
1387
1388 /* Records memory reference location *LOC in LOOP to the memory reference
1389 description REF. The reference occurs in statement STMT. */
1390
1391 static void
record_mem_ref_loc(im_mem_ref * ref,gimple * stmt,tree * loc)1392 record_mem_ref_loc (im_mem_ref *ref, gimple *stmt, tree *loc)
1393 {
1394 mem_ref_loc aref;
1395 aref.stmt = stmt;
1396 aref.ref = loc;
1397 ref->accesses_in_loop.safe_push (aref);
1398 }
1399
1400 /* Set the LOOP bit in REF stored bitmap and allocate that if
1401 necessary. Return whether a bit was changed. */
1402
1403 static bool
set_ref_stored_in_loop(im_mem_ref * ref,class loop * loop)1404 set_ref_stored_in_loop (im_mem_ref *ref, class loop *loop)
1405 {
1406 if (!ref->stored)
1407 ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1408 return bitmap_set_bit (ref->stored, loop->num);
1409 }
1410
1411 /* Marks reference REF as stored in LOOP. */
1412
1413 static void
mark_ref_stored(im_mem_ref * ref,class loop * loop)1414 mark_ref_stored (im_mem_ref *ref, class loop *loop)
1415 {
1416 while (loop != current_loops->tree_root
1417 && set_ref_stored_in_loop (ref, loop))
1418 loop = loop_outer (loop);
1419 }
1420
1421 /* Set the LOOP bit in REF loaded bitmap and allocate that if
1422 necessary. Return whether a bit was changed. */
1423
1424 static bool
set_ref_loaded_in_loop(im_mem_ref * ref,class loop * loop)1425 set_ref_loaded_in_loop (im_mem_ref *ref, class loop *loop)
1426 {
1427 if (!ref->loaded)
1428 ref->loaded = BITMAP_ALLOC (&lim_bitmap_obstack);
1429 return bitmap_set_bit (ref->loaded, loop->num);
1430 }
1431
1432 /* Marks reference REF as loaded in LOOP. */
1433
1434 static void
mark_ref_loaded(im_mem_ref * ref,class loop * loop)1435 mark_ref_loaded (im_mem_ref *ref, class loop *loop)
1436 {
1437 while (loop != current_loops->tree_root
1438 && set_ref_loaded_in_loop (ref, loop))
1439 loop = loop_outer (loop);
1440 }
1441
1442 /* Gathers memory references in statement STMT in LOOP, storing the
1443 information about them in the memory_accesses structure. Marks
1444 the vops accessed through unrecognized statements there as
1445 well. */
1446
1447 static void
gather_mem_refs_stmt(class loop * loop,gimple * stmt)1448 gather_mem_refs_stmt (class loop *loop, gimple *stmt)
1449 {
1450 tree *mem = NULL;
1451 hashval_t hash;
1452 im_mem_ref **slot;
1453 im_mem_ref *ref;
1454 bool is_stored;
1455 unsigned id;
1456
1457 if (!gimple_vuse (stmt))
1458 return;
1459
1460 mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1461 if (!mem && is_gimple_assign (stmt))
1462 {
1463 /* For aggregate copies record distinct references but use them
1464 only for disambiguation purposes. */
1465 id = memory_accesses.refs_list.length ();
1466 ref = mem_ref_alloc (NULL, 0, id);
1467 memory_accesses.refs_list.safe_push (ref);
1468 if (dump_file && (dump_flags & TDF_DETAILS))
1469 {
1470 fprintf (dump_file, "Unhandled memory reference %u: ", id);
1471 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1472 }
1473 record_mem_ref_loc (ref, stmt, mem);
1474 is_stored = gimple_vdef (stmt);
1475 }
1476 else if (!mem)
1477 {
1478 /* We use the shared mem_ref for all unanalyzable refs. */
1479 id = UNANALYZABLE_MEM_ID;
1480 ref = memory_accesses.refs_list[id];
1481 if (dump_file && (dump_flags & TDF_DETAILS))
1482 {
1483 fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1484 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1485 }
1486 is_stored = gimple_vdef (stmt);
1487 }
1488 else
1489 {
1490 /* We are looking for equal refs that might differ in structure
1491 such as a.b vs. MEM[&a + 4]. So we key off the ao_ref but
1492 make sure we can canonicalize the ref in the hashtable if
1493 non-operand_equal_p refs are found. For the lookup we mark
1494 the case we want strict equality with aor.max_size == -1. */
1495 ao_ref aor;
1496 ao_ref_init (&aor, *mem);
1497 ao_ref_base (&aor);
1498 ao_ref_alias_set (&aor);
1499 HOST_WIDE_INT offset, size, max_size;
1500 poly_int64 saved_maxsize = aor.max_size, mem_off;
1501 tree mem_base;
1502 bool ref_decomposed;
1503 if (aor.max_size_known_p ()
1504 && aor.offset.is_constant (&offset)
1505 && aor.size.is_constant (&size)
1506 && aor.max_size.is_constant (&max_size)
1507 && size == max_size
1508 && (size % BITS_PER_UNIT) == 0
1509 /* We're canonicalizing to a MEM where TYPE_SIZE specifies the
1510 size. Make sure this is consistent with the extraction. */
1511 && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (*mem)))
1512 && known_eq (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (*mem))),
1513 aor.size)
1514 && (mem_base = get_addr_base_and_unit_offset (aor.ref, &mem_off)))
1515 {
1516 ref_decomposed = true;
1517 tree base = ao_ref_base (&aor);
1518 poly_int64 moffset;
1519 HOST_WIDE_INT mcoffset;
1520 if (TREE_CODE (base) == MEM_REF
1521 && (mem_ref_offset (base) * BITS_PER_UNIT + offset).to_shwi (&moffset)
1522 && moffset.is_constant (&mcoffset))
1523 {
1524 hash = iterative_hash_expr (TREE_OPERAND (base, 0), 0);
1525 hash = iterative_hash_host_wide_int (mcoffset, hash);
1526 }
1527 else
1528 {
1529 hash = iterative_hash_expr (base, 0);
1530 hash = iterative_hash_host_wide_int (offset, hash);
1531 }
1532 hash = iterative_hash_host_wide_int (size, hash);
1533 }
1534 else
1535 {
1536 ref_decomposed = false;
1537 hash = iterative_hash_expr (aor.ref, 0);
1538 aor.max_size = -1;
1539 }
1540 slot = memory_accesses.refs->find_slot_with_hash (&aor, hash, INSERT);
1541 aor.max_size = saved_maxsize;
1542 if (*slot)
1543 {
1544 if (!(*slot)->ref_canonical
1545 && !operand_equal_p (*mem, (*slot)->mem.ref, 0))
1546 {
1547 /* If we didn't yet canonicalize the hashtable ref (which
1548 we'll end up using for code insertion) and hit a second
1549 equal ref that is not structurally equivalent create
1550 a canonical ref which is a bare MEM_REF. */
1551 if (TREE_CODE (*mem) == MEM_REF
1552 || TREE_CODE (*mem) == TARGET_MEM_REF)
1553 {
1554 (*slot)->mem.ref = *mem;
1555 (*slot)->mem.base_alias_set = ao_ref_base_alias_set (&aor);
1556 }
1557 else
1558 {
1559 tree ref_alias_type = reference_alias_ptr_type (*mem);
1560 unsigned int ref_align = get_object_alignment (*mem);
1561 tree ref_type = TREE_TYPE (*mem);
1562 tree tmp = build1 (ADDR_EXPR, ptr_type_node,
1563 unshare_expr (mem_base));
1564 if (TYPE_ALIGN (ref_type) != ref_align)
1565 ref_type = build_aligned_type (ref_type, ref_align);
1566 (*slot)->mem.ref
1567 = fold_build2 (MEM_REF, ref_type, tmp,
1568 build_int_cst (ref_alias_type, mem_off));
1569 if ((*slot)->mem.volatile_p)
1570 TREE_THIS_VOLATILE ((*slot)->mem.ref) = 1;
1571 gcc_checking_assert (TREE_CODE ((*slot)->mem.ref) == MEM_REF
1572 && is_gimple_mem_ref_addr
1573 (TREE_OPERAND ((*slot)->mem.ref,
1574 0)));
1575 (*slot)->mem.base_alias_set = (*slot)->mem.ref_alias_set;
1576 }
1577 (*slot)->ref_canonical = true;
1578 }
1579 ref = *slot;
1580 id = ref->id;
1581 }
1582 else
1583 {
1584 id = memory_accesses.refs_list.length ();
1585 ref = mem_ref_alloc (&aor, hash, id);
1586 ref->ref_decomposed = ref_decomposed;
1587 memory_accesses.refs_list.safe_push (ref);
1588 *slot = ref;
1589
1590 if (dump_file && (dump_flags & TDF_DETAILS))
1591 {
1592 fprintf (dump_file, "Memory reference %u: ", id);
1593 print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
1594 fprintf (dump_file, "\n");
1595 }
1596 }
1597
1598 record_mem_ref_loc (ref, stmt, mem);
1599 }
1600 if (is_stored)
1601 {
1602 bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
1603 mark_ref_stored (ref, loop);
1604 }
1605 /* A not simple memory op is also a read when it is a write. */
1606 if (!is_stored || id == UNANALYZABLE_MEM_ID
1607 || ref->mem.ref == error_mark_node)
1608 {
1609 bitmap_set_bit (&memory_accesses.refs_loaded_in_loop[loop->num], ref->id);
1610 mark_ref_loaded (ref, loop);
1611 }
1612 init_lim_data (stmt)->ref = ref->id;
1613 return;
1614 }
1615
1616 static unsigned *bb_loop_postorder;
1617
1618 /* qsort sort function to sort blocks after their loop fathers postorder. */
1619
1620 static int
sort_bbs_in_loop_postorder_cmp(const void * bb1_,const void * bb2_,void * bb_loop_postorder_)1621 sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_,
1622 void *bb_loop_postorder_)
1623 {
1624 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1625 basic_block bb1 = *(const basic_block *)bb1_;
1626 basic_block bb2 = *(const basic_block *)bb2_;
1627 class loop *loop1 = bb1->loop_father;
1628 class loop *loop2 = bb2->loop_father;
1629 if (loop1->num == loop2->num)
1630 return bb1->index - bb2->index;
1631 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1632 }
1633
1634 /* qsort sort function to sort ref locs after their loop fathers postorder. */
1635
1636 static int
sort_locs_in_loop_postorder_cmp(const void * loc1_,const void * loc2_,void * bb_loop_postorder_)1637 sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_,
1638 void *bb_loop_postorder_)
1639 {
1640 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1641 const mem_ref_loc *loc1 = (const mem_ref_loc *)loc1_;
1642 const mem_ref_loc *loc2 = (const mem_ref_loc *)loc2_;
1643 class loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
1644 class loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
1645 if (loop1->num == loop2->num)
1646 return 0;
1647 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1648 }
1649
1650 /* Gathers memory references in loops. */
1651
1652 static void
analyze_memory_references(bool store_motion)1653 analyze_memory_references (bool store_motion)
1654 {
1655 gimple_stmt_iterator bsi;
1656 basic_block bb, *bbs;
1657 class loop *outer;
1658 unsigned i, n;
1659
1660 /* Collect all basic-blocks in loops and sort them after their
1661 loops postorder. */
1662 i = 0;
1663 bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
1664 FOR_EACH_BB_FN (bb, cfun)
1665 if (bb->loop_father != current_loops->tree_root)
1666 bbs[i++] = bb;
1667 n = i;
1668 gcc_sort_r (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp,
1669 bb_loop_postorder);
1670
1671 /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1672 That results in better locality for all the bitmaps. It also
1673 automatically sorts the location list of gathered memory references
1674 after their loop postorder number allowing to binary-search it. */
1675 for (i = 0; i < n; ++i)
1676 {
1677 basic_block bb = bbs[i];
1678 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1679 gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
1680 }
1681
1682 /* Verify the list of gathered memory references is sorted after their
1683 loop postorder number. */
1684 if (flag_checking)
1685 {
1686 im_mem_ref *ref;
1687 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
1688 for (unsigned j = 1; j < ref->accesses_in_loop.length (); ++j)
1689 gcc_assert (sort_locs_in_loop_postorder_cmp
1690 (&ref->accesses_in_loop[j-1], &ref->accesses_in_loop[j],
1691 bb_loop_postorder) <= 0);
1692 }
1693
1694 free (bbs);
1695
1696 if (!store_motion)
1697 return;
1698
1699 /* Propagate the information about accessed memory references up
1700 the loop hierarchy. */
1701 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1702 {
1703 /* Finalize the overall touched references (including subloops). */
1704 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
1705 &memory_accesses.refs_stored_in_loop[loop->num]);
1706
1707 /* Propagate the information about accessed memory references up
1708 the loop hierarchy. */
1709 outer = loop_outer (loop);
1710 if (outer == current_loops->tree_root)
1711 continue;
1712
1713 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
1714 &memory_accesses.all_refs_stored_in_loop[loop->num]);
1715 }
1716 }
1717
1718 /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in
1719 tree_to_aff_combination_expand. */
1720
1721 static bool
mem_refs_may_alias_p(im_mem_ref * mem1,im_mem_ref * mem2,hash_map<tree,name_expansion * > ** ttae_cache,bool tbaa_p)1722 mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2,
1723 hash_map<tree, name_expansion *> **ttae_cache,
1724 bool tbaa_p)
1725 {
1726 gcc_checking_assert (mem1->mem.ref != error_mark_node
1727 && mem2->mem.ref != error_mark_node);
1728
1729 /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1730 object and their offset differ in such a way that the locations cannot
1731 overlap, then they cannot alias. */
1732 poly_widest_int size1, size2;
1733 aff_tree off1, off2;
1734
1735 /* Perform basic offset and type-based disambiguation. */
1736 if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, tbaa_p))
1737 return false;
1738
1739 /* The expansion of addresses may be a bit expensive, thus we only do
1740 the check at -O2 and higher optimization levels. */
1741 if (optimize < 2)
1742 return true;
1743
1744 get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
1745 get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
1746 aff_combination_expand (&off1, ttae_cache);
1747 aff_combination_expand (&off2, ttae_cache);
1748 aff_combination_scale (&off1, -1);
1749 aff_combination_add (&off2, &off1);
1750
1751 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1752 return false;
1753
1754 return true;
1755 }
1756
1757 /* Compare function for bsearch searching for reference locations
1758 in a loop. */
1759
1760 static int
find_ref_loc_in_loop_cmp(const void * loop_,const void * loc_,void * bb_loop_postorder_)1761 find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_,
1762 void *bb_loop_postorder_)
1763 {
1764 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1765 class loop *loop = (class loop *)const_cast<void *>(loop_);
1766 mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
1767 class loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
1768 if (loop->num == loc_loop->num
1769 || flow_loop_nested_p (loop, loc_loop))
1770 return 0;
1771 return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
1772 ? -1 : 1);
1773 }
1774
1775 /* Iterates over all locations of REF in LOOP and its subloops calling
1776 fn.operator() with the location as argument. When that operator
1777 returns true the iteration is stopped and true is returned.
1778 Otherwise false is returned. */
1779
1780 template <typename FN>
1781 static bool
for_all_locs_in_loop(class loop * loop,im_mem_ref * ref,FN fn)1782 for_all_locs_in_loop (class loop *loop, im_mem_ref *ref, FN fn)
1783 {
1784 unsigned i;
1785 mem_ref_loc *loc;
1786
1787 /* Search for the cluster of locs in the accesses_in_loop vector
1788 which is sorted after postorder index of the loop father. */
1789 loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp,
1790 bb_loop_postorder);
1791 if (!loc)
1792 return false;
1793
1794 /* We have found one location inside loop or its sub-loops. Iterate
1795 both forward and backward to cover the whole cluster. */
1796 i = loc - ref->accesses_in_loop.address ();
1797 while (i > 0)
1798 {
1799 --i;
1800 mem_ref_loc *l = &ref->accesses_in_loop[i];
1801 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1802 break;
1803 if (fn (l))
1804 return true;
1805 }
1806 for (i = loc - ref->accesses_in_loop.address ();
1807 i < ref->accesses_in_loop.length (); ++i)
1808 {
1809 mem_ref_loc *l = &ref->accesses_in_loop[i];
1810 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1811 break;
1812 if (fn (l))
1813 return true;
1814 }
1815
1816 return false;
1817 }
1818
1819 /* Rewrites location LOC by TMP_VAR. */
1820
1821 class rewrite_mem_ref_loc
1822 {
1823 public:
rewrite_mem_ref_loc(tree tmp_var_)1824 rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
1825 bool operator () (mem_ref_loc *loc);
1826 tree tmp_var;
1827 };
1828
1829 bool
operator()1830 rewrite_mem_ref_loc::operator () (mem_ref_loc *loc)
1831 {
1832 *loc->ref = tmp_var;
1833 update_stmt (loc->stmt);
1834 return false;
1835 }
1836
1837 /* Rewrites all references to REF in LOOP by variable TMP_VAR. */
1838
1839 static void
rewrite_mem_refs(class loop * loop,im_mem_ref * ref,tree tmp_var)1840 rewrite_mem_refs (class loop *loop, im_mem_ref *ref, tree tmp_var)
1841 {
1842 for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
1843 }
1844
1845 /* Stores the first reference location in LOCP. */
1846
1847 class first_mem_ref_loc_1
1848 {
1849 public:
first_mem_ref_loc_1(mem_ref_loc ** locp_)1850 first_mem_ref_loc_1 (mem_ref_loc **locp_) : locp (locp_) {}
1851 bool operator () (mem_ref_loc *loc);
1852 mem_ref_loc **locp;
1853 };
1854
1855 bool
operator()1856 first_mem_ref_loc_1::operator () (mem_ref_loc *loc)
1857 {
1858 *locp = loc;
1859 return true;
1860 }
1861
1862 /* Returns the first reference location to REF in LOOP. */
1863
1864 static mem_ref_loc *
first_mem_ref_loc(class loop * loop,im_mem_ref * ref)1865 first_mem_ref_loc (class loop *loop, im_mem_ref *ref)
1866 {
1867 mem_ref_loc *locp = NULL;
1868 for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
1869 return locp;
1870 }
1871
1872 /* Helper function for execute_sm. Emit code to store TMP_VAR into
1873 MEM along edge EX.
1874
1875 The store is only done if MEM has changed. We do this so no
1876 changes to MEM occur on code paths that did not originally store
1877 into it.
1878
1879 The common case for execute_sm will transform:
1880
1881 for (...) {
1882 if (foo)
1883 stuff;
1884 else
1885 MEM = TMP_VAR;
1886 }
1887
1888 into:
1889
1890 lsm = MEM;
1891 for (...) {
1892 if (foo)
1893 stuff;
1894 else
1895 lsm = TMP_VAR;
1896 }
1897 MEM = lsm;
1898
1899 This function will generate:
1900
1901 lsm = MEM;
1902
1903 lsm_flag = false;
1904 ...
1905 for (...) {
1906 if (foo)
1907 stuff;
1908 else {
1909 lsm = TMP_VAR;
1910 lsm_flag = true;
1911 }
1912 }
1913 if (lsm_flag) <--
1914 MEM = lsm; <-- (X)
1915
1916 In case MEM and TMP_VAR are NULL the function will return the then
1917 block so the caller can insert (X) and other related stmts.
1918 */
1919
1920 static basic_block
execute_sm_if_changed(edge ex,tree mem,tree tmp_var,tree flag,edge preheader,hash_set<basic_block> * flag_bbs,edge & append_cond_position,edge & last_cond_fallthru)1921 execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
1922 edge preheader, hash_set <basic_block> *flag_bbs,
1923 edge &append_cond_position, edge &last_cond_fallthru)
1924 {
1925 basic_block new_bb, then_bb, old_dest;
1926 bool loop_has_only_one_exit;
1927 edge then_old_edge;
1928 gimple_stmt_iterator gsi;
1929 gimple *stmt;
1930 bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
1931
1932 profile_count count_sum = profile_count::zero ();
1933 int nbbs = 0, ncount = 0;
1934 profile_probability flag_probability = profile_probability::uninitialized ();
1935
1936 /* Flag is set in FLAG_BBS. Determine probability that flag will be true
1937 at loop exit.
1938
1939 This code may look fancy, but it cannot update profile very realistically
1940 because we do not know the probability that flag will be true at given
1941 loop exit.
1942
1943 We look for two interesting extremes
1944 - when exit is dominated by block setting the flag, we know it will
1945 always be true. This is a common case.
1946 - when all blocks setting the flag have very low frequency we know
1947 it will likely be false.
1948 In all other cases we default to 2/3 for flag being true. */
1949
1950 for (hash_set<basic_block>::iterator it = flag_bbs->begin ();
1951 it != flag_bbs->end (); ++it)
1952 {
1953 if ((*it)->count.initialized_p ())
1954 count_sum += (*it)->count, ncount ++;
1955 if (dominated_by_p (CDI_DOMINATORS, ex->src, *it))
1956 flag_probability = profile_probability::always ();
1957 nbbs++;
1958 }
1959
1960 profile_probability cap = profile_probability::always ().apply_scale (2, 3);
1961
1962 if (flag_probability.initialized_p ())
1963 ;
1964 else if (ncount == nbbs
1965 && preheader->count () >= count_sum && preheader->count ().nonzero_p ())
1966 {
1967 flag_probability = count_sum.probability_in (preheader->count ());
1968 if (flag_probability > cap)
1969 flag_probability = cap;
1970 }
1971
1972 if (!flag_probability.initialized_p ())
1973 flag_probability = cap;
1974
1975 /* ?? Insert store after previous store if applicable. See note
1976 below. */
1977 if (append_cond_position)
1978 ex = append_cond_position;
1979
1980 loop_has_only_one_exit = single_pred_p (ex->dest);
1981
1982 if (loop_has_only_one_exit)
1983 ex = split_block_after_labels (ex->dest);
1984 else
1985 {
1986 for (gphi_iterator gpi = gsi_start_phis (ex->dest);
1987 !gsi_end_p (gpi); gsi_next (&gpi))
1988 {
1989 gphi *phi = gpi.phi ();
1990 if (virtual_operand_p (gimple_phi_result (phi)))
1991 continue;
1992
1993 /* When the destination has a non-virtual PHI node with multiple
1994 predecessors make sure we preserve the PHI structure by
1995 forcing a forwarder block so that hoisting of that PHI will
1996 still work. */
1997 split_edge (ex);
1998 break;
1999 }
2000 }
2001
2002 old_dest = ex->dest;
2003 new_bb = split_edge (ex);
2004 then_bb = create_empty_bb (new_bb);
2005 then_bb->count = new_bb->count.apply_probability (flag_probability);
2006 if (irr)
2007 then_bb->flags = BB_IRREDUCIBLE_LOOP;
2008 add_bb_to_loop (then_bb, new_bb->loop_father);
2009
2010 gsi = gsi_start_bb (new_bb);
2011 stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
2012 NULL_TREE, NULL_TREE);
2013 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2014
2015 /* Insert actual store. */
2016 if (mem)
2017 {
2018 gsi = gsi_start_bb (then_bb);
2019 stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
2020 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2021 }
2022
2023 edge e1 = single_succ_edge (new_bb);
2024 edge e2 = make_edge (new_bb, then_bb,
2025 EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2026 e2->probability = flag_probability;
2027
2028 e1->flags |= EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0);
2029 e1->flags &= ~EDGE_FALLTHRU;
2030
2031 e1->probability = flag_probability.invert ();
2032
2033 then_old_edge = make_single_succ_edge (then_bb, old_dest,
2034 EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2035
2036 set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
2037
2038 if (append_cond_position)
2039 {
2040 basic_block prevbb = last_cond_fallthru->src;
2041 redirect_edge_succ (last_cond_fallthru, new_bb);
2042 set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
2043 set_immediate_dominator (CDI_DOMINATORS, old_dest,
2044 recompute_dominator (CDI_DOMINATORS, old_dest));
2045 }
2046
2047 /* ?? Because stores may alias, they must happen in the exact
2048 sequence they originally happened. Save the position right after
2049 the (_lsm) store we just created so we can continue appending after
2050 it and maintain the original order. */
2051 append_cond_position = then_old_edge;
2052 last_cond_fallthru = find_edge (new_bb, old_dest);
2053
2054 if (!loop_has_only_one_exit)
2055 for (gphi_iterator gpi = gsi_start_phis (old_dest);
2056 !gsi_end_p (gpi); gsi_next (&gpi))
2057 {
2058 gphi *phi = gpi.phi ();
2059 unsigned i;
2060
2061 for (i = 0; i < gimple_phi_num_args (phi); i++)
2062 if (gimple_phi_arg_edge (phi, i)->src == new_bb)
2063 {
2064 tree arg = gimple_phi_arg_def (phi, i);
2065 add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
2066 update_stmt (phi);
2067 }
2068 }
2069
2070 return then_bb;
2071 }
2072
2073 /* When REF is set on the location, set flag indicating the store. */
2074
2075 class sm_set_flag_if_changed
2076 {
2077 public:
sm_set_flag_if_changed(tree flag_,hash_set<basic_block> * bbs_)2078 sm_set_flag_if_changed (tree flag_, hash_set <basic_block> *bbs_)
2079 : flag (flag_), bbs (bbs_) {}
2080 bool operator () (mem_ref_loc *loc);
2081 tree flag;
2082 hash_set <basic_block> *bbs;
2083 };
2084
2085 bool
operator()2086 sm_set_flag_if_changed::operator () (mem_ref_loc *loc)
2087 {
2088 /* Only set the flag for writes. */
2089 if (is_gimple_assign (loc->stmt)
2090 && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
2091 {
2092 gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
2093 gimple *stmt = gimple_build_assign (flag, boolean_true_node);
2094 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2095 bbs->add (gimple_bb (stmt));
2096 }
2097 return false;
2098 }
2099
2100 /* Helper function for execute_sm. On every location where REF is
2101 set, set an appropriate flag indicating the store. */
2102
2103 static tree
execute_sm_if_changed_flag_set(class loop * loop,im_mem_ref * ref,hash_set<basic_block> * bbs)2104 execute_sm_if_changed_flag_set (class loop *loop, im_mem_ref *ref,
2105 hash_set <basic_block> *bbs)
2106 {
2107 tree flag;
2108 char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
2109 flag = create_tmp_reg (boolean_type_node, str);
2110 for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag, bbs));
2111 return flag;
2112 }
2113
2114 struct sm_aux
2115 {
2116 tree tmp_var;
2117 tree store_flag;
2118 hash_set <basic_block> flag_bbs;
2119 };
2120
2121 /* Executes store motion of memory reference REF from LOOP.
2122 Exits from the LOOP are stored in EXITS. The initialization of the
2123 temporary variable is put to the preheader of the loop, and assignments
2124 to the reference from the temporary variable are emitted to exits. */
2125
2126 static void
execute_sm(class loop * loop,im_mem_ref * ref,hash_map<im_mem_ref *,sm_aux * > & aux_map,bool maybe_mt,bool use_other_flag_var)2127 execute_sm (class loop *loop, im_mem_ref *ref,
2128 hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt,
2129 bool use_other_flag_var)
2130 {
2131 gassign *load;
2132 struct fmt_data fmt_data;
2133 struct lim_aux_data *lim_data;
2134 bool multi_threaded_model_p = false;
2135 gimple_stmt_iterator gsi;
2136 sm_aux *aux = new sm_aux;
2137
2138 if (dump_file && (dump_flags & TDF_DETAILS))
2139 {
2140 fprintf (dump_file, "Executing store motion of ");
2141 print_generic_expr (dump_file, ref->mem.ref);
2142 fprintf (dump_file, " from loop %d\n", loop->num);
2143 }
2144
2145 aux->tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
2146 get_lsm_tmp_name (ref->mem.ref, ~0));
2147
2148 fmt_data.loop = loop;
2149 fmt_data.orig_loop = loop;
2150 for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
2151
2152 bool always_stored = ref_always_accessed_p (loop, ref, true);
2153 if (maybe_mt
2154 && (bb_in_transaction (loop_preheader_edge (loop)->src)
2155 || (! flag_store_data_races && ! always_stored)))
2156 multi_threaded_model_p = true;
2157
2158 if (multi_threaded_model_p && !use_other_flag_var)
2159 aux->store_flag
2160 = execute_sm_if_changed_flag_set (loop, ref, &aux->flag_bbs);
2161 else
2162 aux->store_flag = NULL_TREE;
2163
2164 /* Remember variable setup. */
2165 aux_map.put (ref, aux);
2166
2167 rewrite_mem_refs (loop, ref, aux->tmp_var);
2168
2169 /* Emit the load code on a random exit edge or into the latch if
2170 the loop does not exit, so that we are sure it will be processed
2171 by move_computations after all dependencies. */
2172 gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
2173
2174 /* Avoid doing a load if there was no load of the ref in the loop.
2175 Esp. when the ref is not always stored we cannot optimize it
2176 away later. But when it is not always stored we must use a conditional
2177 store then. */
2178 if ((!always_stored && !multi_threaded_model_p)
2179 || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)))
2180 load = gimple_build_assign (aux->tmp_var, unshare_expr (ref->mem.ref));
2181 else
2182 {
2183 /* If not emitting a load mark the uninitialized state on the
2184 loop entry as not to be warned for. */
2185 tree uninit = create_tmp_reg (TREE_TYPE (aux->tmp_var));
2186 suppress_warning (uninit, OPT_Wuninitialized);
2187 load = gimple_build_assign (aux->tmp_var, uninit);
2188 }
2189 lim_data = init_lim_data (load);
2190 lim_data->max_loop = loop;
2191 lim_data->tgt_loop = loop;
2192 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2193
2194 if (aux->store_flag)
2195 {
2196 load = gimple_build_assign (aux->store_flag, boolean_false_node);
2197 lim_data = init_lim_data (load);
2198 lim_data->max_loop = loop;
2199 lim_data->tgt_loop = loop;
2200 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2201 }
2202 }
2203
2204 /* sm_ord is used for ordinary stores we can retain order with respect
2205 to other stores
2206 sm_unord is used for conditional executed stores which need to be
2207 able to execute in arbitrary order with respect to other stores
2208 sm_other is used for stores we do not try to apply store motion to. */
2209 enum sm_kind { sm_ord, sm_unord, sm_other };
2210 struct seq_entry
2211 {
seq_entryseq_entry2212 seq_entry () {}
2213 seq_entry (unsigned f, sm_kind k, tree fr = NULL)
firstseq_entry2214 : first (f), second (k), from (fr) {}
2215 unsigned first;
2216 sm_kind second;
2217 tree from;
2218 };
2219
2220 static void
execute_sm_exit(class loop * loop,edge ex,vec<seq_entry> & seq,hash_map<im_mem_ref *,sm_aux * > & aux_map,sm_kind kind,edge & append_cond_position,edge & last_cond_fallthru)2221 execute_sm_exit (class loop *loop, edge ex, vec<seq_entry> &seq,
2222 hash_map<im_mem_ref *, sm_aux *> &aux_map, sm_kind kind,
2223 edge &append_cond_position, edge &last_cond_fallthru)
2224 {
2225 /* Sink the stores to exit from the loop. */
2226 for (unsigned i = seq.length (); i > 0; --i)
2227 {
2228 im_mem_ref *ref = memory_accesses.refs_list[seq[i-1].first];
2229 if (seq[i-1].second == sm_other)
2230 {
2231 gcc_assert (kind == sm_ord && seq[i-1].from != NULL_TREE);
2232 if (dump_file && (dump_flags & TDF_DETAILS))
2233 {
2234 fprintf (dump_file, "Re-issueing dependent store of ");
2235 print_generic_expr (dump_file, ref->mem.ref);
2236 fprintf (dump_file, " from loop %d on exit %d -> %d\n",
2237 loop->num, ex->src->index, ex->dest->index);
2238 }
2239 gassign *store = gimple_build_assign (unshare_expr (ref->mem.ref),
2240 seq[i-1].from);
2241 gsi_insert_on_edge (ex, store);
2242 }
2243 else
2244 {
2245 sm_aux *aux = *aux_map.get (ref);
2246 if (!aux->store_flag || kind == sm_ord)
2247 {
2248 gassign *store;
2249 store = gimple_build_assign (unshare_expr (ref->mem.ref),
2250 aux->tmp_var);
2251 gsi_insert_on_edge (ex, store);
2252 }
2253 else
2254 execute_sm_if_changed (ex, ref->mem.ref, aux->tmp_var,
2255 aux->store_flag,
2256 loop_preheader_edge (loop), &aux->flag_bbs,
2257 append_cond_position, last_cond_fallthru);
2258 }
2259 }
2260 }
2261
2262 /* Push the SM candidate at index PTR in the sequence SEQ down until
2263 we hit the next SM candidate. Return true if that went OK and
2264 false if we could not disambiguate agains another unrelated ref.
2265 Update *AT to the index where the candidate now resides. */
2266
2267 static bool
sm_seq_push_down(vec<seq_entry> & seq,unsigned ptr,unsigned * at)2268 sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr, unsigned *at)
2269 {
2270 *at = ptr;
2271 for (; ptr > 0; --ptr)
2272 {
2273 seq_entry &new_cand = seq[ptr];
2274 seq_entry &against = seq[ptr-1];
2275 if (against.second == sm_ord
2276 || (against.second == sm_other && against.from != NULL_TREE))
2277 /* Found the tail of the sequence. */
2278 break;
2279 /* We may not ignore self-dependences here. */
2280 if (new_cand.first == against.first
2281 || !refs_independent_p (memory_accesses.refs_list[new_cand.first],
2282 memory_accesses.refs_list[against.first],
2283 false))
2284 /* ??? Prune new_cand from the list of refs to apply SM to. */
2285 return false;
2286 std::swap (new_cand, against);
2287 *at = ptr - 1;
2288 }
2289 return true;
2290 }
2291
2292 /* Computes the sequence of stores from candidates in REFS_NOT_IN_SEQ to SEQ
2293 walking backwards from VDEF (or the end of BB if VDEF is NULL). */
2294
2295 static int
sm_seq_valid_bb(class loop * loop,basic_block bb,tree vdef,vec<seq_entry> & seq,bitmap refs_not_in_seq,bitmap refs_not_supported,bool forked,bitmap fully_visited)2296 sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef,
2297 vec<seq_entry> &seq, bitmap refs_not_in_seq,
2298 bitmap refs_not_supported, bool forked,
2299 bitmap fully_visited)
2300 {
2301 if (!vdef)
2302 for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (gsi);
2303 gsi_prev (&gsi))
2304 {
2305 vdef = gimple_vdef (gsi_stmt (gsi));
2306 if (vdef)
2307 break;
2308 }
2309 if (!vdef)
2310 {
2311 gphi *vphi = get_virtual_phi (bb);
2312 if (vphi)
2313 vdef = gimple_phi_result (vphi);
2314 }
2315 if (!vdef)
2316 {
2317 if (single_pred_p (bb))
2318 /* This handles the perfect nest case. */
2319 return sm_seq_valid_bb (loop, single_pred (bb), vdef,
2320 seq, refs_not_in_seq, refs_not_supported,
2321 forked, fully_visited);
2322 return 0;
2323 }
2324 do
2325 {
2326 gimple *def = SSA_NAME_DEF_STMT (vdef);
2327 if (gimple_bb (def) != bb)
2328 {
2329 /* If we forked by processing a PHI do not allow our walk to
2330 merge again until we handle that robustly. */
2331 if (forked)
2332 {
2333 /* Mark refs_not_in_seq as unsupported. */
2334 bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2335 return 1;
2336 }
2337 /* Otherwise it doesn't really matter if we end up in different
2338 BBs. */
2339 bb = gimple_bb (def);
2340 }
2341 if (gphi *phi = dyn_cast <gphi *> (def))
2342 {
2343 /* Handle CFG merges. Until we handle forks (gimple_bb (def) != bb)
2344 this is still linear.
2345 Eventually we want to cache intermediate results per BB
2346 (but we can't easily cache for different exits?). */
2347 /* Stop at PHIs with possible backedges. */
2348 if (bb == bb->loop_father->header
2349 || bb->flags & BB_IRREDUCIBLE_LOOP)
2350 {
2351 /* Mark refs_not_in_seq as unsupported. */
2352 bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2353 return 1;
2354 }
2355 if (gimple_phi_num_args (phi) == 1)
2356 return sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
2357 gimple_phi_arg_def (phi, 0), seq,
2358 refs_not_in_seq, refs_not_supported,
2359 false, fully_visited);
2360 if (bitmap_bit_p (fully_visited,
2361 SSA_NAME_VERSION (gimple_phi_result (phi))))
2362 return 1;
2363 auto_vec<seq_entry> first_edge_seq;
2364 auto_bitmap tem_refs_not_in_seq (&lim_bitmap_obstack);
2365 int eret;
2366 bitmap_copy (tem_refs_not_in_seq, refs_not_in_seq);
2367 eret = sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
2368 gimple_phi_arg_def (phi, 0),
2369 first_edge_seq,
2370 tem_refs_not_in_seq, refs_not_supported,
2371 true, fully_visited);
2372 if (eret != 1)
2373 return -1;
2374 /* Simplify our lives by pruning the sequence of !sm_ord. */
2375 while (!first_edge_seq.is_empty ()
2376 && first_edge_seq.last ().second != sm_ord)
2377 first_edge_seq.pop ();
2378 for (unsigned int i = 1; i < gimple_phi_num_args (phi); ++i)
2379 {
2380 tree vuse = gimple_phi_arg_def (phi, i);
2381 edge e = gimple_phi_arg_edge (phi, i);
2382 auto_vec<seq_entry> edge_seq;
2383 bitmap_and_compl (tem_refs_not_in_seq,
2384 refs_not_in_seq, refs_not_supported);
2385 /* If we've marked all refs we search for as unsupported
2386 we can stop processing and use the sequence as before
2387 the PHI. */
2388 if (bitmap_empty_p (tem_refs_not_in_seq))
2389 return 1;
2390 eret = sm_seq_valid_bb (loop, e->src, vuse, edge_seq,
2391 tem_refs_not_in_seq, refs_not_supported,
2392 true, fully_visited);
2393 if (eret != 1)
2394 return -1;
2395 /* Simplify our lives by pruning the sequence of !sm_ord. */
2396 while (!edge_seq.is_empty ()
2397 && edge_seq.last ().second != sm_ord)
2398 edge_seq.pop ();
2399 unsigned min_len = MIN(first_edge_seq.length (),
2400 edge_seq.length ());
2401 /* Incrementally merge seqs into first_edge_seq. */
2402 int first_uneq = -1;
2403 auto_vec<seq_entry, 2> extra_refs;
2404 for (unsigned int i = 0; i < min_len; ++i)
2405 {
2406 /* ??? We can more intelligently merge when we face different
2407 order by additional sinking operations in one sequence.
2408 For now we simply mark them as to be processed by the
2409 not order-preserving SM code. */
2410 if (first_edge_seq[i].first != edge_seq[i].first)
2411 {
2412 if (first_edge_seq[i].second == sm_ord)
2413 bitmap_set_bit (refs_not_supported,
2414 first_edge_seq[i].first);
2415 if (edge_seq[i].second == sm_ord)
2416 bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2417 first_edge_seq[i].second = sm_other;
2418 first_edge_seq[i].from = NULL_TREE;
2419 /* Record the dropped refs for later processing. */
2420 if (first_uneq == -1)
2421 first_uneq = i;
2422 extra_refs.safe_push (seq_entry (edge_seq[i].first,
2423 sm_other, NULL_TREE));
2424 }
2425 /* sm_other prevails. */
2426 else if (first_edge_seq[i].second != edge_seq[i].second)
2427 {
2428 /* Make sure the ref is marked as not supported. */
2429 bitmap_set_bit (refs_not_supported,
2430 first_edge_seq[i].first);
2431 first_edge_seq[i].second = sm_other;
2432 first_edge_seq[i].from = NULL_TREE;
2433 }
2434 else if (first_edge_seq[i].second == sm_other
2435 && first_edge_seq[i].from != NULL_TREE
2436 && (edge_seq[i].from == NULL_TREE
2437 || !operand_equal_p (first_edge_seq[i].from,
2438 edge_seq[i].from, 0)))
2439 first_edge_seq[i].from = NULL_TREE;
2440 }
2441 /* Any excess elements become sm_other since they are now
2442 coonditionally executed. */
2443 if (first_edge_seq.length () > edge_seq.length ())
2444 {
2445 for (unsigned i = edge_seq.length ();
2446 i < first_edge_seq.length (); ++i)
2447 {
2448 if (first_edge_seq[i].second == sm_ord)
2449 bitmap_set_bit (refs_not_supported,
2450 first_edge_seq[i].first);
2451 first_edge_seq[i].second = sm_other;
2452 }
2453 }
2454 else if (edge_seq.length () > first_edge_seq.length ())
2455 {
2456 if (first_uneq == -1)
2457 first_uneq = first_edge_seq.length ();
2458 for (unsigned i = first_edge_seq.length ();
2459 i < edge_seq.length (); ++i)
2460 {
2461 if (edge_seq[i].second == sm_ord)
2462 bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2463 extra_refs.safe_push (seq_entry (edge_seq[i].first,
2464 sm_other, NULL_TREE));
2465 }
2466 }
2467 /* Put unmerged refs at first_uneq to force dependence checking
2468 on them. */
2469 if (first_uneq != -1)
2470 {
2471 /* Missing ordered_splice_at. */
2472 if ((unsigned)first_uneq == first_edge_seq.length ())
2473 first_edge_seq.safe_splice (extra_refs);
2474 else
2475 {
2476 unsigned fes_length = first_edge_seq.length ();
2477 first_edge_seq.safe_grow (fes_length
2478 + extra_refs.length ());
2479 memmove (&first_edge_seq[first_uneq + extra_refs.length ()],
2480 &first_edge_seq[first_uneq],
2481 (fes_length - first_uneq) * sizeof (seq_entry));
2482 memcpy (&first_edge_seq[first_uneq],
2483 extra_refs.address (),
2484 extra_refs.length () * sizeof (seq_entry));
2485 }
2486 }
2487 }
2488 /* Use the sequence from the first edge and push SMs down. */
2489 for (unsigned i = 0; i < first_edge_seq.length (); ++i)
2490 {
2491 unsigned id = first_edge_seq[i].first;
2492 seq.safe_push (first_edge_seq[i]);
2493 unsigned new_idx;
2494 if ((first_edge_seq[i].second == sm_ord
2495 || (first_edge_seq[i].second == sm_other
2496 && first_edge_seq[i].from != NULL_TREE))
2497 && !sm_seq_push_down (seq, seq.length () - 1, &new_idx))
2498 {
2499 if (first_edge_seq[i].second == sm_ord)
2500 bitmap_set_bit (refs_not_supported, id);
2501 /* Mark it sm_other. */
2502 seq[new_idx].second = sm_other;
2503 seq[new_idx].from = NULL_TREE;
2504 }
2505 }
2506 bitmap_set_bit (fully_visited,
2507 SSA_NAME_VERSION (gimple_phi_result (phi)));
2508 return 1;
2509 }
2510 lim_aux_data *data = get_lim_data (def);
2511 gcc_assert (data);
2512 if (data->ref == UNANALYZABLE_MEM_ID)
2513 return -1;
2514 /* Stop at memory references which we can't move. */
2515 else if (memory_accesses.refs_list[data->ref]->mem.ref == error_mark_node)
2516 {
2517 /* Mark refs_not_in_seq as unsupported. */
2518 bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2519 return 1;
2520 }
2521 /* One of the stores we want to apply SM to and we've not yet seen. */
2522 else if (bitmap_clear_bit (refs_not_in_seq, data->ref))
2523 {
2524 seq.safe_push (seq_entry (data->ref, sm_ord));
2525
2526 /* 1) push it down the queue until a SMed
2527 and not ignored ref is reached, skipping all not SMed refs
2528 and ignored refs via non-TBAA disambiguation. */
2529 unsigned new_idx;
2530 if (!sm_seq_push_down (seq, seq.length () - 1, &new_idx)
2531 /* If that fails but we did not fork yet continue, we'll see
2532 to re-materialize all of the stores in the sequence then.
2533 Further stores will only be pushed up to this one. */
2534 && forked)
2535 {
2536 bitmap_set_bit (refs_not_supported, data->ref);
2537 /* Mark it sm_other. */
2538 seq[new_idx].second = sm_other;
2539 }
2540
2541 /* 2) check whether we've seen all refs we want to SM and if so
2542 declare success for the active exit */
2543 if (bitmap_empty_p (refs_not_in_seq))
2544 return 1;
2545 }
2546 else
2547 /* Another store not part of the final sequence. Simply push it. */
2548 seq.safe_push (seq_entry (data->ref, sm_other,
2549 gimple_assign_rhs1 (def)));
2550
2551 vdef = gimple_vuse (def);
2552 }
2553 while (1);
2554 }
2555
2556 /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit
2557 edges of the LOOP. */
2558
2559 static void
hoist_memory_references(class loop * loop,bitmap mem_refs,const vec<edge> & exits)2560 hoist_memory_references (class loop *loop, bitmap mem_refs,
2561 const vec<edge> &exits)
2562 {
2563 im_mem_ref *ref;
2564 unsigned i;
2565 bitmap_iterator bi;
2566
2567 /* There's a special case we can use ordered re-materialization for
2568 conditionally excuted stores which is when all stores in the loop
2569 happen in the same basic-block. In that case we know we'll reach
2570 all stores and thus can simply process that BB and emit a single
2571 conditional block of ordered materializations. See PR102436. */
2572 basic_block single_store_bb = NULL;
2573 EXECUTE_IF_SET_IN_BITMAP (&memory_accesses.all_refs_stored_in_loop[loop->num],
2574 0, i, bi)
2575 {
2576 bool fail = false;
2577 ref = memory_accesses.refs_list[i];
2578 for (auto loc : ref->accesses_in_loop)
2579 if (!gimple_vdef (loc.stmt))
2580 ;
2581 else if (!single_store_bb)
2582 {
2583 single_store_bb = gimple_bb (loc.stmt);
2584 bool conditional = false;
2585 for (edge e : exits)
2586 if (!dominated_by_p (CDI_DOMINATORS, e->src, single_store_bb))
2587 {
2588 /* Conditional as seen from e. */
2589 conditional = true;
2590 break;
2591 }
2592 if (!conditional)
2593 {
2594 fail = true;
2595 break;
2596 }
2597 }
2598 else if (single_store_bb != gimple_bb (loc.stmt))
2599 {
2600 fail = true;
2601 break;
2602 }
2603 if (fail)
2604 {
2605 single_store_bb = NULL;
2606 break;
2607 }
2608 }
2609 if (single_store_bb)
2610 {
2611 /* Analyze the single block with stores. */
2612 auto_bitmap fully_visited;
2613 auto_bitmap refs_not_supported;
2614 auto_bitmap refs_not_in_seq;
2615 auto_vec<seq_entry> seq;
2616 bitmap_copy (refs_not_in_seq, mem_refs);
2617 int res = sm_seq_valid_bb (loop, single_store_bb, NULL_TREE,
2618 seq, refs_not_in_seq, refs_not_supported,
2619 false, fully_visited);
2620 if (res != 1)
2621 {
2622 /* Unhandled refs can still fail this. */
2623 bitmap_clear (mem_refs);
2624 return;
2625 }
2626
2627 /* We cannot handle sm_other since we neither remember the
2628 stored location nor the value at the point we execute them. */
2629 for (unsigned i = 0; i < seq.length (); ++i)
2630 {
2631 unsigned new_i;
2632 if (seq[i].second == sm_other
2633 && seq[i].from != NULL_TREE)
2634 seq[i].from = NULL_TREE;
2635 else if ((seq[i].second == sm_ord
2636 || (seq[i].second == sm_other
2637 && seq[i].from != NULL_TREE))
2638 && !sm_seq_push_down (seq, i, &new_i))
2639 {
2640 bitmap_set_bit (refs_not_supported, seq[new_i].first);
2641 seq[new_i].second = sm_other;
2642 seq[new_i].from = NULL_TREE;
2643 }
2644 }
2645 bitmap_and_compl_into (mem_refs, refs_not_supported);
2646 if (bitmap_empty_p (mem_refs))
2647 return;
2648
2649 /* Prune seq. */
2650 while (seq.last ().second == sm_other
2651 && seq.last ().from == NULL_TREE)
2652 seq.pop ();
2653
2654 hash_map<im_mem_ref *, sm_aux *> aux_map;
2655
2656 /* Execute SM but delay the store materialization for ordered
2657 sequences on exit. */
2658 bool first_p = true;
2659 EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2660 {
2661 ref = memory_accesses.refs_list[i];
2662 execute_sm (loop, ref, aux_map, true, !first_p);
2663 first_p = false;
2664 }
2665
2666 /* Get at the single flag variable we eventually produced. */
2667 im_mem_ref *ref
2668 = memory_accesses.refs_list[bitmap_first_set_bit (mem_refs)];
2669 sm_aux *aux = *aux_map.get (ref);
2670
2671 /* Materialize ordered store sequences on exits. */
2672 edge e;
2673 FOR_EACH_VEC_ELT (exits, i, e)
2674 {
2675 edge append_cond_position = NULL;
2676 edge last_cond_fallthru = NULL;
2677 edge insert_e = e;
2678 /* Construct the single flag variable control flow and insert
2679 the ordered seq of stores in the then block. With
2680 -fstore-data-races we can do the stores unconditionally. */
2681 if (aux->store_flag)
2682 insert_e
2683 = single_pred_edge
2684 (execute_sm_if_changed (e, NULL_TREE, NULL_TREE,
2685 aux->store_flag,
2686 loop_preheader_edge (loop),
2687 &aux->flag_bbs, append_cond_position,
2688 last_cond_fallthru));
2689 execute_sm_exit (loop, insert_e, seq, aux_map, sm_ord,
2690 append_cond_position, last_cond_fallthru);
2691 gsi_commit_one_edge_insert (insert_e, NULL);
2692 }
2693
2694 for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
2695 iter != aux_map.end (); ++iter)
2696 delete (*iter).second;
2697
2698 return;
2699 }
2700
2701 /* To address PR57359 before actually applying store-motion check
2702 the candidates found for validity with regards to reordering
2703 relative to other stores which we until here disambiguated using
2704 TBAA which isn't valid.
2705 What matters is the order of the last stores to the mem_refs
2706 with respect to the other stores of the loop at the point of the
2707 loop exits. */
2708
2709 /* For each exit compute the store order, pruning from mem_refs
2710 on the fly. */
2711 /* The complexity of this is at least
2712 O(number of exits * number of SM refs) but more approaching
2713 O(number of exits * number of SM refs * number of stores). */
2714 /* ??? Somehow do this in a single sweep over the loop body. */
2715 auto_vec<std::pair<edge, vec<seq_entry> > > sms;
2716 auto_bitmap refs_not_supported (&lim_bitmap_obstack);
2717 edge e;
2718 FOR_EACH_VEC_ELT (exits, i, e)
2719 {
2720 vec<seq_entry> seq;
2721 seq.create (4);
2722 auto_bitmap refs_not_in_seq (&lim_bitmap_obstack);
2723 bitmap_and_compl (refs_not_in_seq, mem_refs, refs_not_supported);
2724 if (bitmap_empty_p (refs_not_in_seq))
2725 {
2726 seq.release ();
2727 break;
2728 }
2729 auto_bitmap fully_visited;
2730 int res = sm_seq_valid_bb (loop, e->src, NULL_TREE,
2731 seq, refs_not_in_seq,
2732 refs_not_supported, false,
2733 fully_visited);
2734 if (res != 1)
2735 {
2736 bitmap_copy (refs_not_supported, mem_refs);
2737 seq.release ();
2738 break;
2739 }
2740 sms.safe_push (std::make_pair (e, seq));
2741 }
2742
2743 /* Prune pruned mem_refs from earlier processed exits. */
2744 bool changed = !bitmap_empty_p (refs_not_supported);
2745 while (changed)
2746 {
2747 changed = false;
2748 std::pair<edge, vec<seq_entry> > *seq;
2749 FOR_EACH_VEC_ELT (sms, i, seq)
2750 {
2751 bool need_to_push = false;
2752 for (unsigned i = 0; i < seq->second.length (); ++i)
2753 {
2754 sm_kind kind = seq->second[i].second;
2755 if (kind == sm_other && seq->second[i].from == NULL_TREE)
2756 break;
2757 unsigned id = seq->second[i].first;
2758 unsigned new_idx;
2759 if (kind == sm_ord
2760 && bitmap_bit_p (refs_not_supported, id))
2761 {
2762 seq->second[i].second = sm_other;
2763 gcc_assert (seq->second[i].from == NULL_TREE);
2764 need_to_push = true;
2765 }
2766 else if (need_to_push
2767 && !sm_seq_push_down (seq->second, i, &new_idx))
2768 {
2769 /* We need to push down both sm_ord and sm_other
2770 but for the latter we need to disqualify all
2771 following refs. */
2772 if (kind == sm_ord)
2773 {
2774 if (bitmap_set_bit (refs_not_supported, id))
2775 changed = true;
2776 seq->second[new_idx].second = sm_other;
2777 }
2778 else
2779 {
2780 for (unsigned j = seq->second.length () - 1;
2781 j > new_idx; --j)
2782 if (seq->second[j].second == sm_ord
2783 && bitmap_set_bit (refs_not_supported,
2784 seq->second[j].first))
2785 changed = true;
2786 seq->second.truncate (new_idx);
2787 break;
2788 }
2789 }
2790 }
2791 }
2792 }
2793 std::pair<edge, vec<seq_entry> > *seq;
2794 FOR_EACH_VEC_ELT (sms, i, seq)
2795 {
2796 /* Prune sm_other from the end. */
2797 while (!seq->second.is_empty ()
2798 && seq->second.last ().second == sm_other)
2799 seq->second.pop ();
2800 /* Prune duplicates from the start. */
2801 auto_bitmap seen (&lim_bitmap_obstack);
2802 unsigned j, k;
2803 for (j = k = 0; j < seq->second.length (); ++j)
2804 if (bitmap_set_bit (seen, seq->second[j].first))
2805 {
2806 if (k != j)
2807 seq->second[k] = seq->second[j];
2808 ++k;
2809 }
2810 seq->second.truncate (k);
2811 /* And verify. */
2812 seq_entry *e;
2813 FOR_EACH_VEC_ELT (seq->second, j, e)
2814 gcc_assert (e->second == sm_ord
2815 || (e->second == sm_other && e->from != NULL_TREE));
2816 }
2817
2818 /* Verify dependence for refs we cannot handle with the order preserving
2819 code (refs_not_supported) or prune them from mem_refs. */
2820 auto_vec<seq_entry> unord_refs;
2821 EXECUTE_IF_SET_IN_BITMAP (refs_not_supported, 0, i, bi)
2822 {
2823 ref = memory_accesses.refs_list[i];
2824 if (!ref_indep_loop_p (loop, ref, sm_waw))
2825 bitmap_clear_bit (mem_refs, i);
2826 /* We've now verified store order for ref with respect to all other
2827 stores in the loop does not matter. */
2828 else
2829 unord_refs.safe_push (seq_entry (i, sm_unord));
2830 }
2831
2832 hash_map<im_mem_ref *, sm_aux *> aux_map;
2833
2834 /* Execute SM but delay the store materialization for ordered
2835 sequences on exit. */
2836 EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2837 {
2838 ref = memory_accesses.refs_list[i];
2839 execute_sm (loop, ref, aux_map, bitmap_bit_p (refs_not_supported, i),
2840 false);
2841 }
2842
2843 /* Materialize ordered store sequences on exits. */
2844 FOR_EACH_VEC_ELT (exits, i, e)
2845 {
2846 edge append_cond_position = NULL;
2847 edge last_cond_fallthru = NULL;
2848 if (i < sms.length ())
2849 {
2850 gcc_assert (sms[i].first == e);
2851 execute_sm_exit (loop, e, sms[i].second, aux_map, sm_ord,
2852 append_cond_position, last_cond_fallthru);
2853 sms[i].second.release ();
2854 }
2855 if (!unord_refs.is_empty ())
2856 execute_sm_exit (loop, e, unord_refs, aux_map, sm_unord,
2857 append_cond_position, last_cond_fallthru);
2858 /* Commit edge inserts here to preserve the order of stores
2859 when an exit exits multiple loops. */
2860 gsi_commit_one_edge_insert (e, NULL);
2861 }
2862
2863 for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
2864 iter != aux_map.end (); ++iter)
2865 delete (*iter).second;
2866 }
2867
2868 class ref_always_accessed
2869 {
2870 public:
ref_always_accessed(class loop * loop_,bool stored_p_)2871 ref_always_accessed (class loop *loop_, bool stored_p_)
2872 : loop (loop_), stored_p (stored_p_) {}
2873 bool operator () (mem_ref_loc *loc);
2874 class loop *loop;
2875 bool stored_p;
2876 };
2877
2878 bool
operator()2879 ref_always_accessed::operator () (mem_ref_loc *loc)
2880 {
2881 class loop *must_exec;
2882
2883 struct lim_aux_data *lim_data = get_lim_data (loc->stmt);
2884 if (!lim_data)
2885 return false;
2886
2887 /* If we require an always executed store make sure the statement
2888 is a store. */
2889 if (stored_p)
2890 {
2891 tree lhs = gimple_get_lhs (loc->stmt);
2892 if (!lhs
2893 || !(DECL_P (lhs) || REFERENCE_CLASS_P (lhs)))
2894 return false;
2895 }
2896
2897 must_exec = lim_data->always_executed_in;
2898 if (!must_exec)
2899 return false;
2900
2901 if (must_exec == loop
2902 || flow_loop_nested_p (must_exec, loop))
2903 return true;
2904
2905 return false;
2906 }
2907
2908 /* Returns true if REF is always accessed in LOOP. If STORED_P is true
2909 make sure REF is always stored to in LOOP. */
2910
2911 static bool
ref_always_accessed_p(class loop * loop,im_mem_ref * ref,bool stored_p)2912 ref_always_accessed_p (class loop *loop, im_mem_ref *ref, bool stored_p)
2913 {
2914 return for_all_locs_in_loop (loop, ref,
2915 ref_always_accessed (loop, stored_p));
2916 }
2917
2918 /* Returns true if REF1 and REF2 are independent. */
2919
2920 static bool
refs_independent_p(im_mem_ref * ref1,im_mem_ref * ref2,bool tbaa_p)2921 refs_independent_p (im_mem_ref *ref1, im_mem_ref *ref2, bool tbaa_p)
2922 {
2923 if (ref1 == ref2)
2924 return true;
2925
2926 if (dump_file && (dump_flags & TDF_DETAILS))
2927 fprintf (dump_file, "Querying dependency of refs %u and %u: ",
2928 ref1->id, ref2->id);
2929
2930 if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache, tbaa_p))
2931 {
2932 if (dump_file && (dump_flags & TDF_DETAILS))
2933 fprintf (dump_file, "dependent.\n");
2934 return false;
2935 }
2936 else
2937 {
2938 if (dump_file && (dump_flags & TDF_DETAILS))
2939 fprintf (dump_file, "independent.\n");
2940 return true;
2941 }
2942 }
2943
2944 /* Returns true if REF is independent on all other accessess in LOOP.
2945 KIND specifies the kind of dependence to consider.
2946 lim_raw assumes REF is not stored in LOOP and disambiguates RAW
2947 dependences so if true REF can be hoisted out of LOOP
2948 sm_war disambiguates a store REF against all other loads to see
2949 whether the store can be sunk across loads out of LOOP
2950 sm_waw disambiguates a store REF against all other stores to see
2951 whether the store can be sunk across stores out of LOOP. */
2952
2953 static bool
ref_indep_loop_p(class loop * loop,im_mem_ref * ref,dep_kind kind)2954 ref_indep_loop_p (class loop *loop, im_mem_ref *ref, dep_kind kind)
2955 {
2956 bool indep_p = true;
2957 bitmap refs_to_check;
2958
2959 if (kind == sm_war)
2960 refs_to_check = &memory_accesses.refs_loaded_in_loop[loop->num];
2961 else
2962 refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
2963
2964 if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID)
2965 || ref->mem.ref == error_mark_node)
2966 indep_p = false;
2967 else
2968 {
2969 /* tri-state, { unknown, independent, dependent } */
2970 dep_state state = query_loop_dependence (loop, ref, kind);
2971 if (state != dep_unknown)
2972 return state == dep_independent ? true : false;
2973
2974 class loop *inner = loop->inner;
2975 while (inner)
2976 {
2977 if (!ref_indep_loop_p (inner, ref, kind))
2978 {
2979 indep_p = false;
2980 break;
2981 }
2982 inner = inner->next;
2983 }
2984
2985 if (indep_p)
2986 {
2987 unsigned i;
2988 bitmap_iterator bi;
2989 EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
2990 {
2991 im_mem_ref *aref = memory_accesses.refs_list[i];
2992 if (aref->mem.ref == error_mark_node)
2993 {
2994 gimple *stmt = aref->accesses_in_loop[0].stmt;
2995 if ((kind == sm_war
2996 && ref_maybe_used_by_stmt_p (stmt, &ref->mem,
2997 kind != sm_waw))
2998 || stmt_may_clobber_ref_p_1 (stmt, &ref->mem,
2999 kind != sm_waw))
3000 {
3001 indep_p = false;
3002 break;
3003 }
3004 }
3005 else if (!refs_independent_p (ref, aref, kind != sm_waw))
3006 {
3007 indep_p = false;
3008 break;
3009 }
3010 }
3011 }
3012 }
3013
3014 if (dump_file && (dump_flags & TDF_DETAILS))
3015 fprintf (dump_file, "Querying %s dependencies of ref %u in loop %d: %s\n",
3016 kind == lim_raw ? "RAW" : (kind == sm_war ? "SM WAR" : "SM WAW"),
3017 ref->id, loop->num, indep_p ? "independent" : "dependent");
3018
3019 /* Record the computed result in the cache. */
3020 record_loop_dependence (loop, ref, kind,
3021 indep_p ? dep_independent : dep_dependent);
3022
3023 return indep_p;
3024 }
3025
3026
3027 /* Returns true if we can perform store motion of REF from LOOP. */
3028
3029 static bool
can_sm_ref_p(class loop * loop,im_mem_ref * ref)3030 can_sm_ref_p (class loop *loop, im_mem_ref *ref)
3031 {
3032 tree base;
3033
3034 /* Can't hoist unanalyzable refs. */
3035 if (!MEM_ANALYZABLE (ref))
3036 return false;
3037
3038 /* Can't hoist/sink aggregate copies. */
3039 if (ref->mem.ref == error_mark_node)
3040 return false;
3041
3042 /* It should be movable. */
3043 if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
3044 || TREE_THIS_VOLATILE (ref->mem.ref)
3045 || !for_each_index (&ref->mem.ref, may_move_till, loop))
3046 return false;
3047
3048 /* If it can throw fail, we do not properly update EH info. */
3049 if (tree_could_throw_p (ref->mem.ref))
3050 return false;
3051
3052 /* If it can trap, it must be always executed in LOOP.
3053 Readonly memory locations may trap when storing to them, but
3054 tree_could_trap_p is a predicate for rvalues, so check that
3055 explicitly. */
3056 base = get_base_address (ref->mem.ref);
3057 if ((tree_could_trap_p (ref->mem.ref)
3058 || (DECL_P (base) && TREE_READONLY (base)))
3059 /* ??? We can at least use false here, allowing loads? We
3060 are forcing conditional stores if the ref is not always
3061 stored to later anyway. So this would only guard
3062 the load we need to emit. Thus when the ref is not
3063 loaded we can elide this completely? */
3064 && !ref_always_accessed_p (loop, ref, true))
3065 return false;
3066
3067 /* Verify all loads of ref can be hoisted. */
3068 if (ref->loaded
3069 && bitmap_bit_p (ref->loaded, loop->num)
3070 && !ref_indep_loop_p (loop, ref, lim_raw))
3071 return false;
3072
3073 /* Verify the candidate can be disambiguated against all loads,
3074 that is, we can elide all in-loop stores. Disambiguation
3075 against stores is done later when we cannot guarantee preserving
3076 the order of stores. */
3077 if (!ref_indep_loop_p (loop, ref, sm_war))
3078 return false;
3079
3080 return true;
3081 }
3082
3083 /* Marks the references in LOOP for that store motion should be performed
3084 in REFS_TO_SM. SM_EXECUTED is the set of references for that store
3085 motion was performed in one of the outer loops. */
3086
3087 static void
find_refs_for_sm(class loop * loop,bitmap sm_executed,bitmap refs_to_sm)3088 find_refs_for_sm (class loop *loop, bitmap sm_executed, bitmap refs_to_sm)
3089 {
3090 bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
3091 unsigned i;
3092 bitmap_iterator bi;
3093 im_mem_ref *ref;
3094
3095 EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
3096 {
3097 ref = memory_accesses.refs_list[i];
3098 if (can_sm_ref_p (loop, ref) && dbg_cnt (lim))
3099 bitmap_set_bit (refs_to_sm, i);
3100 }
3101 }
3102
3103 /* Checks whether LOOP (with exits stored in EXITS array) is suitable
3104 for a store motion optimization (i.e. whether we can insert statement
3105 on its exits). */
3106
3107 static bool
loop_suitable_for_sm(class loop * loop ATTRIBUTE_UNUSED,const vec<edge> & exits)3108 loop_suitable_for_sm (class loop *loop ATTRIBUTE_UNUSED,
3109 const vec<edge> &exits)
3110 {
3111 unsigned i;
3112 edge ex;
3113
3114 FOR_EACH_VEC_ELT (exits, i, ex)
3115 if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
3116 return false;
3117
3118 return true;
3119 }
3120
3121 /* Try to perform store motion for all memory references modified inside
3122 LOOP. SM_EXECUTED is the bitmap of the memory references for that
3123 store motion was executed in one of the outer loops. */
3124
3125 static void
store_motion_loop(class loop * loop,bitmap sm_executed)3126 store_motion_loop (class loop *loop, bitmap sm_executed)
3127 {
3128 auto_vec<edge> exits = get_loop_exit_edges (loop);
3129 class loop *subloop;
3130 bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
3131
3132 if (loop_suitable_for_sm (loop, exits))
3133 {
3134 find_refs_for_sm (loop, sm_executed, sm_in_loop);
3135 if (!bitmap_empty_p (sm_in_loop))
3136 hoist_memory_references (loop, sm_in_loop, exits);
3137 }
3138
3139 bitmap_ior_into (sm_executed, sm_in_loop);
3140 for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
3141 store_motion_loop (subloop, sm_executed);
3142 bitmap_and_compl_into (sm_executed, sm_in_loop);
3143 BITMAP_FREE (sm_in_loop);
3144 }
3145
3146 /* Try to perform store motion for all memory references modified inside
3147 loops. */
3148
3149 static void
do_store_motion(void)3150 do_store_motion (void)
3151 {
3152 class loop *loop;
3153 bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
3154
3155 for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
3156 store_motion_loop (loop, sm_executed);
3157
3158 BITMAP_FREE (sm_executed);
3159 }
3160
3161 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
3162 for each such basic block bb records the outermost loop for that execution
3163 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
3164 blocks that contain a nonpure call. */
3165
3166 static void
fill_always_executed_in_1(class loop * loop,sbitmap contains_call)3167 fill_always_executed_in_1 (class loop *loop, sbitmap contains_call)
3168 {
3169 basic_block bb = NULL, last = NULL;
3170 edge e;
3171 class loop *inn_loop = loop;
3172
3173 if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
3174 {
3175 auto_vec<basic_block, 64> worklist;
3176 worklist.reserve_exact (loop->num_nodes);
3177 worklist.quick_push (loop->header);
3178 do
3179 {
3180 edge_iterator ei;
3181 bb = worklist.pop ();
3182
3183 if (!flow_bb_inside_loop_p (inn_loop, bb))
3184 {
3185 /* When we are leaving a possibly infinite inner loop
3186 we have to stop processing. */
3187 if (!finite_loop_p (inn_loop))
3188 break;
3189 /* If the loop was finite we can continue with processing
3190 the loop we exited to. */
3191 inn_loop = bb->loop_father;
3192 }
3193
3194 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
3195 last = bb;
3196
3197 if (bitmap_bit_p (contains_call, bb->index))
3198 break;
3199
3200 /* If LOOP exits from this BB stop processing. */
3201 FOR_EACH_EDGE (e, ei, bb->succs)
3202 if (!flow_bb_inside_loop_p (loop, e->dest))
3203 break;
3204 if (e)
3205 break;
3206
3207 /* A loop might be infinite (TODO use simple loop analysis
3208 to disprove this if possible). */
3209 if (bb->flags & BB_IRREDUCIBLE_LOOP)
3210 break;
3211
3212 if (bb->loop_father->header == bb)
3213 /* Record that we enter into a subloop since it might not
3214 be finite. */
3215 /* ??? Entering into a not always executed subloop makes
3216 fill_always_executed_in quadratic in loop depth since
3217 we walk those loops N times. This is not a problem
3218 in practice though, see PR102253 for a worst-case testcase. */
3219 inn_loop = bb->loop_father;
3220
3221 /* Walk the body of LOOP sorted by dominance relation. Additionally,
3222 if a basic block S dominates the latch, then only blocks dominated
3223 by S are after it.
3224 This is get_loop_body_in_dom_order using a worklist algorithm and
3225 stopping once we are no longer interested in visiting further
3226 blocks. */
3227 unsigned old_len = worklist.length ();
3228 unsigned postpone = 0;
3229 for (basic_block son = first_dom_son (CDI_DOMINATORS, bb);
3230 son;
3231 son = next_dom_son (CDI_DOMINATORS, son))
3232 {
3233 if (!flow_bb_inside_loop_p (loop, son))
3234 continue;
3235 if (dominated_by_p (CDI_DOMINATORS, loop->latch, son))
3236 postpone = worklist.length ();
3237 worklist.quick_push (son);
3238 }
3239 if (postpone)
3240 /* Postponing the block that dominates the latch means
3241 processing it last and thus putting it earliest in the
3242 worklist. */
3243 std::swap (worklist[old_len], worklist[postpone]);
3244 }
3245 while (!worklist.is_empty ());
3246
3247 while (1)
3248 {
3249 if (dump_enabled_p ())
3250 dump_printf (MSG_NOTE, "BB %d is always executed in loop %d\n",
3251 last->index, loop->num);
3252 SET_ALWAYS_EXECUTED_IN (last, loop);
3253 if (last == loop->header)
3254 break;
3255 last = get_immediate_dominator (CDI_DOMINATORS, last);
3256 }
3257 }
3258
3259 for (loop = loop->inner; loop; loop = loop->next)
3260 fill_always_executed_in_1 (loop, contains_call);
3261 }
3262
3263 /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
3264 for each such basic block bb records the outermost loop for that execution
3265 of its header implies execution of bb. */
3266
3267 static void
fill_always_executed_in(void)3268 fill_always_executed_in (void)
3269 {
3270 basic_block bb;
3271 class loop *loop;
3272
3273 auto_sbitmap contains_call (last_basic_block_for_fn (cfun));
3274 bitmap_clear (contains_call);
3275 FOR_EACH_BB_FN (bb, cfun)
3276 {
3277 gimple_stmt_iterator gsi;
3278 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3279 {
3280 if (nonpure_call_p (gsi_stmt (gsi)))
3281 break;
3282 }
3283
3284 if (!gsi_end_p (gsi))
3285 bitmap_set_bit (contains_call, bb->index);
3286 }
3287
3288 for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
3289 fill_always_executed_in_1 (loop, contains_call);
3290 }
3291
3292
3293 /* Compute the global information needed by the loop invariant motion pass. */
3294
3295 static void
tree_ssa_lim_initialize(bool store_motion)3296 tree_ssa_lim_initialize (bool store_motion)
3297 {
3298 unsigned i;
3299
3300 bitmap_obstack_initialize (&lim_bitmap_obstack);
3301 gcc_obstack_init (&mem_ref_obstack);
3302 lim_aux_data_map = new hash_map<gimple *, lim_aux_data *>;
3303
3304 if (flag_tm)
3305 compute_transaction_bits ();
3306
3307 memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
3308 memory_accesses.refs_list.create (100);
3309 /* Allocate a special, unanalyzable mem-ref with ID zero. */
3310 memory_accesses.refs_list.quick_push
3311 (mem_ref_alloc (NULL, 0, UNANALYZABLE_MEM_ID));
3312
3313 memory_accesses.refs_loaded_in_loop.create (number_of_loops (cfun));
3314 memory_accesses.refs_loaded_in_loop.quick_grow (number_of_loops (cfun));
3315 memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
3316 memory_accesses.refs_stored_in_loop.quick_grow (number_of_loops (cfun));
3317 if (store_motion)
3318 {
3319 memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
3320 memory_accesses.all_refs_stored_in_loop.quick_grow
3321 (number_of_loops (cfun));
3322 }
3323
3324 for (i = 0; i < number_of_loops (cfun); i++)
3325 {
3326 bitmap_initialize (&memory_accesses.refs_loaded_in_loop[i],
3327 &lim_bitmap_obstack);
3328 bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
3329 &lim_bitmap_obstack);
3330 if (store_motion)
3331 bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
3332 &lim_bitmap_obstack);
3333 }
3334
3335 memory_accesses.ttae_cache = NULL;
3336
3337 /* Initialize bb_loop_postorder with a mapping from loop->num to
3338 its postorder index. */
3339 i = 0;
3340 bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
3341 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
3342 bb_loop_postorder[loop->num] = i++;
3343 }
3344
3345 /* Cleans up after the invariant motion pass. */
3346
3347 static void
tree_ssa_lim_finalize(void)3348 tree_ssa_lim_finalize (void)
3349 {
3350 basic_block bb;
3351 unsigned i;
3352 im_mem_ref *ref;
3353
3354 FOR_EACH_BB_FN (bb, cfun)
3355 SET_ALWAYS_EXECUTED_IN (bb, NULL);
3356
3357 bitmap_obstack_release (&lim_bitmap_obstack);
3358 delete lim_aux_data_map;
3359
3360 delete memory_accesses.refs;
3361 memory_accesses.refs = NULL;
3362
3363 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
3364 memref_free (ref);
3365 memory_accesses.refs_list.release ();
3366 obstack_free (&mem_ref_obstack, NULL);
3367
3368 memory_accesses.refs_loaded_in_loop.release ();
3369 memory_accesses.refs_stored_in_loop.release ();
3370 memory_accesses.all_refs_stored_in_loop.release ();
3371
3372 if (memory_accesses.ttae_cache)
3373 free_affine_expand_cache (&memory_accesses.ttae_cache);
3374
3375 free (bb_loop_postorder);
3376 }
3377
3378 /* Moves invariants from loops. Only "expensive" invariants are moved out --
3379 i.e. those that are likely to be win regardless of the register pressure.
3380 Only perform store motion if STORE_MOTION is true. */
3381
3382 unsigned int
loop_invariant_motion_in_fun(function * fun,bool store_motion)3383 loop_invariant_motion_in_fun (function *fun, bool store_motion)
3384 {
3385 unsigned int todo = 0;
3386
3387 tree_ssa_lim_initialize (store_motion);
3388
3389 /* Gathers information about memory accesses in the loops. */
3390 analyze_memory_references (store_motion);
3391
3392 /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */
3393 fill_always_executed_in ();
3394
3395 int *rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
3396 int n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
3397
3398 /* For each statement determine the outermost loop in that it is
3399 invariant and cost for computing the invariant. */
3400 for (int i = 0; i < n; ++i)
3401 compute_invariantness (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
3402
3403 /* Execute store motion. Force the necessary invariants to be moved
3404 out of the loops as well. */
3405 if (store_motion)
3406 do_store_motion ();
3407
3408 free (rpo);
3409 rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
3410 n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
3411
3412 /* Move the expressions that are expensive enough. */
3413 for (int i = 0; i < n; ++i)
3414 todo |= move_computations_worker (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
3415
3416 free (rpo);
3417
3418 gsi_commit_edge_inserts ();
3419 if (need_ssa_update_p (fun))
3420 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3421
3422 tree_ssa_lim_finalize ();
3423
3424 return todo;
3425 }
3426
3427 /* Loop invariant motion pass. */
3428
3429 namespace {
3430
3431 const pass_data pass_data_lim =
3432 {
3433 GIMPLE_PASS, /* type */
3434 "lim", /* name */
3435 OPTGROUP_LOOP, /* optinfo_flags */
3436 TV_LIM, /* tv_id */
3437 PROP_cfg, /* properties_required */
3438 0, /* properties_provided */
3439 0, /* properties_destroyed */
3440 0, /* todo_flags_start */
3441 0, /* todo_flags_finish */
3442 };
3443
3444 class pass_lim : public gimple_opt_pass
3445 {
3446 public:
pass_lim(gcc::context * ctxt)3447 pass_lim (gcc::context *ctxt)
3448 : gimple_opt_pass (pass_data_lim, ctxt)
3449 {}
3450
3451 /* opt_pass methods: */
clone()3452 opt_pass * clone () { return new pass_lim (m_ctxt); }
gate(function *)3453 virtual bool gate (function *) { return flag_tree_loop_im != 0; }
3454 virtual unsigned int execute (function *);
3455
3456 }; // class pass_lim
3457
3458 unsigned int
execute(function * fun)3459 pass_lim::execute (function *fun)
3460 {
3461 bool in_loop_pipeline = scev_initialized_p ();
3462 if (!in_loop_pipeline)
3463 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
3464
3465 if (number_of_loops (fun) <= 1)
3466 return 0;
3467 unsigned int todo = loop_invariant_motion_in_fun (fun, flag_move_loop_stores);
3468
3469 if (!in_loop_pipeline)
3470 loop_optimizer_finalize ();
3471 else
3472 scev_reset ();
3473 return todo;
3474 }
3475
3476 } // anon namespace
3477
3478 gimple_opt_pass *
make_pass_lim(gcc::context * ctxt)3479 make_pass_lim (gcc::context *ctxt)
3480 {
3481 return new pass_lim (ctxt);
3482 }
3483
3484
3485