1 /* Loop invariant motion.
2    Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "tree.h"
25 #include "gimple.h"
26 #include "cfghooks.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "gimple-pretty-print.h"
30 #include "fold-const.h"
31 #include "cfganal.h"
32 #include "tree-eh.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "tree-cfg.h"
36 #include "tree-ssa-loop-manip.h"
37 #include "tree-ssa-loop.h"
38 #include "tree-into-ssa.h"
39 #include "cfgloop.h"
40 #include "tree-affine.h"
41 #include "tree-ssa-propagate.h"
42 #include "trans-mem.h"
43 #include "gimple-fold.h"
44 #include "tree-scalar-evolution.h"
45 #include "tree-ssa-loop-niter.h"
46 #include "alias.h"
47 #include "builtins.h"
48 #include "tree-dfa.h"
49 #include "dbgcnt.h"
50 
51 /* TODO:  Support for predicated code motion.  I.e.
52 
53    while (1)
54      {
55        if (cond)
56 	 {
57 	   a = inv;
58 	   something;
59 	 }
60      }
61 
62    Where COND and INV are invariants, but evaluating INV may trap or be
63    invalid from some other reason if !COND.  This may be transformed to
64 
65    if (cond)
66      a = inv;
67    while (1)
68      {
69        if (cond)
70 	 something;
71      }  */
72 
73 /* The auxiliary data kept for each statement.  */
74 
75 struct lim_aux_data
76 {
77   class loop *max_loop;	/* The outermost loop in that the statement
78 				   is invariant.  */
79 
80   class loop *tgt_loop;	/* The loop out of that we want to move the
81 				   invariant.  */
82 
83   class loop *always_executed_in;
84 				/* The outermost loop for that we are sure
85 				   the statement is executed if the loop
86 				   is entered.  */
87 
88   unsigned cost;		/* Cost of the computation performed by the
89 				   statement.  */
90 
91   unsigned ref;			/* The simple_mem_ref in this stmt or 0.  */
92 
93   vec<gimple *> depends;	/* Vector of statements that must be also
94 				   hoisted out of the loop when this statement
95 				   is hoisted; i.e. those that define the
96 				   operands of the statement and are inside of
97 				   the MAX_LOOP loop.  */
98 };
99 
100 /* Maps statements to their lim_aux_data.  */
101 
102 static hash_map<gimple *, lim_aux_data *> *lim_aux_data_map;
103 
104 /* Description of a memory reference location.  */
105 
106 struct mem_ref_loc
107 {
108   tree *ref;			/* The reference itself.  */
109   gimple *stmt;			/* The statement in that it occurs.  */
110 };
111 
112 
113 /* Description of a memory reference.  */
114 
115 class im_mem_ref
116 {
117 public:
118   unsigned id : 30;		/* ID assigned to the memory reference
119 				   (its index in memory_accesses.refs_list)  */
120   unsigned ref_canonical : 1;   /* Whether mem.ref was canonicalized.  */
121   unsigned ref_decomposed : 1;  /* Whether the ref was hashed from mem.  */
122   hashval_t hash;		/* Its hash value.  */
123 
124   /* The memory access itself and associated caching of alias-oracle
125      query meta-data.  We are using mem.ref == error_mark_node for the
126      case the reference is represented by its single access stmt
127      in accesses_in_loop[0].  */
128   ao_ref mem;
129 
130   bitmap stored;		/* The set of loops in that this memory location
131 				   is stored to.  */
132   bitmap loaded;		/* The set of loops in that this memory location
133 				   is loaded from.  */
134   vec<mem_ref_loc>		accesses_in_loop;
135 				/* The locations of the accesses.  */
136 
137   /* The following set is computed on demand.  */
138   bitmap_head dep_loop;		/* The set of loops in that the memory
139 				   reference is {in,}dependent in
140 				   different modes.  */
141 };
142 
143 /* We use six bits per loop in the ref->dep_loop bitmap to record
144    the dep_kind x dep_state combinations.  */
145 
146 enum dep_kind { lim_raw, sm_war, sm_waw };
147 enum dep_state { dep_unknown, dep_independent, dep_dependent };
148 
149 /* Populate the loop dependence cache of REF for LOOP, KIND with STATE.  */
150 
151 static void
record_loop_dependence(class loop * loop,im_mem_ref * ref,dep_kind kind,dep_state state)152 record_loop_dependence (class loop *loop, im_mem_ref *ref,
153 			dep_kind kind, dep_state state)
154 {
155   gcc_assert (state != dep_unknown);
156   unsigned bit = 6 * loop->num + kind * 2 + state == dep_dependent ? 1 : 0;
157   bitmap_set_bit (&ref->dep_loop, bit);
158 }
159 
160 /* Query the loop dependence cache of REF for LOOP, KIND.  */
161 
162 static dep_state
query_loop_dependence(class loop * loop,im_mem_ref * ref,dep_kind kind)163 query_loop_dependence (class loop *loop, im_mem_ref *ref, dep_kind kind)
164 {
165   unsigned first_bit = 6 * loop->num + kind * 2;
166   if (bitmap_bit_p (&ref->dep_loop, first_bit))
167     return dep_independent;
168   else if (bitmap_bit_p (&ref->dep_loop, first_bit + 1))
169     return dep_dependent;
170   return dep_unknown;
171 }
172 
173 /* Mem_ref hashtable helpers.  */
174 
175 struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref>
176 {
177   typedef ao_ref *compare_type;
178   static inline hashval_t hash (const im_mem_ref *);
179   static inline bool equal (const im_mem_ref *, const ao_ref *);
180 };
181 
182 /* A hash function for class im_mem_ref object OBJ.  */
183 
184 inline hashval_t
hash(const im_mem_ref * mem)185 mem_ref_hasher::hash (const im_mem_ref *mem)
186 {
187   return mem->hash;
188 }
189 
190 /* An equality function for class im_mem_ref object MEM1 with
191    memory reference OBJ2.  */
192 
193 inline bool
equal(const im_mem_ref * mem1,const ao_ref * obj2)194 mem_ref_hasher::equal (const im_mem_ref *mem1, const ao_ref *obj2)
195 {
196   if (obj2->max_size_known_p ())
197     return (mem1->ref_decomposed
198 	    && ((TREE_CODE (mem1->mem.base) == MEM_REF
199 		 && TREE_CODE (obj2->base) == MEM_REF
200 		 && operand_equal_p (TREE_OPERAND (mem1->mem.base, 0),
201 				     TREE_OPERAND (obj2->base, 0), 0)
202 		 && known_eq (mem_ref_offset (mem1->mem.base) * BITS_PER_UNIT + mem1->mem.offset,
203 			      mem_ref_offset (obj2->base) * BITS_PER_UNIT + obj2->offset))
204 		|| (operand_equal_p (mem1->mem.base, obj2->base, 0)
205 		    && known_eq (mem1->mem.offset, obj2->offset)))
206 	    && known_eq (mem1->mem.size, obj2->size)
207 	    && known_eq (mem1->mem.max_size, obj2->max_size)
208 	    && mem1->mem.volatile_p == obj2->volatile_p
209 	    && (mem1->mem.ref_alias_set == obj2->ref_alias_set
210 		/* We are not canonicalizing alias-sets but for the
211 		   special-case we didn't canonicalize yet and the
212 		   incoming ref is a alias-set zero MEM we pick
213 		   the correct one already.  */
214 		|| (!mem1->ref_canonical
215 		    && (TREE_CODE (obj2->ref) == MEM_REF
216 			|| TREE_CODE (obj2->ref) == TARGET_MEM_REF)
217 		    && obj2->ref_alias_set == 0)
218 		/* Likewise if there's a canonical ref with alias-set zero.  */
219 		|| (mem1->ref_canonical && mem1->mem.ref_alias_set == 0))
220 	    && types_compatible_p (TREE_TYPE (mem1->mem.ref),
221 				   TREE_TYPE (obj2->ref)));
222   else
223     return operand_equal_p (mem1->mem.ref, obj2->ref, 0);
224 }
225 
226 
227 /* Description of memory accesses in loops.  */
228 
229 static struct
230 {
231   /* The hash table of memory references accessed in loops.  */
232   hash_table<mem_ref_hasher> *refs;
233 
234   /* The list of memory references.  */
235   vec<im_mem_ref *> refs_list;
236 
237   /* The set of memory references accessed in each loop.  */
238   vec<bitmap_head> refs_loaded_in_loop;
239 
240   /* The set of memory references stored in each loop.  */
241   vec<bitmap_head> refs_stored_in_loop;
242 
243   /* The set of memory references stored in each loop, including subloops .  */
244   vec<bitmap_head> all_refs_stored_in_loop;
245 
246   /* Cache for expanding memory addresses.  */
247   hash_map<tree, name_expansion *> *ttae_cache;
248 } memory_accesses;
249 
250 /* Obstack for the bitmaps in the above data structures.  */
251 static bitmap_obstack lim_bitmap_obstack;
252 static obstack mem_ref_obstack;
253 
254 static bool ref_indep_loop_p (class loop *, im_mem_ref *, dep_kind);
255 static bool ref_always_accessed_p (class loop *, im_mem_ref *, bool);
256 static bool refs_independent_p (im_mem_ref *, im_mem_ref *, bool = true);
257 
258 /* Minimum cost of an expensive expression.  */
259 #define LIM_EXPENSIVE ((unsigned) param_lim_expensive)
260 
261 /* The outermost loop for which execution of the header guarantees that the
262    block will be executed.  */
263 #define ALWAYS_EXECUTED_IN(BB) ((class loop *) (BB)->aux)
264 #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
265 
266 /* ID of the shared unanalyzable mem.  */
267 #define UNANALYZABLE_MEM_ID 0
268 
269 /* Whether the reference was analyzable.  */
270 #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
271 
272 static struct lim_aux_data *
init_lim_data(gimple * stmt)273 init_lim_data (gimple *stmt)
274 {
275   lim_aux_data *p = XCNEW (struct lim_aux_data);
276   lim_aux_data_map->put (stmt, p);
277 
278   return p;
279 }
280 
281 static struct lim_aux_data *
get_lim_data(gimple * stmt)282 get_lim_data (gimple *stmt)
283 {
284   lim_aux_data **p = lim_aux_data_map->get (stmt);
285   if (!p)
286     return NULL;
287 
288   return *p;
289 }
290 
291 /* Releases the memory occupied by DATA.  */
292 
293 static void
free_lim_aux_data(struct lim_aux_data * data)294 free_lim_aux_data (struct lim_aux_data *data)
295 {
296   data->depends.release ();
297   free (data);
298 }
299 
300 static void
clear_lim_data(gimple * stmt)301 clear_lim_data (gimple *stmt)
302 {
303   lim_aux_data **p = lim_aux_data_map->get (stmt);
304   if (!p)
305     return;
306 
307   free_lim_aux_data (*p);
308   *p = NULL;
309 }
310 
311 
312 /* The possibilities of statement movement.  */
313 enum move_pos
314   {
315     MOVE_IMPOSSIBLE,		/* No movement -- side effect expression.  */
316     MOVE_PRESERVE_EXECUTION,	/* Must not cause the non-executed statement
317 				   become executed -- memory accesses, ... */
318     MOVE_POSSIBLE		/* Unlimited movement.  */
319   };
320 
321 
322 /* If it is possible to hoist the statement STMT unconditionally,
323    returns MOVE_POSSIBLE.
324    If it is possible to hoist the statement STMT, but we must avoid making
325    it executed if it would not be executed in the original program (e.g.
326    because it may trap), return MOVE_PRESERVE_EXECUTION.
327    Otherwise return MOVE_IMPOSSIBLE.  */
328 
329 enum move_pos
movement_possibility(gimple * stmt)330 movement_possibility (gimple *stmt)
331 {
332   tree lhs;
333   enum move_pos ret = MOVE_POSSIBLE;
334 
335   if (flag_unswitch_loops
336       && gimple_code (stmt) == GIMPLE_COND)
337     {
338       /* If we perform unswitching, force the operands of the invariant
339 	 condition to be moved out of the loop.  */
340       return MOVE_POSSIBLE;
341     }
342 
343   if (gimple_code (stmt) == GIMPLE_PHI
344       && gimple_phi_num_args (stmt) <= 2
345       && !virtual_operand_p (gimple_phi_result (stmt))
346       && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
347     return MOVE_POSSIBLE;
348 
349   if (gimple_get_lhs (stmt) == NULL_TREE)
350     return MOVE_IMPOSSIBLE;
351 
352   if (gimple_vdef (stmt))
353     return MOVE_IMPOSSIBLE;
354 
355   if (stmt_ends_bb_p (stmt)
356       || gimple_has_volatile_ops (stmt)
357       || gimple_has_side_effects (stmt)
358       || stmt_could_throw_p (cfun, stmt))
359     return MOVE_IMPOSSIBLE;
360 
361   if (is_gimple_call (stmt))
362     {
363       /* While pure or const call is guaranteed to have no side effects, we
364 	 cannot move it arbitrarily.  Consider code like
365 
366 	 char *s = something ();
367 
368 	 while (1)
369 	   {
370 	     if (s)
371 	       t = strlen (s);
372 	     else
373 	       t = 0;
374 	   }
375 
376 	 Here the strlen call cannot be moved out of the loop, even though
377 	 s is invariant.  In addition to possibly creating a call with
378 	 invalid arguments, moving out a function call that is not executed
379 	 may cause performance regressions in case the call is costly and
380 	 not executed at all.  */
381       ret = MOVE_PRESERVE_EXECUTION;
382       lhs = gimple_call_lhs (stmt);
383     }
384   else if (is_gimple_assign (stmt))
385     lhs = gimple_assign_lhs (stmt);
386   else
387     return MOVE_IMPOSSIBLE;
388 
389   if (TREE_CODE (lhs) == SSA_NAME
390       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
391     return MOVE_IMPOSSIBLE;
392 
393   if (TREE_CODE (lhs) != SSA_NAME
394       || gimple_could_trap_p (stmt))
395     return MOVE_PRESERVE_EXECUTION;
396 
397   /* Non local loads in a transaction cannot be hoisted out.  Well,
398      unless the load happens on every path out of the loop, but we
399      don't take this into account yet.  */
400   if (flag_tm
401       && gimple_in_transaction (stmt)
402       && gimple_assign_single_p (stmt))
403     {
404       tree rhs = gimple_assign_rhs1 (stmt);
405       if (DECL_P (rhs) && is_global_var (rhs))
406 	{
407 	  if (dump_file)
408 	    {
409 	      fprintf (dump_file, "Cannot hoist conditional load of ");
410 	      print_generic_expr (dump_file, rhs, TDF_SLIM);
411 	      fprintf (dump_file, " because it is in a transaction.\n");
412 	    }
413 	  return MOVE_IMPOSSIBLE;
414 	}
415     }
416 
417   return ret;
418 }
419 
420 /* Suppose that operand DEF is used inside the LOOP.  Returns the outermost
421    loop to that we could move the expression using DEF if it did not have
422    other operands, i.e. the outermost loop enclosing LOOP in that the value
423    of DEF is invariant.  */
424 
425 static class loop *
outermost_invariant_loop(tree def,class loop * loop)426 outermost_invariant_loop (tree def, class loop *loop)
427 {
428   gimple *def_stmt;
429   basic_block def_bb;
430   class loop *max_loop;
431   struct lim_aux_data *lim_data;
432 
433   if (!def)
434     return superloop_at_depth (loop, 1);
435 
436   if (TREE_CODE (def) != SSA_NAME)
437     {
438       gcc_assert (is_gimple_min_invariant (def));
439       return superloop_at_depth (loop, 1);
440     }
441 
442   def_stmt = SSA_NAME_DEF_STMT (def);
443   def_bb = gimple_bb (def_stmt);
444   if (!def_bb)
445     return superloop_at_depth (loop, 1);
446 
447   max_loop = find_common_loop (loop, def_bb->loop_father);
448 
449   lim_data = get_lim_data (def_stmt);
450   if (lim_data != NULL && lim_data->max_loop != NULL)
451     max_loop = find_common_loop (max_loop,
452 				 loop_outer (lim_data->max_loop));
453   if (max_loop == loop)
454     return NULL;
455   max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
456 
457   return max_loop;
458 }
459 
460 /* DATA is a structure containing information associated with a statement
461    inside LOOP.  DEF is one of the operands of this statement.
462 
463    Find the outermost loop enclosing LOOP in that value of DEF is invariant
464    and record this in DATA->max_loop field.  If DEF itself is defined inside
465    this loop as well (i.e. we need to hoist it out of the loop if we want
466    to hoist the statement represented by DATA), record the statement in that
467    DEF is defined to the DATA->depends list.  Additionally if ADD_COST is true,
468    add the cost of the computation of DEF to the DATA->cost.
469 
470    If DEF is not invariant in LOOP, return false.  Otherwise return TRUE.  */
471 
472 static bool
add_dependency(tree def,struct lim_aux_data * data,class loop * loop,bool add_cost)473 add_dependency (tree def, struct lim_aux_data *data, class loop *loop,
474 		bool add_cost)
475 {
476   gimple *def_stmt = SSA_NAME_DEF_STMT (def);
477   basic_block def_bb = gimple_bb (def_stmt);
478   class loop *max_loop;
479   struct lim_aux_data *def_data;
480 
481   if (!def_bb)
482     return true;
483 
484   max_loop = outermost_invariant_loop (def, loop);
485   if (!max_loop)
486     return false;
487 
488   if (flow_loop_nested_p (data->max_loop, max_loop))
489     data->max_loop = max_loop;
490 
491   def_data = get_lim_data (def_stmt);
492   if (!def_data)
493     return true;
494 
495   if (add_cost
496       /* Only add the cost if the statement defining DEF is inside LOOP,
497 	 i.e. if it is likely that by moving the invariants dependent
498 	 on it, we will be able to avoid creating a new register for
499 	 it (since it will be only used in these dependent invariants).  */
500       && def_bb->loop_father == loop)
501     data->cost += def_data->cost;
502 
503   data->depends.safe_push (def_stmt);
504 
505   return true;
506 }
507 
508 /* Returns an estimate for a cost of statement STMT.  The values here
509    are just ad-hoc constants, similar to costs for inlining.  */
510 
511 static unsigned
stmt_cost(gimple * stmt)512 stmt_cost (gimple *stmt)
513 {
514   /* Always try to create possibilities for unswitching.  */
515   if (gimple_code (stmt) == GIMPLE_COND
516       || gimple_code (stmt) == GIMPLE_PHI)
517     return LIM_EXPENSIVE;
518 
519   /* We should be hoisting calls if possible.  */
520   if (is_gimple_call (stmt))
521     {
522       tree fndecl;
523 
524       /* Unless the call is a builtin_constant_p; this always folds to a
525 	 constant, so moving it is useless.  */
526       fndecl = gimple_call_fndecl (stmt);
527       if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_CONSTANT_P))
528 	return 0;
529 
530       return LIM_EXPENSIVE;
531     }
532 
533   /* Hoisting memory references out should almost surely be a win.  */
534   if (gimple_references_memory_p (stmt))
535     return LIM_EXPENSIVE;
536 
537   if (gimple_code (stmt) != GIMPLE_ASSIGN)
538     return 1;
539 
540   switch (gimple_assign_rhs_code (stmt))
541     {
542     case MULT_EXPR:
543     case WIDEN_MULT_EXPR:
544     case WIDEN_MULT_PLUS_EXPR:
545     case WIDEN_MULT_MINUS_EXPR:
546     case DOT_PROD_EXPR:
547     case TRUNC_DIV_EXPR:
548     case CEIL_DIV_EXPR:
549     case FLOOR_DIV_EXPR:
550     case ROUND_DIV_EXPR:
551     case EXACT_DIV_EXPR:
552     case CEIL_MOD_EXPR:
553     case FLOOR_MOD_EXPR:
554     case ROUND_MOD_EXPR:
555     case TRUNC_MOD_EXPR:
556     case RDIV_EXPR:
557       /* Division and multiplication are usually expensive.  */
558       return LIM_EXPENSIVE;
559 
560     case LSHIFT_EXPR:
561     case RSHIFT_EXPR:
562     case WIDEN_LSHIFT_EXPR:
563     case LROTATE_EXPR:
564     case RROTATE_EXPR:
565       /* Shifts and rotates are usually expensive.  */
566       return LIM_EXPENSIVE;
567 
568     case CONSTRUCTOR:
569       /* Make vector construction cost proportional to the number
570          of elements.  */
571       return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
572 
573     case SSA_NAME:
574     case PAREN_EXPR:
575       /* Whether or not something is wrapped inside a PAREN_EXPR
576          should not change move cost.  Nor should an intermediate
577 	 unpropagated SSA name copy.  */
578       return 0;
579 
580     default:
581       return 1;
582     }
583 }
584 
585 /* Finds the outermost loop between OUTER and LOOP in that the memory reference
586    REF is independent.  If REF is not independent in LOOP, NULL is returned
587    instead.  */
588 
589 static class loop *
outermost_indep_loop(class loop * outer,class loop * loop,im_mem_ref * ref)590 outermost_indep_loop (class loop *outer, class loop *loop, im_mem_ref *ref)
591 {
592   class loop *aloop;
593 
594   if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
595     return NULL;
596 
597   for (aloop = outer;
598        aloop != loop;
599        aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
600     if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
601 	&& ref_indep_loop_p (aloop, ref, lim_raw))
602       return aloop;
603 
604   if (ref_indep_loop_p (loop, ref, lim_raw))
605     return loop;
606   else
607     return NULL;
608 }
609 
610 /* If there is a simple load or store to a memory reference in STMT, returns
611    the location of the memory reference, and sets IS_STORE according to whether
612    it is a store or load.  Otherwise, returns NULL.  */
613 
614 static tree *
simple_mem_ref_in_stmt(gimple * stmt,bool * is_store)615 simple_mem_ref_in_stmt (gimple *stmt, bool *is_store)
616 {
617   tree *lhs, *rhs;
618 
619   /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns.  */
620   if (!gimple_assign_single_p (stmt))
621     return NULL;
622 
623   lhs = gimple_assign_lhs_ptr (stmt);
624   rhs = gimple_assign_rhs1_ptr (stmt);
625 
626   if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
627     {
628       *is_store = false;
629       return rhs;
630     }
631   else if (gimple_vdef (stmt)
632 	   && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
633     {
634       *is_store = true;
635       return lhs;
636     }
637   else
638     return NULL;
639 }
640 
641 /* From a controlling predicate in DOM determine the arguments from
642    the PHI node PHI that are chosen if the predicate evaluates to
643    true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
644    they are non-NULL.  Returns true if the arguments can be determined,
645    else return false.  */
646 
647 static bool
extract_true_false_args_from_phi(basic_block dom,gphi * phi,tree * true_arg_p,tree * false_arg_p)648 extract_true_false_args_from_phi (basic_block dom, gphi *phi,
649 				  tree *true_arg_p, tree *false_arg_p)
650 {
651   edge te, fe;
652   if (! extract_true_false_controlled_edges (dom, gimple_bb (phi),
653 					     &te, &fe))
654     return false;
655 
656   if (true_arg_p)
657     *true_arg_p = PHI_ARG_DEF (phi, te->dest_idx);
658   if (false_arg_p)
659     *false_arg_p = PHI_ARG_DEF (phi, fe->dest_idx);
660 
661   return true;
662 }
663 
664 /* Determine the outermost loop to that it is possible to hoist a statement
665    STMT and store it to LIM_DATA (STMT)->max_loop.  To do this we determine
666    the outermost loop in that the value computed by STMT is invariant.
667    If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
668    we preserve the fact whether STMT is executed.  It also fills other related
669    information to LIM_DATA (STMT).
670 
671    The function returns false if STMT cannot be hoisted outside of the loop it
672    is defined in, and true otherwise.  */
673 
674 static bool
determine_max_movement(gimple * stmt,bool must_preserve_exec)675 determine_max_movement (gimple *stmt, bool must_preserve_exec)
676 {
677   basic_block bb = gimple_bb (stmt);
678   class loop *loop = bb->loop_father;
679   class loop *level;
680   struct lim_aux_data *lim_data = get_lim_data (stmt);
681   tree val;
682   ssa_op_iter iter;
683 
684   if (must_preserve_exec)
685     level = ALWAYS_EXECUTED_IN (bb);
686   else
687     level = superloop_at_depth (loop, 1);
688   lim_data->max_loop = level;
689 
690   if (gphi *phi = dyn_cast <gphi *> (stmt))
691     {
692       use_operand_p use_p;
693       unsigned min_cost = UINT_MAX;
694       unsigned total_cost = 0;
695       struct lim_aux_data *def_data;
696 
697       /* We will end up promoting dependencies to be unconditionally
698 	 evaluated.  For this reason the PHI cost (and thus the
699 	 cost we remove from the loop by doing the invariant motion)
700 	 is that of the cheapest PHI argument dependency chain.  */
701       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
702 	{
703 	  val = USE_FROM_PTR (use_p);
704 
705 	  if (TREE_CODE (val) != SSA_NAME)
706 	    {
707 	      /* Assign const 1 to constants.  */
708 	      min_cost = MIN (min_cost, 1);
709 	      total_cost += 1;
710 	      continue;
711 	    }
712 	  if (!add_dependency (val, lim_data, loop, false))
713 	    return false;
714 
715 	  gimple *def_stmt = SSA_NAME_DEF_STMT (val);
716 	  if (gimple_bb (def_stmt)
717 	      && gimple_bb (def_stmt)->loop_father == loop)
718 	    {
719 	      def_data = get_lim_data (def_stmt);
720 	      if (def_data)
721 		{
722 		  min_cost = MIN (min_cost, def_data->cost);
723 		  total_cost += def_data->cost;
724 		}
725 	    }
726 	}
727 
728       min_cost = MIN (min_cost, total_cost);
729       lim_data->cost += min_cost;
730 
731       if (gimple_phi_num_args (phi) > 1)
732 	{
733 	  basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
734 	  gimple *cond;
735 	  if (gsi_end_p (gsi_last_bb (dom)))
736 	    return false;
737 	  cond = gsi_stmt (gsi_last_bb (dom));
738 	  if (gimple_code (cond) != GIMPLE_COND)
739 	    return false;
740 	  /* Verify that this is an extended form of a diamond and
741 	     the PHI arguments are completely controlled by the
742 	     predicate in DOM.  */
743 	  if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
744 	    return false;
745 
746 	  /* Fold in dependencies and cost of the condition.  */
747 	  FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
748 	    {
749 	      if (!add_dependency (val, lim_data, loop, false))
750 		return false;
751 	      def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
752 	      if (def_data)
753 		lim_data->cost += def_data->cost;
754 	    }
755 
756 	  /* We want to avoid unconditionally executing very expensive
757 	     operations.  As costs for our dependencies cannot be
758 	     negative just claim we are not invariand for this case.
759 	     We also are not sure whether the control-flow inside the
760 	     loop will vanish.  */
761 	  if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
762 	      && !(min_cost != 0
763 		   && total_cost / min_cost <= 2))
764 	    return false;
765 
766 	  /* Assume that the control-flow in the loop will vanish.
767 	     ???  We should verify this and not artificially increase
768 	     the cost if that is not the case.  */
769 	  lim_data->cost += stmt_cost (stmt);
770 	}
771 
772       return true;
773     }
774   else
775     FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
776       if (!add_dependency (val, lim_data, loop, true))
777 	return false;
778 
779   if (gimple_vuse (stmt))
780     {
781       im_mem_ref *ref
782 	= lim_data ? memory_accesses.refs_list[lim_data->ref] : NULL;
783       if (ref
784 	  && MEM_ANALYZABLE (ref))
785 	{
786 	  lim_data->max_loop = outermost_indep_loop (lim_data->max_loop,
787 						     loop, ref);
788 	  if (!lim_data->max_loop)
789 	    return false;
790 	}
791       else if (! add_dependency (gimple_vuse (stmt), lim_data, loop, false))
792 	return false;
793     }
794 
795   lim_data->cost += stmt_cost (stmt);
796 
797   return true;
798 }
799 
800 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
801    and that one of the operands of this statement is computed by STMT.
802    Ensure that STMT (together with all the statements that define its
803    operands) is hoisted at least out of the loop LEVEL.  */
804 
805 static void
set_level(gimple * stmt,class loop * orig_loop,class loop * level)806 set_level (gimple *stmt, class loop *orig_loop, class loop *level)
807 {
808   class loop *stmt_loop = gimple_bb (stmt)->loop_father;
809   struct lim_aux_data *lim_data;
810   gimple *dep_stmt;
811   unsigned i;
812 
813   stmt_loop = find_common_loop (orig_loop, stmt_loop);
814   lim_data = get_lim_data (stmt);
815   if (lim_data != NULL && lim_data->tgt_loop != NULL)
816     stmt_loop = find_common_loop (stmt_loop,
817 				  loop_outer (lim_data->tgt_loop));
818   if (flow_loop_nested_p (stmt_loop, level))
819     return;
820 
821   gcc_assert (level == lim_data->max_loop
822 	      || flow_loop_nested_p (lim_data->max_loop, level));
823 
824   lim_data->tgt_loop = level;
825   FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
826     set_level (dep_stmt, orig_loop, level);
827 }
828 
829 /* Determines an outermost loop from that we want to hoist the statement STMT.
830    For now we chose the outermost possible loop.  TODO -- use profiling
831    information to set it more sanely.  */
832 
833 static void
set_profitable_level(gimple * stmt)834 set_profitable_level (gimple *stmt)
835 {
836   set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
837 }
838 
839 /* Returns true if STMT is a call that has side effects.  */
840 
841 static bool
nonpure_call_p(gimple * stmt)842 nonpure_call_p (gimple *stmt)
843 {
844   if (gimple_code (stmt) != GIMPLE_CALL)
845     return false;
846 
847   return gimple_has_side_effects (stmt);
848 }
849 
850 /* Rewrite a/b to a*(1/b).  Return the invariant stmt to process.  */
851 
852 static gimple *
rewrite_reciprocal(gimple_stmt_iterator * bsi)853 rewrite_reciprocal (gimple_stmt_iterator *bsi)
854 {
855   gassign *stmt, *stmt1, *stmt2;
856   tree name, lhs, type;
857   tree real_one;
858   gimple_stmt_iterator gsi;
859 
860   stmt = as_a <gassign *> (gsi_stmt (*bsi));
861   lhs = gimple_assign_lhs (stmt);
862   type = TREE_TYPE (lhs);
863 
864   real_one = build_one_cst (type);
865 
866   name = make_temp_ssa_name (type, NULL, "reciptmp");
867   stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
868 			       gimple_assign_rhs2 (stmt));
869   stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
870 			       gimple_assign_rhs1 (stmt));
871 
872   /* Replace division stmt with reciprocal and multiply stmts.
873      The multiply stmt is not invariant, so update iterator
874      and avoid rescanning.  */
875   gsi = *bsi;
876   gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
877   gsi_replace (&gsi, stmt2, true);
878 
879   /* Continue processing with invariant reciprocal statement.  */
880   return stmt1;
881 }
882 
883 /* Check if the pattern at *BSI is a bittest of the form
884    (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0.  */
885 
886 static gimple *
rewrite_bittest(gimple_stmt_iterator * bsi)887 rewrite_bittest (gimple_stmt_iterator *bsi)
888 {
889   gassign *stmt;
890   gimple *stmt1;
891   gassign *stmt2;
892   gimple *use_stmt;
893   gcond *cond_stmt;
894   tree lhs, name, t, a, b;
895   use_operand_p use;
896 
897   stmt = as_a <gassign *> (gsi_stmt (*bsi));
898   lhs = gimple_assign_lhs (stmt);
899 
900   /* Verify that the single use of lhs is a comparison against zero.  */
901   if (TREE_CODE (lhs) != SSA_NAME
902       || !single_imm_use (lhs, &use, &use_stmt))
903     return stmt;
904   cond_stmt = dyn_cast <gcond *> (use_stmt);
905   if (!cond_stmt)
906     return stmt;
907   if (gimple_cond_lhs (cond_stmt) != lhs
908       || (gimple_cond_code (cond_stmt) != NE_EXPR
909 	  && gimple_cond_code (cond_stmt) != EQ_EXPR)
910       || !integer_zerop (gimple_cond_rhs (cond_stmt)))
911     return stmt;
912 
913   /* Get at the operands of the shift.  The rhs is TMP1 & 1.  */
914   stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
915   if (gimple_code (stmt1) != GIMPLE_ASSIGN)
916     return stmt;
917 
918   /* There is a conversion in between possibly inserted by fold.  */
919   if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
920     {
921       t = gimple_assign_rhs1 (stmt1);
922       if (TREE_CODE (t) != SSA_NAME
923 	  || !has_single_use (t))
924 	return stmt;
925       stmt1 = SSA_NAME_DEF_STMT (t);
926       if (gimple_code (stmt1) != GIMPLE_ASSIGN)
927 	return stmt;
928     }
929 
930   /* Verify that B is loop invariant but A is not.  Verify that with
931      all the stmt walking we are still in the same loop.  */
932   if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
933       || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
934     return stmt;
935 
936   a = gimple_assign_rhs1 (stmt1);
937   b = gimple_assign_rhs2 (stmt1);
938 
939   if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
940       && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
941     {
942       gimple_stmt_iterator rsi;
943 
944       /* 1 << B */
945       t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
946 		       build_int_cst (TREE_TYPE (a), 1), b);
947       name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
948       stmt1 = gimple_build_assign (name, t);
949 
950       /* A & (1 << B) */
951       t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
952       name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
953       stmt2 = gimple_build_assign (name, t);
954 
955       /* Replace the SSA_NAME we compare against zero.  Adjust
956 	 the type of zero accordingly.  */
957       SET_USE (use, name);
958       gimple_cond_set_rhs (cond_stmt,
959 			   build_int_cst_type (TREE_TYPE (name),
960 					       0));
961 
962       /* Don't use gsi_replace here, none of the new assignments sets
963 	 the variable originally set in stmt.  Move bsi to stmt1, and
964 	 then remove the original stmt, so that we get a chance to
965 	 retain debug info for it.  */
966       rsi = *bsi;
967       gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
968       gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
969       gimple *to_release = gsi_stmt (rsi);
970       gsi_remove (&rsi, true);
971       release_defs (to_release);
972 
973       return stmt1;
974     }
975 
976   return stmt;
977 }
978 
979 /* Determine the outermost loops in that statements in basic block BB are
980    invariant, and record them to the LIM_DATA associated with the
981    statements.  */
982 
983 static void
compute_invariantness(basic_block bb)984 compute_invariantness (basic_block bb)
985 {
986   enum move_pos pos;
987   gimple_stmt_iterator bsi;
988   gimple *stmt;
989   bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
990   class loop *outermost = ALWAYS_EXECUTED_IN (bb);
991   struct lim_aux_data *lim_data;
992 
993   if (!loop_outer (bb->loop_father))
994     return;
995 
996   if (dump_file && (dump_flags & TDF_DETAILS))
997     fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
998 	     bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
999 
1000   /* Look at PHI nodes, but only if there is at most two.
1001      ???  We could relax this further by post-processing the inserted
1002      code and transforming adjacent cond-exprs with the same predicate
1003      to control flow again.  */
1004   bsi = gsi_start_phis (bb);
1005   if (!gsi_end_p (bsi)
1006       && ((gsi_next (&bsi), gsi_end_p (bsi))
1007 	  || (gsi_next (&bsi), gsi_end_p (bsi))))
1008     for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1009       {
1010 	stmt = gsi_stmt (bsi);
1011 
1012 	pos = movement_possibility (stmt);
1013 	if (pos == MOVE_IMPOSSIBLE)
1014 	  continue;
1015 
1016 	lim_data = get_lim_data (stmt);
1017 	if (! lim_data)
1018 	  lim_data = init_lim_data (stmt);
1019 	lim_data->always_executed_in = outermost;
1020 
1021 	if (!determine_max_movement (stmt, false))
1022 	  {
1023 	    lim_data->max_loop = NULL;
1024 	    continue;
1025 	  }
1026 
1027 	if (dump_file && (dump_flags & TDF_DETAILS))
1028 	  {
1029 	    print_gimple_stmt (dump_file, stmt, 2);
1030 	    fprintf (dump_file, "  invariant up to level %d, cost %d.\n\n",
1031 		     loop_depth (lim_data->max_loop),
1032 		     lim_data->cost);
1033 	  }
1034 
1035 	if (lim_data->cost >= LIM_EXPENSIVE)
1036 	  set_profitable_level (stmt);
1037       }
1038 
1039   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1040     {
1041       stmt = gsi_stmt (bsi);
1042 
1043       pos = movement_possibility (stmt);
1044       if (pos == MOVE_IMPOSSIBLE)
1045 	{
1046 	  if (nonpure_call_p (stmt))
1047 	    {
1048 	      maybe_never = true;
1049 	      outermost = NULL;
1050 	    }
1051 	  /* Make sure to note always_executed_in for stores to make
1052 	     store-motion work.  */
1053 	  else if (stmt_makes_single_store (stmt))
1054 	    {
1055 	      struct lim_aux_data *lim_data = get_lim_data (stmt);
1056 	      if (! lim_data)
1057 		lim_data = init_lim_data (stmt);
1058 	      lim_data->always_executed_in = outermost;
1059 	    }
1060 	  continue;
1061 	}
1062 
1063       if (is_gimple_assign (stmt)
1064 	  && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1065 	      == GIMPLE_BINARY_RHS))
1066 	{
1067 	  tree op0 = gimple_assign_rhs1 (stmt);
1068 	  tree op1 = gimple_assign_rhs2 (stmt);
1069 	  class loop *ol1 = outermost_invariant_loop (op1,
1070 					loop_containing_stmt (stmt));
1071 
1072 	  /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1073 	     to be hoisted out of loop, saving expensive divide.  */
1074 	  if (pos == MOVE_POSSIBLE
1075 	      && gimple_assign_rhs_code (stmt) == RDIV_EXPR
1076 	      && flag_unsafe_math_optimizations
1077 	      && !flag_trapping_math
1078 	      && ol1 != NULL
1079 	      && outermost_invariant_loop (op0, ol1) == NULL)
1080 	    stmt = rewrite_reciprocal (&bsi);
1081 
1082 	  /* If the shift count is invariant, convert (A >> B) & 1 to
1083 	     A & (1 << B) allowing the bit mask to be hoisted out of the loop
1084 	     saving an expensive shift.  */
1085 	  if (pos == MOVE_POSSIBLE
1086 	      && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1087 	      && integer_onep (op1)
1088 	      && TREE_CODE (op0) == SSA_NAME
1089 	      && has_single_use (op0))
1090 	    stmt = rewrite_bittest (&bsi);
1091 	}
1092 
1093       lim_data = get_lim_data (stmt);
1094       if (! lim_data)
1095 	lim_data = init_lim_data (stmt);
1096       lim_data->always_executed_in = outermost;
1097 
1098       if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1099 	continue;
1100 
1101       if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1102 	{
1103 	  lim_data->max_loop = NULL;
1104 	  continue;
1105 	}
1106 
1107       if (dump_file && (dump_flags & TDF_DETAILS))
1108 	{
1109 	  print_gimple_stmt (dump_file, stmt, 2);
1110 	  fprintf (dump_file, "  invariant up to level %d, cost %d.\n\n",
1111 		   loop_depth (lim_data->max_loop),
1112 		   lim_data->cost);
1113 	}
1114 
1115       if (lim_data->cost >= LIM_EXPENSIVE)
1116 	set_profitable_level (stmt);
1117     }
1118 }
1119 
1120 /* Hoist the statements in basic block BB out of the loops prescribed by
1121    data stored in LIM_DATA structures associated with each statement.  Callback
1122    for walk_dominator_tree.  */
1123 
1124 unsigned int
move_computations_worker(basic_block bb)1125 move_computations_worker (basic_block bb)
1126 {
1127   class loop *level;
1128   unsigned cost = 0;
1129   struct lim_aux_data *lim_data;
1130   unsigned int todo = 0;
1131 
1132   if (!loop_outer (bb->loop_father))
1133     return todo;
1134 
1135   for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
1136     {
1137       gassign *new_stmt;
1138       gphi *stmt = bsi.phi ();
1139 
1140       lim_data = get_lim_data (stmt);
1141       if (lim_data == NULL)
1142 	{
1143 	  gsi_next (&bsi);
1144 	  continue;
1145 	}
1146 
1147       cost = lim_data->cost;
1148       level = lim_data->tgt_loop;
1149       clear_lim_data (stmt);
1150 
1151       if (!level)
1152 	{
1153 	  gsi_next (&bsi);
1154 	  continue;
1155 	}
1156 
1157       if (dump_file && (dump_flags & TDF_DETAILS))
1158 	{
1159 	  fprintf (dump_file, "Moving PHI node\n");
1160 	  print_gimple_stmt (dump_file, stmt, 0);
1161 	  fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1162 		   cost, level->num);
1163 	}
1164 
1165       if (gimple_phi_num_args (stmt) == 1)
1166 	{
1167 	  tree arg = PHI_ARG_DEF (stmt, 0);
1168 	  new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1169 					  TREE_CODE (arg), arg);
1170 	}
1171       else
1172 	{
1173 	  basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1174 	  gimple *cond = gsi_stmt (gsi_last_bb (dom));
1175 	  tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1176 	  /* Get the PHI arguments corresponding to the true and false
1177 	     edges of COND.  */
1178 	  extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1179 	  gcc_assert (arg0 && arg1);
1180 	  t = build2 (gimple_cond_code (cond), boolean_type_node,
1181 		      gimple_cond_lhs (cond), gimple_cond_rhs (cond));
1182 	  new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1183 					  COND_EXPR, t, arg0, arg1);
1184 	  todo |= TODO_cleanup_cfg;
1185 	}
1186       if (!ALWAYS_EXECUTED_IN (bb)
1187 	  || (ALWAYS_EXECUTED_IN (bb) != level
1188 	      && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level)))
1189 	reset_flow_sensitive_info (gimple_assign_lhs (new_stmt));
1190       gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1191       remove_phi_node (&bsi, false);
1192     }
1193 
1194   for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
1195     {
1196       edge e;
1197 
1198       gimple *stmt = gsi_stmt (bsi);
1199 
1200       lim_data = get_lim_data (stmt);
1201       if (lim_data == NULL)
1202 	{
1203 	  gsi_next (&bsi);
1204 	  continue;
1205 	}
1206 
1207       cost = lim_data->cost;
1208       level = lim_data->tgt_loop;
1209       clear_lim_data (stmt);
1210 
1211       if (!level)
1212 	{
1213 	  gsi_next (&bsi);
1214 	  continue;
1215 	}
1216 
1217       /* We do not really want to move conditionals out of the loop; we just
1218 	 placed it here to force its operands to be moved if necessary.  */
1219       if (gimple_code (stmt) == GIMPLE_COND)
1220 	continue;
1221 
1222       if (dump_file && (dump_flags & TDF_DETAILS))
1223 	{
1224 	  fprintf (dump_file, "Moving statement\n");
1225 	  print_gimple_stmt (dump_file, stmt, 0);
1226 	  fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1227 		   cost, level->num);
1228 	}
1229 
1230       e = loop_preheader_edge (level);
1231       gcc_assert (!gimple_vdef (stmt));
1232       if (gimple_vuse (stmt))
1233 	{
1234 	  /* The new VUSE is the one from the virtual PHI in the loop
1235 	     header or the one already present.  */
1236 	  gphi_iterator gsi2;
1237 	  for (gsi2 = gsi_start_phis (e->dest);
1238 	       !gsi_end_p (gsi2); gsi_next (&gsi2))
1239 	    {
1240 	      gphi *phi = gsi2.phi ();
1241 	      if (virtual_operand_p (gimple_phi_result (phi)))
1242 		{
1243 		  SET_USE (gimple_vuse_op (stmt),
1244 			   PHI_ARG_DEF_FROM_EDGE (phi, e));
1245 		  break;
1246 		}
1247 	    }
1248 	}
1249       gsi_remove (&bsi, false);
1250       if (gimple_has_lhs (stmt)
1251 	  && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
1252 	  && (!ALWAYS_EXECUTED_IN (bb)
1253 	      || !(ALWAYS_EXECUTED_IN (bb) == level
1254 		   || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1255 	reset_flow_sensitive_info (gimple_get_lhs (stmt));
1256       /* In case this is a stmt that is not unconditionally executed
1257          when the target loop header is executed and the stmt may
1258 	 invoke undefined integer or pointer overflow rewrite it to
1259 	 unsigned arithmetic.  */
1260       if (is_gimple_assign (stmt)
1261 	  && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))
1262 	  && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt)))
1263 	  && arith_code_with_undefined_signed_overflow
1264 	       (gimple_assign_rhs_code (stmt))
1265 	  && (!ALWAYS_EXECUTED_IN (bb)
1266 	      || !(ALWAYS_EXECUTED_IN (bb) == level
1267 		   || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1268 	gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt));
1269       else
1270 	gsi_insert_on_edge (e, stmt);
1271     }
1272 
1273   return todo;
1274 }
1275 
1276 /* Checks whether the statement defining variable *INDEX can be hoisted
1277    out of the loop passed in DATA.  Callback for for_each_index.  */
1278 
1279 static bool
may_move_till(tree ref,tree * index,void * data)1280 may_move_till (tree ref, tree *index, void *data)
1281 {
1282   class loop *loop = (class loop *) data, *max_loop;
1283 
1284   /* If REF is an array reference, check also that the step and the lower
1285      bound is invariant in LOOP.  */
1286   if (TREE_CODE (ref) == ARRAY_REF)
1287     {
1288       tree step = TREE_OPERAND (ref, 3);
1289       tree lbound = TREE_OPERAND (ref, 2);
1290 
1291       max_loop = outermost_invariant_loop (step, loop);
1292       if (!max_loop)
1293 	return false;
1294 
1295       max_loop = outermost_invariant_loop (lbound, loop);
1296       if (!max_loop)
1297 	return false;
1298     }
1299 
1300   max_loop = outermost_invariant_loop (*index, loop);
1301   if (!max_loop)
1302     return false;
1303 
1304   return true;
1305 }
1306 
1307 /* If OP is SSA NAME, force the statement that defines it to be
1308    moved out of the LOOP.  ORIG_LOOP is the loop in that EXPR is used.  */
1309 
1310 static void
force_move_till_op(tree op,class loop * orig_loop,class loop * loop)1311 force_move_till_op (tree op, class loop *orig_loop, class loop *loop)
1312 {
1313   gimple *stmt;
1314 
1315   if (!op
1316       || is_gimple_min_invariant (op))
1317     return;
1318 
1319   gcc_assert (TREE_CODE (op) == SSA_NAME);
1320 
1321   stmt = SSA_NAME_DEF_STMT (op);
1322   if (gimple_nop_p (stmt))
1323     return;
1324 
1325   set_level (stmt, orig_loop, loop);
1326 }
1327 
1328 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1329    the LOOP.  The reference REF is used in the loop ORIG_LOOP.  Callback for
1330    for_each_index.  */
1331 
1332 struct fmt_data
1333 {
1334   class loop *loop;
1335   class loop *orig_loop;
1336 };
1337 
1338 static bool
force_move_till(tree ref,tree * index,void * data)1339 force_move_till (tree ref, tree *index, void *data)
1340 {
1341   struct fmt_data *fmt_data = (struct fmt_data *) data;
1342 
1343   if (TREE_CODE (ref) == ARRAY_REF)
1344     {
1345       tree step = TREE_OPERAND (ref, 3);
1346       tree lbound = TREE_OPERAND (ref, 2);
1347 
1348       force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1349       force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
1350     }
1351 
1352   force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
1353 
1354   return true;
1355 }
1356 
1357 /* A function to free the mem_ref object OBJ.  */
1358 
1359 static void
memref_free(class im_mem_ref * mem)1360 memref_free (class im_mem_ref *mem)
1361 {
1362   mem->accesses_in_loop.release ();
1363 }
1364 
1365 /* Allocates and returns a memory reference description for MEM whose hash
1366    value is HASH and id is ID.  */
1367 
1368 static im_mem_ref *
mem_ref_alloc(ao_ref * mem,unsigned hash,unsigned id)1369 mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id)
1370 {
1371   im_mem_ref *ref = XOBNEW (&mem_ref_obstack, class im_mem_ref);
1372   if (mem)
1373     ref->mem = *mem;
1374   else
1375     ao_ref_init (&ref->mem, error_mark_node);
1376   ref->id = id;
1377   ref->ref_canonical = false;
1378   ref->ref_decomposed = false;
1379   ref->hash = hash;
1380   ref->stored = NULL;
1381   ref->loaded = NULL;
1382   bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
1383   ref->accesses_in_loop.create (1);
1384 
1385   return ref;
1386 }
1387 
1388 /* Records memory reference location *LOC in LOOP to the memory reference
1389    description REF.  The reference occurs in statement STMT.  */
1390 
1391 static void
record_mem_ref_loc(im_mem_ref * ref,gimple * stmt,tree * loc)1392 record_mem_ref_loc (im_mem_ref *ref, gimple *stmt, tree *loc)
1393 {
1394   mem_ref_loc aref;
1395   aref.stmt = stmt;
1396   aref.ref = loc;
1397   ref->accesses_in_loop.safe_push (aref);
1398 }
1399 
1400 /* Set the LOOP bit in REF stored bitmap and allocate that if
1401    necessary.  Return whether a bit was changed.  */
1402 
1403 static bool
set_ref_stored_in_loop(im_mem_ref * ref,class loop * loop)1404 set_ref_stored_in_loop (im_mem_ref *ref, class loop *loop)
1405 {
1406   if (!ref->stored)
1407     ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1408   return bitmap_set_bit (ref->stored, loop->num);
1409 }
1410 
1411 /* Marks reference REF as stored in LOOP.  */
1412 
1413 static void
mark_ref_stored(im_mem_ref * ref,class loop * loop)1414 mark_ref_stored (im_mem_ref *ref, class loop *loop)
1415 {
1416   while (loop != current_loops->tree_root
1417 	 && set_ref_stored_in_loop (ref, loop))
1418     loop = loop_outer (loop);
1419 }
1420 
1421 /* Set the LOOP bit in REF loaded bitmap and allocate that if
1422    necessary.  Return whether a bit was changed.  */
1423 
1424 static bool
set_ref_loaded_in_loop(im_mem_ref * ref,class loop * loop)1425 set_ref_loaded_in_loop (im_mem_ref *ref, class loop *loop)
1426 {
1427   if (!ref->loaded)
1428     ref->loaded = BITMAP_ALLOC (&lim_bitmap_obstack);
1429   return bitmap_set_bit (ref->loaded, loop->num);
1430 }
1431 
1432 /* Marks reference REF as loaded in LOOP.  */
1433 
1434 static void
mark_ref_loaded(im_mem_ref * ref,class loop * loop)1435 mark_ref_loaded (im_mem_ref *ref, class loop *loop)
1436 {
1437   while (loop != current_loops->tree_root
1438 	 && set_ref_loaded_in_loop (ref, loop))
1439     loop = loop_outer (loop);
1440 }
1441 
1442 /* Gathers memory references in statement STMT in LOOP, storing the
1443    information about them in the memory_accesses structure.  Marks
1444    the vops accessed through unrecognized statements there as
1445    well.  */
1446 
1447 static void
gather_mem_refs_stmt(class loop * loop,gimple * stmt)1448 gather_mem_refs_stmt (class loop *loop, gimple *stmt)
1449 {
1450   tree *mem = NULL;
1451   hashval_t hash;
1452   im_mem_ref **slot;
1453   im_mem_ref *ref;
1454   bool is_stored;
1455   unsigned id;
1456 
1457   if (!gimple_vuse (stmt))
1458     return;
1459 
1460   mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1461   if (!mem && is_gimple_assign (stmt))
1462     {
1463       /* For aggregate copies record distinct references but use them
1464 	 only for disambiguation purposes.  */
1465       id = memory_accesses.refs_list.length ();
1466       ref = mem_ref_alloc (NULL, 0, id);
1467       memory_accesses.refs_list.safe_push (ref);
1468       if (dump_file && (dump_flags & TDF_DETAILS))
1469 	{
1470 	  fprintf (dump_file, "Unhandled memory reference %u: ", id);
1471 	  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1472 	}
1473       record_mem_ref_loc (ref, stmt, mem);
1474       is_stored = gimple_vdef (stmt);
1475     }
1476   else if (!mem)
1477     {
1478       /* We use the shared mem_ref for all unanalyzable refs.  */
1479       id = UNANALYZABLE_MEM_ID;
1480       ref = memory_accesses.refs_list[id];
1481       if (dump_file && (dump_flags & TDF_DETAILS))
1482 	{
1483 	  fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1484 	  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1485 	}
1486       is_stored = gimple_vdef (stmt);
1487     }
1488   else
1489     {
1490       /* We are looking for equal refs that might differ in structure
1491          such as a.b vs. MEM[&a + 4].  So we key off the ao_ref but
1492 	 make sure we can canonicalize the ref in the hashtable if
1493 	 non-operand_equal_p refs are found.  For the lookup we mark
1494 	 the case we want strict equality with aor.max_size == -1.  */
1495       ao_ref aor;
1496       ao_ref_init (&aor, *mem);
1497       ao_ref_base (&aor);
1498       ao_ref_alias_set (&aor);
1499       HOST_WIDE_INT offset, size, max_size;
1500       poly_int64 saved_maxsize = aor.max_size, mem_off;
1501       tree mem_base;
1502       bool ref_decomposed;
1503       if (aor.max_size_known_p ()
1504 	  && aor.offset.is_constant (&offset)
1505 	  && aor.size.is_constant (&size)
1506 	  && aor.max_size.is_constant (&max_size)
1507 	  && size == max_size
1508 	  && (size % BITS_PER_UNIT) == 0
1509 	  /* We're canonicalizing to a MEM where TYPE_SIZE specifies the
1510 	     size.  Make sure this is consistent with the extraction.  */
1511 	  && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (*mem)))
1512 	  && known_eq (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (*mem))),
1513 		       aor.size)
1514 	  && (mem_base = get_addr_base_and_unit_offset (aor.ref, &mem_off)))
1515 	{
1516 	  ref_decomposed = true;
1517 	  tree base = ao_ref_base (&aor);
1518 	  poly_int64 moffset;
1519 	  HOST_WIDE_INT mcoffset;
1520 	  if (TREE_CODE (base) == MEM_REF
1521 	      && (mem_ref_offset (base) * BITS_PER_UNIT + offset).to_shwi (&moffset)
1522 	      && moffset.is_constant (&mcoffset))
1523 	    {
1524 	      hash = iterative_hash_expr (TREE_OPERAND (base, 0), 0);
1525 	      hash = iterative_hash_host_wide_int (mcoffset, hash);
1526 	    }
1527 	  else
1528 	    {
1529 	      hash = iterative_hash_expr (base, 0);
1530 	      hash = iterative_hash_host_wide_int (offset, hash);
1531 	    }
1532 	  hash = iterative_hash_host_wide_int (size, hash);
1533 	}
1534       else
1535 	{
1536 	  ref_decomposed = false;
1537 	  hash = iterative_hash_expr (aor.ref, 0);
1538 	  aor.max_size = -1;
1539 	}
1540       slot = memory_accesses.refs->find_slot_with_hash (&aor, hash, INSERT);
1541       aor.max_size = saved_maxsize;
1542       if (*slot)
1543 	{
1544 	  if (!(*slot)->ref_canonical
1545 	      && !operand_equal_p (*mem, (*slot)->mem.ref, 0))
1546 	    {
1547 	      /* If we didn't yet canonicalize the hashtable ref (which
1548 	         we'll end up using for code insertion) and hit a second
1549 		 equal ref that is not structurally equivalent create
1550 		 a canonical ref which is a bare MEM_REF.  */
1551 	      if (TREE_CODE (*mem) == MEM_REF
1552 		  || TREE_CODE (*mem) == TARGET_MEM_REF)
1553 		{
1554 		  (*slot)->mem.ref = *mem;
1555 		  (*slot)->mem.base_alias_set = ao_ref_base_alias_set (&aor);
1556 		}
1557 	      else
1558 		{
1559 		  tree ref_alias_type = reference_alias_ptr_type (*mem);
1560 		  unsigned int ref_align = get_object_alignment (*mem);
1561 		  tree ref_type = TREE_TYPE (*mem);
1562 		  tree tmp = build1 (ADDR_EXPR, ptr_type_node,
1563 				     unshare_expr (mem_base));
1564 		  if (TYPE_ALIGN (ref_type) != ref_align)
1565 		    ref_type = build_aligned_type (ref_type, ref_align);
1566 		  (*slot)->mem.ref
1567 		    = fold_build2 (MEM_REF, ref_type, tmp,
1568 				   build_int_cst (ref_alias_type, mem_off));
1569 		  if ((*slot)->mem.volatile_p)
1570 		    TREE_THIS_VOLATILE ((*slot)->mem.ref) = 1;
1571 		  gcc_checking_assert (TREE_CODE ((*slot)->mem.ref) == MEM_REF
1572 				       && is_gimple_mem_ref_addr
1573 				            (TREE_OPERAND ((*slot)->mem.ref,
1574 							   0)));
1575 		  (*slot)->mem.base_alias_set = (*slot)->mem.ref_alias_set;
1576 		}
1577 	      (*slot)->ref_canonical = true;
1578 	    }
1579 	  ref = *slot;
1580 	  id = ref->id;
1581 	}
1582       else
1583 	{
1584 	  id = memory_accesses.refs_list.length ();
1585 	  ref = mem_ref_alloc (&aor, hash, id);
1586 	  ref->ref_decomposed = ref_decomposed;
1587 	  memory_accesses.refs_list.safe_push (ref);
1588 	  *slot = ref;
1589 
1590 	  if (dump_file && (dump_flags & TDF_DETAILS))
1591 	    {
1592 	      fprintf (dump_file, "Memory reference %u: ", id);
1593 	      print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
1594 	      fprintf (dump_file, "\n");
1595 	    }
1596 	}
1597 
1598       record_mem_ref_loc (ref, stmt, mem);
1599     }
1600   if (is_stored)
1601     {
1602       bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
1603       mark_ref_stored (ref, loop);
1604     }
1605   /* A not simple memory op is also a read when it is a write.  */
1606   if (!is_stored || id == UNANALYZABLE_MEM_ID
1607       || ref->mem.ref == error_mark_node)
1608     {
1609       bitmap_set_bit (&memory_accesses.refs_loaded_in_loop[loop->num], ref->id);
1610       mark_ref_loaded (ref, loop);
1611     }
1612   init_lim_data (stmt)->ref = ref->id;
1613   return;
1614 }
1615 
1616 static unsigned *bb_loop_postorder;
1617 
1618 /* qsort sort function to sort blocks after their loop fathers postorder.  */
1619 
1620 static int
sort_bbs_in_loop_postorder_cmp(const void * bb1_,const void * bb2_,void * bb_loop_postorder_)1621 sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_,
1622 				void *bb_loop_postorder_)
1623 {
1624   unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1625   basic_block bb1 = *(const basic_block *)bb1_;
1626   basic_block bb2 = *(const basic_block *)bb2_;
1627   class loop *loop1 = bb1->loop_father;
1628   class loop *loop2 = bb2->loop_father;
1629   if (loop1->num == loop2->num)
1630     return bb1->index - bb2->index;
1631   return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1632 }
1633 
1634 /* qsort sort function to sort ref locs after their loop fathers postorder.  */
1635 
1636 static int
sort_locs_in_loop_postorder_cmp(const void * loc1_,const void * loc2_,void * bb_loop_postorder_)1637 sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_,
1638 				 void *bb_loop_postorder_)
1639 {
1640   unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1641   const mem_ref_loc *loc1 = (const mem_ref_loc *)loc1_;
1642   const mem_ref_loc *loc2 = (const mem_ref_loc *)loc2_;
1643   class loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
1644   class loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
1645   if (loop1->num == loop2->num)
1646     return 0;
1647   return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1648 }
1649 
1650 /* Gathers memory references in loops.  */
1651 
1652 static void
analyze_memory_references(bool store_motion)1653 analyze_memory_references (bool store_motion)
1654 {
1655   gimple_stmt_iterator bsi;
1656   basic_block bb, *bbs;
1657   class loop *outer;
1658   unsigned i, n;
1659 
1660   /* Collect all basic-blocks in loops and sort them after their
1661      loops postorder.  */
1662   i = 0;
1663   bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
1664   FOR_EACH_BB_FN (bb, cfun)
1665     if (bb->loop_father != current_loops->tree_root)
1666       bbs[i++] = bb;
1667   n = i;
1668   gcc_sort_r (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp,
1669 	      bb_loop_postorder);
1670 
1671   /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1672      That results in better locality for all the bitmaps.  It also
1673      automatically sorts the location list of gathered memory references
1674      after their loop postorder number allowing to binary-search it.  */
1675   for (i = 0; i < n; ++i)
1676     {
1677       basic_block bb = bbs[i];
1678       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1679         gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
1680     }
1681 
1682   /* Verify the list of gathered memory references is sorted after their
1683      loop postorder number.  */
1684   if (flag_checking)
1685     {
1686       im_mem_ref *ref;
1687       FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
1688 	for (unsigned j = 1; j < ref->accesses_in_loop.length (); ++j)
1689 	  gcc_assert (sort_locs_in_loop_postorder_cmp
1690 			(&ref->accesses_in_loop[j-1], &ref->accesses_in_loop[j],
1691 			 bb_loop_postorder) <= 0);
1692     }
1693 
1694   free (bbs);
1695 
1696   if (!store_motion)
1697     return;
1698 
1699   /* Propagate the information about accessed memory references up
1700      the loop hierarchy.  */
1701   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1702     {
1703       /* Finalize the overall touched references (including subloops).  */
1704       bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
1705 		       &memory_accesses.refs_stored_in_loop[loop->num]);
1706 
1707       /* Propagate the information about accessed memory references up
1708 	 the loop hierarchy.  */
1709       outer = loop_outer (loop);
1710       if (outer == current_loops->tree_root)
1711 	continue;
1712 
1713       bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
1714 		       &memory_accesses.all_refs_stored_in_loop[loop->num]);
1715     }
1716 }
1717 
1718 /* Returns true if MEM1 and MEM2 may alias.  TTAE_CACHE is used as a cache in
1719    tree_to_aff_combination_expand.  */
1720 
1721 static bool
mem_refs_may_alias_p(im_mem_ref * mem1,im_mem_ref * mem2,hash_map<tree,name_expansion * > ** ttae_cache,bool tbaa_p)1722 mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2,
1723 		      hash_map<tree, name_expansion *> **ttae_cache,
1724 		      bool tbaa_p)
1725 {
1726   gcc_checking_assert (mem1->mem.ref != error_mark_node
1727 		       && mem2->mem.ref != error_mark_node);
1728 
1729   /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1730      object and their offset differ in such a way that the locations cannot
1731      overlap, then they cannot alias.  */
1732   poly_widest_int size1, size2;
1733   aff_tree off1, off2;
1734 
1735   /* Perform basic offset and type-based disambiguation.  */
1736   if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, tbaa_p))
1737     return false;
1738 
1739   /* The expansion of addresses may be a bit expensive, thus we only do
1740      the check at -O2 and higher optimization levels.  */
1741   if (optimize < 2)
1742     return true;
1743 
1744   get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
1745   get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
1746   aff_combination_expand (&off1, ttae_cache);
1747   aff_combination_expand (&off2, ttae_cache);
1748   aff_combination_scale (&off1, -1);
1749   aff_combination_add (&off2, &off1);
1750 
1751   if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1752     return false;
1753 
1754   return true;
1755 }
1756 
1757 /* Compare function for bsearch searching for reference locations
1758    in a loop.  */
1759 
1760 static int
find_ref_loc_in_loop_cmp(const void * loop_,const void * loc_,void * bb_loop_postorder_)1761 find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_,
1762 			  void *bb_loop_postorder_)
1763 {
1764   unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1765   class loop *loop = (class loop *)const_cast<void *>(loop_);
1766   mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
1767   class loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
1768   if (loop->num  == loc_loop->num
1769       || flow_loop_nested_p (loop, loc_loop))
1770     return 0;
1771   return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
1772 	  ? -1 : 1);
1773 }
1774 
1775 /* Iterates over all locations of REF in LOOP and its subloops calling
1776    fn.operator() with the location as argument.  When that operator
1777    returns true the iteration is stopped and true is returned.
1778    Otherwise false is returned.  */
1779 
1780 template <typename FN>
1781 static bool
for_all_locs_in_loop(class loop * loop,im_mem_ref * ref,FN fn)1782 for_all_locs_in_loop (class loop *loop, im_mem_ref *ref, FN fn)
1783 {
1784   unsigned i;
1785   mem_ref_loc *loc;
1786 
1787   /* Search for the cluster of locs in the accesses_in_loop vector
1788      which is sorted after postorder index of the loop father.  */
1789   loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp,
1790 				       bb_loop_postorder);
1791   if (!loc)
1792     return false;
1793 
1794   /* We have found one location inside loop or its sub-loops.  Iterate
1795      both forward and backward to cover the whole cluster.  */
1796   i = loc - ref->accesses_in_loop.address ();
1797   while (i > 0)
1798     {
1799       --i;
1800       mem_ref_loc *l = &ref->accesses_in_loop[i];
1801       if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1802 	break;
1803       if (fn (l))
1804 	return true;
1805     }
1806   for (i = loc - ref->accesses_in_loop.address ();
1807        i < ref->accesses_in_loop.length (); ++i)
1808     {
1809       mem_ref_loc *l = &ref->accesses_in_loop[i];
1810       if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1811 	break;
1812       if (fn (l))
1813 	return true;
1814     }
1815 
1816   return false;
1817 }
1818 
1819 /* Rewrites location LOC by TMP_VAR.  */
1820 
1821 class rewrite_mem_ref_loc
1822 {
1823 public:
rewrite_mem_ref_loc(tree tmp_var_)1824   rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
1825   bool operator () (mem_ref_loc *loc);
1826   tree tmp_var;
1827 };
1828 
1829 bool
operator()1830 rewrite_mem_ref_loc::operator () (mem_ref_loc *loc)
1831 {
1832   *loc->ref = tmp_var;
1833   update_stmt (loc->stmt);
1834   return false;
1835 }
1836 
1837 /* Rewrites all references to REF in LOOP by variable TMP_VAR.  */
1838 
1839 static void
rewrite_mem_refs(class loop * loop,im_mem_ref * ref,tree tmp_var)1840 rewrite_mem_refs (class loop *loop, im_mem_ref *ref, tree tmp_var)
1841 {
1842   for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
1843 }
1844 
1845 /* Stores the first reference location in LOCP.  */
1846 
1847 class first_mem_ref_loc_1
1848 {
1849 public:
first_mem_ref_loc_1(mem_ref_loc ** locp_)1850   first_mem_ref_loc_1 (mem_ref_loc **locp_) : locp (locp_) {}
1851   bool operator () (mem_ref_loc *loc);
1852   mem_ref_loc **locp;
1853 };
1854 
1855 bool
operator()1856 first_mem_ref_loc_1::operator () (mem_ref_loc *loc)
1857 {
1858   *locp = loc;
1859   return true;
1860 }
1861 
1862 /* Returns the first reference location to REF in LOOP.  */
1863 
1864 static mem_ref_loc *
first_mem_ref_loc(class loop * loop,im_mem_ref * ref)1865 first_mem_ref_loc (class loop *loop, im_mem_ref *ref)
1866 {
1867   mem_ref_loc *locp = NULL;
1868   for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
1869   return locp;
1870 }
1871 
1872 /* Helper function for execute_sm.  Emit code to store TMP_VAR into
1873    MEM along edge EX.
1874 
1875    The store is only done if MEM has changed.  We do this so no
1876    changes to MEM occur on code paths that did not originally store
1877    into it.
1878 
1879    The common case for execute_sm will transform:
1880 
1881      for (...) {
1882        if (foo)
1883          stuff;
1884        else
1885          MEM = TMP_VAR;
1886      }
1887 
1888    into:
1889 
1890      lsm = MEM;
1891      for (...) {
1892        if (foo)
1893          stuff;
1894        else
1895          lsm = TMP_VAR;
1896      }
1897      MEM = lsm;
1898 
1899   This function will generate:
1900 
1901      lsm = MEM;
1902 
1903      lsm_flag = false;
1904      ...
1905      for (...) {
1906        if (foo)
1907          stuff;
1908        else {
1909          lsm = TMP_VAR;
1910          lsm_flag = true;
1911        }
1912      }
1913      if (lsm_flag)	<--
1914        MEM = lsm;	<-- (X)
1915 
1916   In case MEM and TMP_VAR are NULL the function will return the then
1917   block so the caller can insert (X) and other related stmts.
1918 */
1919 
1920 static basic_block
execute_sm_if_changed(edge ex,tree mem,tree tmp_var,tree flag,edge preheader,hash_set<basic_block> * flag_bbs,edge & append_cond_position,edge & last_cond_fallthru)1921 execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
1922 		       edge preheader, hash_set <basic_block> *flag_bbs,
1923 		       edge &append_cond_position, edge &last_cond_fallthru)
1924 {
1925   basic_block new_bb, then_bb, old_dest;
1926   bool loop_has_only_one_exit;
1927   edge then_old_edge;
1928   gimple_stmt_iterator gsi;
1929   gimple *stmt;
1930   bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
1931 
1932   profile_count count_sum = profile_count::zero ();
1933   int nbbs = 0, ncount = 0;
1934   profile_probability flag_probability = profile_probability::uninitialized ();
1935 
1936   /* Flag is set in FLAG_BBS. Determine probability that flag will be true
1937      at loop exit.
1938 
1939      This code may look fancy, but it cannot update profile very realistically
1940      because we do not know the probability that flag will be true at given
1941      loop exit.
1942 
1943      We look for two interesting extremes
1944        - when exit is dominated by block setting the flag, we know it will
1945          always be true.  This is a common case.
1946        - when all blocks setting the flag have very low frequency we know
1947          it will likely be false.
1948      In all other cases we default to 2/3 for flag being true.  */
1949 
1950   for (hash_set<basic_block>::iterator it = flag_bbs->begin ();
1951        it != flag_bbs->end (); ++it)
1952     {
1953        if ((*it)->count.initialized_p ())
1954          count_sum += (*it)->count, ncount ++;
1955        if (dominated_by_p (CDI_DOMINATORS, ex->src, *it))
1956 	 flag_probability = profile_probability::always ();
1957        nbbs++;
1958     }
1959 
1960   profile_probability cap = profile_probability::always ().apply_scale (2, 3);
1961 
1962   if (flag_probability.initialized_p ())
1963     ;
1964   else if (ncount == nbbs
1965 	   && preheader->count () >= count_sum && preheader->count ().nonzero_p ())
1966     {
1967       flag_probability = count_sum.probability_in (preheader->count ());
1968       if (flag_probability > cap)
1969 	flag_probability = cap;
1970     }
1971 
1972   if (!flag_probability.initialized_p ())
1973     flag_probability = cap;
1974 
1975   /* ?? Insert store after previous store if applicable.  See note
1976      below.  */
1977   if (append_cond_position)
1978     ex = append_cond_position;
1979 
1980   loop_has_only_one_exit = single_pred_p (ex->dest);
1981 
1982   if (loop_has_only_one_exit)
1983     ex = split_block_after_labels (ex->dest);
1984   else
1985     {
1986       for (gphi_iterator gpi = gsi_start_phis (ex->dest);
1987 	   !gsi_end_p (gpi); gsi_next (&gpi))
1988 	{
1989 	  gphi *phi = gpi.phi ();
1990 	  if (virtual_operand_p (gimple_phi_result (phi)))
1991 	    continue;
1992 
1993 	  /* When the destination has a non-virtual PHI node with multiple
1994 	     predecessors make sure we preserve the PHI structure by
1995 	     forcing a forwarder block so that hoisting of that PHI will
1996 	     still work.  */
1997 	  split_edge (ex);
1998 	  break;
1999 	}
2000     }
2001 
2002   old_dest = ex->dest;
2003   new_bb = split_edge (ex);
2004   then_bb = create_empty_bb (new_bb);
2005   then_bb->count = new_bb->count.apply_probability (flag_probability);
2006   if (irr)
2007     then_bb->flags = BB_IRREDUCIBLE_LOOP;
2008   add_bb_to_loop (then_bb, new_bb->loop_father);
2009 
2010   gsi = gsi_start_bb (new_bb);
2011   stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
2012 			    NULL_TREE, NULL_TREE);
2013   gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2014 
2015   /* Insert actual store.  */
2016   if (mem)
2017     {
2018       gsi = gsi_start_bb (then_bb);
2019       stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
2020       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2021     }
2022 
2023   edge e1 = single_succ_edge (new_bb);
2024   edge e2 = make_edge (new_bb, then_bb,
2025 	               EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2026   e2->probability = flag_probability;
2027 
2028   e1->flags |= EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0);
2029   e1->flags &= ~EDGE_FALLTHRU;
2030 
2031   e1->probability = flag_probability.invert ();
2032 
2033   then_old_edge = make_single_succ_edge (then_bb, old_dest,
2034 			     EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2035 
2036   set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
2037 
2038   if (append_cond_position)
2039     {
2040       basic_block prevbb = last_cond_fallthru->src;
2041       redirect_edge_succ (last_cond_fallthru, new_bb);
2042       set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
2043       set_immediate_dominator (CDI_DOMINATORS, old_dest,
2044 			       recompute_dominator (CDI_DOMINATORS, old_dest));
2045     }
2046 
2047   /* ?? Because stores may alias, they must happen in the exact
2048      sequence they originally happened.  Save the position right after
2049      the (_lsm) store we just created so we can continue appending after
2050      it and maintain the original order.  */
2051   append_cond_position = then_old_edge;
2052   last_cond_fallthru = find_edge (new_bb, old_dest);
2053 
2054   if (!loop_has_only_one_exit)
2055     for (gphi_iterator gpi = gsi_start_phis (old_dest);
2056 	 !gsi_end_p (gpi); gsi_next (&gpi))
2057       {
2058 	gphi *phi = gpi.phi ();
2059 	unsigned i;
2060 
2061 	for (i = 0; i < gimple_phi_num_args (phi); i++)
2062 	  if (gimple_phi_arg_edge (phi, i)->src == new_bb)
2063 	    {
2064 	      tree arg = gimple_phi_arg_def (phi, i);
2065 	      add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
2066 	      update_stmt (phi);
2067 	    }
2068       }
2069 
2070   return then_bb;
2071 }
2072 
2073 /* When REF is set on the location, set flag indicating the store.  */
2074 
2075 class sm_set_flag_if_changed
2076 {
2077 public:
sm_set_flag_if_changed(tree flag_,hash_set<basic_block> * bbs_)2078   sm_set_flag_if_changed (tree flag_, hash_set <basic_block> *bbs_)
2079 	 : flag (flag_), bbs (bbs_) {}
2080   bool operator () (mem_ref_loc *loc);
2081   tree flag;
2082   hash_set <basic_block> *bbs;
2083 };
2084 
2085 bool
operator()2086 sm_set_flag_if_changed::operator () (mem_ref_loc *loc)
2087 {
2088   /* Only set the flag for writes.  */
2089   if (is_gimple_assign (loc->stmt)
2090       && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
2091     {
2092       gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
2093       gimple *stmt = gimple_build_assign (flag, boolean_true_node);
2094       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2095       bbs->add (gimple_bb (stmt));
2096     }
2097   return false;
2098 }
2099 
2100 /* Helper function for execute_sm.  On every location where REF is
2101    set, set an appropriate flag indicating the store.  */
2102 
2103 static tree
execute_sm_if_changed_flag_set(class loop * loop,im_mem_ref * ref,hash_set<basic_block> * bbs)2104 execute_sm_if_changed_flag_set (class loop *loop, im_mem_ref *ref,
2105 				hash_set <basic_block> *bbs)
2106 {
2107   tree flag;
2108   char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
2109   flag = create_tmp_reg (boolean_type_node, str);
2110   for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag, bbs));
2111   return flag;
2112 }
2113 
2114 struct sm_aux
2115 {
2116   tree tmp_var;
2117   tree store_flag;
2118   hash_set <basic_block> flag_bbs;
2119 };
2120 
2121 /* Executes store motion of memory reference REF from LOOP.
2122    Exits from the LOOP are stored in EXITS.  The initialization of the
2123    temporary variable is put to the preheader of the loop, and assignments
2124    to the reference from the temporary variable are emitted to exits.  */
2125 
2126 static void
execute_sm(class loop * loop,im_mem_ref * ref,hash_map<im_mem_ref *,sm_aux * > & aux_map,bool maybe_mt,bool use_other_flag_var)2127 execute_sm (class loop *loop, im_mem_ref *ref,
2128 	    hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt,
2129 	    bool use_other_flag_var)
2130 {
2131   gassign *load;
2132   struct fmt_data fmt_data;
2133   struct lim_aux_data *lim_data;
2134   bool multi_threaded_model_p = false;
2135   gimple_stmt_iterator gsi;
2136   sm_aux *aux = new sm_aux;
2137 
2138   if (dump_file && (dump_flags & TDF_DETAILS))
2139     {
2140       fprintf (dump_file, "Executing store motion of ");
2141       print_generic_expr (dump_file, ref->mem.ref);
2142       fprintf (dump_file, " from loop %d\n", loop->num);
2143     }
2144 
2145   aux->tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
2146 				 get_lsm_tmp_name (ref->mem.ref, ~0));
2147 
2148   fmt_data.loop = loop;
2149   fmt_data.orig_loop = loop;
2150   for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
2151 
2152   bool always_stored = ref_always_accessed_p (loop, ref, true);
2153   if (maybe_mt
2154       && (bb_in_transaction (loop_preheader_edge (loop)->src)
2155 	  || (! flag_store_data_races && ! always_stored)))
2156     multi_threaded_model_p = true;
2157 
2158   if (multi_threaded_model_p && !use_other_flag_var)
2159     aux->store_flag
2160       = execute_sm_if_changed_flag_set (loop, ref, &aux->flag_bbs);
2161   else
2162     aux->store_flag = NULL_TREE;
2163 
2164   /* Remember variable setup.  */
2165   aux_map.put (ref, aux);
2166 
2167   rewrite_mem_refs (loop, ref, aux->tmp_var);
2168 
2169   /* Emit the load code on a random exit edge or into the latch if
2170      the loop does not exit, so that we are sure it will be processed
2171      by move_computations after all dependencies.  */
2172   gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
2173 
2174   /* Avoid doing a load if there was no load of the ref in the loop.
2175      Esp. when the ref is not always stored we cannot optimize it
2176      away later.  But when it is not always stored we must use a conditional
2177      store then.  */
2178   if ((!always_stored && !multi_threaded_model_p)
2179       || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)))
2180     load = gimple_build_assign (aux->tmp_var, unshare_expr (ref->mem.ref));
2181   else
2182     {
2183       /* If not emitting a load mark the uninitialized state on the
2184 	 loop entry as not to be warned for.  */
2185       tree uninit = create_tmp_reg (TREE_TYPE (aux->tmp_var));
2186       suppress_warning (uninit, OPT_Wuninitialized);
2187       load = gimple_build_assign (aux->tmp_var, uninit);
2188     }
2189   lim_data = init_lim_data (load);
2190   lim_data->max_loop = loop;
2191   lim_data->tgt_loop = loop;
2192   gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2193 
2194   if (aux->store_flag)
2195     {
2196       load = gimple_build_assign (aux->store_flag, boolean_false_node);
2197       lim_data = init_lim_data (load);
2198       lim_data->max_loop = loop;
2199       lim_data->tgt_loop = loop;
2200       gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2201     }
2202 }
2203 
2204 /* sm_ord is used for ordinary stores we can retain order with respect
2205        to other stores
2206    sm_unord is used for conditional executed stores which need to be
2207        able to execute in arbitrary order with respect to other stores
2208    sm_other is used for stores we do not try to apply store motion to.  */
2209 enum sm_kind { sm_ord, sm_unord, sm_other };
2210 struct seq_entry
2211 {
seq_entryseq_entry2212   seq_entry () {}
2213   seq_entry (unsigned f, sm_kind k, tree fr = NULL)
firstseq_entry2214     : first (f), second (k), from (fr) {}
2215   unsigned first;
2216   sm_kind second;
2217   tree from;
2218 };
2219 
2220 static void
execute_sm_exit(class loop * loop,edge ex,vec<seq_entry> & seq,hash_map<im_mem_ref *,sm_aux * > & aux_map,sm_kind kind,edge & append_cond_position,edge & last_cond_fallthru)2221 execute_sm_exit (class loop *loop, edge ex, vec<seq_entry> &seq,
2222 		 hash_map<im_mem_ref *, sm_aux *> &aux_map, sm_kind kind,
2223 		 edge &append_cond_position, edge &last_cond_fallthru)
2224 {
2225   /* Sink the stores to exit from the loop.  */
2226   for (unsigned i = seq.length (); i > 0; --i)
2227     {
2228       im_mem_ref *ref = memory_accesses.refs_list[seq[i-1].first];
2229       if (seq[i-1].second == sm_other)
2230 	{
2231 	  gcc_assert (kind == sm_ord && seq[i-1].from != NULL_TREE);
2232 	  if (dump_file && (dump_flags & TDF_DETAILS))
2233 	    {
2234 	      fprintf (dump_file, "Re-issueing dependent store of ");
2235 	      print_generic_expr (dump_file, ref->mem.ref);
2236 	      fprintf (dump_file, " from loop %d on exit %d -> %d\n",
2237 		       loop->num, ex->src->index, ex->dest->index);
2238 	    }
2239 	  gassign *store = gimple_build_assign (unshare_expr (ref->mem.ref),
2240 						seq[i-1].from);
2241 	  gsi_insert_on_edge (ex, store);
2242 	}
2243       else
2244 	{
2245 	  sm_aux *aux = *aux_map.get (ref);
2246 	  if (!aux->store_flag || kind == sm_ord)
2247 	    {
2248 	      gassign *store;
2249 	      store = gimple_build_assign (unshare_expr (ref->mem.ref),
2250 					   aux->tmp_var);
2251 	      gsi_insert_on_edge (ex, store);
2252 	    }
2253 	  else
2254 	    execute_sm_if_changed (ex, ref->mem.ref, aux->tmp_var,
2255 				   aux->store_flag,
2256 				   loop_preheader_edge (loop), &aux->flag_bbs,
2257 				   append_cond_position, last_cond_fallthru);
2258 	}
2259     }
2260 }
2261 
2262 /* Push the SM candidate at index PTR in the sequence SEQ down until
2263    we hit the next SM candidate.  Return true if that went OK and
2264    false if we could not disambiguate agains another unrelated ref.
2265    Update *AT to the index where the candidate now resides.  */
2266 
2267 static bool
sm_seq_push_down(vec<seq_entry> & seq,unsigned ptr,unsigned * at)2268 sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr, unsigned *at)
2269 {
2270   *at = ptr;
2271   for (; ptr > 0; --ptr)
2272     {
2273       seq_entry &new_cand = seq[ptr];
2274       seq_entry &against = seq[ptr-1];
2275       if (against.second == sm_ord
2276 	  || (against.second == sm_other && against.from != NULL_TREE))
2277 	/* Found the tail of the sequence.  */
2278 	break;
2279       /* We may not ignore self-dependences here.  */
2280       if (new_cand.first == against.first
2281 	  || !refs_independent_p (memory_accesses.refs_list[new_cand.first],
2282 				  memory_accesses.refs_list[against.first],
2283 				  false))
2284 	/* ???  Prune new_cand from the list of refs to apply SM to.  */
2285 	return false;
2286       std::swap (new_cand, against);
2287       *at = ptr - 1;
2288     }
2289   return true;
2290 }
2291 
2292 /* Computes the sequence of stores from candidates in REFS_NOT_IN_SEQ to SEQ
2293    walking backwards from VDEF (or the end of BB if VDEF is NULL).  */
2294 
2295 static int
sm_seq_valid_bb(class loop * loop,basic_block bb,tree vdef,vec<seq_entry> & seq,bitmap refs_not_in_seq,bitmap refs_not_supported,bool forked,bitmap fully_visited)2296 sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef,
2297 		 vec<seq_entry> &seq, bitmap refs_not_in_seq,
2298 		 bitmap refs_not_supported, bool forked,
2299 		 bitmap fully_visited)
2300 {
2301   if (!vdef)
2302     for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (gsi);
2303 	 gsi_prev (&gsi))
2304       {
2305 	vdef = gimple_vdef (gsi_stmt (gsi));
2306 	if (vdef)
2307 	  break;
2308       }
2309   if (!vdef)
2310     {
2311       gphi *vphi = get_virtual_phi (bb);
2312       if (vphi)
2313 	vdef = gimple_phi_result (vphi);
2314     }
2315   if (!vdef)
2316     {
2317       if (single_pred_p (bb))
2318 	/* This handles the perfect nest case.  */
2319 	return sm_seq_valid_bb (loop, single_pred (bb), vdef,
2320 				seq, refs_not_in_seq, refs_not_supported,
2321 				forked, fully_visited);
2322       return 0;
2323     }
2324   do
2325     {
2326       gimple *def = SSA_NAME_DEF_STMT (vdef);
2327       if (gimple_bb (def) != bb)
2328 	{
2329 	  /* If we forked by processing a PHI do not allow our walk to
2330 	     merge again until we handle that robustly.  */
2331 	  if (forked)
2332 	    {
2333 	      /* Mark refs_not_in_seq as unsupported.  */
2334 	      bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2335 	      return 1;
2336 	    }
2337 	  /* Otherwise it doesn't really matter if we end up in different
2338 	     BBs.  */
2339 	  bb = gimple_bb (def);
2340 	}
2341       if (gphi *phi = dyn_cast <gphi *> (def))
2342 	{
2343 	  /* Handle CFG merges.  Until we handle forks (gimple_bb (def) != bb)
2344 	     this is still linear.
2345 	     Eventually we want to cache intermediate results per BB
2346 	     (but we can't easily cache for different exits?).  */
2347 	  /* Stop at PHIs with possible backedges.  */
2348 	  if (bb == bb->loop_father->header
2349 	      || bb->flags & BB_IRREDUCIBLE_LOOP)
2350 	    {
2351 	      /* Mark refs_not_in_seq as unsupported.  */
2352 	      bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2353 	      return 1;
2354 	    }
2355 	  if (gimple_phi_num_args (phi) == 1)
2356 	    return sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
2357 				    gimple_phi_arg_def (phi, 0), seq,
2358 				    refs_not_in_seq, refs_not_supported,
2359 				    false, fully_visited);
2360 	  if (bitmap_bit_p (fully_visited,
2361 			    SSA_NAME_VERSION (gimple_phi_result (phi))))
2362 	    return 1;
2363 	  auto_vec<seq_entry> first_edge_seq;
2364 	  auto_bitmap tem_refs_not_in_seq (&lim_bitmap_obstack);
2365 	  int eret;
2366 	  bitmap_copy (tem_refs_not_in_seq, refs_not_in_seq);
2367 	  eret = sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
2368 				  gimple_phi_arg_def (phi, 0),
2369 				  first_edge_seq,
2370 				  tem_refs_not_in_seq, refs_not_supported,
2371 				  true, fully_visited);
2372 	  if (eret != 1)
2373 	    return -1;
2374 	  /* Simplify our lives by pruning the sequence of !sm_ord.  */
2375 	  while (!first_edge_seq.is_empty ()
2376 		 && first_edge_seq.last ().second != sm_ord)
2377 	    first_edge_seq.pop ();
2378 	  for (unsigned int i = 1; i < gimple_phi_num_args (phi); ++i)
2379 	    {
2380 	      tree vuse = gimple_phi_arg_def (phi, i);
2381 	      edge e = gimple_phi_arg_edge (phi, i);
2382 	      auto_vec<seq_entry> edge_seq;
2383 	      bitmap_and_compl (tem_refs_not_in_seq,
2384 				refs_not_in_seq, refs_not_supported);
2385 	      /* If we've marked all refs we search for as unsupported
2386 		 we can stop processing and use the sequence as before
2387 		 the PHI.  */
2388 	      if (bitmap_empty_p (tem_refs_not_in_seq))
2389 		return 1;
2390 	      eret = sm_seq_valid_bb (loop, e->src, vuse, edge_seq,
2391 				      tem_refs_not_in_seq, refs_not_supported,
2392 				      true, fully_visited);
2393 	      if (eret != 1)
2394 		return -1;
2395 	      /* Simplify our lives by pruning the sequence of !sm_ord.  */
2396 	      while (!edge_seq.is_empty ()
2397 		     && edge_seq.last ().second != sm_ord)
2398 		edge_seq.pop ();
2399 	      unsigned min_len = MIN(first_edge_seq.length (),
2400 				     edge_seq.length ());
2401 	      /* Incrementally merge seqs into first_edge_seq.  */
2402 	      int first_uneq = -1;
2403 	      auto_vec<seq_entry, 2> extra_refs;
2404 	      for (unsigned int i = 0; i < min_len; ++i)
2405 		{
2406 		  /* ???  We can more intelligently merge when we face different
2407 		     order by additional sinking operations in one sequence.
2408 		     For now we simply mark them as to be processed by the
2409 		     not order-preserving SM code.  */
2410 		  if (first_edge_seq[i].first != edge_seq[i].first)
2411 		    {
2412 		      if (first_edge_seq[i].second == sm_ord)
2413 			bitmap_set_bit (refs_not_supported,
2414 					first_edge_seq[i].first);
2415 		      if (edge_seq[i].second == sm_ord)
2416 			bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2417 		      first_edge_seq[i].second = sm_other;
2418 		      first_edge_seq[i].from = NULL_TREE;
2419 		      /* Record the dropped refs for later processing.  */
2420 		      if (first_uneq == -1)
2421 			first_uneq = i;
2422 		      extra_refs.safe_push (seq_entry (edge_seq[i].first,
2423 						       sm_other, NULL_TREE));
2424 		    }
2425 		  /* sm_other prevails.  */
2426 		  else if (first_edge_seq[i].second != edge_seq[i].second)
2427 		    {
2428 		      /* Make sure the ref is marked as not supported.  */
2429 		      bitmap_set_bit (refs_not_supported,
2430 				      first_edge_seq[i].first);
2431 		      first_edge_seq[i].second = sm_other;
2432 		      first_edge_seq[i].from = NULL_TREE;
2433 		    }
2434 		  else if (first_edge_seq[i].second == sm_other
2435 			   && first_edge_seq[i].from != NULL_TREE
2436 			   && (edge_seq[i].from == NULL_TREE
2437 			       || !operand_equal_p (first_edge_seq[i].from,
2438 						    edge_seq[i].from, 0)))
2439 		    first_edge_seq[i].from = NULL_TREE;
2440 		}
2441 	      /* Any excess elements become sm_other since they are now
2442 		 coonditionally executed.  */
2443 	      if (first_edge_seq.length () > edge_seq.length ())
2444 		{
2445 		  for (unsigned i = edge_seq.length ();
2446 		       i < first_edge_seq.length (); ++i)
2447 		    {
2448 		      if (first_edge_seq[i].second == sm_ord)
2449 			bitmap_set_bit (refs_not_supported,
2450 					first_edge_seq[i].first);
2451 		      first_edge_seq[i].second = sm_other;
2452 		    }
2453 		}
2454 	      else if (edge_seq.length () > first_edge_seq.length ())
2455 		{
2456 		  if (first_uneq == -1)
2457 		    first_uneq = first_edge_seq.length ();
2458 		  for (unsigned i = first_edge_seq.length ();
2459 		       i < edge_seq.length (); ++i)
2460 		    {
2461 		      if (edge_seq[i].second == sm_ord)
2462 			bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2463 		      extra_refs.safe_push (seq_entry (edge_seq[i].first,
2464 						       sm_other, NULL_TREE));
2465 		    }
2466 		}
2467 	      /* Put unmerged refs at first_uneq to force dependence checking
2468 		 on them.  */
2469 	      if (first_uneq != -1)
2470 		{
2471 		  /* Missing ordered_splice_at.  */
2472 		  if ((unsigned)first_uneq == first_edge_seq.length ())
2473 		    first_edge_seq.safe_splice (extra_refs);
2474 		  else
2475 		    {
2476 		      unsigned fes_length = first_edge_seq.length ();
2477 		      first_edge_seq.safe_grow (fes_length
2478 						+ extra_refs.length ());
2479 		      memmove (&first_edge_seq[first_uneq + extra_refs.length ()],
2480 			       &first_edge_seq[first_uneq],
2481 			       (fes_length - first_uneq) * sizeof (seq_entry));
2482 		      memcpy (&first_edge_seq[first_uneq],
2483 			      extra_refs.address (),
2484 			      extra_refs.length () * sizeof (seq_entry));
2485 		    }
2486 		}
2487 	    }
2488 	  /* Use the sequence from the first edge and push SMs down.  */
2489 	  for (unsigned i = 0; i < first_edge_seq.length (); ++i)
2490 	    {
2491 	      unsigned id = first_edge_seq[i].first;
2492 	      seq.safe_push (first_edge_seq[i]);
2493 	      unsigned new_idx;
2494 	      if ((first_edge_seq[i].second == sm_ord
2495 		   || (first_edge_seq[i].second == sm_other
2496 		       && first_edge_seq[i].from != NULL_TREE))
2497 		  && !sm_seq_push_down (seq, seq.length () - 1, &new_idx))
2498 		{
2499 		  if (first_edge_seq[i].second == sm_ord)
2500 		    bitmap_set_bit (refs_not_supported, id);
2501 		  /* Mark it sm_other.  */
2502 		  seq[new_idx].second = sm_other;
2503 		  seq[new_idx].from = NULL_TREE;
2504 		}
2505 	    }
2506 	  bitmap_set_bit (fully_visited,
2507 			  SSA_NAME_VERSION (gimple_phi_result (phi)));
2508 	  return 1;
2509 	}
2510       lim_aux_data *data = get_lim_data (def);
2511       gcc_assert (data);
2512       if (data->ref == UNANALYZABLE_MEM_ID)
2513 	return -1;
2514       /* Stop at memory references which we can't move.  */
2515       else if (memory_accesses.refs_list[data->ref]->mem.ref == error_mark_node)
2516 	{
2517 	  /* Mark refs_not_in_seq as unsupported.  */
2518 	  bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2519 	  return 1;
2520 	}
2521       /* One of the stores we want to apply SM to and we've not yet seen.  */
2522       else if (bitmap_clear_bit (refs_not_in_seq, data->ref))
2523 	{
2524 	  seq.safe_push (seq_entry (data->ref, sm_ord));
2525 
2526 	  /* 1) push it down the queue until a SMed
2527 	     and not ignored ref is reached, skipping all not SMed refs
2528 	     and ignored refs via non-TBAA disambiguation.  */
2529 	  unsigned new_idx;
2530 	  if (!sm_seq_push_down (seq, seq.length () - 1, &new_idx)
2531 	      /* If that fails but we did not fork yet continue, we'll see
2532 		 to re-materialize all of the stores in the sequence then.
2533 		 Further stores will only be pushed up to this one.  */
2534 	      && forked)
2535 	    {
2536 	      bitmap_set_bit (refs_not_supported, data->ref);
2537 	      /* Mark it sm_other.  */
2538 	      seq[new_idx].second = sm_other;
2539 	    }
2540 
2541 	  /* 2) check whether we've seen all refs we want to SM and if so
2542 	     declare success for the active exit  */
2543 	  if (bitmap_empty_p (refs_not_in_seq))
2544 	    return 1;
2545 	}
2546       else
2547 	/* Another store not part of the final sequence.  Simply push it.  */
2548 	seq.safe_push (seq_entry (data->ref, sm_other,
2549 				  gimple_assign_rhs1 (def)));
2550 
2551       vdef = gimple_vuse (def);
2552     }
2553   while (1);
2554 }
2555 
2556 /* Hoists memory references MEM_REFS out of LOOP.  EXITS is the list of exit
2557    edges of the LOOP.  */
2558 
2559 static void
hoist_memory_references(class loop * loop,bitmap mem_refs,const vec<edge> & exits)2560 hoist_memory_references (class loop *loop, bitmap mem_refs,
2561 			 const vec<edge> &exits)
2562 {
2563   im_mem_ref *ref;
2564   unsigned  i;
2565   bitmap_iterator bi;
2566 
2567   /* There's a special case we can use ordered re-materialization for
2568      conditionally excuted stores which is when all stores in the loop
2569      happen in the same basic-block.  In that case we know we'll reach
2570      all stores and thus can simply process that BB and emit a single
2571      conditional block of ordered materializations.  See PR102436.  */
2572   basic_block single_store_bb = NULL;
2573   EXECUTE_IF_SET_IN_BITMAP (&memory_accesses.all_refs_stored_in_loop[loop->num],
2574 			    0, i, bi)
2575     {
2576       bool fail = false;
2577       ref = memory_accesses.refs_list[i];
2578       for (auto loc : ref->accesses_in_loop)
2579 	if (!gimple_vdef (loc.stmt))
2580 	  ;
2581 	else if (!single_store_bb)
2582 	  {
2583 	    single_store_bb = gimple_bb (loc.stmt);
2584 	    bool conditional = false;
2585 	    for (edge e : exits)
2586 	      if (!dominated_by_p (CDI_DOMINATORS, e->src, single_store_bb))
2587 		{
2588 		  /* Conditional as seen from e.  */
2589 		  conditional = true;
2590 		  break;
2591 		}
2592 	    if (!conditional)
2593 	      {
2594 		fail = true;
2595 		break;
2596 	      }
2597 	  }
2598 	else if (single_store_bb != gimple_bb (loc.stmt))
2599 	  {
2600 	    fail = true;
2601 	    break;
2602 	  }
2603       if (fail)
2604 	{
2605 	  single_store_bb = NULL;
2606 	  break;
2607 	}
2608     }
2609   if (single_store_bb)
2610     {
2611       /* Analyze the single block with stores.  */
2612       auto_bitmap fully_visited;
2613       auto_bitmap refs_not_supported;
2614       auto_bitmap refs_not_in_seq;
2615       auto_vec<seq_entry> seq;
2616       bitmap_copy (refs_not_in_seq, mem_refs);
2617       int res = sm_seq_valid_bb (loop, single_store_bb, NULL_TREE,
2618 				 seq, refs_not_in_seq, refs_not_supported,
2619 				 false, fully_visited);
2620       if (res != 1)
2621 	{
2622 	  /* Unhandled refs can still fail this.  */
2623 	  bitmap_clear (mem_refs);
2624 	  return;
2625 	}
2626 
2627       /* We cannot handle sm_other since we neither remember the
2628 	 stored location nor the value at the point we execute them.  */
2629       for (unsigned i = 0; i < seq.length (); ++i)
2630 	{
2631 	  unsigned new_i;
2632 	  if (seq[i].second == sm_other
2633 	      && seq[i].from != NULL_TREE)
2634 	    seq[i].from = NULL_TREE;
2635 	  else if ((seq[i].second == sm_ord
2636 		    || (seq[i].second == sm_other
2637 			&& seq[i].from != NULL_TREE))
2638 		   && !sm_seq_push_down (seq, i, &new_i))
2639 	    {
2640 	      bitmap_set_bit (refs_not_supported, seq[new_i].first);
2641 	      seq[new_i].second = sm_other;
2642 	      seq[new_i].from = NULL_TREE;
2643 	    }
2644 	}
2645       bitmap_and_compl_into (mem_refs, refs_not_supported);
2646       if (bitmap_empty_p (mem_refs))
2647 	return;
2648 
2649       /* Prune seq.  */
2650       while (seq.last ().second == sm_other
2651 	     && seq.last ().from == NULL_TREE)
2652 	seq.pop ();
2653 
2654       hash_map<im_mem_ref *, sm_aux *> aux_map;
2655 
2656       /* Execute SM but delay the store materialization for ordered
2657 	 sequences on exit.  */
2658       bool first_p = true;
2659       EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2660 	{
2661 	  ref = memory_accesses.refs_list[i];
2662 	  execute_sm (loop, ref, aux_map, true, !first_p);
2663 	  first_p = false;
2664 	}
2665 
2666       /* Get at the single flag variable we eventually produced.  */
2667       im_mem_ref *ref
2668 	= memory_accesses.refs_list[bitmap_first_set_bit (mem_refs)];
2669       sm_aux *aux = *aux_map.get (ref);
2670 
2671       /* Materialize ordered store sequences on exits.  */
2672       edge e;
2673       FOR_EACH_VEC_ELT (exits, i, e)
2674 	{
2675 	  edge append_cond_position = NULL;
2676 	  edge last_cond_fallthru = NULL;
2677 	  edge insert_e = e;
2678 	  /* Construct the single flag variable control flow and insert
2679 	     the ordered seq of stores in the then block.  With
2680 	     -fstore-data-races we can do the stores unconditionally.  */
2681 	  if (aux->store_flag)
2682 	    insert_e
2683 	      = single_pred_edge
2684 		  (execute_sm_if_changed (e, NULL_TREE, NULL_TREE,
2685 					  aux->store_flag,
2686 					  loop_preheader_edge (loop),
2687 					  &aux->flag_bbs, append_cond_position,
2688 					  last_cond_fallthru));
2689 	  execute_sm_exit (loop, insert_e, seq, aux_map, sm_ord,
2690 			   append_cond_position, last_cond_fallthru);
2691 	  gsi_commit_one_edge_insert (insert_e, NULL);
2692 	}
2693 
2694       for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
2695 	   iter != aux_map.end (); ++iter)
2696 	delete (*iter).second;
2697 
2698       return;
2699     }
2700 
2701   /* To address PR57359 before actually applying store-motion check
2702      the candidates found for validity with regards to reordering
2703      relative to other stores which we until here disambiguated using
2704      TBAA which isn't valid.
2705      What matters is the order of the last stores to the mem_refs
2706      with respect to the other stores of the loop at the point of the
2707      loop exits.  */
2708 
2709   /* For each exit compute the store order, pruning from mem_refs
2710      on the fly.  */
2711   /* The complexity of this is at least
2712      O(number of exits * number of SM refs) but more approaching
2713      O(number of exits * number of SM refs * number of stores).  */
2714   /* ???  Somehow do this in a single sweep over the loop body.  */
2715   auto_vec<std::pair<edge, vec<seq_entry> > > sms;
2716   auto_bitmap refs_not_supported (&lim_bitmap_obstack);
2717   edge e;
2718   FOR_EACH_VEC_ELT (exits, i, e)
2719     {
2720       vec<seq_entry> seq;
2721       seq.create (4);
2722       auto_bitmap refs_not_in_seq (&lim_bitmap_obstack);
2723       bitmap_and_compl (refs_not_in_seq, mem_refs, refs_not_supported);
2724       if (bitmap_empty_p (refs_not_in_seq))
2725 	{
2726 	  seq.release ();
2727 	  break;
2728 	}
2729       auto_bitmap fully_visited;
2730       int res = sm_seq_valid_bb (loop, e->src, NULL_TREE,
2731 				 seq, refs_not_in_seq,
2732 				 refs_not_supported, false,
2733 				 fully_visited);
2734       if (res != 1)
2735 	{
2736 	  bitmap_copy (refs_not_supported, mem_refs);
2737 	  seq.release ();
2738 	  break;
2739 	}
2740       sms.safe_push (std::make_pair (e, seq));
2741     }
2742 
2743   /* Prune pruned mem_refs from earlier processed exits.  */
2744   bool changed = !bitmap_empty_p (refs_not_supported);
2745   while (changed)
2746     {
2747       changed = false;
2748       std::pair<edge, vec<seq_entry> > *seq;
2749       FOR_EACH_VEC_ELT (sms, i, seq)
2750 	{
2751 	  bool need_to_push = false;
2752 	  for (unsigned i = 0; i < seq->second.length (); ++i)
2753 	    {
2754 	      sm_kind kind = seq->second[i].second;
2755 	      if (kind == sm_other && seq->second[i].from == NULL_TREE)
2756 		break;
2757 	      unsigned id = seq->second[i].first;
2758 	      unsigned new_idx;
2759 	      if (kind == sm_ord
2760 		  && bitmap_bit_p (refs_not_supported, id))
2761 		{
2762 		  seq->second[i].second = sm_other;
2763 		  gcc_assert (seq->second[i].from == NULL_TREE);
2764 		  need_to_push = true;
2765 		}
2766 	      else if (need_to_push
2767 		       && !sm_seq_push_down (seq->second, i, &new_idx))
2768 		{
2769 		  /* We need to push down both sm_ord and sm_other
2770 		     but for the latter we need to disqualify all
2771 		     following refs.  */
2772 		  if (kind == sm_ord)
2773 		    {
2774 		      if (bitmap_set_bit (refs_not_supported, id))
2775 			changed = true;
2776 		      seq->second[new_idx].second = sm_other;
2777 		    }
2778 		  else
2779 		    {
2780 		      for (unsigned j = seq->second.length () - 1;
2781 			   j > new_idx; --j)
2782 			if (seq->second[j].second == sm_ord
2783 			    && bitmap_set_bit (refs_not_supported,
2784 					       seq->second[j].first))
2785 			  changed = true;
2786 		      seq->second.truncate (new_idx);
2787 		      break;
2788 		    }
2789 		}
2790 	    }
2791 	}
2792     }
2793   std::pair<edge, vec<seq_entry> > *seq;
2794   FOR_EACH_VEC_ELT (sms, i, seq)
2795     {
2796       /* Prune sm_other from the end.  */
2797       while (!seq->second.is_empty ()
2798 	     && seq->second.last ().second == sm_other)
2799 	seq->second.pop ();
2800       /* Prune duplicates from the start.  */
2801       auto_bitmap seen (&lim_bitmap_obstack);
2802       unsigned j, k;
2803       for (j = k = 0; j < seq->second.length (); ++j)
2804 	if (bitmap_set_bit (seen, seq->second[j].first))
2805 	  {
2806 	    if (k != j)
2807 	      seq->second[k] = seq->second[j];
2808 	    ++k;
2809 	  }
2810       seq->second.truncate (k);
2811       /* And verify.  */
2812       seq_entry *e;
2813       FOR_EACH_VEC_ELT (seq->second, j, e)
2814 	gcc_assert (e->second == sm_ord
2815 		    || (e->second == sm_other && e->from != NULL_TREE));
2816     }
2817 
2818   /* Verify dependence for refs we cannot handle with the order preserving
2819      code (refs_not_supported) or prune them from mem_refs.  */
2820   auto_vec<seq_entry> unord_refs;
2821   EXECUTE_IF_SET_IN_BITMAP (refs_not_supported, 0, i, bi)
2822     {
2823       ref = memory_accesses.refs_list[i];
2824       if (!ref_indep_loop_p (loop, ref, sm_waw))
2825 	bitmap_clear_bit (mem_refs, i);
2826       /* We've now verified store order for ref with respect to all other
2827 	 stores in the loop does not matter.  */
2828       else
2829 	unord_refs.safe_push (seq_entry (i, sm_unord));
2830     }
2831 
2832   hash_map<im_mem_ref *, sm_aux *> aux_map;
2833 
2834   /* Execute SM but delay the store materialization for ordered
2835      sequences on exit.  */
2836   EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2837     {
2838       ref = memory_accesses.refs_list[i];
2839       execute_sm (loop, ref, aux_map, bitmap_bit_p (refs_not_supported, i),
2840 		  false);
2841     }
2842 
2843   /* Materialize ordered store sequences on exits.  */
2844   FOR_EACH_VEC_ELT (exits, i, e)
2845     {
2846       edge append_cond_position = NULL;
2847       edge last_cond_fallthru = NULL;
2848       if (i < sms.length ())
2849 	{
2850 	  gcc_assert (sms[i].first == e);
2851 	  execute_sm_exit (loop, e, sms[i].second, aux_map, sm_ord,
2852 			   append_cond_position, last_cond_fallthru);
2853 	  sms[i].second.release ();
2854 	}
2855       if (!unord_refs.is_empty ())
2856 	execute_sm_exit (loop, e, unord_refs, aux_map, sm_unord,
2857 			 append_cond_position, last_cond_fallthru);
2858       /* Commit edge inserts here to preserve the order of stores
2859 	 when an exit exits multiple loops.  */
2860       gsi_commit_one_edge_insert (e, NULL);
2861     }
2862 
2863   for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
2864        iter != aux_map.end (); ++iter)
2865     delete (*iter).second;
2866 }
2867 
2868 class ref_always_accessed
2869 {
2870 public:
ref_always_accessed(class loop * loop_,bool stored_p_)2871   ref_always_accessed (class loop *loop_, bool stored_p_)
2872       : loop (loop_), stored_p (stored_p_) {}
2873   bool operator () (mem_ref_loc *loc);
2874   class loop *loop;
2875   bool stored_p;
2876 };
2877 
2878 bool
operator()2879 ref_always_accessed::operator () (mem_ref_loc *loc)
2880 {
2881   class loop *must_exec;
2882 
2883   struct lim_aux_data *lim_data = get_lim_data (loc->stmt);
2884   if (!lim_data)
2885     return false;
2886 
2887   /* If we require an always executed store make sure the statement
2888      is a store.  */
2889   if (stored_p)
2890     {
2891       tree lhs = gimple_get_lhs (loc->stmt);
2892       if (!lhs
2893 	  || !(DECL_P (lhs) || REFERENCE_CLASS_P (lhs)))
2894 	return false;
2895     }
2896 
2897   must_exec = lim_data->always_executed_in;
2898   if (!must_exec)
2899     return false;
2900 
2901   if (must_exec == loop
2902       || flow_loop_nested_p (must_exec, loop))
2903     return true;
2904 
2905   return false;
2906 }
2907 
2908 /* Returns true if REF is always accessed in LOOP.  If STORED_P is true
2909    make sure REF is always stored to in LOOP.  */
2910 
2911 static bool
ref_always_accessed_p(class loop * loop,im_mem_ref * ref,bool stored_p)2912 ref_always_accessed_p (class loop *loop, im_mem_ref *ref, bool stored_p)
2913 {
2914   return for_all_locs_in_loop (loop, ref,
2915 			       ref_always_accessed (loop, stored_p));
2916 }
2917 
2918 /* Returns true if REF1 and REF2 are independent.  */
2919 
2920 static bool
refs_independent_p(im_mem_ref * ref1,im_mem_ref * ref2,bool tbaa_p)2921 refs_independent_p (im_mem_ref *ref1, im_mem_ref *ref2, bool tbaa_p)
2922 {
2923   if (ref1 == ref2)
2924     return true;
2925 
2926   if (dump_file && (dump_flags & TDF_DETAILS))
2927     fprintf (dump_file, "Querying dependency of refs %u and %u: ",
2928 	     ref1->id, ref2->id);
2929 
2930   if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache, tbaa_p))
2931     {
2932       if (dump_file && (dump_flags & TDF_DETAILS))
2933 	fprintf (dump_file, "dependent.\n");
2934       return false;
2935     }
2936   else
2937     {
2938       if (dump_file && (dump_flags & TDF_DETAILS))
2939 	fprintf (dump_file, "independent.\n");
2940       return true;
2941     }
2942 }
2943 
2944 /* Returns true if REF is independent on all other accessess in LOOP.
2945    KIND specifies the kind of dependence to consider.
2946      lim_raw assumes REF is not stored in LOOP and disambiguates RAW
2947 	     dependences so if true REF can be hoisted out of LOOP
2948      sm_war disambiguates a store REF against all other loads to see
2949 	    whether the store can be sunk across loads out of LOOP
2950      sm_waw disambiguates a store REF against all other stores to see
2951 	    whether the store can be sunk across stores out of LOOP.  */
2952 
2953 static bool
ref_indep_loop_p(class loop * loop,im_mem_ref * ref,dep_kind kind)2954 ref_indep_loop_p (class loop *loop, im_mem_ref *ref, dep_kind kind)
2955 {
2956   bool indep_p = true;
2957   bitmap refs_to_check;
2958 
2959   if (kind == sm_war)
2960     refs_to_check = &memory_accesses.refs_loaded_in_loop[loop->num];
2961   else
2962     refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
2963 
2964   if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID)
2965       || ref->mem.ref == error_mark_node)
2966     indep_p = false;
2967   else
2968     {
2969       /* tri-state, { unknown, independent, dependent }  */
2970       dep_state state = query_loop_dependence (loop, ref, kind);
2971       if (state != dep_unknown)
2972 	return state == dep_independent ? true : false;
2973 
2974       class loop *inner = loop->inner;
2975       while (inner)
2976 	{
2977 	  if (!ref_indep_loop_p (inner, ref, kind))
2978 	    {
2979 	      indep_p = false;
2980 	      break;
2981 	    }
2982 	  inner = inner->next;
2983 	}
2984 
2985       if (indep_p)
2986 	{
2987 	  unsigned i;
2988 	  bitmap_iterator bi;
2989 	  EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
2990 	    {
2991 	      im_mem_ref *aref = memory_accesses.refs_list[i];
2992 	      if (aref->mem.ref == error_mark_node)
2993 		{
2994 		  gimple *stmt = aref->accesses_in_loop[0].stmt;
2995 		  if ((kind == sm_war
2996 		       && ref_maybe_used_by_stmt_p (stmt, &ref->mem,
2997 						    kind != sm_waw))
2998 		      || stmt_may_clobber_ref_p_1 (stmt, &ref->mem,
2999 						   kind != sm_waw))
3000 		    {
3001 		      indep_p = false;
3002 		      break;
3003 		    }
3004 		}
3005 	      else if (!refs_independent_p (ref, aref, kind != sm_waw))
3006 		{
3007 		  indep_p = false;
3008 		  break;
3009 		}
3010 	    }
3011 	}
3012     }
3013 
3014   if (dump_file && (dump_flags & TDF_DETAILS))
3015     fprintf (dump_file, "Querying %s dependencies of ref %u in loop %d: %s\n",
3016 	     kind == lim_raw ? "RAW" : (kind == sm_war ? "SM WAR" : "SM WAW"),
3017 	     ref->id, loop->num, indep_p ? "independent" : "dependent");
3018 
3019   /* Record the computed result in the cache.  */
3020   record_loop_dependence (loop, ref, kind,
3021 			  indep_p ? dep_independent : dep_dependent);
3022 
3023   return indep_p;
3024 }
3025 
3026 
3027 /* Returns true if we can perform store motion of REF from LOOP.  */
3028 
3029 static bool
can_sm_ref_p(class loop * loop,im_mem_ref * ref)3030 can_sm_ref_p (class loop *loop, im_mem_ref *ref)
3031 {
3032   tree base;
3033 
3034   /* Can't hoist unanalyzable refs.  */
3035   if (!MEM_ANALYZABLE (ref))
3036     return false;
3037 
3038   /* Can't hoist/sink aggregate copies.  */
3039   if (ref->mem.ref == error_mark_node)
3040     return false;
3041 
3042   /* It should be movable.  */
3043   if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
3044       || TREE_THIS_VOLATILE (ref->mem.ref)
3045       || !for_each_index (&ref->mem.ref, may_move_till, loop))
3046     return false;
3047 
3048   /* If it can throw fail, we do not properly update EH info.  */
3049   if (tree_could_throw_p (ref->mem.ref))
3050     return false;
3051 
3052   /* If it can trap, it must be always executed in LOOP.
3053      Readonly memory locations may trap when storing to them, but
3054      tree_could_trap_p is a predicate for rvalues, so check that
3055      explicitly.  */
3056   base = get_base_address (ref->mem.ref);
3057   if ((tree_could_trap_p (ref->mem.ref)
3058        || (DECL_P (base) && TREE_READONLY (base)))
3059       /* ???  We can at least use false here, allowing loads?  We
3060 	 are forcing conditional stores if the ref is not always
3061 	 stored to later anyway.  So this would only guard
3062 	 the load we need to emit.  Thus when the ref is not
3063 	 loaded we can elide this completely?  */
3064       && !ref_always_accessed_p (loop, ref, true))
3065     return false;
3066 
3067   /* Verify all loads of ref can be hoisted.  */
3068   if (ref->loaded
3069       && bitmap_bit_p (ref->loaded, loop->num)
3070       && !ref_indep_loop_p (loop, ref, lim_raw))
3071     return false;
3072 
3073   /* Verify the candidate can be disambiguated against all loads,
3074      that is, we can elide all in-loop stores.  Disambiguation
3075      against stores is done later when we cannot guarantee preserving
3076      the order of stores.  */
3077   if (!ref_indep_loop_p (loop, ref, sm_war))
3078     return false;
3079 
3080   return true;
3081 }
3082 
3083 /* Marks the references in LOOP for that store motion should be performed
3084    in REFS_TO_SM.  SM_EXECUTED is the set of references for that store
3085    motion was performed in one of the outer loops.  */
3086 
3087 static void
find_refs_for_sm(class loop * loop,bitmap sm_executed,bitmap refs_to_sm)3088 find_refs_for_sm (class loop *loop, bitmap sm_executed, bitmap refs_to_sm)
3089 {
3090   bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
3091   unsigned i;
3092   bitmap_iterator bi;
3093   im_mem_ref *ref;
3094 
3095   EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
3096     {
3097       ref = memory_accesses.refs_list[i];
3098       if (can_sm_ref_p (loop, ref) && dbg_cnt (lim))
3099 	bitmap_set_bit (refs_to_sm, i);
3100     }
3101 }
3102 
3103 /* Checks whether LOOP (with exits stored in EXITS array) is suitable
3104    for a store motion optimization (i.e. whether we can insert statement
3105    on its exits).  */
3106 
3107 static bool
loop_suitable_for_sm(class loop * loop ATTRIBUTE_UNUSED,const vec<edge> & exits)3108 loop_suitable_for_sm (class loop *loop ATTRIBUTE_UNUSED,
3109 		      const vec<edge> &exits)
3110 {
3111   unsigned i;
3112   edge ex;
3113 
3114   FOR_EACH_VEC_ELT (exits, i, ex)
3115     if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
3116       return false;
3117 
3118   return true;
3119 }
3120 
3121 /* Try to perform store motion for all memory references modified inside
3122    LOOP.  SM_EXECUTED is the bitmap of the memory references for that
3123    store motion was executed in one of the outer loops.  */
3124 
3125 static void
store_motion_loop(class loop * loop,bitmap sm_executed)3126 store_motion_loop (class loop *loop, bitmap sm_executed)
3127 {
3128   auto_vec<edge> exits = get_loop_exit_edges (loop);
3129   class loop *subloop;
3130   bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
3131 
3132   if (loop_suitable_for_sm (loop, exits))
3133     {
3134       find_refs_for_sm (loop, sm_executed, sm_in_loop);
3135       if (!bitmap_empty_p (sm_in_loop))
3136 	hoist_memory_references (loop, sm_in_loop, exits);
3137     }
3138 
3139   bitmap_ior_into (sm_executed, sm_in_loop);
3140   for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
3141     store_motion_loop (subloop, sm_executed);
3142   bitmap_and_compl_into (sm_executed, sm_in_loop);
3143   BITMAP_FREE (sm_in_loop);
3144 }
3145 
3146 /* Try to perform store motion for all memory references modified inside
3147    loops.  */
3148 
3149 static void
do_store_motion(void)3150 do_store_motion (void)
3151 {
3152   class loop *loop;
3153   bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
3154 
3155   for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
3156     store_motion_loop (loop, sm_executed);
3157 
3158   BITMAP_FREE (sm_executed);
3159 }
3160 
3161 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
3162    for each such basic block bb records the outermost loop for that execution
3163    of its header implies execution of bb.  CONTAINS_CALL is the bitmap of
3164    blocks that contain a nonpure call.  */
3165 
3166 static void
fill_always_executed_in_1(class loop * loop,sbitmap contains_call)3167 fill_always_executed_in_1 (class loop *loop, sbitmap contains_call)
3168 {
3169   basic_block bb = NULL, last = NULL;
3170   edge e;
3171   class loop *inn_loop = loop;
3172 
3173   if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
3174     {
3175       auto_vec<basic_block, 64> worklist;
3176       worklist.reserve_exact (loop->num_nodes);
3177       worklist.quick_push (loop->header);
3178       do
3179 	{
3180 	  edge_iterator ei;
3181 	  bb = worklist.pop ();
3182 
3183 	  if (!flow_bb_inside_loop_p (inn_loop, bb))
3184 	    {
3185 	      /* When we are leaving a possibly infinite inner loop
3186 		 we have to stop processing.  */
3187 	      if (!finite_loop_p (inn_loop))
3188 		break;
3189 	      /* If the loop was finite we can continue with processing
3190 		 the loop we exited to.  */
3191 	      inn_loop = bb->loop_father;
3192 	    }
3193 
3194 	  if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
3195 	    last = bb;
3196 
3197 	  if (bitmap_bit_p (contains_call, bb->index))
3198 	    break;
3199 
3200 	  /* If LOOP exits from this BB stop processing.  */
3201 	  FOR_EACH_EDGE (e, ei, bb->succs)
3202 	    if (!flow_bb_inside_loop_p (loop, e->dest))
3203 	      break;
3204 	  if (e)
3205 	    break;
3206 
3207 	  /* A loop might be infinite (TODO use simple loop analysis
3208 	     to disprove this if possible).  */
3209 	  if (bb->flags & BB_IRREDUCIBLE_LOOP)
3210 	    break;
3211 
3212 	  if (bb->loop_father->header == bb)
3213 	    /* Record that we enter into a subloop since it might not
3214 	       be finite.  */
3215 	    /* ???  Entering into a not always executed subloop makes
3216 	       fill_always_executed_in quadratic in loop depth since
3217 	       we walk those loops N times.  This is not a problem
3218 	       in practice though, see PR102253 for a worst-case testcase.  */
3219 	    inn_loop = bb->loop_father;
3220 
3221 	  /* Walk the body of LOOP sorted by dominance relation.  Additionally,
3222 	     if a basic block S dominates the latch, then only blocks dominated
3223 	     by S are after it.
3224 	     This is get_loop_body_in_dom_order using a worklist algorithm and
3225 	     stopping once we are no longer interested in visiting further
3226 	     blocks.  */
3227 	  unsigned old_len = worklist.length ();
3228 	  unsigned postpone = 0;
3229 	  for (basic_block son = first_dom_son (CDI_DOMINATORS, bb);
3230 	       son;
3231 	       son = next_dom_son (CDI_DOMINATORS, son))
3232 	    {
3233 	      if (!flow_bb_inside_loop_p (loop, son))
3234 		continue;
3235 	      if (dominated_by_p (CDI_DOMINATORS, loop->latch, son))
3236 		postpone = worklist.length ();
3237 	      worklist.quick_push (son);
3238 	    }
3239 	  if (postpone)
3240 	    /* Postponing the block that dominates the latch means
3241 	       processing it last and thus putting it earliest in the
3242 	       worklist.  */
3243 	    std::swap (worklist[old_len], worklist[postpone]);
3244 	}
3245       while (!worklist.is_empty ());
3246 
3247       while (1)
3248 	{
3249 	  if (dump_enabled_p ())
3250 	    dump_printf (MSG_NOTE, "BB %d is always executed in loop %d\n",
3251 			 last->index, loop->num);
3252 	  SET_ALWAYS_EXECUTED_IN (last, loop);
3253 	  if (last == loop->header)
3254 	    break;
3255 	  last = get_immediate_dominator (CDI_DOMINATORS, last);
3256 	}
3257     }
3258 
3259   for (loop = loop->inner; loop; loop = loop->next)
3260     fill_always_executed_in_1 (loop, contains_call);
3261 }
3262 
3263 /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
3264    for each such basic block bb records the outermost loop for that execution
3265    of its header implies execution of bb.  */
3266 
3267 static void
fill_always_executed_in(void)3268 fill_always_executed_in (void)
3269 {
3270   basic_block bb;
3271   class loop *loop;
3272 
3273   auto_sbitmap contains_call (last_basic_block_for_fn (cfun));
3274   bitmap_clear (contains_call);
3275   FOR_EACH_BB_FN (bb, cfun)
3276     {
3277       gimple_stmt_iterator gsi;
3278       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3279 	{
3280 	  if (nonpure_call_p (gsi_stmt (gsi)))
3281 	    break;
3282 	}
3283 
3284       if (!gsi_end_p (gsi))
3285 	bitmap_set_bit (contains_call, bb->index);
3286     }
3287 
3288   for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
3289     fill_always_executed_in_1 (loop, contains_call);
3290 }
3291 
3292 
3293 /* Compute the global information needed by the loop invariant motion pass.  */
3294 
3295 static void
tree_ssa_lim_initialize(bool store_motion)3296 tree_ssa_lim_initialize (bool store_motion)
3297 {
3298   unsigned i;
3299 
3300   bitmap_obstack_initialize (&lim_bitmap_obstack);
3301   gcc_obstack_init (&mem_ref_obstack);
3302   lim_aux_data_map = new hash_map<gimple *, lim_aux_data *>;
3303 
3304   if (flag_tm)
3305     compute_transaction_bits ();
3306 
3307   memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
3308   memory_accesses.refs_list.create (100);
3309   /* Allocate a special, unanalyzable mem-ref with ID zero.  */
3310   memory_accesses.refs_list.quick_push
3311     (mem_ref_alloc (NULL, 0, UNANALYZABLE_MEM_ID));
3312 
3313   memory_accesses.refs_loaded_in_loop.create (number_of_loops (cfun));
3314   memory_accesses.refs_loaded_in_loop.quick_grow (number_of_loops (cfun));
3315   memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
3316   memory_accesses.refs_stored_in_loop.quick_grow (number_of_loops (cfun));
3317   if (store_motion)
3318     {
3319       memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
3320       memory_accesses.all_refs_stored_in_loop.quick_grow
3321 						      (number_of_loops (cfun));
3322     }
3323 
3324   for (i = 0; i < number_of_loops (cfun); i++)
3325     {
3326       bitmap_initialize (&memory_accesses.refs_loaded_in_loop[i],
3327 			 &lim_bitmap_obstack);
3328       bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
3329 			 &lim_bitmap_obstack);
3330       if (store_motion)
3331 	bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
3332 			   &lim_bitmap_obstack);
3333     }
3334 
3335   memory_accesses.ttae_cache = NULL;
3336 
3337   /* Initialize bb_loop_postorder with a mapping from loop->num to
3338      its postorder index.  */
3339   i = 0;
3340   bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
3341   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
3342     bb_loop_postorder[loop->num] = i++;
3343 }
3344 
3345 /* Cleans up after the invariant motion pass.  */
3346 
3347 static void
tree_ssa_lim_finalize(void)3348 tree_ssa_lim_finalize (void)
3349 {
3350   basic_block bb;
3351   unsigned i;
3352   im_mem_ref *ref;
3353 
3354   FOR_EACH_BB_FN (bb, cfun)
3355     SET_ALWAYS_EXECUTED_IN (bb, NULL);
3356 
3357   bitmap_obstack_release (&lim_bitmap_obstack);
3358   delete lim_aux_data_map;
3359 
3360   delete memory_accesses.refs;
3361   memory_accesses.refs = NULL;
3362 
3363   FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
3364     memref_free (ref);
3365   memory_accesses.refs_list.release ();
3366   obstack_free (&mem_ref_obstack, NULL);
3367 
3368   memory_accesses.refs_loaded_in_loop.release ();
3369   memory_accesses.refs_stored_in_loop.release ();
3370   memory_accesses.all_refs_stored_in_loop.release ();
3371 
3372   if (memory_accesses.ttae_cache)
3373     free_affine_expand_cache (&memory_accesses.ttae_cache);
3374 
3375   free (bb_loop_postorder);
3376 }
3377 
3378 /* Moves invariants from loops.  Only "expensive" invariants are moved out --
3379    i.e. those that are likely to be win regardless of the register pressure.
3380    Only perform store motion if STORE_MOTION is true.  */
3381 
3382 unsigned int
loop_invariant_motion_in_fun(function * fun,bool store_motion)3383 loop_invariant_motion_in_fun (function *fun, bool store_motion)
3384 {
3385   unsigned int todo = 0;
3386 
3387   tree_ssa_lim_initialize (store_motion);
3388 
3389   /* Gathers information about memory accesses in the loops.  */
3390   analyze_memory_references (store_motion);
3391 
3392   /* Fills ALWAYS_EXECUTED_IN information for basic blocks.  */
3393   fill_always_executed_in ();
3394 
3395   int *rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
3396   int n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
3397 
3398   /* For each statement determine the outermost loop in that it is
3399      invariant and cost for computing the invariant.  */
3400   for (int i = 0; i < n; ++i)
3401     compute_invariantness (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
3402 
3403   /* Execute store motion.  Force the necessary invariants to be moved
3404      out of the loops as well.  */
3405   if (store_motion)
3406     do_store_motion ();
3407 
3408   free (rpo);
3409   rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
3410   n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
3411 
3412   /* Move the expressions that are expensive enough.  */
3413   for (int i = 0; i < n; ++i)
3414     todo |= move_computations_worker (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
3415 
3416   free (rpo);
3417 
3418   gsi_commit_edge_inserts ();
3419   if (need_ssa_update_p (fun))
3420     rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3421 
3422   tree_ssa_lim_finalize ();
3423 
3424   return todo;
3425 }
3426 
3427 /* Loop invariant motion pass.  */
3428 
3429 namespace {
3430 
3431 const pass_data pass_data_lim =
3432 {
3433   GIMPLE_PASS, /* type */
3434   "lim", /* name */
3435   OPTGROUP_LOOP, /* optinfo_flags */
3436   TV_LIM, /* tv_id */
3437   PROP_cfg, /* properties_required */
3438   0, /* properties_provided */
3439   0, /* properties_destroyed */
3440   0, /* todo_flags_start */
3441   0, /* todo_flags_finish */
3442 };
3443 
3444 class pass_lim : public gimple_opt_pass
3445 {
3446 public:
pass_lim(gcc::context * ctxt)3447   pass_lim (gcc::context *ctxt)
3448     : gimple_opt_pass (pass_data_lim, ctxt)
3449   {}
3450 
3451   /* opt_pass methods: */
clone()3452   opt_pass * clone () { return new pass_lim (m_ctxt); }
gate(function *)3453   virtual bool gate (function *) { return flag_tree_loop_im != 0; }
3454   virtual unsigned int execute (function *);
3455 
3456 }; // class pass_lim
3457 
3458 unsigned int
execute(function * fun)3459 pass_lim::execute (function *fun)
3460 {
3461   bool in_loop_pipeline = scev_initialized_p ();
3462   if (!in_loop_pipeline)
3463     loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
3464 
3465   if (number_of_loops (fun) <= 1)
3466     return 0;
3467   unsigned int todo = loop_invariant_motion_in_fun (fun, flag_move_loop_stores);
3468 
3469   if (!in_loop_pipeline)
3470     loop_optimizer_finalize ();
3471   else
3472     scev_reset ();
3473   return todo;
3474 }
3475 
3476 } // anon namespace
3477 
3478 gimple_opt_pass *
make_pass_lim(gcc::context * ctxt)3479 make_pass_lim (gcc::context *ctxt)
3480 {
3481   return new pass_lim (ctxt);
3482 }
3483 
3484 
3485