1 /* Loop invariant motion.
2    Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "tree.h"
25 #include "gimple.h"
26 #include "cfghooks.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "gimple-pretty-print.h"
30 #include "fold-const.h"
31 #include "cfganal.h"
32 #include "tree-eh.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "tree-cfg.h"
36 #include "tree-ssa-loop-manip.h"
37 #include "tree-ssa-loop.h"
38 #include "tree-into-ssa.h"
39 #include "cfgloop.h"
40 #include "tree-affine.h"
41 #include "tree-ssa-propagate.h"
42 #include "trans-mem.h"
43 #include "gimple-fold.h"
44 #include "tree-scalar-evolution.h"
45 #include "tree-ssa-loop-niter.h"
46 #include "alias.h"
47 #include "builtins.h"
48 #include "tree-dfa.h"
49 #include "dbgcnt.h"
50 
51 /* TODO:  Support for predicated code motion.  I.e.
52 
53    while (1)
54      {
55        if (cond)
56 	 {
57 	   a = inv;
58 	   something;
59 	 }
60      }
61 
62    Where COND and INV are invariants, but evaluating INV may trap or be
63    invalid from some other reason if !COND.  This may be transformed to
64 
65    if (cond)
66      a = inv;
67    while (1)
68      {
69        if (cond)
70 	 something;
71      }  */
72 
73 /* The auxiliary data kept for each statement.  */
74 
75 struct lim_aux_data
76 {
77   class loop *max_loop;	/* The outermost loop in that the statement
78 				   is invariant.  */
79 
80   class loop *tgt_loop;	/* The loop out of that we want to move the
81 				   invariant.  */
82 
83   class loop *always_executed_in;
84 				/* The outermost loop for that we are sure
85 				   the statement is executed if the loop
86 				   is entered.  */
87 
88   unsigned cost;		/* Cost of the computation performed by the
89 				   statement.  */
90 
91   unsigned ref;			/* The simple_mem_ref in this stmt or 0.  */
92 
93   vec<gimple *> depends;	/* Vector of statements that must be also
94 				   hoisted out of the loop when this statement
95 				   is hoisted; i.e. those that define the
96 				   operands of the statement and are inside of
97 				   the MAX_LOOP loop.  */
98 };
99 
100 /* Maps statements to their lim_aux_data.  */
101 
102 static hash_map<gimple *, lim_aux_data *> *lim_aux_data_map;
103 
104 /* Description of a memory reference location.  */
105 
106 struct mem_ref_loc
107 {
108   tree *ref;			/* The reference itself.  */
109   gimple *stmt;			/* The statement in that it occurs.  */
110 };
111 
112 
113 /* Description of a memory reference.  */
114 
115 class im_mem_ref
116 {
117 public:
118   unsigned id : 30;		/* ID assigned to the memory reference
119 				   (its index in memory_accesses.refs_list)  */
120   unsigned ref_canonical : 1;   /* Whether mem.ref was canonicalized.  */
121   unsigned ref_decomposed : 1;  /* Whether the ref was hashed from mem.  */
122   hashval_t hash;		/* Its hash value.  */
123 
124   /* The memory access itself and associated caching of alias-oracle
125      query meta-data.  */
126   ao_ref mem;
127 
128   bitmap stored;		/* The set of loops in that this memory location
129 				   is stored to.  */
130   bitmap loaded;		/* The set of loops in that this memory location
131 				   is loaded from.  */
132   vec<mem_ref_loc>		accesses_in_loop;
133 				/* The locations of the accesses.  Vector
134 				   indexed by the loop number.  */
135 
136   /* The following set is computed on demand.  */
137   bitmap_head dep_loop;		/* The set of loops in that the memory
138 				   reference is {in,}dependent in
139 				   different modes.  */
140 };
141 
142 /* We use six bits per loop in the ref->dep_loop bitmap to record
143    the dep_kind x dep_state combinations.  */
144 
145 enum dep_kind { lim_raw, sm_war, sm_waw };
146 enum dep_state { dep_unknown, dep_independent, dep_dependent };
147 
148 /* Populate the loop dependence cache of REF for LOOP, KIND with STATE.  */
149 
150 static void
record_loop_dependence(class loop * loop,im_mem_ref * ref,dep_kind kind,dep_state state)151 record_loop_dependence (class loop *loop, im_mem_ref *ref,
152 			dep_kind kind, dep_state state)
153 {
154   gcc_assert (state != dep_unknown);
155   unsigned bit = 6 * loop->num + kind * 2 + state == dep_dependent ? 1 : 0;
156   bitmap_set_bit (&ref->dep_loop, bit);
157 }
158 
159 /* Query the loop dependence cache of REF for LOOP, KIND.  */
160 
161 static dep_state
query_loop_dependence(class loop * loop,im_mem_ref * ref,dep_kind kind)162 query_loop_dependence (class loop *loop, im_mem_ref *ref, dep_kind kind)
163 {
164   unsigned first_bit = 6 * loop->num + kind * 2;
165   if (bitmap_bit_p (&ref->dep_loop, first_bit))
166     return dep_independent;
167   else if (bitmap_bit_p (&ref->dep_loop, first_bit + 1))
168     return dep_dependent;
169   return dep_unknown;
170 }
171 
172 /* Mem_ref hashtable helpers.  */
173 
174 struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref>
175 {
176   typedef ao_ref *compare_type;
177   static inline hashval_t hash (const im_mem_ref *);
178   static inline bool equal (const im_mem_ref *, const ao_ref *);
179 };
180 
181 /* A hash function for class im_mem_ref object OBJ.  */
182 
183 inline hashval_t
hash(const im_mem_ref * mem)184 mem_ref_hasher::hash (const im_mem_ref *mem)
185 {
186   return mem->hash;
187 }
188 
189 /* An equality function for class im_mem_ref object MEM1 with
190    memory reference OBJ2.  */
191 
192 inline bool
equal(const im_mem_ref * mem1,const ao_ref * obj2)193 mem_ref_hasher::equal (const im_mem_ref *mem1, const ao_ref *obj2)
194 {
195   if (obj2->max_size_known_p ())
196     return (mem1->ref_decomposed
197 	    && operand_equal_p (mem1->mem.base, obj2->base, 0)
198 	    && known_eq (mem1->mem.offset, obj2->offset)
199 	    && known_eq (mem1->mem.size, obj2->size)
200 	    && known_eq (mem1->mem.max_size, obj2->max_size)
201 	    && mem1->mem.volatile_p == obj2->volatile_p
202 	    && (mem1->mem.ref_alias_set == obj2->ref_alias_set
203 		/* We are not canonicalizing alias-sets but for the
204 		   special-case we didn't canonicalize yet and the
205 		   incoming ref is a alias-set zero MEM we pick
206 		   the correct one already.  */
207 		|| (!mem1->ref_canonical
208 		    && (TREE_CODE (obj2->ref) == MEM_REF
209 			|| TREE_CODE (obj2->ref) == TARGET_MEM_REF)
210 		    && obj2->ref_alias_set == 0)
211 		/* Likewise if there's a canonical ref with alias-set zero.  */
212 		|| (mem1->ref_canonical && mem1->mem.ref_alias_set == 0))
213 	    && types_compatible_p (TREE_TYPE (mem1->mem.ref),
214 				   TREE_TYPE (obj2->ref)));
215   else
216     return operand_equal_p (mem1->mem.ref, obj2->ref, 0);
217 }
218 
219 
220 /* Description of memory accesses in loops.  */
221 
222 static struct
223 {
224   /* The hash table of memory references accessed in loops.  */
225   hash_table<mem_ref_hasher> *refs;
226 
227   /* The list of memory references.  */
228   vec<im_mem_ref *> refs_list;
229 
230   /* The set of memory references accessed in each loop.  */
231   vec<bitmap_head> refs_loaded_in_loop;
232 
233   /* The set of memory references stored in each loop.  */
234   vec<bitmap_head> refs_stored_in_loop;
235 
236   /* The set of memory references stored in each loop, including subloops .  */
237   vec<bitmap_head> all_refs_stored_in_loop;
238 
239   /* Cache for expanding memory addresses.  */
240   hash_map<tree, name_expansion *> *ttae_cache;
241 } memory_accesses;
242 
243 /* Obstack for the bitmaps in the above data structures.  */
244 static bitmap_obstack lim_bitmap_obstack;
245 static obstack mem_ref_obstack;
246 
247 static bool ref_indep_loop_p (class loop *, im_mem_ref *, dep_kind);
248 static bool ref_always_accessed_p (class loop *, im_mem_ref *, bool);
249 static bool refs_independent_p (im_mem_ref *, im_mem_ref *, bool = true);
250 
251 /* Minimum cost of an expensive expression.  */
252 #define LIM_EXPENSIVE ((unsigned) param_lim_expensive)
253 
254 /* The outermost loop for which execution of the header guarantees that the
255    block will be executed.  */
256 #define ALWAYS_EXECUTED_IN(BB) ((class loop *) (BB)->aux)
257 #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
258 
259 /* ID of the shared unanalyzable mem.  */
260 #define UNANALYZABLE_MEM_ID 0
261 
262 /* Whether the reference was analyzable.  */
263 #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
264 
265 static struct lim_aux_data *
init_lim_data(gimple * stmt)266 init_lim_data (gimple *stmt)
267 {
268   lim_aux_data *p = XCNEW (struct lim_aux_data);
269   lim_aux_data_map->put (stmt, p);
270 
271   return p;
272 }
273 
274 static struct lim_aux_data *
get_lim_data(gimple * stmt)275 get_lim_data (gimple *stmt)
276 {
277   lim_aux_data **p = lim_aux_data_map->get (stmt);
278   if (!p)
279     return NULL;
280 
281   return *p;
282 }
283 
284 /* Releases the memory occupied by DATA.  */
285 
286 static void
free_lim_aux_data(struct lim_aux_data * data)287 free_lim_aux_data (struct lim_aux_data *data)
288 {
289   data->depends.release ();
290   free (data);
291 }
292 
293 static void
clear_lim_data(gimple * stmt)294 clear_lim_data (gimple *stmt)
295 {
296   lim_aux_data **p = lim_aux_data_map->get (stmt);
297   if (!p)
298     return;
299 
300   free_lim_aux_data (*p);
301   *p = NULL;
302 }
303 
304 
305 /* The possibilities of statement movement.  */
306 enum move_pos
307   {
308     MOVE_IMPOSSIBLE,		/* No movement -- side effect expression.  */
309     MOVE_PRESERVE_EXECUTION,	/* Must not cause the non-executed statement
310 				   become executed -- memory accesses, ... */
311     MOVE_POSSIBLE		/* Unlimited movement.  */
312   };
313 
314 
315 /* If it is possible to hoist the statement STMT unconditionally,
316    returns MOVE_POSSIBLE.
317    If it is possible to hoist the statement STMT, but we must avoid making
318    it executed if it would not be executed in the original program (e.g.
319    because it may trap), return MOVE_PRESERVE_EXECUTION.
320    Otherwise return MOVE_IMPOSSIBLE.  */
321 
322 enum move_pos
movement_possibility(gimple * stmt)323 movement_possibility (gimple *stmt)
324 {
325   tree lhs;
326   enum move_pos ret = MOVE_POSSIBLE;
327 
328   if (flag_unswitch_loops
329       && gimple_code (stmt) == GIMPLE_COND)
330     {
331       /* If we perform unswitching, force the operands of the invariant
332 	 condition to be moved out of the loop.  */
333       return MOVE_POSSIBLE;
334     }
335 
336   if (gimple_code (stmt) == GIMPLE_PHI
337       && gimple_phi_num_args (stmt) <= 2
338       && !virtual_operand_p (gimple_phi_result (stmt))
339       && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
340     return MOVE_POSSIBLE;
341 
342   if (gimple_get_lhs (stmt) == NULL_TREE)
343     return MOVE_IMPOSSIBLE;
344 
345   if (gimple_vdef (stmt))
346     return MOVE_IMPOSSIBLE;
347 
348   if (stmt_ends_bb_p (stmt)
349       || gimple_has_volatile_ops (stmt)
350       || gimple_has_side_effects (stmt)
351       || stmt_could_throw_p (cfun, stmt))
352     return MOVE_IMPOSSIBLE;
353 
354   if (is_gimple_call (stmt))
355     {
356       /* While pure or const call is guaranteed to have no side effects, we
357 	 cannot move it arbitrarily.  Consider code like
358 
359 	 char *s = something ();
360 
361 	 while (1)
362 	   {
363 	     if (s)
364 	       t = strlen (s);
365 	     else
366 	       t = 0;
367 	   }
368 
369 	 Here the strlen call cannot be moved out of the loop, even though
370 	 s is invariant.  In addition to possibly creating a call with
371 	 invalid arguments, moving out a function call that is not executed
372 	 may cause performance regressions in case the call is costly and
373 	 not executed at all.  */
374       ret = MOVE_PRESERVE_EXECUTION;
375       lhs = gimple_call_lhs (stmt);
376     }
377   else if (is_gimple_assign (stmt))
378     lhs = gimple_assign_lhs (stmt);
379   else
380     return MOVE_IMPOSSIBLE;
381 
382   if (TREE_CODE (lhs) == SSA_NAME
383       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
384     return MOVE_IMPOSSIBLE;
385 
386   if (TREE_CODE (lhs) != SSA_NAME
387       || gimple_could_trap_p (stmt))
388     return MOVE_PRESERVE_EXECUTION;
389 
390   /* Non local loads in a transaction cannot be hoisted out.  Well,
391      unless the load happens on every path out of the loop, but we
392      don't take this into account yet.  */
393   if (flag_tm
394       && gimple_in_transaction (stmt)
395       && gimple_assign_single_p (stmt))
396     {
397       tree rhs = gimple_assign_rhs1 (stmt);
398       if (DECL_P (rhs) && is_global_var (rhs))
399 	{
400 	  if (dump_file)
401 	    {
402 	      fprintf (dump_file, "Cannot hoist conditional load of ");
403 	      print_generic_expr (dump_file, rhs, TDF_SLIM);
404 	      fprintf (dump_file, " because it is in a transaction.\n");
405 	    }
406 	  return MOVE_IMPOSSIBLE;
407 	}
408     }
409 
410   return ret;
411 }
412 
413 /* Suppose that operand DEF is used inside the LOOP.  Returns the outermost
414    loop to that we could move the expression using DEF if it did not have
415    other operands, i.e. the outermost loop enclosing LOOP in that the value
416    of DEF is invariant.  */
417 
418 static class loop *
outermost_invariant_loop(tree def,class loop * loop)419 outermost_invariant_loop (tree def, class loop *loop)
420 {
421   gimple *def_stmt;
422   basic_block def_bb;
423   class loop *max_loop;
424   struct lim_aux_data *lim_data;
425 
426   if (!def)
427     return superloop_at_depth (loop, 1);
428 
429   if (TREE_CODE (def) != SSA_NAME)
430     {
431       gcc_assert (is_gimple_min_invariant (def));
432       return superloop_at_depth (loop, 1);
433     }
434 
435   def_stmt = SSA_NAME_DEF_STMT (def);
436   def_bb = gimple_bb (def_stmt);
437   if (!def_bb)
438     return superloop_at_depth (loop, 1);
439 
440   max_loop = find_common_loop (loop, def_bb->loop_father);
441 
442   lim_data = get_lim_data (def_stmt);
443   if (lim_data != NULL && lim_data->max_loop != NULL)
444     max_loop = find_common_loop (max_loop,
445 				 loop_outer (lim_data->max_loop));
446   if (max_loop == loop)
447     return NULL;
448   max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
449 
450   return max_loop;
451 }
452 
453 /* DATA is a structure containing information associated with a statement
454    inside LOOP.  DEF is one of the operands of this statement.
455 
456    Find the outermost loop enclosing LOOP in that value of DEF is invariant
457    and record this in DATA->max_loop field.  If DEF itself is defined inside
458    this loop as well (i.e. we need to hoist it out of the loop if we want
459    to hoist the statement represented by DATA), record the statement in that
460    DEF is defined to the DATA->depends list.  Additionally if ADD_COST is true,
461    add the cost of the computation of DEF to the DATA->cost.
462 
463    If DEF is not invariant in LOOP, return false.  Otherwise return TRUE.  */
464 
465 static bool
add_dependency(tree def,struct lim_aux_data * data,class loop * loop,bool add_cost)466 add_dependency (tree def, struct lim_aux_data *data, class loop *loop,
467 		bool add_cost)
468 {
469   gimple *def_stmt = SSA_NAME_DEF_STMT (def);
470   basic_block def_bb = gimple_bb (def_stmt);
471   class loop *max_loop;
472   struct lim_aux_data *def_data;
473 
474   if (!def_bb)
475     return true;
476 
477   max_loop = outermost_invariant_loop (def, loop);
478   if (!max_loop)
479     return false;
480 
481   if (flow_loop_nested_p (data->max_loop, max_loop))
482     data->max_loop = max_loop;
483 
484   def_data = get_lim_data (def_stmt);
485   if (!def_data)
486     return true;
487 
488   if (add_cost
489       /* Only add the cost if the statement defining DEF is inside LOOP,
490 	 i.e. if it is likely that by moving the invariants dependent
491 	 on it, we will be able to avoid creating a new register for
492 	 it (since it will be only used in these dependent invariants).  */
493       && def_bb->loop_father == loop)
494     data->cost += def_data->cost;
495 
496   data->depends.safe_push (def_stmt);
497 
498   return true;
499 }
500 
501 /* Returns an estimate for a cost of statement STMT.  The values here
502    are just ad-hoc constants, similar to costs for inlining.  */
503 
504 static unsigned
stmt_cost(gimple * stmt)505 stmt_cost (gimple *stmt)
506 {
507   /* Always try to create possibilities for unswitching.  */
508   if (gimple_code (stmt) == GIMPLE_COND
509       || gimple_code (stmt) == GIMPLE_PHI)
510     return LIM_EXPENSIVE;
511 
512   /* We should be hoisting calls if possible.  */
513   if (is_gimple_call (stmt))
514     {
515       tree fndecl;
516 
517       /* Unless the call is a builtin_constant_p; this always folds to a
518 	 constant, so moving it is useless.  */
519       fndecl = gimple_call_fndecl (stmt);
520       if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_CONSTANT_P))
521 	return 0;
522 
523       return LIM_EXPENSIVE;
524     }
525 
526   /* Hoisting memory references out should almost surely be a win.  */
527   if (gimple_references_memory_p (stmt))
528     return LIM_EXPENSIVE;
529 
530   if (gimple_code (stmt) != GIMPLE_ASSIGN)
531     return 1;
532 
533   switch (gimple_assign_rhs_code (stmt))
534     {
535     case MULT_EXPR:
536     case WIDEN_MULT_EXPR:
537     case WIDEN_MULT_PLUS_EXPR:
538     case WIDEN_MULT_MINUS_EXPR:
539     case DOT_PROD_EXPR:
540     case TRUNC_DIV_EXPR:
541     case CEIL_DIV_EXPR:
542     case FLOOR_DIV_EXPR:
543     case ROUND_DIV_EXPR:
544     case EXACT_DIV_EXPR:
545     case CEIL_MOD_EXPR:
546     case FLOOR_MOD_EXPR:
547     case ROUND_MOD_EXPR:
548     case TRUNC_MOD_EXPR:
549     case RDIV_EXPR:
550       /* Division and multiplication are usually expensive.  */
551       return LIM_EXPENSIVE;
552 
553     case LSHIFT_EXPR:
554     case RSHIFT_EXPR:
555     case WIDEN_LSHIFT_EXPR:
556     case LROTATE_EXPR:
557     case RROTATE_EXPR:
558       /* Shifts and rotates are usually expensive.  */
559       return LIM_EXPENSIVE;
560 
561     case CONSTRUCTOR:
562       /* Make vector construction cost proportional to the number
563          of elements.  */
564       return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
565 
566     case SSA_NAME:
567     case PAREN_EXPR:
568       /* Whether or not something is wrapped inside a PAREN_EXPR
569          should not change move cost.  Nor should an intermediate
570 	 unpropagated SSA name copy.  */
571       return 0;
572 
573     default:
574       return 1;
575     }
576 }
577 
578 /* Finds the outermost loop between OUTER and LOOP in that the memory reference
579    REF is independent.  If REF is not independent in LOOP, NULL is returned
580    instead.  */
581 
582 static class loop *
outermost_indep_loop(class loop * outer,class loop * loop,im_mem_ref * ref)583 outermost_indep_loop (class loop *outer, class loop *loop, im_mem_ref *ref)
584 {
585   class loop *aloop;
586 
587   if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
588     return NULL;
589 
590   for (aloop = outer;
591        aloop != loop;
592        aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
593     if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
594 	&& ref_indep_loop_p (aloop, ref, lim_raw))
595       return aloop;
596 
597   if (ref_indep_loop_p (loop, ref, lim_raw))
598     return loop;
599   else
600     return NULL;
601 }
602 
603 /* If there is a simple load or store to a memory reference in STMT, returns
604    the location of the memory reference, and sets IS_STORE according to whether
605    it is a store or load.  Otherwise, returns NULL.  */
606 
607 static tree *
simple_mem_ref_in_stmt(gimple * stmt,bool * is_store)608 simple_mem_ref_in_stmt (gimple *stmt, bool *is_store)
609 {
610   tree *lhs, *rhs;
611 
612   /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns.  */
613   if (!gimple_assign_single_p (stmt))
614     return NULL;
615 
616   lhs = gimple_assign_lhs_ptr (stmt);
617   rhs = gimple_assign_rhs1_ptr (stmt);
618 
619   if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
620     {
621       *is_store = false;
622       return rhs;
623     }
624   else if (gimple_vdef (stmt)
625 	   && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
626     {
627       *is_store = true;
628       return lhs;
629     }
630   else
631     return NULL;
632 }
633 
634 /* From a controlling predicate in DOM determine the arguments from
635    the PHI node PHI that are chosen if the predicate evaluates to
636    true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
637    they are non-NULL.  Returns true if the arguments can be determined,
638    else return false.  */
639 
640 static bool
extract_true_false_args_from_phi(basic_block dom,gphi * phi,tree * true_arg_p,tree * false_arg_p)641 extract_true_false_args_from_phi (basic_block dom, gphi *phi,
642 				  tree *true_arg_p, tree *false_arg_p)
643 {
644   edge te, fe;
645   if (! extract_true_false_controlled_edges (dom, gimple_bb (phi),
646 					     &te, &fe))
647     return false;
648 
649   if (true_arg_p)
650     *true_arg_p = PHI_ARG_DEF (phi, te->dest_idx);
651   if (false_arg_p)
652     *false_arg_p = PHI_ARG_DEF (phi, fe->dest_idx);
653 
654   return true;
655 }
656 
657 /* Determine the outermost loop to that it is possible to hoist a statement
658    STMT and store it to LIM_DATA (STMT)->max_loop.  To do this we determine
659    the outermost loop in that the value computed by STMT is invariant.
660    If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
661    we preserve the fact whether STMT is executed.  It also fills other related
662    information to LIM_DATA (STMT).
663 
664    The function returns false if STMT cannot be hoisted outside of the loop it
665    is defined in, and true otherwise.  */
666 
667 static bool
determine_max_movement(gimple * stmt,bool must_preserve_exec)668 determine_max_movement (gimple *stmt, bool must_preserve_exec)
669 {
670   basic_block bb = gimple_bb (stmt);
671   class loop *loop = bb->loop_father;
672   class loop *level;
673   struct lim_aux_data *lim_data = get_lim_data (stmt);
674   tree val;
675   ssa_op_iter iter;
676 
677   if (must_preserve_exec)
678     level = ALWAYS_EXECUTED_IN (bb);
679   else
680     level = superloop_at_depth (loop, 1);
681   lim_data->max_loop = level;
682 
683   if (gphi *phi = dyn_cast <gphi *> (stmt))
684     {
685       use_operand_p use_p;
686       unsigned min_cost = UINT_MAX;
687       unsigned total_cost = 0;
688       struct lim_aux_data *def_data;
689 
690       /* We will end up promoting dependencies to be unconditionally
691 	 evaluated.  For this reason the PHI cost (and thus the
692 	 cost we remove from the loop by doing the invariant motion)
693 	 is that of the cheapest PHI argument dependency chain.  */
694       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
695 	{
696 	  val = USE_FROM_PTR (use_p);
697 
698 	  if (TREE_CODE (val) != SSA_NAME)
699 	    {
700 	      /* Assign const 1 to constants.  */
701 	      min_cost = MIN (min_cost, 1);
702 	      total_cost += 1;
703 	      continue;
704 	    }
705 	  if (!add_dependency (val, lim_data, loop, false))
706 	    return false;
707 
708 	  gimple *def_stmt = SSA_NAME_DEF_STMT (val);
709 	  if (gimple_bb (def_stmt)
710 	      && gimple_bb (def_stmt)->loop_father == loop)
711 	    {
712 	      def_data = get_lim_data (def_stmt);
713 	      if (def_data)
714 		{
715 		  min_cost = MIN (min_cost, def_data->cost);
716 		  total_cost += def_data->cost;
717 		}
718 	    }
719 	}
720 
721       min_cost = MIN (min_cost, total_cost);
722       lim_data->cost += min_cost;
723 
724       if (gimple_phi_num_args (phi) > 1)
725 	{
726 	  basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
727 	  gimple *cond;
728 	  if (gsi_end_p (gsi_last_bb (dom)))
729 	    return false;
730 	  cond = gsi_stmt (gsi_last_bb (dom));
731 	  if (gimple_code (cond) != GIMPLE_COND)
732 	    return false;
733 	  /* Verify that this is an extended form of a diamond and
734 	     the PHI arguments are completely controlled by the
735 	     predicate in DOM.  */
736 	  if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
737 	    return false;
738 
739 	  /* Fold in dependencies and cost of the condition.  */
740 	  FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
741 	    {
742 	      if (!add_dependency (val, lim_data, loop, false))
743 		return false;
744 	      def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
745 	      if (def_data)
746 		lim_data->cost += def_data->cost;
747 	    }
748 
749 	  /* We want to avoid unconditionally executing very expensive
750 	     operations.  As costs for our dependencies cannot be
751 	     negative just claim we are not invariand for this case.
752 	     We also are not sure whether the control-flow inside the
753 	     loop will vanish.  */
754 	  if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
755 	      && !(min_cost != 0
756 		   && total_cost / min_cost <= 2))
757 	    return false;
758 
759 	  /* Assume that the control-flow in the loop will vanish.
760 	     ???  We should verify this and not artificially increase
761 	     the cost if that is not the case.  */
762 	  lim_data->cost += stmt_cost (stmt);
763 	}
764 
765       return true;
766     }
767   else
768     FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
769       if (!add_dependency (val, lim_data, loop, true))
770 	return false;
771 
772   if (gimple_vuse (stmt))
773     {
774       im_mem_ref *ref
775 	= lim_data ? memory_accesses.refs_list[lim_data->ref] : NULL;
776       if (ref
777 	  && MEM_ANALYZABLE (ref))
778 	{
779 	  lim_data->max_loop = outermost_indep_loop (lim_data->max_loop,
780 						     loop, ref);
781 	  if (!lim_data->max_loop)
782 	    return false;
783 	}
784       else if (! add_dependency (gimple_vuse (stmt), lim_data, loop, false))
785 	return false;
786     }
787 
788   lim_data->cost += stmt_cost (stmt);
789 
790   return true;
791 }
792 
793 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
794    and that one of the operands of this statement is computed by STMT.
795    Ensure that STMT (together with all the statements that define its
796    operands) is hoisted at least out of the loop LEVEL.  */
797 
798 static void
set_level(gimple * stmt,class loop * orig_loop,class loop * level)799 set_level (gimple *stmt, class loop *orig_loop, class loop *level)
800 {
801   class loop *stmt_loop = gimple_bb (stmt)->loop_father;
802   struct lim_aux_data *lim_data;
803   gimple *dep_stmt;
804   unsigned i;
805 
806   stmt_loop = find_common_loop (orig_loop, stmt_loop);
807   lim_data = get_lim_data (stmt);
808   if (lim_data != NULL && lim_data->tgt_loop != NULL)
809     stmt_loop = find_common_loop (stmt_loop,
810 				  loop_outer (lim_data->tgt_loop));
811   if (flow_loop_nested_p (stmt_loop, level))
812     return;
813 
814   gcc_assert (level == lim_data->max_loop
815 	      || flow_loop_nested_p (lim_data->max_loop, level));
816 
817   lim_data->tgt_loop = level;
818   FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
819     set_level (dep_stmt, orig_loop, level);
820 }
821 
822 /* Determines an outermost loop from that we want to hoist the statement STMT.
823    For now we chose the outermost possible loop.  TODO -- use profiling
824    information to set it more sanely.  */
825 
826 static void
set_profitable_level(gimple * stmt)827 set_profitable_level (gimple *stmt)
828 {
829   set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
830 }
831 
832 /* Returns true if STMT is a call that has side effects.  */
833 
834 static bool
nonpure_call_p(gimple * stmt)835 nonpure_call_p (gimple *stmt)
836 {
837   if (gimple_code (stmt) != GIMPLE_CALL)
838     return false;
839 
840   return gimple_has_side_effects (stmt);
841 }
842 
843 /* Rewrite a/b to a*(1/b).  Return the invariant stmt to process.  */
844 
845 static gimple *
rewrite_reciprocal(gimple_stmt_iterator * bsi)846 rewrite_reciprocal (gimple_stmt_iterator *bsi)
847 {
848   gassign *stmt, *stmt1, *stmt2;
849   tree name, lhs, type;
850   tree real_one;
851   gimple_stmt_iterator gsi;
852 
853   stmt = as_a <gassign *> (gsi_stmt (*bsi));
854   lhs = gimple_assign_lhs (stmt);
855   type = TREE_TYPE (lhs);
856 
857   real_one = build_one_cst (type);
858 
859   name = make_temp_ssa_name (type, NULL, "reciptmp");
860   stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
861 			       gimple_assign_rhs2 (stmt));
862   stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
863 			       gimple_assign_rhs1 (stmt));
864 
865   /* Replace division stmt with reciprocal and multiply stmts.
866      The multiply stmt is not invariant, so update iterator
867      and avoid rescanning.  */
868   gsi = *bsi;
869   gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
870   gsi_replace (&gsi, stmt2, true);
871 
872   /* Continue processing with invariant reciprocal statement.  */
873   return stmt1;
874 }
875 
876 /* Check if the pattern at *BSI is a bittest of the form
877    (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0.  */
878 
879 static gimple *
rewrite_bittest(gimple_stmt_iterator * bsi)880 rewrite_bittest (gimple_stmt_iterator *bsi)
881 {
882   gassign *stmt;
883   gimple *stmt1;
884   gassign *stmt2;
885   gimple *use_stmt;
886   gcond *cond_stmt;
887   tree lhs, name, t, a, b;
888   use_operand_p use;
889 
890   stmt = as_a <gassign *> (gsi_stmt (*bsi));
891   lhs = gimple_assign_lhs (stmt);
892 
893   /* Verify that the single use of lhs is a comparison against zero.  */
894   if (TREE_CODE (lhs) != SSA_NAME
895       || !single_imm_use (lhs, &use, &use_stmt))
896     return stmt;
897   cond_stmt = dyn_cast <gcond *> (use_stmt);
898   if (!cond_stmt)
899     return stmt;
900   if (gimple_cond_lhs (cond_stmt) != lhs
901       || (gimple_cond_code (cond_stmt) != NE_EXPR
902 	  && gimple_cond_code (cond_stmt) != EQ_EXPR)
903       || !integer_zerop (gimple_cond_rhs (cond_stmt)))
904     return stmt;
905 
906   /* Get at the operands of the shift.  The rhs is TMP1 & 1.  */
907   stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
908   if (gimple_code (stmt1) != GIMPLE_ASSIGN)
909     return stmt;
910 
911   /* There is a conversion in between possibly inserted by fold.  */
912   if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
913     {
914       t = gimple_assign_rhs1 (stmt1);
915       if (TREE_CODE (t) != SSA_NAME
916 	  || !has_single_use (t))
917 	return stmt;
918       stmt1 = SSA_NAME_DEF_STMT (t);
919       if (gimple_code (stmt1) != GIMPLE_ASSIGN)
920 	return stmt;
921     }
922 
923   /* Verify that B is loop invariant but A is not.  Verify that with
924      all the stmt walking we are still in the same loop.  */
925   if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
926       || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
927     return stmt;
928 
929   a = gimple_assign_rhs1 (stmt1);
930   b = gimple_assign_rhs2 (stmt1);
931 
932   if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
933       && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
934     {
935       gimple_stmt_iterator rsi;
936 
937       /* 1 << B */
938       t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
939 		       build_int_cst (TREE_TYPE (a), 1), b);
940       name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
941       stmt1 = gimple_build_assign (name, t);
942 
943       /* A & (1 << B) */
944       t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
945       name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
946       stmt2 = gimple_build_assign (name, t);
947 
948       /* Replace the SSA_NAME we compare against zero.  Adjust
949 	 the type of zero accordingly.  */
950       SET_USE (use, name);
951       gimple_cond_set_rhs (cond_stmt,
952 			   build_int_cst_type (TREE_TYPE (name),
953 					       0));
954 
955       /* Don't use gsi_replace here, none of the new assignments sets
956 	 the variable originally set in stmt.  Move bsi to stmt1, and
957 	 then remove the original stmt, so that we get a chance to
958 	 retain debug info for it.  */
959       rsi = *bsi;
960       gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
961       gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
962       gimple *to_release = gsi_stmt (rsi);
963       gsi_remove (&rsi, true);
964       release_defs (to_release);
965 
966       return stmt1;
967     }
968 
969   return stmt;
970 }
971 
972 /* Determine the outermost loops in that statements in basic block BB are
973    invariant, and record them to the LIM_DATA associated with the
974    statements.  */
975 
976 static void
compute_invariantness(basic_block bb)977 compute_invariantness (basic_block bb)
978 {
979   enum move_pos pos;
980   gimple_stmt_iterator bsi;
981   gimple *stmt;
982   bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
983   class loop *outermost = ALWAYS_EXECUTED_IN (bb);
984   struct lim_aux_data *lim_data;
985 
986   if (!loop_outer (bb->loop_father))
987     return;
988 
989   if (dump_file && (dump_flags & TDF_DETAILS))
990     fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
991 	     bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
992 
993   /* Look at PHI nodes, but only if there is at most two.
994      ???  We could relax this further by post-processing the inserted
995      code and transforming adjacent cond-exprs with the same predicate
996      to control flow again.  */
997   bsi = gsi_start_phis (bb);
998   if (!gsi_end_p (bsi)
999       && ((gsi_next (&bsi), gsi_end_p (bsi))
1000 	  || (gsi_next (&bsi), gsi_end_p (bsi))))
1001     for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1002       {
1003 	stmt = gsi_stmt (bsi);
1004 
1005 	pos = movement_possibility (stmt);
1006 	if (pos == MOVE_IMPOSSIBLE)
1007 	  continue;
1008 
1009 	lim_data = get_lim_data (stmt);
1010 	if (! lim_data)
1011 	  lim_data = init_lim_data (stmt);
1012 	lim_data->always_executed_in = outermost;
1013 
1014 	if (!determine_max_movement (stmt, false))
1015 	  {
1016 	    lim_data->max_loop = NULL;
1017 	    continue;
1018 	  }
1019 
1020 	if (dump_file && (dump_flags & TDF_DETAILS))
1021 	  {
1022 	    print_gimple_stmt (dump_file, stmt, 2);
1023 	    fprintf (dump_file, "  invariant up to level %d, cost %d.\n\n",
1024 		     loop_depth (lim_data->max_loop),
1025 		     lim_data->cost);
1026 	  }
1027 
1028 	if (lim_data->cost >= LIM_EXPENSIVE)
1029 	  set_profitable_level (stmt);
1030       }
1031 
1032   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1033     {
1034       stmt = gsi_stmt (bsi);
1035 
1036       pos = movement_possibility (stmt);
1037       if (pos == MOVE_IMPOSSIBLE)
1038 	{
1039 	  if (nonpure_call_p (stmt))
1040 	    {
1041 	      maybe_never = true;
1042 	      outermost = NULL;
1043 	    }
1044 	  /* Make sure to note always_executed_in for stores to make
1045 	     store-motion work.  */
1046 	  else if (stmt_makes_single_store (stmt))
1047 	    {
1048 	      struct lim_aux_data *lim_data = get_lim_data (stmt);
1049 	      if (! lim_data)
1050 		lim_data = init_lim_data (stmt);
1051 	      lim_data->always_executed_in = outermost;
1052 	    }
1053 	  continue;
1054 	}
1055 
1056       if (is_gimple_assign (stmt)
1057 	  && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1058 	      == GIMPLE_BINARY_RHS))
1059 	{
1060 	  tree op0 = gimple_assign_rhs1 (stmt);
1061 	  tree op1 = gimple_assign_rhs2 (stmt);
1062 	  class loop *ol1 = outermost_invariant_loop (op1,
1063 					loop_containing_stmt (stmt));
1064 
1065 	  /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1066 	     to be hoisted out of loop, saving expensive divide.  */
1067 	  if (pos == MOVE_POSSIBLE
1068 	      && gimple_assign_rhs_code (stmt) == RDIV_EXPR
1069 	      && flag_unsafe_math_optimizations
1070 	      && !flag_trapping_math
1071 	      && ol1 != NULL
1072 	      && outermost_invariant_loop (op0, ol1) == NULL)
1073 	    stmt = rewrite_reciprocal (&bsi);
1074 
1075 	  /* If the shift count is invariant, convert (A >> B) & 1 to
1076 	     A & (1 << B) allowing the bit mask to be hoisted out of the loop
1077 	     saving an expensive shift.  */
1078 	  if (pos == MOVE_POSSIBLE
1079 	      && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1080 	      && integer_onep (op1)
1081 	      && TREE_CODE (op0) == SSA_NAME
1082 	      && has_single_use (op0))
1083 	    stmt = rewrite_bittest (&bsi);
1084 	}
1085 
1086       lim_data = get_lim_data (stmt);
1087       if (! lim_data)
1088 	lim_data = init_lim_data (stmt);
1089       lim_data->always_executed_in = outermost;
1090 
1091       if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1092 	continue;
1093 
1094       if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1095 	{
1096 	  lim_data->max_loop = NULL;
1097 	  continue;
1098 	}
1099 
1100       if (dump_file && (dump_flags & TDF_DETAILS))
1101 	{
1102 	  print_gimple_stmt (dump_file, stmt, 2);
1103 	  fprintf (dump_file, "  invariant up to level %d, cost %d.\n\n",
1104 		   loop_depth (lim_data->max_loop),
1105 		   lim_data->cost);
1106 	}
1107 
1108       if (lim_data->cost >= LIM_EXPENSIVE)
1109 	set_profitable_level (stmt);
1110     }
1111 }
1112 
1113 /* Hoist the statements in basic block BB out of the loops prescribed by
1114    data stored in LIM_DATA structures associated with each statement.  Callback
1115    for walk_dominator_tree.  */
1116 
1117 unsigned int
move_computations_worker(basic_block bb)1118 move_computations_worker (basic_block bb)
1119 {
1120   class loop *level;
1121   unsigned cost = 0;
1122   struct lim_aux_data *lim_data;
1123   unsigned int todo = 0;
1124 
1125   if (!loop_outer (bb->loop_father))
1126     return todo;
1127 
1128   for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
1129     {
1130       gassign *new_stmt;
1131       gphi *stmt = bsi.phi ();
1132 
1133       lim_data = get_lim_data (stmt);
1134       if (lim_data == NULL)
1135 	{
1136 	  gsi_next (&bsi);
1137 	  continue;
1138 	}
1139 
1140       cost = lim_data->cost;
1141       level = lim_data->tgt_loop;
1142       clear_lim_data (stmt);
1143 
1144       if (!level)
1145 	{
1146 	  gsi_next (&bsi);
1147 	  continue;
1148 	}
1149 
1150       if (dump_file && (dump_flags & TDF_DETAILS))
1151 	{
1152 	  fprintf (dump_file, "Moving PHI node\n");
1153 	  print_gimple_stmt (dump_file, stmt, 0);
1154 	  fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1155 		   cost, level->num);
1156 	}
1157 
1158       if (gimple_phi_num_args (stmt) == 1)
1159 	{
1160 	  tree arg = PHI_ARG_DEF (stmt, 0);
1161 	  new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1162 					  TREE_CODE (arg), arg);
1163 	}
1164       else
1165 	{
1166 	  basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1167 	  gimple *cond = gsi_stmt (gsi_last_bb (dom));
1168 	  tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1169 	  /* Get the PHI arguments corresponding to the true and false
1170 	     edges of COND.  */
1171 	  extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1172 	  gcc_assert (arg0 && arg1);
1173 	  t = build2 (gimple_cond_code (cond), boolean_type_node,
1174 		      gimple_cond_lhs (cond), gimple_cond_rhs (cond));
1175 	  new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1176 					  COND_EXPR, t, arg0, arg1);
1177 	  todo |= TODO_cleanup_cfg;
1178 	}
1179       if (INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (new_stmt)))
1180 	  && (!ALWAYS_EXECUTED_IN (bb)
1181 	      || (ALWAYS_EXECUTED_IN (bb) != level
1182 		  && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1183 	{
1184 	  tree lhs = gimple_assign_lhs (new_stmt);
1185 	  SSA_NAME_RANGE_INFO (lhs) = NULL;
1186 	}
1187       gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1188       remove_phi_node (&bsi, false);
1189     }
1190 
1191   for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
1192     {
1193       edge e;
1194 
1195       gimple *stmt = gsi_stmt (bsi);
1196 
1197       lim_data = get_lim_data (stmt);
1198       if (lim_data == NULL)
1199 	{
1200 	  gsi_next (&bsi);
1201 	  continue;
1202 	}
1203 
1204       cost = lim_data->cost;
1205       level = lim_data->tgt_loop;
1206       clear_lim_data (stmt);
1207 
1208       if (!level)
1209 	{
1210 	  gsi_next (&bsi);
1211 	  continue;
1212 	}
1213 
1214       /* We do not really want to move conditionals out of the loop; we just
1215 	 placed it here to force its operands to be moved if necessary.  */
1216       if (gimple_code (stmt) == GIMPLE_COND)
1217 	continue;
1218 
1219       if (dump_file && (dump_flags & TDF_DETAILS))
1220 	{
1221 	  fprintf (dump_file, "Moving statement\n");
1222 	  print_gimple_stmt (dump_file, stmt, 0);
1223 	  fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1224 		   cost, level->num);
1225 	}
1226 
1227       e = loop_preheader_edge (level);
1228       gcc_assert (!gimple_vdef (stmt));
1229       if (gimple_vuse (stmt))
1230 	{
1231 	  /* The new VUSE is the one from the virtual PHI in the loop
1232 	     header or the one already present.  */
1233 	  gphi_iterator gsi2;
1234 	  for (gsi2 = gsi_start_phis (e->dest);
1235 	       !gsi_end_p (gsi2); gsi_next (&gsi2))
1236 	    {
1237 	      gphi *phi = gsi2.phi ();
1238 	      if (virtual_operand_p (gimple_phi_result (phi)))
1239 		{
1240 		  SET_USE (gimple_vuse_op (stmt),
1241 			   PHI_ARG_DEF_FROM_EDGE (phi, e));
1242 		  break;
1243 		}
1244 	    }
1245 	}
1246       gsi_remove (&bsi, false);
1247       if (gimple_has_lhs (stmt)
1248 	  && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
1249 	  && INTEGRAL_TYPE_P (TREE_TYPE (gimple_get_lhs (stmt)))
1250 	  && (!ALWAYS_EXECUTED_IN (bb)
1251 	      || !(ALWAYS_EXECUTED_IN (bb) == level
1252 		   || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1253 	{
1254 	  tree lhs = gimple_get_lhs (stmt);
1255 	  SSA_NAME_RANGE_INFO (lhs) = NULL;
1256 	}
1257       /* In case this is a stmt that is not unconditionally executed
1258          when the target loop header is executed and the stmt may
1259 	 invoke undefined integer or pointer overflow rewrite it to
1260 	 unsigned arithmetic.  */
1261       if (is_gimple_assign (stmt)
1262 	  && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))
1263 	  && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt)))
1264 	  && arith_code_with_undefined_signed_overflow
1265 	       (gimple_assign_rhs_code (stmt))
1266 	  && (!ALWAYS_EXECUTED_IN (bb)
1267 	      || !(ALWAYS_EXECUTED_IN (bb) == level
1268 		   || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1269 	gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt));
1270       else
1271 	gsi_insert_on_edge (e, stmt);
1272     }
1273 
1274   return todo;
1275 }
1276 
1277 /* Checks whether the statement defining variable *INDEX can be hoisted
1278    out of the loop passed in DATA.  Callback for for_each_index.  */
1279 
1280 static bool
may_move_till(tree ref,tree * index,void * data)1281 may_move_till (tree ref, tree *index, void *data)
1282 {
1283   class loop *loop = (class loop *) data, *max_loop;
1284 
1285   /* If REF is an array reference, check also that the step and the lower
1286      bound is invariant in LOOP.  */
1287   if (TREE_CODE (ref) == ARRAY_REF)
1288     {
1289       tree step = TREE_OPERAND (ref, 3);
1290       tree lbound = TREE_OPERAND (ref, 2);
1291 
1292       max_loop = outermost_invariant_loop (step, loop);
1293       if (!max_loop)
1294 	return false;
1295 
1296       max_loop = outermost_invariant_loop (lbound, loop);
1297       if (!max_loop)
1298 	return false;
1299     }
1300 
1301   max_loop = outermost_invariant_loop (*index, loop);
1302   if (!max_loop)
1303     return false;
1304 
1305   return true;
1306 }
1307 
1308 /* If OP is SSA NAME, force the statement that defines it to be
1309    moved out of the LOOP.  ORIG_LOOP is the loop in that EXPR is used.  */
1310 
1311 static void
force_move_till_op(tree op,class loop * orig_loop,class loop * loop)1312 force_move_till_op (tree op, class loop *orig_loop, class loop *loop)
1313 {
1314   gimple *stmt;
1315 
1316   if (!op
1317       || is_gimple_min_invariant (op))
1318     return;
1319 
1320   gcc_assert (TREE_CODE (op) == SSA_NAME);
1321 
1322   stmt = SSA_NAME_DEF_STMT (op);
1323   if (gimple_nop_p (stmt))
1324     return;
1325 
1326   set_level (stmt, orig_loop, loop);
1327 }
1328 
1329 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1330    the LOOP.  The reference REF is used in the loop ORIG_LOOP.  Callback for
1331    for_each_index.  */
1332 
1333 struct fmt_data
1334 {
1335   class loop *loop;
1336   class loop *orig_loop;
1337 };
1338 
1339 static bool
force_move_till(tree ref,tree * index,void * data)1340 force_move_till (tree ref, tree *index, void *data)
1341 {
1342   struct fmt_data *fmt_data = (struct fmt_data *) data;
1343 
1344   if (TREE_CODE (ref) == ARRAY_REF)
1345     {
1346       tree step = TREE_OPERAND (ref, 3);
1347       tree lbound = TREE_OPERAND (ref, 2);
1348 
1349       force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1350       force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
1351     }
1352 
1353   force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
1354 
1355   return true;
1356 }
1357 
1358 /* A function to free the mem_ref object OBJ.  */
1359 
1360 static void
memref_free(class im_mem_ref * mem)1361 memref_free (class im_mem_ref *mem)
1362 {
1363   mem->accesses_in_loop.release ();
1364 }
1365 
1366 /* Allocates and returns a memory reference description for MEM whose hash
1367    value is HASH and id is ID.  */
1368 
1369 static im_mem_ref *
mem_ref_alloc(ao_ref * mem,unsigned hash,unsigned id)1370 mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id)
1371 {
1372   im_mem_ref *ref = XOBNEW (&mem_ref_obstack, class im_mem_ref);
1373   if (mem)
1374     ref->mem = *mem;
1375   else
1376     ao_ref_init (&ref->mem, error_mark_node);
1377   ref->id = id;
1378   ref->ref_canonical = false;
1379   ref->ref_decomposed = false;
1380   ref->hash = hash;
1381   ref->stored = NULL;
1382   ref->loaded = NULL;
1383   bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
1384   ref->accesses_in_loop.create (1);
1385 
1386   return ref;
1387 }
1388 
1389 /* Records memory reference location *LOC in LOOP to the memory reference
1390    description REF.  The reference occurs in statement STMT.  */
1391 
1392 static void
record_mem_ref_loc(im_mem_ref * ref,gimple * stmt,tree * loc)1393 record_mem_ref_loc (im_mem_ref *ref, gimple *stmt, tree *loc)
1394 {
1395   mem_ref_loc aref;
1396   aref.stmt = stmt;
1397   aref.ref = loc;
1398   ref->accesses_in_loop.safe_push (aref);
1399 }
1400 
1401 /* Set the LOOP bit in REF stored bitmap and allocate that if
1402    necessary.  Return whether a bit was changed.  */
1403 
1404 static bool
set_ref_stored_in_loop(im_mem_ref * ref,class loop * loop)1405 set_ref_stored_in_loop (im_mem_ref *ref, class loop *loop)
1406 {
1407   if (!ref->stored)
1408     ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1409   return bitmap_set_bit (ref->stored, loop->num);
1410 }
1411 
1412 /* Marks reference REF as stored in LOOP.  */
1413 
1414 static void
mark_ref_stored(im_mem_ref * ref,class loop * loop)1415 mark_ref_stored (im_mem_ref *ref, class loop *loop)
1416 {
1417   while (loop != current_loops->tree_root
1418 	 && set_ref_stored_in_loop (ref, loop))
1419     loop = loop_outer (loop);
1420 }
1421 
1422 /* Set the LOOP bit in REF loaded bitmap and allocate that if
1423    necessary.  Return whether a bit was changed.  */
1424 
1425 static bool
set_ref_loaded_in_loop(im_mem_ref * ref,class loop * loop)1426 set_ref_loaded_in_loop (im_mem_ref *ref, class loop *loop)
1427 {
1428   if (!ref->loaded)
1429     ref->loaded = BITMAP_ALLOC (&lim_bitmap_obstack);
1430   return bitmap_set_bit (ref->loaded, loop->num);
1431 }
1432 
1433 /* Marks reference REF as loaded in LOOP.  */
1434 
1435 static void
mark_ref_loaded(im_mem_ref * ref,class loop * loop)1436 mark_ref_loaded (im_mem_ref *ref, class loop *loop)
1437 {
1438   while (loop != current_loops->tree_root
1439 	 && set_ref_loaded_in_loop (ref, loop))
1440     loop = loop_outer (loop);
1441 }
1442 
1443 /* Gathers memory references in statement STMT in LOOP, storing the
1444    information about them in the memory_accesses structure.  Marks
1445    the vops accessed through unrecognized statements there as
1446    well.  */
1447 
1448 static void
gather_mem_refs_stmt(class loop * loop,gimple * stmt)1449 gather_mem_refs_stmt (class loop *loop, gimple *stmt)
1450 {
1451   tree *mem = NULL;
1452   hashval_t hash;
1453   im_mem_ref **slot;
1454   im_mem_ref *ref;
1455   bool is_stored;
1456   unsigned id;
1457 
1458   if (!gimple_vuse (stmt))
1459     return;
1460 
1461   mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1462   if (!mem)
1463     {
1464       /* We use the shared mem_ref for all unanalyzable refs.  */
1465       id = UNANALYZABLE_MEM_ID;
1466       ref = memory_accesses.refs_list[id];
1467       if (dump_file && (dump_flags & TDF_DETAILS))
1468 	{
1469 	  fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1470 	  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1471 	}
1472       is_stored = gimple_vdef (stmt);
1473     }
1474   else
1475     {
1476       /* We are looking for equal refs that might differ in structure
1477          such as a.b vs. MEM[&a + 4].  So we key off the ao_ref but
1478 	 make sure we can canonicalize the ref in the hashtable if
1479 	 non-operand_equal_p refs are found.  For the lookup we mark
1480 	 the case we want strict equality with aor.max_size == -1.  */
1481       ao_ref aor;
1482       ao_ref_init (&aor, *mem);
1483       ao_ref_base (&aor);
1484       ao_ref_alias_set (&aor);
1485       HOST_WIDE_INT offset, size, max_size;
1486       poly_int64 saved_maxsize = aor.max_size, mem_off;
1487       tree mem_base;
1488       bool ref_decomposed;
1489       if (aor.max_size_known_p ()
1490 	  && aor.offset.is_constant (&offset)
1491 	  && aor.size.is_constant (&size)
1492 	  && aor.max_size.is_constant (&max_size)
1493 	  && size == max_size
1494 	  && (size % BITS_PER_UNIT) == 0
1495 	  /* We're canonicalizing to a MEM where TYPE_SIZE specifies the
1496 	     size.  Make sure this is consistent with the extraction.  */
1497 	  && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (*mem)))
1498 	  && known_eq (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (*mem))),
1499 		       aor.size)
1500 	  && (mem_base = get_addr_base_and_unit_offset (aor.ref, &mem_off)))
1501 	{
1502 	  ref_decomposed = true;
1503 	  hash = iterative_hash_expr (ao_ref_base (&aor), 0);
1504 	  hash = iterative_hash_host_wide_int (offset, hash);
1505 	  hash = iterative_hash_host_wide_int (size, hash);
1506 	}
1507       else
1508 	{
1509 	  ref_decomposed = false;
1510 	  hash = iterative_hash_expr (aor.ref, 0);
1511 	  aor.max_size = -1;
1512 	}
1513       slot = memory_accesses.refs->find_slot_with_hash (&aor, hash, INSERT);
1514       aor.max_size = saved_maxsize;
1515       if (*slot)
1516 	{
1517 	  if (!(*slot)->ref_canonical
1518 	      && !operand_equal_p (*mem, (*slot)->mem.ref, 0))
1519 	    {
1520 	      /* If we didn't yet canonicalize the hashtable ref (which
1521 	         we'll end up using for code insertion) and hit a second
1522 		 equal ref that is not structurally equivalent create
1523 		 a canonical ref which is a bare MEM_REF.  */
1524 	      if (TREE_CODE (*mem) == MEM_REF
1525 		  || TREE_CODE (*mem) == TARGET_MEM_REF)
1526 		{
1527 		  (*slot)->mem.ref = *mem;
1528 		  (*slot)->mem.base_alias_set = ao_ref_base_alias_set (&aor);
1529 		}
1530 	      else
1531 		{
1532 		  tree ref_alias_type = reference_alias_ptr_type (*mem);
1533 		  unsigned int ref_align = get_object_alignment (*mem);
1534 		  tree ref_type = TREE_TYPE (*mem);
1535 		  tree tmp = build1 (ADDR_EXPR, ptr_type_node,
1536 				     unshare_expr (mem_base));
1537 		  if (TYPE_ALIGN (ref_type) != ref_align)
1538 		    ref_type = build_aligned_type (ref_type, ref_align);
1539 		  (*slot)->mem.ref
1540 		    = fold_build2 (MEM_REF, ref_type, tmp,
1541 				   build_int_cst (ref_alias_type, mem_off));
1542 		  if ((*slot)->mem.volatile_p)
1543 		    TREE_THIS_VOLATILE ((*slot)->mem.ref) = 1;
1544 		  gcc_checking_assert (TREE_CODE ((*slot)->mem.ref) == MEM_REF
1545 				       && is_gimple_mem_ref_addr
1546 				            (TREE_OPERAND ((*slot)->mem.ref,
1547 							   0)));
1548 		  (*slot)->mem.base_alias_set = (*slot)->mem.ref_alias_set;
1549 		}
1550 	      (*slot)->ref_canonical = true;
1551 	    }
1552 	  ref = *slot;
1553 	  id = ref->id;
1554 	}
1555       else
1556 	{
1557 	  id = memory_accesses.refs_list.length ();
1558 	  ref = mem_ref_alloc (&aor, hash, id);
1559 	  ref->ref_decomposed = ref_decomposed;
1560 	  memory_accesses.refs_list.safe_push (ref);
1561 	  *slot = ref;
1562 
1563 	  if (dump_file && (dump_flags & TDF_DETAILS))
1564 	    {
1565 	      fprintf (dump_file, "Memory reference %u: ", id);
1566 	      print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
1567 	      fprintf (dump_file, "\n");
1568 	    }
1569 	}
1570 
1571       record_mem_ref_loc (ref, stmt, mem);
1572     }
1573   if (is_stored)
1574     {
1575       bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
1576       mark_ref_stored (ref, loop);
1577     }
1578   /* A not simple memory op is also a read when it is a write.  */
1579   if (!is_stored || id == UNANALYZABLE_MEM_ID)
1580     {
1581       bitmap_set_bit (&memory_accesses.refs_loaded_in_loop[loop->num], ref->id);
1582       mark_ref_loaded (ref, loop);
1583     }
1584   init_lim_data (stmt)->ref = ref->id;
1585   return;
1586 }
1587 
1588 static unsigned *bb_loop_postorder;
1589 
1590 /* qsort sort function to sort blocks after their loop fathers postorder.  */
1591 
1592 static int
sort_bbs_in_loop_postorder_cmp(const void * bb1_,const void * bb2_,void * bb_loop_postorder_)1593 sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_,
1594 				void *bb_loop_postorder_)
1595 {
1596   unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1597   basic_block bb1 = *(const basic_block *)bb1_;
1598   basic_block bb2 = *(const basic_block *)bb2_;
1599   class loop *loop1 = bb1->loop_father;
1600   class loop *loop2 = bb2->loop_father;
1601   if (loop1->num == loop2->num)
1602     return bb1->index - bb2->index;
1603   return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1604 }
1605 
1606 /* qsort sort function to sort ref locs after their loop fathers postorder.  */
1607 
1608 static int
sort_locs_in_loop_postorder_cmp(const void * loc1_,const void * loc2_,void * bb_loop_postorder_)1609 sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_,
1610 				 void *bb_loop_postorder_)
1611 {
1612   unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1613   const mem_ref_loc *loc1 = (const mem_ref_loc *)loc1_;
1614   const mem_ref_loc *loc2 = (const mem_ref_loc *)loc2_;
1615   class loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
1616   class loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
1617   if (loop1->num == loop2->num)
1618     return 0;
1619   return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1620 }
1621 
1622 /* Gathers memory references in loops.  */
1623 
1624 static void
analyze_memory_references(bool store_motion)1625 analyze_memory_references (bool store_motion)
1626 {
1627   gimple_stmt_iterator bsi;
1628   basic_block bb, *bbs;
1629   class loop *loop, *outer;
1630   unsigned i, n;
1631 
1632   /* Collect all basic-blocks in loops and sort them after their
1633      loops postorder.  */
1634   i = 0;
1635   bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
1636   FOR_EACH_BB_FN (bb, cfun)
1637     if (bb->loop_father != current_loops->tree_root)
1638       bbs[i++] = bb;
1639   n = i;
1640   gcc_sort_r (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp,
1641 	      bb_loop_postorder);
1642 
1643   /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1644      That results in better locality for all the bitmaps.  It also
1645      automatically sorts the location list of gathered memory references
1646      after their loop postorder number allowing to binary-search it.  */
1647   for (i = 0; i < n; ++i)
1648     {
1649       basic_block bb = bbs[i];
1650       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1651         gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
1652     }
1653 
1654   /* Verify the list of gathered memory references is sorted after their
1655      loop postorder number.  */
1656   if (flag_checking)
1657     {
1658       im_mem_ref *ref;
1659       FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
1660 	for (unsigned j = 1; j < ref->accesses_in_loop.length (); ++j)
1661 	  gcc_assert (sort_locs_in_loop_postorder_cmp
1662 			(&ref->accesses_in_loop[j-1], &ref->accesses_in_loop[j],
1663 			 bb_loop_postorder) <= 0);
1664     }
1665 
1666   free (bbs);
1667 
1668   if (!store_motion)
1669     return;
1670 
1671   /* Propagate the information about accessed memory references up
1672      the loop hierarchy.  */
1673   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1674     {
1675       /* Finalize the overall touched references (including subloops).  */
1676       bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
1677 		       &memory_accesses.refs_stored_in_loop[loop->num]);
1678 
1679       /* Propagate the information about accessed memory references up
1680 	 the loop hierarchy.  */
1681       outer = loop_outer (loop);
1682       if (outer == current_loops->tree_root)
1683 	continue;
1684 
1685       bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
1686 		       &memory_accesses.all_refs_stored_in_loop[loop->num]);
1687     }
1688 }
1689 
1690 /* Returns true if MEM1 and MEM2 may alias.  TTAE_CACHE is used as a cache in
1691    tree_to_aff_combination_expand.  */
1692 
1693 static bool
mem_refs_may_alias_p(im_mem_ref * mem1,im_mem_ref * mem2,hash_map<tree,name_expansion * > ** ttae_cache,bool tbaa_p)1694 mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2,
1695 		      hash_map<tree, name_expansion *> **ttae_cache,
1696 		      bool tbaa_p)
1697 {
1698   /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1699      object and their offset differ in such a way that the locations cannot
1700      overlap, then they cannot alias.  */
1701   poly_widest_int size1, size2;
1702   aff_tree off1, off2;
1703 
1704   /* Perform basic offset and type-based disambiguation.  */
1705   if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, tbaa_p))
1706     return false;
1707 
1708   /* The expansion of addresses may be a bit expensive, thus we only do
1709      the check at -O2 and higher optimization levels.  */
1710   if (optimize < 2)
1711     return true;
1712 
1713   get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
1714   get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
1715   aff_combination_expand (&off1, ttae_cache);
1716   aff_combination_expand (&off2, ttae_cache);
1717   aff_combination_scale (&off1, -1);
1718   aff_combination_add (&off2, &off1);
1719 
1720   if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1721     return false;
1722 
1723   return true;
1724 }
1725 
1726 /* Compare function for bsearch searching for reference locations
1727    in a loop.  */
1728 
1729 static int
find_ref_loc_in_loop_cmp(const void * loop_,const void * loc_,void * bb_loop_postorder_)1730 find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_,
1731 			  void *bb_loop_postorder_)
1732 {
1733   unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1734   class loop *loop = (class loop *)const_cast<void *>(loop_);
1735   mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
1736   class loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
1737   if (loop->num  == loc_loop->num
1738       || flow_loop_nested_p (loop, loc_loop))
1739     return 0;
1740   return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
1741 	  ? -1 : 1);
1742 }
1743 
1744 /* Iterates over all locations of REF in LOOP and its subloops calling
1745    fn.operator() with the location as argument.  When that operator
1746    returns true the iteration is stopped and true is returned.
1747    Otherwise false is returned.  */
1748 
1749 template <typename FN>
1750 static bool
for_all_locs_in_loop(class loop * loop,im_mem_ref * ref,FN fn)1751 for_all_locs_in_loop (class loop *loop, im_mem_ref *ref, FN fn)
1752 {
1753   unsigned i;
1754   mem_ref_loc *loc;
1755 
1756   /* Search for the cluster of locs in the accesses_in_loop vector
1757      which is sorted after postorder index of the loop father.  */
1758   loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp,
1759 				       bb_loop_postorder);
1760   if (!loc)
1761     return false;
1762 
1763   /* We have found one location inside loop or its sub-loops.  Iterate
1764      both forward and backward to cover the whole cluster.  */
1765   i = loc - ref->accesses_in_loop.address ();
1766   while (i > 0)
1767     {
1768       --i;
1769       mem_ref_loc *l = &ref->accesses_in_loop[i];
1770       if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1771 	break;
1772       if (fn (l))
1773 	return true;
1774     }
1775   for (i = loc - ref->accesses_in_loop.address ();
1776        i < ref->accesses_in_loop.length (); ++i)
1777     {
1778       mem_ref_loc *l = &ref->accesses_in_loop[i];
1779       if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1780 	break;
1781       if (fn (l))
1782 	return true;
1783     }
1784 
1785   return false;
1786 }
1787 
1788 /* Rewrites location LOC by TMP_VAR.  */
1789 
1790 class rewrite_mem_ref_loc
1791 {
1792 public:
rewrite_mem_ref_loc(tree tmp_var_)1793   rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
1794   bool operator () (mem_ref_loc *loc);
1795   tree tmp_var;
1796 };
1797 
1798 bool
operator()1799 rewrite_mem_ref_loc::operator () (mem_ref_loc *loc)
1800 {
1801   *loc->ref = tmp_var;
1802   update_stmt (loc->stmt);
1803   return false;
1804 }
1805 
1806 /* Rewrites all references to REF in LOOP by variable TMP_VAR.  */
1807 
1808 static void
rewrite_mem_refs(class loop * loop,im_mem_ref * ref,tree tmp_var)1809 rewrite_mem_refs (class loop *loop, im_mem_ref *ref, tree tmp_var)
1810 {
1811   for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
1812 }
1813 
1814 /* Stores the first reference location in LOCP.  */
1815 
1816 class first_mem_ref_loc_1
1817 {
1818 public:
first_mem_ref_loc_1(mem_ref_loc ** locp_)1819   first_mem_ref_loc_1 (mem_ref_loc **locp_) : locp (locp_) {}
1820   bool operator () (mem_ref_loc *loc);
1821   mem_ref_loc **locp;
1822 };
1823 
1824 bool
operator()1825 first_mem_ref_loc_1::operator () (mem_ref_loc *loc)
1826 {
1827   *locp = loc;
1828   return true;
1829 }
1830 
1831 /* Returns the first reference location to REF in LOOP.  */
1832 
1833 static mem_ref_loc *
first_mem_ref_loc(class loop * loop,im_mem_ref * ref)1834 first_mem_ref_loc (class loop *loop, im_mem_ref *ref)
1835 {
1836   mem_ref_loc *locp = NULL;
1837   for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
1838   return locp;
1839 }
1840 
1841 /* Helper function for execute_sm.  Emit code to store TMP_VAR into
1842    MEM along edge EX.
1843 
1844    The store is only done if MEM has changed.  We do this so no
1845    changes to MEM occur on code paths that did not originally store
1846    into it.
1847 
1848    The common case for execute_sm will transform:
1849 
1850      for (...) {
1851        if (foo)
1852          stuff;
1853        else
1854          MEM = TMP_VAR;
1855      }
1856 
1857    into:
1858 
1859      lsm = MEM;
1860      for (...) {
1861        if (foo)
1862          stuff;
1863        else
1864          lsm = TMP_VAR;
1865      }
1866      MEM = lsm;
1867 
1868   This function will generate:
1869 
1870      lsm = MEM;
1871 
1872      lsm_flag = false;
1873      ...
1874      for (...) {
1875        if (foo)
1876          stuff;
1877        else {
1878          lsm = TMP_VAR;
1879          lsm_flag = true;
1880        }
1881      }
1882      if (lsm_flag)	<--
1883        MEM = lsm;	<--
1884 */
1885 
1886 static void
execute_sm_if_changed(edge ex,tree mem,tree tmp_var,tree flag,edge preheader,hash_set<basic_block> * flag_bbs,edge & append_cond_position,edge & last_cond_fallthru)1887 execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
1888 		       edge preheader, hash_set <basic_block> *flag_bbs,
1889 		       edge &append_cond_position, edge &last_cond_fallthru)
1890 {
1891   basic_block new_bb, then_bb, old_dest;
1892   bool loop_has_only_one_exit;
1893   edge then_old_edge;
1894   gimple_stmt_iterator gsi;
1895   gimple *stmt;
1896   bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
1897 
1898   profile_count count_sum = profile_count::zero ();
1899   int nbbs = 0, ncount = 0;
1900   profile_probability flag_probability = profile_probability::uninitialized ();
1901 
1902   /* Flag is set in FLAG_BBS. Determine probability that flag will be true
1903      at loop exit.
1904 
1905      This code may look fancy, but it cannot update profile very realistically
1906      because we do not know the probability that flag will be true at given
1907      loop exit.
1908 
1909      We look for two interesting extremes
1910        - when exit is dominated by block setting the flag, we know it will
1911          always be true.  This is a common case.
1912        - when all blocks setting the flag have very low frequency we know
1913          it will likely be false.
1914      In all other cases we default to 2/3 for flag being true.  */
1915 
1916   for (hash_set<basic_block>::iterator it = flag_bbs->begin ();
1917        it != flag_bbs->end (); ++it)
1918     {
1919        if ((*it)->count.initialized_p ())
1920          count_sum += (*it)->count, ncount ++;
1921        if (dominated_by_p (CDI_DOMINATORS, ex->src, *it))
1922 	 flag_probability = profile_probability::always ();
1923        nbbs++;
1924     }
1925 
1926   profile_probability cap = profile_probability::always ().apply_scale (2, 3);
1927 
1928   if (flag_probability.initialized_p ())
1929     ;
1930   else if (ncount == nbbs
1931 	   && preheader->count () >= count_sum && preheader->count ().nonzero_p ())
1932     {
1933       flag_probability = count_sum.probability_in (preheader->count ());
1934       if (flag_probability > cap)
1935 	flag_probability = cap;
1936     }
1937 
1938   if (!flag_probability.initialized_p ())
1939     flag_probability = cap;
1940 
1941   /* ?? Insert store after previous store if applicable.  See note
1942      below.  */
1943   if (append_cond_position)
1944     ex = append_cond_position;
1945 
1946   loop_has_only_one_exit = single_pred_p (ex->dest);
1947 
1948   if (loop_has_only_one_exit)
1949     ex = split_block_after_labels (ex->dest);
1950   else
1951     {
1952       for (gphi_iterator gpi = gsi_start_phis (ex->dest);
1953 	   !gsi_end_p (gpi); gsi_next (&gpi))
1954 	{
1955 	  gphi *phi = gpi.phi ();
1956 	  if (virtual_operand_p (gimple_phi_result (phi)))
1957 	    continue;
1958 
1959 	  /* When the destination has a non-virtual PHI node with multiple
1960 	     predecessors make sure we preserve the PHI structure by
1961 	     forcing a forwarder block so that hoisting of that PHI will
1962 	     still work.  */
1963 	  split_edge (ex);
1964 	  break;
1965 	}
1966     }
1967 
1968   old_dest = ex->dest;
1969   new_bb = split_edge (ex);
1970   then_bb = create_empty_bb (new_bb);
1971   then_bb->count = new_bb->count.apply_probability (flag_probability);
1972   if (irr)
1973     then_bb->flags = BB_IRREDUCIBLE_LOOP;
1974   add_bb_to_loop (then_bb, new_bb->loop_father);
1975 
1976   gsi = gsi_start_bb (new_bb);
1977   stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
1978 			    NULL_TREE, NULL_TREE);
1979   gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1980 
1981   gsi = gsi_start_bb (then_bb);
1982   /* Insert actual store.  */
1983   stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
1984   gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1985 
1986   edge e1 = single_succ_edge (new_bb);
1987   edge e2 = make_edge (new_bb, then_bb,
1988 	               EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1989   e2->probability = flag_probability;
1990 
1991   e1->flags |= EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0);
1992   e1->flags &= ~EDGE_FALLTHRU;
1993 
1994   e1->probability = flag_probability.invert ();
1995 
1996   then_old_edge = make_single_succ_edge (then_bb, old_dest,
1997 			     EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1998 
1999   set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
2000 
2001   if (append_cond_position)
2002     {
2003       basic_block prevbb = last_cond_fallthru->src;
2004       redirect_edge_succ (last_cond_fallthru, new_bb);
2005       set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
2006       set_immediate_dominator (CDI_DOMINATORS, old_dest,
2007 			       recompute_dominator (CDI_DOMINATORS, old_dest));
2008     }
2009 
2010   /* ?? Because stores may alias, they must happen in the exact
2011      sequence they originally happened.  Save the position right after
2012      the (_lsm) store we just created so we can continue appending after
2013      it and maintain the original order.  */
2014   append_cond_position = then_old_edge;
2015   last_cond_fallthru = find_edge (new_bb, old_dest);
2016 
2017   if (!loop_has_only_one_exit)
2018     for (gphi_iterator gpi = gsi_start_phis (old_dest);
2019 	 !gsi_end_p (gpi); gsi_next (&gpi))
2020       {
2021 	gphi *phi = gpi.phi ();
2022 	unsigned i;
2023 
2024 	for (i = 0; i < gimple_phi_num_args (phi); i++)
2025 	  if (gimple_phi_arg_edge (phi, i)->src == new_bb)
2026 	    {
2027 	      tree arg = gimple_phi_arg_def (phi, i);
2028 	      add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
2029 	      update_stmt (phi);
2030 	    }
2031       }
2032 }
2033 
2034 /* When REF is set on the location, set flag indicating the store.  */
2035 
2036 class sm_set_flag_if_changed
2037 {
2038 public:
sm_set_flag_if_changed(tree flag_,hash_set<basic_block> * bbs_)2039   sm_set_flag_if_changed (tree flag_, hash_set <basic_block> *bbs_)
2040 	 : flag (flag_), bbs (bbs_) {}
2041   bool operator () (mem_ref_loc *loc);
2042   tree flag;
2043   hash_set <basic_block> *bbs;
2044 };
2045 
2046 bool
operator()2047 sm_set_flag_if_changed::operator () (mem_ref_loc *loc)
2048 {
2049   /* Only set the flag for writes.  */
2050   if (is_gimple_assign (loc->stmt)
2051       && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
2052     {
2053       gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
2054       gimple *stmt = gimple_build_assign (flag, boolean_true_node);
2055       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2056       bbs->add (gimple_bb (stmt));
2057     }
2058   return false;
2059 }
2060 
2061 /* Helper function for execute_sm.  On every location where REF is
2062    set, set an appropriate flag indicating the store.  */
2063 
2064 static tree
execute_sm_if_changed_flag_set(class loop * loop,im_mem_ref * ref,hash_set<basic_block> * bbs)2065 execute_sm_if_changed_flag_set (class loop *loop, im_mem_ref *ref,
2066 				hash_set <basic_block> *bbs)
2067 {
2068   tree flag;
2069   char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
2070   flag = create_tmp_reg (boolean_type_node, str);
2071   for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag, bbs));
2072   return flag;
2073 }
2074 
2075 struct sm_aux
2076 {
2077   tree tmp_var;
2078   tree store_flag;
2079   hash_set <basic_block> flag_bbs;
2080 };
2081 
2082 /* Executes store motion of memory reference REF from LOOP.
2083    Exits from the LOOP are stored in EXITS.  The initialization of the
2084    temporary variable is put to the preheader of the loop, and assignments
2085    to the reference from the temporary variable are emitted to exits.  */
2086 
2087 static void
execute_sm(class loop * loop,im_mem_ref * ref,hash_map<im_mem_ref *,sm_aux * > & aux_map,bool maybe_mt)2088 execute_sm (class loop *loop, im_mem_ref *ref,
2089 	    hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt)
2090 {
2091   gassign *load;
2092   struct fmt_data fmt_data;
2093   struct lim_aux_data *lim_data;
2094   bool multi_threaded_model_p = false;
2095   gimple_stmt_iterator gsi;
2096   sm_aux *aux = new sm_aux;
2097 
2098   if (dump_file && (dump_flags & TDF_DETAILS))
2099     {
2100       fprintf (dump_file, "Executing store motion of ");
2101       print_generic_expr (dump_file, ref->mem.ref);
2102       fprintf (dump_file, " from loop %d\n", loop->num);
2103     }
2104 
2105   aux->tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
2106 				 get_lsm_tmp_name (ref->mem.ref, ~0));
2107 
2108   fmt_data.loop = loop;
2109   fmt_data.orig_loop = loop;
2110   for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
2111 
2112   bool always_stored = ref_always_accessed_p (loop, ref, true);
2113   if (maybe_mt
2114       && (bb_in_transaction (loop_preheader_edge (loop)->src)
2115 	  || (! flag_store_data_races && ! always_stored)))
2116     multi_threaded_model_p = true;
2117 
2118   if (multi_threaded_model_p)
2119     aux->store_flag
2120       = execute_sm_if_changed_flag_set (loop, ref, &aux->flag_bbs);
2121   else
2122     aux->store_flag = NULL_TREE;
2123 
2124   /* Remember variable setup.  */
2125   aux_map.put (ref, aux);
2126 
2127   rewrite_mem_refs (loop, ref, aux->tmp_var);
2128 
2129   /* Emit the load code on a random exit edge or into the latch if
2130      the loop does not exit, so that we are sure it will be processed
2131      by move_computations after all dependencies.  */
2132   gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
2133 
2134   /* Avoid doing a load if there was no load of the ref in the loop.
2135      Esp. when the ref is not always stored we cannot optimize it
2136      away later.  But when it is not always stored we must use a conditional
2137      store then.  */
2138   if ((!always_stored && !multi_threaded_model_p)
2139       || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)))
2140     load = gimple_build_assign (aux->tmp_var, unshare_expr (ref->mem.ref));
2141   else
2142     {
2143       /* If not emitting a load mark the uninitialized state on the
2144 	 loop entry as not to be warned for.  */
2145       tree uninit = create_tmp_reg (TREE_TYPE (aux->tmp_var));
2146       TREE_NO_WARNING (uninit) = 1;
2147       load = gimple_build_assign (aux->tmp_var, uninit);
2148     }
2149   lim_data = init_lim_data (load);
2150   lim_data->max_loop = loop;
2151   lim_data->tgt_loop = loop;
2152   gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2153 
2154   if (multi_threaded_model_p)
2155     {
2156       load = gimple_build_assign (aux->store_flag, boolean_false_node);
2157       lim_data = init_lim_data (load);
2158       lim_data->max_loop = loop;
2159       lim_data->tgt_loop = loop;
2160       gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2161     }
2162 }
2163 
2164 /* sm_ord is used for ordinary stores we can retain order with respect
2165        to other stores
2166    sm_unord is used for conditional executed stores which need to be
2167        able to execute in arbitrary order with respect to other stores
2168    sm_other is used for stores we do not try to apply store motion to.  */
2169 enum sm_kind { sm_ord, sm_unord, sm_other };
2170 struct seq_entry
2171 {
seq_entryseq_entry2172   seq_entry () {}
2173   seq_entry (unsigned f, sm_kind k, tree fr = NULL)
firstseq_entry2174     : first (f), second (k), from (fr) {}
2175   unsigned first;
2176   sm_kind second;
2177   tree from;
2178 };
2179 
2180 static void
execute_sm_exit(class loop * loop,edge ex,vec<seq_entry> & seq,hash_map<im_mem_ref *,sm_aux * > & aux_map,sm_kind kind,edge & append_cond_position,edge & last_cond_fallthru)2181 execute_sm_exit (class loop *loop, edge ex, vec<seq_entry> &seq,
2182 		 hash_map<im_mem_ref *, sm_aux *> &aux_map, sm_kind kind,
2183 		 edge &append_cond_position, edge &last_cond_fallthru)
2184 {
2185   /* Sink the stores to exit from the loop.  */
2186   for (unsigned i = seq.length (); i > 0; --i)
2187     {
2188       im_mem_ref *ref = memory_accesses.refs_list[seq[i-1].first];
2189       if (seq[i-1].second == sm_other)
2190 	{
2191 	  gcc_assert (kind == sm_ord && seq[i-1].from != NULL_TREE);
2192 	  if (dump_file && (dump_flags & TDF_DETAILS))
2193 	    {
2194 	      fprintf (dump_file, "Re-issueing dependent store of ");
2195 	      print_generic_expr (dump_file, ref->mem.ref);
2196 	      fprintf (dump_file, " from loop %d on exit %d -> %d\n",
2197 		       loop->num, ex->src->index, ex->dest->index);
2198 	    }
2199 	  gassign *store = gimple_build_assign (unshare_expr (ref->mem.ref),
2200 						seq[i-1].from);
2201 	  gsi_insert_on_edge (ex, store);
2202 	}
2203       else
2204 	{
2205 	  sm_aux *aux = *aux_map.get (ref);
2206 	  if (!aux->store_flag || kind == sm_ord)
2207 	    {
2208 	      gassign *store;
2209 	      store = gimple_build_assign (unshare_expr (ref->mem.ref),
2210 					   aux->tmp_var);
2211 	      gsi_insert_on_edge (ex, store);
2212 	    }
2213 	  else
2214 	    execute_sm_if_changed (ex, ref->mem.ref, aux->tmp_var,
2215 				   aux->store_flag,
2216 				   loop_preheader_edge (loop), &aux->flag_bbs,
2217 				   append_cond_position, last_cond_fallthru);
2218 	}
2219     }
2220 }
2221 
2222 /* Push the SM candidate at index PTR in the sequence SEQ down until
2223    we hit the next SM candidate.  Return true if that went OK and
2224    false if we could not disambiguate agains another unrelated ref.
2225    Update *AT to the index where the candidate now resides.  */
2226 
2227 static bool
sm_seq_push_down(vec<seq_entry> & seq,unsigned ptr,unsigned * at)2228 sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr, unsigned *at)
2229 {
2230   *at = ptr;
2231   for (; ptr > 0; --ptr)
2232     {
2233       seq_entry &new_cand = seq[ptr];
2234       seq_entry &against = seq[ptr-1];
2235       if (against.second == sm_ord
2236 	  || (against.second == sm_other && against.from != NULL_TREE))
2237 	/* Found the tail of the sequence.  */
2238 	break;
2239       /* We may not ignore self-dependences here.  */
2240       if (new_cand.first == against.first
2241 	  || !refs_independent_p (memory_accesses.refs_list[new_cand.first],
2242 				  memory_accesses.refs_list[against.first],
2243 				  false))
2244 	/* ???  Prune new_cand from the list of refs to apply SM to.  */
2245 	return false;
2246       std::swap (new_cand, against);
2247       *at = ptr - 1;
2248     }
2249   return true;
2250 }
2251 
2252 /* Computes the sequence of stores from candidates in REFS_NOT_IN_SEQ to SEQ
2253    walking backwards from VDEF (or the end of BB if VDEF is NULL).  */
2254 
2255 static int
sm_seq_valid_bb(class loop * loop,basic_block bb,tree vdef,vec<seq_entry> & seq,bitmap refs_not_in_seq,bitmap refs_not_supported,bool forked,bitmap fully_visited)2256 sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef,
2257 		 vec<seq_entry> &seq, bitmap refs_not_in_seq,
2258 		 bitmap refs_not_supported, bool forked,
2259 		 bitmap fully_visited)
2260 {
2261   if (!vdef)
2262     for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (gsi);
2263 	 gsi_prev (&gsi))
2264       {
2265 	vdef = gimple_vdef (gsi_stmt (gsi));
2266 	if (vdef)
2267 	  break;
2268       }
2269   if (!vdef)
2270     {
2271       gphi *vphi = get_virtual_phi (bb);
2272       if (vphi)
2273 	vdef = gimple_phi_result (vphi);
2274     }
2275   if (!vdef)
2276     {
2277       if (single_pred_p (bb))
2278 	/* This handles the perfect nest case.  */
2279 	return sm_seq_valid_bb (loop, single_pred (bb), vdef,
2280 				seq, refs_not_in_seq, refs_not_supported,
2281 				forked, fully_visited);
2282       return 0;
2283     }
2284   do
2285     {
2286       gimple *def = SSA_NAME_DEF_STMT (vdef);
2287       if (gimple_bb (def) != bb)
2288 	{
2289 	  /* If we forked by processing a PHI do not allow our walk to
2290 	     merge again until we handle that robustly.  */
2291 	  if (forked)
2292 	    {
2293 	      /* Mark refs_not_in_seq as unsupported.  */
2294 	      bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2295 	      return 1;
2296 	    }
2297 	  /* Otherwise it doesn't really matter if we end up in different
2298 	     BBs.  */
2299 	  bb = gimple_bb (def);
2300 	}
2301       if (gphi *phi = dyn_cast <gphi *> (def))
2302 	{
2303 	  /* Handle CFG merges.  Until we handle forks (gimple_bb (def) != bb)
2304 	     this is still linear.
2305 	     Eventually we want to cache intermediate results per BB
2306 	     (but we can't easily cache for different exits?).  */
2307 	  /* Stop at PHIs with possible backedges.  */
2308 	  if (bb == bb->loop_father->header
2309 	      || bb->flags & BB_IRREDUCIBLE_LOOP)
2310 	    {
2311 	      /* Mark refs_not_in_seq as unsupported.  */
2312 	      bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2313 	      return 1;
2314 	    }
2315 	  if (gimple_phi_num_args (phi) == 1)
2316 	    return sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
2317 				    gimple_phi_arg_def (phi, 0), seq,
2318 				    refs_not_in_seq, refs_not_supported,
2319 				    false, fully_visited);
2320 	  if (bitmap_bit_p (fully_visited,
2321 			    SSA_NAME_VERSION (gimple_phi_result (phi))))
2322 	    return 1;
2323 	  auto_vec<seq_entry> first_edge_seq;
2324 	  auto_bitmap tem_refs_not_in_seq (&lim_bitmap_obstack);
2325 	  int eret;
2326 	  bitmap_copy (tem_refs_not_in_seq, refs_not_in_seq);
2327 	  eret = sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
2328 				  gimple_phi_arg_def (phi, 0),
2329 				  first_edge_seq,
2330 				  tem_refs_not_in_seq, refs_not_supported,
2331 				  true, fully_visited);
2332 	  if (eret != 1)
2333 	    return -1;
2334 	  /* Simplify our lives by pruning the sequence of !sm_ord.  */
2335 	  while (!first_edge_seq.is_empty ()
2336 		 && first_edge_seq.last ().second != sm_ord)
2337 	    first_edge_seq.pop ();
2338 	  for (unsigned int i = 1; i < gimple_phi_num_args (phi); ++i)
2339 	    {
2340 	      tree vuse = gimple_phi_arg_def (phi, i);
2341 	      edge e = gimple_phi_arg_edge (phi, i);
2342 	      auto_vec<seq_entry> edge_seq;
2343 	      bitmap_and_compl (tem_refs_not_in_seq,
2344 				refs_not_in_seq, refs_not_supported);
2345 	      /* If we've marked all refs we search for as unsupported
2346 		 we can stop processing and use the sequence as before
2347 		 the PHI.  */
2348 	      if (bitmap_empty_p (tem_refs_not_in_seq))
2349 		return 1;
2350 	      eret = sm_seq_valid_bb (loop, e->src, vuse, edge_seq,
2351 				      tem_refs_not_in_seq, refs_not_supported,
2352 				      true, fully_visited);
2353 	      if (eret != 1)
2354 		return -1;
2355 	      /* Simplify our lives by pruning the sequence of !sm_ord.  */
2356 	      while (!edge_seq.is_empty ()
2357 		     && edge_seq.last ().second != sm_ord)
2358 		edge_seq.pop ();
2359 	      unsigned min_len = MIN(first_edge_seq.length (),
2360 				     edge_seq.length ());
2361 	      /* Incrementally merge seqs into first_edge_seq.  */
2362 	      int first_uneq = -1;
2363 	      auto_vec<seq_entry, 2> extra_refs;
2364 	      for (unsigned int i = 0; i < min_len; ++i)
2365 		{
2366 		  /* ???  We can more intelligently merge when we face different
2367 		     order by additional sinking operations in one sequence.
2368 		     For now we simply mark them as to be processed by the
2369 		     not order-preserving SM code.  */
2370 		  if (first_edge_seq[i].first != edge_seq[i].first)
2371 		    {
2372 		      if (first_edge_seq[i].second == sm_ord)
2373 			bitmap_set_bit (refs_not_supported,
2374 					first_edge_seq[i].first);
2375 		      if (edge_seq[i].second == sm_ord)
2376 			bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2377 		      first_edge_seq[i].second = sm_other;
2378 		      first_edge_seq[i].from = NULL_TREE;
2379 		      /* Record the dropped refs for later processing.  */
2380 		      if (first_uneq == -1)
2381 			first_uneq = i;
2382 		      extra_refs.safe_push (seq_entry (edge_seq[i].first,
2383 						       sm_other, NULL_TREE));
2384 		    }
2385 		  /* sm_other prevails.  */
2386 		  else if (first_edge_seq[i].second != edge_seq[i].second)
2387 		    {
2388 		      /* Make sure the ref is marked as not supported.  */
2389 		      bitmap_set_bit (refs_not_supported,
2390 				      first_edge_seq[i].first);
2391 		      first_edge_seq[i].second = sm_other;
2392 		      first_edge_seq[i].from = NULL_TREE;
2393 		    }
2394 		  else if (first_edge_seq[i].second == sm_other
2395 			   && first_edge_seq[i].from != NULL_TREE
2396 			   && (edge_seq[i].from == NULL_TREE
2397 			       || !operand_equal_p (first_edge_seq[i].from,
2398 						    edge_seq[i].from, 0)))
2399 		    first_edge_seq[i].from = NULL_TREE;
2400 		}
2401 	      /* Any excess elements become sm_other since they are now
2402 		 coonditionally executed.  */
2403 	      if (first_edge_seq.length () > edge_seq.length ())
2404 		{
2405 		  for (unsigned i = edge_seq.length ();
2406 		       i < first_edge_seq.length (); ++i)
2407 		    {
2408 		      if (first_edge_seq[i].second == sm_ord)
2409 			bitmap_set_bit (refs_not_supported,
2410 					first_edge_seq[i].first);
2411 		      first_edge_seq[i].second = sm_other;
2412 		    }
2413 		}
2414 	      else if (edge_seq.length () > first_edge_seq.length ())
2415 		{
2416 		  if (first_uneq == -1)
2417 		    first_uneq = first_edge_seq.length ();
2418 		  for (unsigned i = first_edge_seq.length ();
2419 		       i < edge_seq.length (); ++i)
2420 		    {
2421 		      if (edge_seq[i].second == sm_ord)
2422 			bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2423 		      extra_refs.safe_push (seq_entry (edge_seq[i].first,
2424 						       sm_other, NULL_TREE));
2425 		    }
2426 		}
2427 	      /* Put unmerged refs at first_uneq to force dependence checking
2428 		 on them.  */
2429 	      if (first_uneq != -1)
2430 		{
2431 		  /* Missing ordered_splice_at.  */
2432 		  if ((unsigned)first_uneq == first_edge_seq.length ())
2433 		    first_edge_seq.safe_splice (extra_refs);
2434 		  else
2435 		    {
2436 		      unsigned fes_length = first_edge_seq.length ();
2437 		      first_edge_seq.safe_grow (fes_length
2438 						+ extra_refs.length ());
2439 		      memmove (&first_edge_seq[first_uneq + extra_refs.length ()],
2440 			       &first_edge_seq[first_uneq],
2441 			       (fes_length - first_uneq) * sizeof (seq_entry));
2442 		      memcpy (&first_edge_seq[first_uneq],
2443 			      extra_refs.address (),
2444 			      extra_refs.length () * sizeof (seq_entry));
2445 		    }
2446 		}
2447 	    }
2448 	  /* Use the sequence from the first edge and push SMs down.  */
2449 	  for (unsigned i = 0; i < first_edge_seq.length (); ++i)
2450 	    {
2451 	      unsigned id = first_edge_seq[i].first;
2452 	      seq.safe_push (first_edge_seq[i]);
2453 	      unsigned new_idx;
2454 	      if ((first_edge_seq[i].second == sm_ord
2455 		   || (first_edge_seq[i].second == sm_other
2456 		       && first_edge_seq[i].from != NULL_TREE))
2457 		  && !sm_seq_push_down (seq, seq.length () - 1, &new_idx))
2458 		{
2459 		  if (first_edge_seq[i].second == sm_ord)
2460 		    bitmap_set_bit (refs_not_supported, id);
2461 		  /* Mark it sm_other.  */
2462 		  seq[new_idx].second = sm_other;
2463 		  seq[new_idx].from = NULL_TREE;
2464 		}
2465 	    }
2466 	  bitmap_set_bit (fully_visited,
2467 			  SSA_NAME_VERSION (gimple_phi_result (phi)));
2468 	  return 1;
2469 	}
2470       lim_aux_data *data = get_lim_data (def);
2471       gcc_assert (data);
2472       if (data->ref == UNANALYZABLE_MEM_ID)
2473 	return -1;
2474       /* One of the stores we want to apply SM to and we've not yet seen.  */
2475       else if (bitmap_clear_bit (refs_not_in_seq, data->ref))
2476 	{
2477 	  seq.safe_push (seq_entry (data->ref, sm_ord));
2478 
2479 	  /* 1) push it down the queue until a SMed
2480 	     and not ignored ref is reached, skipping all not SMed refs
2481 	     and ignored refs via non-TBAA disambiguation.  */
2482 	  unsigned new_idx;
2483 	  if (!sm_seq_push_down (seq, seq.length () - 1, &new_idx)
2484 	      /* If that fails but we did not fork yet continue, we'll see
2485 		 to re-materialize all of the stores in the sequence then.
2486 		 Further stores will only be pushed up to this one.  */
2487 	      && forked)
2488 	    {
2489 	      bitmap_set_bit (refs_not_supported, data->ref);
2490 	      /* Mark it sm_other.  */
2491 	      seq[new_idx].second = sm_other;
2492 	    }
2493 
2494 	  /* 2) check whether we've seen all refs we want to SM and if so
2495 	     declare success for the active exit  */
2496 	  if (bitmap_empty_p (refs_not_in_seq))
2497 	    return 1;
2498 	}
2499       else
2500 	/* Another store not part of the final sequence.  Simply push it.  */
2501 	seq.safe_push (seq_entry (data->ref, sm_other,
2502 				  gimple_assign_rhs1 (def)));
2503 
2504       vdef = gimple_vuse (def);
2505     }
2506   while (1);
2507 }
2508 
2509 /* Hoists memory references MEM_REFS out of LOOP.  EXITS is the list of exit
2510    edges of the LOOP.  */
2511 
2512 static void
hoist_memory_references(class loop * loop,bitmap mem_refs,vec<edge> exits)2513 hoist_memory_references (class loop *loop, bitmap mem_refs,
2514 			 vec<edge> exits)
2515 {
2516   im_mem_ref *ref;
2517   unsigned  i;
2518   bitmap_iterator bi;
2519 
2520   /* To address PR57359 before actually applying store-motion check
2521      the candidates found for validity with regards to reordering
2522      relative to other stores which we until here disambiguated using
2523      TBAA which isn't valid.
2524      What matters is the order of the last stores to the mem_refs
2525      with respect to the other stores of the loop at the point of the
2526      loop exits.  */
2527 
2528   /* For each exit compute the store order, pruning from mem_refs
2529      on the fly.  */
2530   /* The complexity of this is at least
2531      O(number of exits * number of SM refs) but more approaching
2532      O(number of exits * number of SM refs * number of stores).  */
2533   /* ???  Somehow do this in a single sweep over the loop body.  */
2534   auto_vec<std::pair<edge, vec<seq_entry> > > sms;
2535   auto_bitmap refs_not_supported (&lim_bitmap_obstack);
2536   edge e;
2537   FOR_EACH_VEC_ELT (exits, i, e)
2538     {
2539       vec<seq_entry> seq;
2540       seq.create (4);
2541       auto_bitmap refs_not_in_seq (&lim_bitmap_obstack);
2542       bitmap_and_compl (refs_not_in_seq, mem_refs, refs_not_supported);
2543       if (bitmap_empty_p (refs_not_in_seq))
2544 	{
2545 	  seq.release ();
2546 	  break;
2547 	}
2548       auto_bitmap fully_visited;
2549       int res = sm_seq_valid_bb (loop, e->src, NULL_TREE,
2550 				 seq, refs_not_in_seq,
2551 				 refs_not_supported, false,
2552 				 fully_visited);
2553       if (res != 1)
2554 	{
2555 	  bitmap_copy (refs_not_supported, mem_refs);
2556 	  seq.release ();
2557 	  break;
2558 	}
2559       sms.safe_push (std::make_pair (e, seq));
2560     }
2561 
2562   /* Prune pruned mem_refs from earlier processed exits.  */
2563   bool changed = !bitmap_empty_p (refs_not_supported);
2564   while (changed)
2565     {
2566       changed = false;
2567       std::pair<edge, vec<seq_entry> > *seq;
2568       FOR_EACH_VEC_ELT (sms, i, seq)
2569 	{
2570 	  bool need_to_push = false;
2571 	  for (unsigned i = 0; i < seq->second.length (); ++i)
2572 	    {
2573 	      sm_kind kind = seq->second[i].second;
2574 	      if (kind == sm_other && seq->second[i].from == NULL_TREE)
2575 		break;
2576 	      unsigned id = seq->second[i].first;
2577 	      unsigned new_idx;
2578 	      if (kind == sm_ord
2579 		  && bitmap_bit_p (refs_not_supported, id))
2580 		{
2581 		  seq->second[i].second = sm_other;
2582 		  gcc_assert (seq->second[i].from == NULL_TREE);
2583 		  need_to_push = true;
2584 		}
2585 	      else if (need_to_push
2586 		       && !sm_seq_push_down (seq->second, i, &new_idx))
2587 		{
2588 		  /* We need to push down both sm_ord and sm_other
2589 		     but for the latter we need to disqualify all
2590 		     following refs.  */
2591 		  if (kind == sm_ord)
2592 		    {
2593 		      if (bitmap_set_bit (refs_not_supported, id))
2594 			changed = true;
2595 		      seq->second[new_idx].second = sm_other;
2596 		    }
2597 		  else
2598 		    {
2599 		      for (unsigned j = seq->second.length () - 1;
2600 			   j > new_idx; --j)
2601 			if (seq->second[j].second == sm_ord
2602 			    && bitmap_set_bit (refs_not_supported,
2603 					       seq->second[j].first))
2604 			  changed = true;
2605 		      seq->second.truncate (new_idx);
2606 		      break;
2607 		    }
2608 		}
2609 	    }
2610 	}
2611     }
2612   std::pair<edge, vec<seq_entry> > *seq;
2613   FOR_EACH_VEC_ELT (sms, i, seq)
2614     {
2615       /* Prune sm_other from the end.  */
2616       while (!seq->second.is_empty ()
2617 	     && seq->second.last ().second == sm_other)
2618 	seq->second.pop ();
2619       /* Prune duplicates from the start.  */
2620       auto_bitmap seen (&lim_bitmap_obstack);
2621       unsigned j, k;
2622       for (j = k = 0; j < seq->second.length (); ++j)
2623 	if (bitmap_set_bit (seen, seq->second[j].first))
2624 	  {
2625 	    if (k != j)
2626 	      seq->second[k] = seq->second[j];
2627 	    ++k;
2628 	  }
2629       seq->second.truncate (k);
2630       /* And verify.  */
2631       seq_entry *e;
2632       FOR_EACH_VEC_ELT (seq->second, j, e)
2633 	gcc_assert (e->second == sm_ord
2634 		    || (e->second == sm_other && e->from != NULL_TREE));
2635     }
2636 
2637   /* Verify dependence for refs we cannot handle with the order preserving
2638      code (refs_not_supported) or prune them from mem_refs.  */
2639   auto_vec<seq_entry> unord_refs;
2640   EXECUTE_IF_SET_IN_BITMAP (refs_not_supported, 0, i, bi)
2641     {
2642       ref = memory_accesses.refs_list[i];
2643       if (!ref_indep_loop_p (loop, ref, sm_waw))
2644 	bitmap_clear_bit (mem_refs, i);
2645       /* We've now verified store order for ref with respect to all other
2646 	 stores in the loop does not matter.  */
2647       else
2648 	unord_refs.safe_push (seq_entry (i, sm_unord));
2649     }
2650 
2651   hash_map<im_mem_ref *, sm_aux *> aux_map;
2652 
2653   /* Execute SM but delay the store materialization for ordered
2654      sequences on exit.  */
2655   EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2656     {
2657       ref = memory_accesses.refs_list[i];
2658       execute_sm (loop, ref, aux_map, bitmap_bit_p (refs_not_supported, i));
2659     }
2660 
2661   /* Materialize ordered store sequences on exits.  */
2662   FOR_EACH_VEC_ELT (exits, i, e)
2663     {
2664       edge append_cond_position = NULL;
2665       edge last_cond_fallthru = NULL;
2666       if (i < sms.length ())
2667 	{
2668 	  gcc_assert (sms[i].first == e);
2669 	  execute_sm_exit (loop, e, sms[i].second, aux_map, sm_ord,
2670 			   append_cond_position, last_cond_fallthru);
2671 	  sms[i].second.release ();
2672 	}
2673       if (!unord_refs.is_empty ())
2674 	execute_sm_exit (loop, e, unord_refs, aux_map, sm_unord,
2675 			 append_cond_position, last_cond_fallthru);
2676       /* Commit edge inserts here to preserve the order of stores
2677 	 when an exit exits multiple loops.  */
2678       gsi_commit_one_edge_insert (e, NULL);
2679     }
2680 
2681   for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
2682        iter != aux_map.end (); ++iter)
2683     delete (*iter).second;
2684 }
2685 
2686 class ref_always_accessed
2687 {
2688 public:
ref_always_accessed(class loop * loop_,bool stored_p_)2689   ref_always_accessed (class loop *loop_, bool stored_p_)
2690       : loop (loop_), stored_p (stored_p_) {}
2691   bool operator () (mem_ref_loc *loc);
2692   class loop *loop;
2693   bool stored_p;
2694 };
2695 
2696 bool
operator()2697 ref_always_accessed::operator () (mem_ref_loc *loc)
2698 {
2699   class loop *must_exec;
2700 
2701   struct lim_aux_data *lim_data = get_lim_data (loc->stmt);
2702   if (!lim_data)
2703     return false;
2704 
2705   /* If we require an always executed store make sure the statement
2706      is a store.  */
2707   if (stored_p)
2708     {
2709       tree lhs = gimple_get_lhs (loc->stmt);
2710       if (!lhs
2711 	  || !(DECL_P (lhs) || REFERENCE_CLASS_P (lhs)))
2712 	return false;
2713     }
2714 
2715   must_exec = lim_data->always_executed_in;
2716   if (!must_exec)
2717     return false;
2718 
2719   if (must_exec == loop
2720       || flow_loop_nested_p (must_exec, loop))
2721     return true;
2722 
2723   return false;
2724 }
2725 
2726 /* Returns true if REF is always accessed in LOOP.  If STORED_P is true
2727    make sure REF is always stored to in LOOP.  */
2728 
2729 static bool
ref_always_accessed_p(class loop * loop,im_mem_ref * ref,bool stored_p)2730 ref_always_accessed_p (class loop *loop, im_mem_ref *ref, bool stored_p)
2731 {
2732   return for_all_locs_in_loop (loop, ref,
2733 			       ref_always_accessed (loop, stored_p));
2734 }
2735 
2736 /* Returns true if REF1 and REF2 are independent.  */
2737 
2738 static bool
refs_independent_p(im_mem_ref * ref1,im_mem_ref * ref2,bool tbaa_p)2739 refs_independent_p (im_mem_ref *ref1, im_mem_ref *ref2, bool tbaa_p)
2740 {
2741   if (ref1 == ref2)
2742     return true;
2743 
2744   if (dump_file && (dump_flags & TDF_DETAILS))
2745     fprintf (dump_file, "Querying dependency of refs %u and %u: ",
2746 	     ref1->id, ref2->id);
2747 
2748   if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache, tbaa_p))
2749     {
2750       if (dump_file && (dump_flags & TDF_DETAILS))
2751 	fprintf (dump_file, "dependent.\n");
2752       return false;
2753     }
2754   else
2755     {
2756       if (dump_file && (dump_flags & TDF_DETAILS))
2757 	fprintf (dump_file, "independent.\n");
2758       return true;
2759     }
2760 }
2761 
2762 /* Returns true if REF is independent on all other accessess in LOOP.
2763    KIND specifies the kind of dependence to consider.
2764      lim_raw assumes REF is not stored in LOOP and disambiguates RAW
2765 	     dependences so if true REF can be hoisted out of LOOP
2766      sm_war disambiguates a store REF against all other loads to see
2767 	    whether the store can be sunk across loads out of LOOP
2768      sm_waw disambiguates a store REF against all other stores to see
2769 	    whether the store can be sunk across stores out of LOOP.  */
2770 
2771 static bool
ref_indep_loop_p(class loop * loop,im_mem_ref * ref,dep_kind kind)2772 ref_indep_loop_p (class loop *loop, im_mem_ref *ref, dep_kind kind)
2773 {
2774   bool indep_p = true;
2775   bitmap refs_to_check;
2776 
2777   if (kind == sm_war)
2778     refs_to_check = &memory_accesses.refs_loaded_in_loop[loop->num];
2779   else
2780     refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
2781 
2782   if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID))
2783     indep_p = false;
2784   else
2785     {
2786       /* tri-state, { unknown, independent, dependent }  */
2787       dep_state state = query_loop_dependence (loop, ref, kind);
2788       if (state != dep_unknown)
2789 	return state == dep_independent ? true : false;
2790 
2791       class loop *inner = loop->inner;
2792       while (inner)
2793 	{
2794 	  if (!ref_indep_loop_p (inner, ref, kind))
2795 	    {
2796 	      indep_p = false;
2797 	      break;
2798 	    }
2799 	  inner = inner->next;
2800 	}
2801 
2802       if (indep_p)
2803 	{
2804 	  unsigned i;
2805 	  bitmap_iterator bi;
2806 	  EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
2807 	    {
2808 	      im_mem_ref *aref = memory_accesses.refs_list[i];
2809 	      if (!refs_independent_p (ref, aref, kind != sm_waw))
2810 		{
2811 		  indep_p = false;
2812 		  break;
2813 		}
2814 	    }
2815 	}
2816     }
2817 
2818   if (dump_file && (dump_flags & TDF_DETAILS))
2819     fprintf (dump_file, "Querying %s dependencies of ref %u in loop %d: %s\n",
2820 	     kind == lim_raw ? "RAW" : (kind == sm_war ? "SM WAR" : "SM WAW"),
2821 	     ref->id, loop->num, indep_p ? "independent" : "dependent");
2822 
2823   /* Record the computed result in the cache.  */
2824   record_loop_dependence (loop, ref, kind,
2825 			  indep_p ? dep_independent : dep_dependent);
2826 
2827   return indep_p;
2828 }
2829 
2830 
2831 /* Returns true if we can perform store motion of REF from LOOP.  */
2832 
2833 static bool
can_sm_ref_p(class loop * loop,im_mem_ref * ref)2834 can_sm_ref_p (class loop *loop, im_mem_ref *ref)
2835 {
2836   tree base;
2837 
2838   /* Can't hoist unanalyzable refs.  */
2839   if (!MEM_ANALYZABLE (ref))
2840     return false;
2841 
2842   /* It should be movable.  */
2843   if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
2844       || TREE_THIS_VOLATILE (ref->mem.ref)
2845       || !for_each_index (&ref->mem.ref, may_move_till, loop))
2846     return false;
2847 
2848   /* If it can throw fail, we do not properly update EH info.  */
2849   if (tree_could_throw_p (ref->mem.ref))
2850     return false;
2851 
2852   /* If it can trap, it must be always executed in LOOP.
2853      Readonly memory locations may trap when storing to them, but
2854      tree_could_trap_p is a predicate for rvalues, so check that
2855      explicitly.  */
2856   base = get_base_address (ref->mem.ref);
2857   if ((tree_could_trap_p (ref->mem.ref)
2858        || (DECL_P (base) && TREE_READONLY (base)))
2859       /* ???  We can at least use false here, allowing loads?  We
2860 	 are forcing conditional stores if the ref is not always
2861 	 stored to later anyway.  So this would only guard
2862 	 the load we need to emit.  Thus when the ref is not
2863 	 loaded we can elide this completely?  */
2864       && !ref_always_accessed_p (loop, ref, true))
2865     return false;
2866 
2867   /* Verify all loads of ref can be hoisted.  */
2868   if (ref->loaded
2869       && bitmap_bit_p (ref->loaded, loop->num)
2870       && !ref_indep_loop_p (loop, ref, lim_raw))
2871     return false;
2872 
2873   /* Verify the candidate can be disambiguated against all loads,
2874      that is, we can elide all in-loop stores.  Disambiguation
2875      against stores is done later when we cannot guarantee preserving
2876      the order of stores.  */
2877   if (!ref_indep_loop_p (loop, ref, sm_war))
2878     return false;
2879 
2880   return true;
2881 }
2882 
2883 /* Marks the references in LOOP for that store motion should be performed
2884    in REFS_TO_SM.  SM_EXECUTED is the set of references for that store
2885    motion was performed in one of the outer loops.  */
2886 
2887 static void
find_refs_for_sm(class loop * loop,bitmap sm_executed,bitmap refs_to_sm)2888 find_refs_for_sm (class loop *loop, bitmap sm_executed, bitmap refs_to_sm)
2889 {
2890   bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
2891   unsigned i;
2892   bitmap_iterator bi;
2893   im_mem_ref *ref;
2894 
2895   EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
2896     {
2897       ref = memory_accesses.refs_list[i];
2898       if (can_sm_ref_p (loop, ref) && dbg_cnt (lim))
2899 	bitmap_set_bit (refs_to_sm, i);
2900     }
2901 }
2902 
2903 /* Checks whether LOOP (with exits stored in EXITS array) is suitable
2904    for a store motion optimization (i.e. whether we can insert statement
2905    on its exits).  */
2906 
2907 static bool
loop_suitable_for_sm(class loop * loop ATTRIBUTE_UNUSED,vec<edge> exits)2908 loop_suitable_for_sm (class loop *loop ATTRIBUTE_UNUSED,
2909 		      vec<edge> exits)
2910 {
2911   unsigned i;
2912   edge ex;
2913 
2914   FOR_EACH_VEC_ELT (exits, i, ex)
2915     if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
2916       return false;
2917 
2918   return true;
2919 }
2920 
2921 /* Try to perform store motion for all memory references modified inside
2922    LOOP.  SM_EXECUTED is the bitmap of the memory references for that
2923    store motion was executed in one of the outer loops.  */
2924 
2925 static void
store_motion_loop(class loop * loop,bitmap sm_executed)2926 store_motion_loop (class loop *loop, bitmap sm_executed)
2927 {
2928   auto_vec<edge> exits = get_loop_exit_edges (loop);
2929   class loop *subloop;
2930   bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
2931 
2932   if (loop_suitable_for_sm (loop, exits))
2933     {
2934       find_refs_for_sm (loop, sm_executed, sm_in_loop);
2935       if (!bitmap_empty_p (sm_in_loop))
2936 	hoist_memory_references (loop, sm_in_loop, exits);
2937     }
2938 
2939   bitmap_ior_into (sm_executed, sm_in_loop);
2940   for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
2941     store_motion_loop (subloop, sm_executed);
2942   bitmap_and_compl_into (sm_executed, sm_in_loop);
2943   BITMAP_FREE (sm_in_loop);
2944 }
2945 
2946 /* Try to perform store motion for all memory references modified inside
2947    loops.  */
2948 
2949 static void
do_store_motion(void)2950 do_store_motion (void)
2951 {
2952   class loop *loop;
2953   bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
2954 
2955   for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
2956     store_motion_loop (loop, sm_executed);
2957 
2958   BITMAP_FREE (sm_executed);
2959 }
2960 
2961 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
2962    for each such basic block bb records the outermost loop for that execution
2963    of its header implies execution of bb.  CONTAINS_CALL is the bitmap of
2964    blocks that contain a nonpure call.  */
2965 
2966 static void
fill_always_executed_in_1(class loop * loop,sbitmap contains_call)2967 fill_always_executed_in_1 (class loop *loop, sbitmap contains_call)
2968 {
2969   basic_block bb = NULL, *bbs, last = NULL;
2970   unsigned i;
2971   edge e;
2972   class loop *inn_loop = loop;
2973 
2974   if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
2975     {
2976       bbs = get_loop_body_in_dom_order (loop);
2977 
2978       for (i = 0; i < loop->num_nodes; i++)
2979 	{
2980 	  edge_iterator ei;
2981 	  bb = bbs[i];
2982 
2983 	  if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2984 	    last = bb;
2985 
2986 	  if (bitmap_bit_p (contains_call, bb->index))
2987 	    break;
2988 
2989 	  FOR_EACH_EDGE (e, ei, bb->succs)
2990 	    {
2991 	      /* If there is an exit from this BB.  */
2992 	      if (!flow_bb_inside_loop_p (loop, e->dest))
2993 		break;
2994 	      /* Or we enter a possibly non-finite loop.  */
2995 	      if (flow_loop_nested_p (bb->loop_father,
2996 				      e->dest->loop_father)
2997 		  && ! finite_loop_p (e->dest->loop_father))
2998 		break;
2999 	    }
3000 	  if (e)
3001 	    break;
3002 
3003 	  /* A loop might be infinite (TODO use simple loop analysis
3004 	     to disprove this if possible).  */
3005 	  if (bb->flags & BB_IRREDUCIBLE_LOOP)
3006 	    break;
3007 
3008 	  if (!flow_bb_inside_loop_p (inn_loop, bb))
3009 	    break;
3010 
3011 	  if (bb->loop_father->header == bb)
3012 	    {
3013 	      if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
3014 		break;
3015 
3016 	      /* In a loop that is always entered we may proceed anyway.
3017 		 But record that we entered it and stop once we leave it.  */
3018 	      inn_loop = bb->loop_father;
3019 	    }
3020 	}
3021 
3022       while (1)
3023 	{
3024 	  SET_ALWAYS_EXECUTED_IN (last, loop);
3025 	  if (last == loop->header)
3026 	    break;
3027 	  last = get_immediate_dominator (CDI_DOMINATORS, last);
3028 	}
3029 
3030       free (bbs);
3031     }
3032 
3033   for (loop = loop->inner; loop; loop = loop->next)
3034     fill_always_executed_in_1 (loop, contains_call);
3035 }
3036 
3037 /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
3038    for each such basic block bb records the outermost loop for that execution
3039    of its header implies execution of bb.  */
3040 
3041 static void
fill_always_executed_in(void)3042 fill_always_executed_in (void)
3043 {
3044   basic_block bb;
3045   class loop *loop;
3046 
3047   auto_sbitmap contains_call (last_basic_block_for_fn (cfun));
3048   bitmap_clear (contains_call);
3049   FOR_EACH_BB_FN (bb, cfun)
3050     {
3051       gimple_stmt_iterator gsi;
3052       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3053 	{
3054 	  if (nonpure_call_p (gsi_stmt (gsi)))
3055 	    break;
3056 	}
3057 
3058       if (!gsi_end_p (gsi))
3059 	bitmap_set_bit (contains_call, bb->index);
3060     }
3061 
3062   for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
3063     fill_always_executed_in_1 (loop, contains_call);
3064 }
3065 
3066 
3067 /* Compute the global information needed by the loop invariant motion pass.  */
3068 
3069 static void
tree_ssa_lim_initialize(bool store_motion)3070 tree_ssa_lim_initialize (bool store_motion)
3071 {
3072   class loop *loop;
3073   unsigned i;
3074 
3075   bitmap_obstack_initialize (&lim_bitmap_obstack);
3076   gcc_obstack_init (&mem_ref_obstack);
3077   lim_aux_data_map = new hash_map<gimple *, lim_aux_data *>;
3078 
3079   if (flag_tm)
3080     compute_transaction_bits ();
3081 
3082   memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
3083   memory_accesses.refs_list.create (100);
3084   /* Allocate a special, unanalyzable mem-ref with ID zero.  */
3085   memory_accesses.refs_list.quick_push
3086     (mem_ref_alloc (NULL, 0, UNANALYZABLE_MEM_ID));
3087 
3088   memory_accesses.refs_loaded_in_loop.create (number_of_loops (cfun));
3089   memory_accesses.refs_loaded_in_loop.quick_grow (number_of_loops (cfun));
3090   memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
3091   memory_accesses.refs_stored_in_loop.quick_grow (number_of_loops (cfun));
3092   if (store_motion)
3093     {
3094       memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
3095       memory_accesses.all_refs_stored_in_loop.quick_grow
3096 						      (number_of_loops (cfun));
3097     }
3098 
3099   for (i = 0; i < number_of_loops (cfun); i++)
3100     {
3101       bitmap_initialize (&memory_accesses.refs_loaded_in_loop[i],
3102 			 &lim_bitmap_obstack);
3103       bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
3104 			 &lim_bitmap_obstack);
3105       if (store_motion)
3106 	bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
3107 			   &lim_bitmap_obstack);
3108     }
3109 
3110   memory_accesses.ttae_cache = NULL;
3111 
3112   /* Initialize bb_loop_postorder with a mapping from loop->num to
3113      its postorder index.  */
3114   i = 0;
3115   bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
3116   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
3117     bb_loop_postorder[loop->num] = i++;
3118 }
3119 
3120 /* Cleans up after the invariant motion pass.  */
3121 
3122 static void
tree_ssa_lim_finalize(void)3123 tree_ssa_lim_finalize (void)
3124 {
3125   basic_block bb;
3126   unsigned i;
3127   im_mem_ref *ref;
3128 
3129   FOR_EACH_BB_FN (bb, cfun)
3130     SET_ALWAYS_EXECUTED_IN (bb, NULL);
3131 
3132   bitmap_obstack_release (&lim_bitmap_obstack);
3133   delete lim_aux_data_map;
3134 
3135   delete memory_accesses.refs;
3136   memory_accesses.refs = NULL;
3137 
3138   FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
3139     memref_free (ref);
3140   memory_accesses.refs_list.release ();
3141   obstack_free (&mem_ref_obstack, NULL);
3142 
3143   memory_accesses.refs_loaded_in_loop.release ();
3144   memory_accesses.refs_stored_in_loop.release ();
3145   memory_accesses.all_refs_stored_in_loop.release ();
3146 
3147   if (memory_accesses.ttae_cache)
3148     free_affine_expand_cache (&memory_accesses.ttae_cache);
3149 
3150   free (bb_loop_postorder);
3151 }
3152 
3153 /* Moves invariants from loops.  Only "expensive" invariants are moved out --
3154    i.e. those that are likely to be win regardless of the register pressure.
3155    Only perform store motion if STORE_MOTION is true.  */
3156 
3157 unsigned int
loop_invariant_motion_in_fun(function * fun,bool store_motion)3158 loop_invariant_motion_in_fun (function *fun, bool store_motion)
3159 {
3160   unsigned int todo = 0;
3161 
3162   tree_ssa_lim_initialize (store_motion);
3163 
3164   /* Gathers information about memory accesses in the loops.  */
3165   analyze_memory_references (store_motion);
3166 
3167   /* Fills ALWAYS_EXECUTED_IN information for basic blocks.  */
3168   fill_always_executed_in ();
3169 
3170   int *rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
3171   int n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
3172 
3173   /* For each statement determine the outermost loop in that it is
3174      invariant and cost for computing the invariant.  */
3175   for (int i = 0; i < n; ++i)
3176     compute_invariantness (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
3177 
3178   /* Execute store motion.  Force the necessary invariants to be moved
3179      out of the loops as well.  */
3180   if (store_motion)
3181     do_store_motion ();
3182 
3183   free (rpo);
3184   rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
3185   n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
3186 
3187   /* Move the expressions that are expensive enough.  */
3188   for (int i = 0; i < n; ++i)
3189     todo |= move_computations_worker (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
3190 
3191   free (rpo);
3192 
3193   gsi_commit_edge_inserts ();
3194   if (need_ssa_update_p (fun))
3195     rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3196 
3197   tree_ssa_lim_finalize ();
3198 
3199   return todo;
3200 }
3201 
3202 /* Loop invariant motion pass.  */
3203 
3204 namespace {
3205 
3206 const pass_data pass_data_lim =
3207 {
3208   GIMPLE_PASS, /* type */
3209   "lim", /* name */
3210   OPTGROUP_LOOP, /* optinfo_flags */
3211   TV_LIM, /* tv_id */
3212   PROP_cfg, /* properties_required */
3213   0, /* properties_provided */
3214   0, /* properties_destroyed */
3215   0, /* todo_flags_start */
3216   0, /* todo_flags_finish */
3217 };
3218 
3219 class pass_lim : public gimple_opt_pass
3220 {
3221 public:
pass_lim(gcc::context * ctxt)3222   pass_lim (gcc::context *ctxt)
3223     : gimple_opt_pass (pass_data_lim, ctxt)
3224   {}
3225 
3226   /* opt_pass methods: */
clone()3227   opt_pass * clone () { return new pass_lim (m_ctxt); }
gate(function *)3228   virtual bool gate (function *) { return flag_tree_loop_im != 0; }
3229   virtual unsigned int execute (function *);
3230 
3231 }; // class pass_lim
3232 
3233 unsigned int
execute(function * fun)3234 pass_lim::execute (function *fun)
3235 {
3236   bool in_loop_pipeline = scev_initialized_p ();
3237   if (!in_loop_pipeline)
3238     loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
3239 
3240   if (number_of_loops (fun) <= 1)
3241     return 0;
3242   unsigned int todo = loop_invariant_motion_in_fun (fun, true);
3243 
3244   if (!in_loop_pipeline)
3245     loop_optimizer_finalize ();
3246   else
3247     scev_reset ();
3248   return todo;
3249 }
3250 
3251 } // anon namespace
3252 
3253 gimple_opt_pass *
make_pass_lim(gcc::context * ctxt)3254 make_pass_lim (gcc::context *ctxt)
3255 {
3256   return new pass_lim (ctxt);
3257 }
3258 
3259 
3260