1 /* Loop invariant motion.
2    Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "tree.h"
25 #include "gimple.h"
26 #include "cfghooks.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "gimple-pretty-print.h"
30 #include "fold-const.h"
31 #include "cfganal.h"
32 #include "tree-eh.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "tree-cfg.h"
36 #include "tree-ssa-loop-manip.h"
37 #include "tree-ssa-loop.h"
38 #include "tree-into-ssa.h"
39 #include "cfgloop.h"
40 #include "domwalk.h"
41 #include "params.h"
42 #include "tree-affine.h"
43 #include "tree-ssa-propagate.h"
44 #include "trans-mem.h"
45 #include "gimple-fold.h"
46 #include "tree-scalar-evolution.h"
47 #include "tree-ssa-loop-niter.h"
48 
49 /* TODO:  Support for predicated code motion.  I.e.
50 
51    while (1)
52      {
53        if (cond)
54 	 {
55 	   a = inv;
56 	   something;
57 	 }
58      }
59 
60    Where COND and INV are invariants, but evaluating INV may trap or be
61    invalid from some other reason if !COND.  This may be transformed to
62 
63    if (cond)
64      a = inv;
65    while (1)
66      {
67        if (cond)
68 	 something;
69      }  */
70 
71 /* The auxiliary data kept for each statement.  */
72 
73 struct lim_aux_data
74 {
75   struct loop *max_loop;	/* The outermost loop in that the statement
76 				   is invariant.  */
77 
78   struct loop *tgt_loop;	/* The loop out of that we want to move the
79 				   invariant.  */
80 
81   struct loop *always_executed_in;
82 				/* The outermost loop for that we are sure
83 				   the statement is executed if the loop
84 				   is entered.  */
85 
86   unsigned cost;		/* Cost of the computation performed by the
87 				   statement.  */
88 
89   unsigned ref;			/* The simple_mem_ref in this stmt or 0.  */
90 
91   vec<gimple *> depends;	/* Vector of statements that must be also
92 				   hoisted out of the loop when this statement
93 				   is hoisted; i.e. those that define the
94 				   operands of the statement and are inside of
95 				   the MAX_LOOP loop.  */
96 };
97 
98 /* Maps statements to their lim_aux_data.  */
99 
100 static hash_map<gimple *, lim_aux_data *> *lim_aux_data_map;
101 
102 /* Description of a memory reference location.  */
103 
104 struct mem_ref_loc
105 {
106   tree *ref;			/* The reference itself.  */
107   gimple *stmt;			/* The statement in that it occurs.  */
108 };
109 
110 
111 /* Description of a memory reference.  */
112 
113 struct im_mem_ref
114 {
115   unsigned id;			/* ID assigned to the memory reference
116 				   (its index in memory_accesses.refs_list)  */
117   hashval_t hash;		/* Its hash value.  */
118 
119   /* The memory access itself and associated caching of alias-oracle
120      query meta-data.  */
121   ao_ref mem;
122 
123   bitmap stored;		/* The set of loops in that this memory location
124 				   is stored to.  */
125   vec<mem_ref_loc>		accesses_in_loop;
126 				/* The locations of the accesses.  Vector
127 				   indexed by the loop number.  */
128 
129   /* The following sets are computed on demand.  We keep both set and
130      its complement, so that we know whether the information was
131      already computed or not.  */
132   bitmap_head indep_loop;	/* The set of loops in that the memory
133 				   reference is independent, meaning:
134 				   If it is stored in the loop, this store
135 				     is independent on all other loads and
136 				     stores.
137 				   If it is only loaded, then it is independent
138 				     on all stores in the loop.  */
139   bitmap_head dep_loop;		/* The complement of INDEP_LOOP.  */
140 };
141 
142 /* We use two bits per loop in the ref->{in,}dep_loop bitmaps, the first
143    to record (in)dependence against stores in the loop and its subloops, the
144    second to record (in)dependence against all references in the loop
145    and its subloops.  */
146 #define LOOP_DEP_BIT(loopnum, storedp) (2 * (loopnum) + (storedp ? 1 : 0))
147 
148 /* Mem_ref hashtable helpers.  */
149 
150 struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref>
151 {
152   typedef tree_node *compare_type;
153   static inline hashval_t hash (const im_mem_ref *);
154   static inline bool equal (const im_mem_ref *, const tree_node *);
155 };
156 
157 /* A hash function for struct im_mem_ref object OBJ.  */
158 
159 inline hashval_t
hash(const im_mem_ref * mem)160 mem_ref_hasher::hash (const im_mem_ref *mem)
161 {
162   return mem->hash;
163 }
164 
165 /* An equality function for struct im_mem_ref object MEM1 with
166    memory reference OBJ2.  */
167 
168 inline bool
equal(const im_mem_ref * mem1,const tree_node * obj2)169 mem_ref_hasher::equal (const im_mem_ref *mem1, const tree_node *obj2)
170 {
171   return operand_equal_p (mem1->mem.ref, (const_tree) obj2, 0);
172 }
173 
174 
175 /* Description of memory accesses in loops.  */
176 
177 static struct
178 {
179   /* The hash table of memory references accessed in loops.  */
180   hash_table<mem_ref_hasher> *refs;
181 
182   /* The list of memory references.  */
183   vec<im_mem_ref *> refs_list;
184 
185   /* The set of memory references accessed in each loop.  */
186   vec<bitmap_head> refs_in_loop;
187 
188   /* The set of memory references stored in each loop.  */
189   vec<bitmap_head> refs_stored_in_loop;
190 
191   /* The set of memory references stored in each loop, including subloops .  */
192   vec<bitmap_head> all_refs_stored_in_loop;
193 
194   /* Cache for expanding memory addresses.  */
195   hash_map<tree, name_expansion *> *ttae_cache;
196 } memory_accesses;
197 
198 /* Obstack for the bitmaps in the above data structures.  */
199 static bitmap_obstack lim_bitmap_obstack;
200 static obstack mem_ref_obstack;
201 
202 static bool ref_indep_loop_p (struct loop *, im_mem_ref *);
203 static bool ref_always_accessed_p (struct loop *, im_mem_ref *, bool);
204 
205 /* Minimum cost of an expensive expression.  */
206 #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
207 
208 /* The outermost loop for which execution of the header guarantees that the
209    block will be executed.  */
210 #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
211 #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
212 
213 /* ID of the shared unanalyzable mem.  */
214 #define UNANALYZABLE_MEM_ID 0
215 
216 /* Whether the reference was analyzable.  */
217 #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
218 
219 static struct lim_aux_data *
init_lim_data(gimple * stmt)220 init_lim_data (gimple *stmt)
221 {
222   lim_aux_data *p = XCNEW (struct lim_aux_data);
223   lim_aux_data_map->put (stmt, p);
224 
225   return p;
226 }
227 
228 static struct lim_aux_data *
get_lim_data(gimple * stmt)229 get_lim_data (gimple *stmt)
230 {
231   lim_aux_data **p = lim_aux_data_map->get (stmt);
232   if (!p)
233     return NULL;
234 
235   return *p;
236 }
237 
238 /* Releases the memory occupied by DATA.  */
239 
240 static void
free_lim_aux_data(struct lim_aux_data * data)241 free_lim_aux_data (struct lim_aux_data *data)
242 {
243   data->depends.release ();
244   free (data);
245 }
246 
247 static void
clear_lim_data(gimple * stmt)248 clear_lim_data (gimple *stmt)
249 {
250   lim_aux_data **p = lim_aux_data_map->get (stmt);
251   if (!p)
252     return;
253 
254   free_lim_aux_data (*p);
255   *p = NULL;
256 }
257 
258 
259 /* The possibilities of statement movement.  */
260 enum move_pos
261   {
262     MOVE_IMPOSSIBLE,		/* No movement -- side effect expression.  */
263     MOVE_PRESERVE_EXECUTION,	/* Must not cause the non-executed statement
264 				   become executed -- memory accesses, ... */
265     MOVE_POSSIBLE		/* Unlimited movement.  */
266   };
267 
268 
269 /* If it is possible to hoist the statement STMT unconditionally,
270    returns MOVE_POSSIBLE.
271    If it is possible to hoist the statement STMT, but we must avoid making
272    it executed if it would not be executed in the original program (e.g.
273    because it may trap), return MOVE_PRESERVE_EXECUTION.
274    Otherwise return MOVE_IMPOSSIBLE.  */
275 
276 enum move_pos
movement_possibility(gimple * stmt)277 movement_possibility (gimple *stmt)
278 {
279   tree lhs;
280   enum move_pos ret = MOVE_POSSIBLE;
281 
282   if (flag_unswitch_loops
283       && gimple_code (stmt) == GIMPLE_COND)
284     {
285       /* If we perform unswitching, force the operands of the invariant
286 	 condition to be moved out of the loop.  */
287       return MOVE_POSSIBLE;
288     }
289 
290   if (gimple_code (stmt) == GIMPLE_PHI
291       && gimple_phi_num_args (stmt) <= 2
292       && !virtual_operand_p (gimple_phi_result (stmt))
293       && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
294     return MOVE_POSSIBLE;
295 
296   if (gimple_get_lhs (stmt) == NULL_TREE)
297     return MOVE_IMPOSSIBLE;
298 
299   if (gimple_vdef (stmt))
300     return MOVE_IMPOSSIBLE;
301 
302   if (stmt_ends_bb_p (stmt)
303       || gimple_has_volatile_ops (stmt)
304       || gimple_has_side_effects (stmt)
305       || stmt_could_throw_p (stmt))
306     return MOVE_IMPOSSIBLE;
307 
308   if (is_gimple_call (stmt))
309     {
310       /* While pure or const call is guaranteed to have no side effects, we
311 	 cannot move it arbitrarily.  Consider code like
312 
313 	 char *s = something ();
314 
315 	 while (1)
316 	   {
317 	     if (s)
318 	       t = strlen (s);
319 	     else
320 	       t = 0;
321 	   }
322 
323 	 Here the strlen call cannot be moved out of the loop, even though
324 	 s is invariant.  In addition to possibly creating a call with
325 	 invalid arguments, moving out a function call that is not executed
326 	 may cause performance regressions in case the call is costly and
327 	 not executed at all.  */
328       ret = MOVE_PRESERVE_EXECUTION;
329       lhs = gimple_call_lhs (stmt);
330     }
331   else if (is_gimple_assign (stmt))
332     lhs = gimple_assign_lhs (stmt);
333   else
334     return MOVE_IMPOSSIBLE;
335 
336   if (TREE_CODE (lhs) == SSA_NAME
337       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
338     return MOVE_IMPOSSIBLE;
339 
340   if (TREE_CODE (lhs) != SSA_NAME
341       || gimple_could_trap_p (stmt))
342     return MOVE_PRESERVE_EXECUTION;
343 
344   /* Non local loads in a transaction cannot be hoisted out.  Well,
345      unless the load happens on every path out of the loop, but we
346      don't take this into account yet.  */
347   if (flag_tm
348       && gimple_in_transaction (stmt)
349       && gimple_assign_single_p (stmt))
350     {
351       tree rhs = gimple_assign_rhs1 (stmt);
352       if (DECL_P (rhs) && is_global_var (rhs))
353 	{
354 	  if (dump_file)
355 	    {
356 	      fprintf (dump_file, "Cannot hoist conditional load of ");
357 	      print_generic_expr (dump_file, rhs, TDF_SLIM);
358 	      fprintf (dump_file, " because it is in a transaction.\n");
359 	    }
360 	  return MOVE_IMPOSSIBLE;
361 	}
362     }
363 
364   return ret;
365 }
366 
367 /* Suppose that operand DEF is used inside the LOOP.  Returns the outermost
368    loop to that we could move the expression using DEF if it did not have
369    other operands, i.e. the outermost loop enclosing LOOP in that the value
370    of DEF is invariant.  */
371 
372 static struct loop *
outermost_invariant_loop(tree def,struct loop * loop)373 outermost_invariant_loop (tree def, struct loop *loop)
374 {
375   gimple *def_stmt;
376   basic_block def_bb;
377   struct loop *max_loop;
378   struct lim_aux_data *lim_data;
379 
380   if (!def)
381     return superloop_at_depth (loop, 1);
382 
383   if (TREE_CODE (def) != SSA_NAME)
384     {
385       gcc_assert (is_gimple_min_invariant (def));
386       return superloop_at_depth (loop, 1);
387     }
388 
389   def_stmt = SSA_NAME_DEF_STMT (def);
390   def_bb = gimple_bb (def_stmt);
391   if (!def_bb)
392     return superloop_at_depth (loop, 1);
393 
394   max_loop = find_common_loop (loop, def_bb->loop_father);
395 
396   lim_data = get_lim_data (def_stmt);
397   if (lim_data != NULL && lim_data->max_loop != NULL)
398     max_loop = find_common_loop (max_loop,
399 				 loop_outer (lim_data->max_loop));
400   if (max_loop == loop)
401     return NULL;
402   max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
403 
404   return max_loop;
405 }
406 
407 /* DATA is a structure containing information associated with a statement
408    inside LOOP.  DEF is one of the operands of this statement.
409 
410    Find the outermost loop enclosing LOOP in that value of DEF is invariant
411    and record this in DATA->max_loop field.  If DEF itself is defined inside
412    this loop as well (i.e. we need to hoist it out of the loop if we want
413    to hoist the statement represented by DATA), record the statement in that
414    DEF is defined to the DATA->depends list.  Additionally if ADD_COST is true,
415    add the cost of the computation of DEF to the DATA->cost.
416 
417    If DEF is not invariant in LOOP, return false.  Otherwise return TRUE.  */
418 
419 static bool
add_dependency(tree def,struct lim_aux_data * data,struct loop * loop,bool add_cost)420 add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
421 		bool add_cost)
422 {
423   gimple *def_stmt = SSA_NAME_DEF_STMT (def);
424   basic_block def_bb = gimple_bb (def_stmt);
425   struct loop *max_loop;
426   struct lim_aux_data *def_data;
427 
428   if (!def_bb)
429     return true;
430 
431   max_loop = outermost_invariant_loop (def, loop);
432   if (!max_loop)
433     return false;
434 
435   if (flow_loop_nested_p (data->max_loop, max_loop))
436     data->max_loop = max_loop;
437 
438   def_data = get_lim_data (def_stmt);
439   if (!def_data)
440     return true;
441 
442   if (add_cost
443       /* Only add the cost if the statement defining DEF is inside LOOP,
444 	 i.e. if it is likely that by moving the invariants dependent
445 	 on it, we will be able to avoid creating a new register for
446 	 it (since it will be only used in these dependent invariants).  */
447       && def_bb->loop_father == loop)
448     data->cost += def_data->cost;
449 
450   data->depends.safe_push (def_stmt);
451 
452   return true;
453 }
454 
455 /* Returns an estimate for a cost of statement STMT.  The values here
456    are just ad-hoc constants, similar to costs for inlining.  */
457 
458 static unsigned
stmt_cost(gimple * stmt)459 stmt_cost (gimple *stmt)
460 {
461   /* Always try to create possibilities for unswitching.  */
462   if (gimple_code (stmt) == GIMPLE_COND
463       || gimple_code (stmt) == GIMPLE_PHI)
464     return LIM_EXPENSIVE;
465 
466   /* We should be hoisting calls if possible.  */
467   if (is_gimple_call (stmt))
468     {
469       tree fndecl;
470 
471       /* Unless the call is a builtin_constant_p; this always folds to a
472 	 constant, so moving it is useless.  */
473       fndecl = gimple_call_fndecl (stmt);
474       if (fndecl
475 	  && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
476 	  && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CONSTANT_P)
477 	return 0;
478 
479       return LIM_EXPENSIVE;
480     }
481 
482   /* Hoisting memory references out should almost surely be a win.  */
483   if (gimple_references_memory_p (stmt))
484     return LIM_EXPENSIVE;
485 
486   if (gimple_code (stmt) != GIMPLE_ASSIGN)
487     return 1;
488 
489   switch (gimple_assign_rhs_code (stmt))
490     {
491     case MULT_EXPR:
492     case WIDEN_MULT_EXPR:
493     case WIDEN_MULT_PLUS_EXPR:
494     case WIDEN_MULT_MINUS_EXPR:
495     case DOT_PROD_EXPR:
496     case FMA_EXPR:
497     case TRUNC_DIV_EXPR:
498     case CEIL_DIV_EXPR:
499     case FLOOR_DIV_EXPR:
500     case ROUND_DIV_EXPR:
501     case EXACT_DIV_EXPR:
502     case CEIL_MOD_EXPR:
503     case FLOOR_MOD_EXPR:
504     case ROUND_MOD_EXPR:
505     case TRUNC_MOD_EXPR:
506     case RDIV_EXPR:
507       /* Division and multiplication are usually expensive.  */
508       return LIM_EXPENSIVE;
509 
510     case LSHIFT_EXPR:
511     case RSHIFT_EXPR:
512     case WIDEN_LSHIFT_EXPR:
513     case LROTATE_EXPR:
514     case RROTATE_EXPR:
515       /* Shifts and rotates are usually expensive.  */
516       return LIM_EXPENSIVE;
517 
518     case CONSTRUCTOR:
519       /* Make vector construction cost proportional to the number
520          of elements.  */
521       return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
522 
523     case SSA_NAME:
524     case PAREN_EXPR:
525       /* Whether or not something is wrapped inside a PAREN_EXPR
526          should not change move cost.  Nor should an intermediate
527 	 unpropagated SSA name copy.  */
528       return 0;
529 
530     default:
531       return 1;
532     }
533 }
534 
535 /* Finds the outermost loop between OUTER and LOOP in that the memory reference
536    REF is independent.  If REF is not independent in LOOP, NULL is returned
537    instead.  */
538 
539 static struct loop *
outermost_indep_loop(struct loop * outer,struct loop * loop,im_mem_ref * ref)540 outermost_indep_loop (struct loop *outer, struct loop *loop, im_mem_ref *ref)
541 {
542   struct loop *aloop;
543 
544   if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
545     return NULL;
546 
547   for (aloop = outer;
548        aloop != loop;
549        aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
550     if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
551 	&& ref_indep_loop_p (aloop, ref))
552       return aloop;
553 
554   if (ref_indep_loop_p (loop, ref))
555     return loop;
556   else
557     return NULL;
558 }
559 
560 /* If there is a simple load or store to a memory reference in STMT, returns
561    the location of the memory reference, and sets IS_STORE according to whether
562    it is a store or load.  Otherwise, returns NULL.  */
563 
564 static tree *
simple_mem_ref_in_stmt(gimple * stmt,bool * is_store)565 simple_mem_ref_in_stmt (gimple *stmt, bool *is_store)
566 {
567   tree *lhs, *rhs;
568 
569   /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns.  */
570   if (!gimple_assign_single_p (stmt))
571     return NULL;
572 
573   lhs = gimple_assign_lhs_ptr (stmt);
574   rhs = gimple_assign_rhs1_ptr (stmt);
575 
576   if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
577     {
578       *is_store = false;
579       return rhs;
580     }
581   else if (gimple_vdef (stmt)
582 	   && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
583     {
584       *is_store = true;
585       return lhs;
586     }
587   else
588     return NULL;
589 }
590 
591 /* From a controlling predicate in DOM determine the arguments from
592    the PHI node PHI that are chosen if the predicate evaluates to
593    true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
594    they are non-NULL.  Returns true if the arguments can be determined,
595    else return false.  */
596 
597 static bool
extract_true_false_args_from_phi(basic_block dom,gphi * phi,tree * true_arg_p,tree * false_arg_p)598 extract_true_false_args_from_phi (basic_block dom, gphi *phi,
599 				  tree *true_arg_p, tree *false_arg_p)
600 {
601   edge te, fe;
602   if (! extract_true_false_controlled_edges (dom, gimple_bb (phi),
603 					     &te, &fe))
604     return false;
605 
606   if (true_arg_p)
607     *true_arg_p = PHI_ARG_DEF (phi, te->dest_idx);
608   if (false_arg_p)
609     *false_arg_p = PHI_ARG_DEF (phi, fe->dest_idx);
610 
611   return true;
612 }
613 
614 /* Determine the outermost loop to that it is possible to hoist a statement
615    STMT and store it to LIM_DATA (STMT)->max_loop.  To do this we determine
616    the outermost loop in that the value computed by STMT is invariant.
617    If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
618    we preserve the fact whether STMT is executed.  It also fills other related
619    information to LIM_DATA (STMT).
620 
621    The function returns false if STMT cannot be hoisted outside of the loop it
622    is defined in, and true otherwise.  */
623 
624 static bool
determine_max_movement(gimple * stmt,bool must_preserve_exec)625 determine_max_movement (gimple *stmt, bool must_preserve_exec)
626 {
627   basic_block bb = gimple_bb (stmt);
628   struct loop *loop = bb->loop_father;
629   struct loop *level;
630   struct lim_aux_data *lim_data = get_lim_data (stmt);
631   tree val;
632   ssa_op_iter iter;
633 
634   if (must_preserve_exec)
635     level = ALWAYS_EXECUTED_IN (bb);
636   else
637     level = superloop_at_depth (loop, 1);
638   lim_data->max_loop = level;
639 
640   if (gphi *phi = dyn_cast <gphi *> (stmt))
641     {
642       use_operand_p use_p;
643       unsigned min_cost = UINT_MAX;
644       unsigned total_cost = 0;
645       struct lim_aux_data *def_data;
646 
647       /* We will end up promoting dependencies to be unconditionally
648 	 evaluated.  For this reason the PHI cost (and thus the
649 	 cost we remove from the loop by doing the invariant motion)
650 	 is that of the cheapest PHI argument dependency chain.  */
651       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
652 	{
653 	  val = USE_FROM_PTR (use_p);
654 
655 	  if (TREE_CODE (val) != SSA_NAME)
656 	    {
657 	      /* Assign const 1 to constants.  */
658 	      min_cost = MIN (min_cost, 1);
659 	      total_cost += 1;
660 	      continue;
661 	    }
662 	  if (!add_dependency (val, lim_data, loop, false))
663 	    return false;
664 
665 	  gimple *def_stmt = SSA_NAME_DEF_STMT (val);
666 	  if (gimple_bb (def_stmt)
667 	      && gimple_bb (def_stmt)->loop_father == loop)
668 	    {
669 	      def_data = get_lim_data (def_stmt);
670 	      if (def_data)
671 		{
672 		  min_cost = MIN (min_cost, def_data->cost);
673 		  total_cost += def_data->cost;
674 		}
675 	    }
676 	}
677 
678       min_cost = MIN (min_cost, total_cost);
679       lim_data->cost += min_cost;
680 
681       if (gimple_phi_num_args (phi) > 1)
682 	{
683 	  basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
684 	  gimple *cond;
685 	  if (gsi_end_p (gsi_last_bb (dom)))
686 	    return false;
687 	  cond = gsi_stmt (gsi_last_bb (dom));
688 	  if (gimple_code (cond) != GIMPLE_COND)
689 	    return false;
690 	  /* Verify that this is an extended form of a diamond and
691 	     the PHI arguments are completely controlled by the
692 	     predicate in DOM.  */
693 	  if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
694 	    return false;
695 
696 	  /* Fold in dependencies and cost of the condition.  */
697 	  FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
698 	    {
699 	      if (!add_dependency (val, lim_data, loop, false))
700 		return false;
701 	      def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
702 	      if (def_data)
703 		lim_data->cost += def_data->cost;
704 	    }
705 
706 	  /* We want to avoid unconditionally executing very expensive
707 	     operations.  As costs for our dependencies cannot be
708 	     negative just claim we are not invariand for this case.
709 	     We also are not sure whether the control-flow inside the
710 	     loop will vanish.  */
711 	  if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
712 	      && !(min_cost != 0
713 		   && total_cost / min_cost <= 2))
714 	    return false;
715 
716 	  /* Assume that the control-flow in the loop will vanish.
717 	     ???  We should verify this and not artificially increase
718 	     the cost if that is not the case.  */
719 	  lim_data->cost += stmt_cost (stmt);
720 	}
721 
722       return true;
723     }
724   else
725     FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
726       if (!add_dependency (val, lim_data, loop, true))
727 	return false;
728 
729   if (gimple_vuse (stmt))
730     {
731       im_mem_ref *ref
732 	= lim_data ? memory_accesses.refs_list[lim_data->ref] : NULL;
733       if (ref
734 	  && MEM_ANALYZABLE (ref))
735 	{
736 	  lim_data->max_loop = outermost_indep_loop (lim_data->max_loop,
737 						     loop, ref);
738 	  if (!lim_data->max_loop)
739 	    return false;
740 	}
741       else if (! add_dependency (gimple_vuse (stmt), lim_data, loop, false))
742 	return false;
743     }
744 
745   lim_data->cost += stmt_cost (stmt);
746 
747   return true;
748 }
749 
750 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
751    and that one of the operands of this statement is computed by STMT.
752    Ensure that STMT (together with all the statements that define its
753    operands) is hoisted at least out of the loop LEVEL.  */
754 
755 static void
set_level(gimple * stmt,struct loop * orig_loop,struct loop * level)756 set_level (gimple *stmt, struct loop *orig_loop, struct loop *level)
757 {
758   struct loop *stmt_loop = gimple_bb (stmt)->loop_father;
759   struct lim_aux_data *lim_data;
760   gimple *dep_stmt;
761   unsigned i;
762 
763   stmt_loop = find_common_loop (orig_loop, stmt_loop);
764   lim_data = get_lim_data (stmt);
765   if (lim_data != NULL && lim_data->tgt_loop != NULL)
766     stmt_loop = find_common_loop (stmt_loop,
767 				  loop_outer (lim_data->tgt_loop));
768   if (flow_loop_nested_p (stmt_loop, level))
769     return;
770 
771   gcc_assert (level == lim_data->max_loop
772 	      || flow_loop_nested_p (lim_data->max_loop, level));
773 
774   lim_data->tgt_loop = level;
775   FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
776     set_level (dep_stmt, orig_loop, level);
777 }
778 
779 /* Determines an outermost loop from that we want to hoist the statement STMT.
780    For now we chose the outermost possible loop.  TODO -- use profiling
781    information to set it more sanely.  */
782 
783 static void
set_profitable_level(gimple * stmt)784 set_profitable_level (gimple *stmt)
785 {
786   set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
787 }
788 
789 /* Returns true if STMT is a call that has side effects.  */
790 
791 static bool
nonpure_call_p(gimple * stmt)792 nonpure_call_p (gimple *stmt)
793 {
794   if (gimple_code (stmt) != GIMPLE_CALL)
795     return false;
796 
797   return gimple_has_side_effects (stmt);
798 }
799 
800 /* Rewrite a/b to a*(1/b).  Return the invariant stmt to process.  */
801 
802 static gimple *
rewrite_reciprocal(gimple_stmt_iterator * bsi)803 rewrite_reciprocal (gimple_stmt_iterator *bsi)
804 {
805   gassign *stmt, *stmt1, *stmt2;
806   tree name, lhs, type;
807   tree real_one;
808   gimple_stmt_iterator gsi;
809 
810   stmt = as_a <gassign *> (gsi_stmt (*bsi));
811   lhs = gimple_assign_lhs (stmt);
812   type = TREE_TYPE (lhs);
813 
814   real_one = build_one_cst (type);
815 
816   name = make_temp_ssa_name (type, NULL, "reciptmp");
817   stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
818 			       gimple_assign_rhs2 (stmt));
819   stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
820 			       gimple_assign_rhs1 (stmt));
821 
822   /* Replace division stmt with reciprocal and multiply stmts.
823      The multiply stmt is not invariant, so update iterator
824      and avoid rescanning.  */
825   gsi = *bsi;
826   gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
827   gsi_replace (&gsi, stmt2, true);
828 
829   /* Continue processing with invariant reciprocal statement.  */
830   return stmt1;
831 }
832 
833 /* Check if the pattern at *BSI is a bittest of the form
834    (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0.  */
835 
836 static gimple *
rewrite_bittest(gimple_stmt_iterator * bsi)837 rewrite_bittest (gimple_stmt_iterator *bsi)
838 {
839   gassign *stmt;
840   gimple *stmt1;
841   gassign *stmt2;
842   gimple *use_stmt;
843   gcond *cond_stmt;
844   tree lhs, name, t, a, b;
845   use_operand_p use;
846 
847   stmt = as_a <gassign *> (gsi_stmt (*bsi));
848   lhs = gimple_assign_lhs (stmt);
849 
850   /* Verify that the single use of lhs is a comparison against zero.  */
851   if (TREE_CODE (lhs) != SSA_NAME
852       || !single_imm_use (lhs, &use, &use_stmt))
853     return stmt;
854   cond_stmt = dyn_cast <gcond *> (use_stmt);
855   if (!cond_stmt)
856     return stmt;
857   if (gimple_cond_lhs (cond_stmt) != lhs
858       || (gimple_cond_code (cond_stmt) != NE_EXPR
859 	  && gimple_cond_code (cond_stmt) != EQ_EXPR)
860       || !integer_zerop (gimple_cond_rhs (cond_stmt)))
861     return stmt;
862 
863   /* Get at the operands of the shift.  The rhs is TMP1 & 1.  */
864   stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
865   if (gimple_code (stmt1) != GIMPLE_ASSIGN)
866     return stmt;
867 
868   /* There is a conversion in between possibly inserted by fold.  */
869   if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
870     {
871       t = gimple_assign_rhs1 (stmt1);
872       if (TREE_CODE (t) != SSA_NAME
873 	  || !has_single_use (t))
874 	return stmt;
875       stmt1 = SSA_NAME_DEF_STMT (t);
876       if (gimple_code (stmt1) != GIMPLE_ASSIGN)
877 	return stmt;
878     }
879 
880   /* Verify that B is loop invariant but A is not.  Verify that with
881      all the stmt walking we are still in the same loop.  */
882   if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
883       || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
884     return stmt;
885 
886   a = gimple_assign_rhs1 (stmt1);
887   b = gimple_assign_rhs2 (stmt1);
888 
889   if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
890       && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
891     {
892       gimple_stmt_iterator rsi;
893 
894       /* 1 << B */
895       t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
896 		       build_int_cst (TREE_TYPE (a), 1), b);
897       name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
898       stmt1 = gimple_build_assign (name, t);
899 
900       /* A & (1 << B) */
901       t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
902       name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
903       stmt2 = gimple_build_assign (name, t);
904 
905       /* Replace the SSA_NAME we compare against zero.  Adjust
906 	 the type of zero accordingly.  */
907       SET_USE (use, name);
908       gimple_cond_set_rhs (cond_stmt,
909 			   build_int_cst_type (TREE_TYPE (name),
910 					       0));
911 
912       /* Don't use gsi_replace here, none of the new assignments sets
913 	 the variable originally set in stmt.  Move bsi to stmt1, and
914 	 then remove the original stmt, so that we get a chance to
915 	 retain debug info for it.  */
916       rsi = *bsi;
917       gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
918       gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
919       gimple *to_release = gsi_stmt (rsi);
920       gsi_remove (&rsi, true);
921       release_defs (to_release);
922 
923       return stmt1;
924     }
925 
926   return stmt;
927 }
928 
929 /* For each statement determines the outermost loop in that it is invariant,
930    -   statements on whose motion it depends and the cost of the computation.
931    -   This information is stored to the LIM_DATA structure associated with
932    -   each statement.  */
933 class invariantness_dom_walker : public dom_walker
934 {
935 public:
invariantness_dom_walker(cdi_direction direction)936   invariantness_dom_walker (cdi_direction direction)
937     : dom_walker (direction) {}
938 
939   virtual edge before_dom_children (basic_block);
940 };
941 
942 /* Determine the outermost loops in that statements in basic block BB are
943    invariant, and record them to the LIM_DATA associated with the statements.
944    Callback for dom_walker.  */
945 
946 edge
before_dom_children(basic_block bb)947 invariantness_dom_walker::before_dom_children (basic_block bb)
948 {
949   enum move_pos pos;
950   gimple_stmt_iterator bsi;
951   gimple *stmt;
952   bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
953   struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
954   struct lim_aux_data *lim_data;
955 
956   if (!loop_outer (bb->loop_father))
957     return NULL;
958 
959   if (dump_file && (dump_flags & TDF_DETAILS))
960     fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
961 	     bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
962 
963   /* Look at PHI nodes, but only if there is at most two.
964      ???  We could relax this further by post-processing the inserted
965      code and transforming adjacent cond-exprs with the same predicate
966      to control flow again.  */
967   bsi = gsi_start_phis (bb);
968   if (!gsi_end_p (bsi)
969       && ((gsi_next (&bsi), gsi_end_p (bsi))
970 	  || (gsi_next (&bsi), gsi_end_p (bsi))))
971     for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
972       {
973 	stmt = gsi_stmt (bsi);
974 
975 	pos = movement_possibility (stmt);
976 	if (pos == MOVE_IMPOSSIBLE)
977 	  continue;
978 
979 	lim_data = get_lim_data (stmt);
980 	if (! lim_data)
981 	  lim_data = init_lim_data (stmt);
982 	lim_data->always_executed_in = outermost;
983 
984 	if (!determine_max_movement (stmt, false))
985 	  {
986 	    lim_data->max_loop = NULL;
987 	    continue;
988 	  }
989 
990 	if (dump_file && (dump_flags & TDF_DETAILS))
991 	  {
992 	    print_gimple_stmt (dump_file, stmt, 2);
993 	    fprintf (dump_file, "  invariant up to level %d, cost %d.\n\n",
994 		     loop_depth (lim_data->max_loop),
995 		     lim_data->cost);
996 	  }
997 
998 	if (lim_data->cost >= LIM_EXPENSIVE)
999 	  set_profitable_level (stmt);
1000       }
1001 
1002   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1003     {
1004       stmt = gsi_stmt (bsi);
1005 
1006       pos = movement_possibility (stmt);
1007       if (pos == MOVE_IMPOSSIBLE)
1008 	{
1009 	  if (nonpure_call_p (stmt))
1010 	    {
1011 	      maybe_never = true;
1012 	      outermost = NULL;
1013 	    }
1014 	  /* Make sure to note always_executed_in for stores to make
1015 	     store-motion work.  */
1016 	  else if (stmt_makes_single_store (stmt))
1017 	    {
1018 	      struct lim_aux_data *lim_data = get_lim_data (stmt);
1019 	      if (! lim_data)
1020 		lim_data = init_lim_data (stmt);
1021 	      lim_data->always_executed_in = outermost;
1022 	    }
1023 	  continue;
1024 	}
1025 
1026       if (is_gimple_assign (stmt)
1027 	  && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1028 	      == GIMPLE_BINARY_RHS))
1029 	{
1030 	  tree op0 = gimple_assign_rhs1 (stmt);
1031 	  tree op1 = gimple_assign_rhs2 (stmt);
1032 	  struct loop *ol1 = outermost_invariant_loop (op1,
1033 					loop_containing_stmt (stmt));
1034 
1035 	  /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1036 	     to be hoisted out of loop, saving expensive divide.  */
1037 	  if (pos == MOVE_POSSIBLE
1038 	      && gimple_assign_rhs_code (stmt) == RDIV_EXPR
1039 	      && flag_unsafe_math_optimizations
1040 	      && !flag_trapping_math
1041 	      && ol1 != NULL
1042 	      && outermost_invariant_loop (op0, ol1) == NULL)
1043 	    stmt = rewrite_reciprocal (&bsi);
1044 
1045 	  /* If the shift count is invariant, convert (A >> B) & 1 to
1046 	     A & (1 << B) allowing the bit mask to be hoisted out of the loop
1047 	     saving an expensive shift.  */
1048 	  if (pos == MOVE_POSSIBLE
1049 	      && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1050 	      && integer_onep (op1)
1051 	      && TREE_CODE (op0) == SSA_NAME
1052 	      && has_single_use (op0))
1053 	    stmt = rewrite_bittest (&bsi);
1054 	}
1055 
1056       lim_data = get_lim_data (stmt);
1057       if (! lim_data)
1058 	lim_data = init_lim_data (stmt);
1059       lim_data->always_executed_in = outermost;
1060 
1061       if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1062 	continue;
1063 
1064       if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1065 	{
1066 	  lim_data->max_loop = NULL;
1067 	  continue;
1068 	}
1069 
1070       if (dump_file && (dump_flags & TDF_DETAILS))
1071 	{
1072 	  print_gimple_stmt (dump_file, stmt, 2);
1073 	  fprintf (dump_file, "  invariant up to level %d, cost %d.\n\n",
1074 		   loop_depth (lim_data->max_loop),
1075 		   lim_data->cost);
1076 	}
1077 
1078       if (lim_data->cost >= LIM_EXPENSIVE)
1079 	set_profitable_level (stmt);
1080     }
1081   return NULL;
1082 }
1083 
1084 /* Hoist the statements in basic block BB out of the loops prescribed by
1085    data stored in LIM_DATA structures associated with each statement.  Callback
1086    for walk_dominator_tree.  */
1087 
1088 unsigned int
move_computations_worker(basic_block bb)1089 move_computations_worker (basic_block bb)
1090 {
1091   struct loop *level;
1092   unsigned cost = 0;
1093   struct lim_aux_data *lim_data;
1094   unsigned int todo = 0;
1095 
1096   if (!loop_outer (bb->loop_father))
1097     return todo;
1098 
1099   for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
1100     {
1101       gassign *new_stmt;
1102       gphi *stmt = bsi.phi ();
1103 
1104       lim_data = get_lim_data (stmt);
1105       if (lim_data == NULL)
1106 	{
1107 	  gsi_next (&bsi);
1108 	  continue;
1109 	}
1110 
1111       cost = lim_data->cost;
1112       level = lim_data->tgt_loop;
1113       clear_lim_data (stmt);
1114 
1115       if (!level)
1116 	{
1117 	  gsi_next (&bsi);
1118 	  continue;
1119 	}
1120 
1121       if (dump_file && (dump_flags & TDF_DETAILS))
1122 	{
1123 	  fprintf (dump_file, "Moving PHI node\n");
1124 	  print_gimple_stmt (dump_file, stmt, 0);
1125 	  fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1126 		   cost, level->num);
1127 	}
1128 
1129       if (gimple_phi_num_args (stmt) == 1)
1130 	{
1131 	  tree arg = PHI_ARG_DEF (stmt, 0);
1132 	  new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1133 					  TREE_CODE (arg), arg);
1134 	}
1135       else
1136 	{
1137 	  basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1138 	  gimple *cond = gsi_stmt (gsi_last_bb (dom));
1139 	  tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1140 	  /* Get the PHI arguments corresponding to the true and false
1141 	     edges of COND.  */
1142 	  extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1143 	  gcc_assert (arg0 && arg1);
1144 	  t = build2 (gimple_cond_code (cond), boolean_type_node,
1145 		      gimple_cond_lhs (cond), gimple_cond_rhs (cond));
1146 	  new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1147 					  COND_EXPR, t, arg0, arg1);
1148 	  todo |= TODO_cleanup_cfg;
1149 	}
1150       if (INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (new_stmt)))
1151 	  && (!ALWAYS_EXECUTED_IN (bb)
1152 	      || (ALWAYS_EXECUTED_IN (bb) != level
1153 		  && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1154 	{
1155 	  tree lhs = gimple_assign_lhs (new_stmt);
1156 	  SSA_NAME_RANGE_INFO (lhs) = NULL;
1157 	}
1158       gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1159       remove_phi_node (&bsi, false);
1160     }
1161 
1162   for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
1163     {
1164       edge e;
1165 
1166       gimple *stmt = gsi_stmt (bsi);
1167 
1168       lim_data = get_lim_data (stmt);
1169       if (lim_data == NULL)
1170 	{
1171 	  gsi_next (&bsi);
1172 	  continue;
1173 	}
1174 
1175       cost = lim_data->cost;
1176       level = lim_data->tgt_loop;
1177       clear_lim_data (stmt);
1178 
1179       if (!level)
1180 	{
1181 	  gsi_next (&bsi);
1182 	  continue;
1183 	}
1184 
1185       /* We do not really want to move conditionals out of the loop; we just
1186 	 placed it here to force its operands to be moved if necessary.  */
1187       if (gimple_code (stmt) == GIMPLE_COND)
1188 	continue;
1189 
1190       if (dump_file && (dump_flags & TDF_DETAILS))
1191 	{
1192 	  fprintf (dump_file, "Moving statement\n");
1193 	  print_gimple_stmt (dump_file, stmt, 0);
1194 	  fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1195 		   cost, level->num);
1196 	}
1197 
1198       e = loop_preheader_edge (level);
1199       gcc_assert (!gimple_vdef (stmt));
1200       if (gimple_vuse (stmt))
1201 	{
1202 	  /* The new VUSE is the one from the virtual PHI in the loop
1203 	     header or the one already present.  */
1204 	  gphi_iterator gsi2;
1205 	  for (gsi2 = gsi_start_phis (e->dest);
1206 	       !gsi_end_p (gsi2); gsi_next (&gsi2))
1207 	    {
1208 	      gphi *phi = gsi2.phi ();
1209 	      if (virtual_operand_p (gimple_phi_result (phi)))
1210 		{
1211 		  gimple_set_vuse (stmt, PHI_ARG_DEF_FROM_EDGE (phi, e));
1212 		  break;
1213 		}
1214 	    }
1215 	}
1216       gsi_remove (&bsi, false);
1217       if (gimple_has_lhs (stmt)
1218 	  && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
1219 	  && INTEGRAL_TYPE_P (TREE_TYPE (gimple_get_lhs (stmt)))
1220 	  && (!ALWAYS_EXECUTED_IN (bb)
1221 	      || !(ALWAYS_EXECUTED_IN (bb) == level
1222 		   || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1223 	{
1224 	  tree lhs = gimple_get_lhs (stmt);
1225 	  SSA_NAME_RANGE_INFO (lhs) = NULL;
1226 	}
1227       /* In case this is a stmt that is not unconditionally executed
1228          when the target loop header is executed and the stmt may
1229 	 invoke undefined integer or pointer overflow rewrite it to
1230 	 unsigned arithmetic.  */
1231       if (is_gimple_assign (stmt)
1232 	  && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))
1233 	  && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt)))
1234 	  && arith_code_with_undefined_signed_overflow
1235 	       (gimple_assign_rhs_code (stmt))
1236 	  && (!ALWAYS_EXECUTED_IN (bb)
1237 	      || !(ALWAYS_EXECUTED_IN (bb) == level
1238 		   || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1239 	gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt));
1240       else
1241 	gsi_insert_on_edge (e, stmt);
1242     }
1243 
1244   return todo;
1245 }
1246 
1247 /* Hoist the statements out of the loops prescribed by data stored in
1248    LIM_DATA structures associated with each statement.*/
1249 
1250 static unsigned int
move_computations(void)1251 move_computations (void)
1252 {
1253   int *rpo = XNEWVEC (int, last_basic_block_for_fn (cfun));
1254   int n = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, false);
1255   unsigned todo = 0;
1256 
1257   for (int i = 0; i < n; ++i)
1258     todo |= move_computations_worker (BASIC_BLOCK_FOR_FN (cfun, rpo[i]));
1259 
1260   free (rpo);
1261 
1262   gsi_commit_edge_inserts ();
1263   if (need_ssa_update_p (cfun))
1264     rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1265 
1266   return todo;
1267 }
1268 
1269 /* Checks whether the statement defining variable *INDEX can be hoisted
1270    out of the loop passed in DATA.  Callback for for_each_index.  */
1271 
1272 static bool
may_move_till(tree ref,tree * index,void * data)1273 may_move_till (tree ref, tree *index, void *data)
1274 {
1275   struct loop *loop = (struct loop *) data, *max_loop;
1276 
1277   /* If REF is an array reference, check also that the step and the lower
1278      bound is invariant in LOOP.  */
1279   if (TREE_CODE (ref) == ARRAY_REF)
1280     {
1281       tree step = TREE_OPERAND (ref, 3);
1282       tree lbound = TREE_OPERAND (ref, 2);
1283 
1284       max_loop = outermost_invariant_loop (step, loop);
1285       if (!max_loop)
1286 	return false;
1287 
1288       max_loop = outermost_invariant_loop (lbound, loop);
1289       if (!max_loop)
1290 	return false;
1291     }
1292 
1293   max_loop = outermost_invariant_loop (*index, loop);
1294   if (!max_loop)
1295     return false;
1296 
1297   return true;
1298 }
1299 
1300 /* If OP is SSA NAME, force the statement that defines it to be
1301    moved out of the LOOP.  ORIG_LOOP is the loop in that EXPR is used.  */
1302 
1303 static void
force_move_till_op(tree op,struct loop * orig_loop,struct loop * loop)1304 force_move_till_op (tree op, struct loop *orig_loop, struct loop *loop)
1305 {
1306   gimple *stmt;
1307 
1308   if (!op
1309       || is_gimple_min_invariant (op))
1310     return;
1311 
1312   gcc_assert (TREE_CODE (op) == SSA_NAME);
1313 
1314   stmt = SSA_NAME_DEF_STMT (op);
1315   if (gimple_nop_p (stmt))
1316     return;
1317 
1318   set_level (stmt, orig_loop, loop);
1319 }
1320 
1321 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1322    the LOOP.  The reference REF is used in the loop ORIG_LOOP.  Callback for
1323    for_each_index.  */
1324 
1325 struct fmt_data
1326 {
1327   struct loop *loop;
1328   struct loop *orig_loop;
1329 };
1330 
1331 static bool
force_move_till(tree ref,tree * index,void * data)1332 force_move_till (tree ref, tree *index, void *data)
1333 {
1334   struct fmt_data *fmt_data = (struct fmt_data *) data;
1335 
1336   if (TREE_CODE (ref) == ARRAY_REF)
1337     {
1338       tree step = TREE_OPERAND (ref, 3);
1339       tree lbound = TREE_OPERAND (ref, 2);
1340 
1341       force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1342       force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
1343     }
1344 
1345   force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
1346 
1347   return true;
1348 }
1349 
1350 /* A function to free the mem_ref object OBJ.  */
1351 
1352 static void
memref_free(struct im_mem_ref * mem)1353 memref_free (struct im_mem_ref *mem)
1354 {
1355   mem->accesses_in_loop.release ();
1356 }
1357 
1358 /* Allocates and returns a memory reference description for MEM whose hash
1359    value is HASH and id is ID.  */
1360 
1361 static im_mem_ref *
mem_ref_alloc(tree mem,unsigned hash,unsigned id)1362 mem_ref_alloc (tree mem, unsigned hash, unsigned id)
1363 {
1364   im_mem_ref *ref = XOBNEW (&mem_ref_obstack, struct im_mem_ref);
1365   ao_ref_init (&ref->mem, mem);
1366   ref->id = id;
1367   ref->hash = hash;
1368   ref->stored = NULL;
1369   bitmap_initialize (&ref->indep_loop, &lim_bitmap_obstack);
1370   bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
1371   ref->accesses_in_loop.create (1);
1372 
1373   return ref;
1374 }
1375 
1376 /* Records memory reference location *LOC in LOOP to the memory reference
1377    description REF.  The reference occurs in statement STMT.  */
1378 
1379 static void
record_mem_ref_loc(im_mem_ref * ref,gimple * stmt,tree * loc)1380 record_mem_ref_loc (im_mem_ref *ref, gimple *stmt, tree *loc)
1381 {
1382   mem_ref_loc aref;
1383   aref.stmt = stmt;
1384   aref.ref = loc;
1385   ref->accesses_in_loop.safe_push (aref);
1386 }
1387 
1388 /* Set the LOOP bit in REF stored bitmap and allocate that if
1389    necessary.  Return whether a bit was changed.  */
1390 
1391 static bool
set_ref_stored_in_loop(im_mem_ref * ref,struct loop * loop)1392 set_ref_stored_in_loop (im_mem_ref *ref, struct loop *loop)
1393 {
1394   if (!ref->stored)
1395     ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1396   return bitmap_set_bit (ref->stored, loop->num);
1397 }
1398 
1399 /* Marks reference REF as stored in LOOP.  */
1400 
1401 static void
mark_ref_stored(im_mem_ref * ref,struct loop * loop)1402 mark_ref_stored (im_mem_ref *ref, struct loop *loop)
1403 {
1404   while (loop != current_loops->tree_root
1405 	 && set_ref_stored_in_loop (ref, loop))
1406     loop = loop_outer (loop);
1407 }
1408 
1409 /* Gathers memory references in statement STMT in LOOP, storing the
1410    information about them in the memory_accesses structure.  Marks
1411    the vops accessed through unrecognized statements there as
1412    well.  */
1413 
1414 static void
gather_mem_refs_stmt(struct loop * loop,gimple * stmt)1415 gather_mem_refs_stmt (struct loop *loop, gimple *stmt)
1416 {
1417   tree *mem = NULL;
1418   hashval_t hash;
1419   im_mem_ref **slot;
1420   im_mem_ref *ref;
1421   bool is_stored;
1422   unsigned id;
1423 
1424   if (!gimple_vuse (stmt))
1425     return;
1426 
1427   mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1428   if (!mem)
1429     {
1430       /* We use the shared mem_ref for all unanalyzable refs.  */
1431       id = UNANALYZABLE_MEM_ID;
1432       ref = memory_accesses.refs_list[id];
1433       if (dump_file && (dump_flags & TDF_DETAILS))
1434 	{
1435 	  fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1436 	  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1437 	}
1438       is_stored = gimple_vdef (stmt);
1439     }
1440   else
1441     {
1442       hash = iterative_hash_expr (*mem, 0);
1443       slot = memory_accesses.refs->find_slot_with_hash (*mem, hash, INSERT);
1444       if (*slot)
1445 	{
1446 	  ref = *slot;
1447 	  id = ref->id;
1448 	}
1449       else
1450 	{
1451 	  id = memory_accesses.refs_list.length ();
1452 	  ref = mem_ref_alloc (*mem, hash, id);
1453 	  memory_accesses.refs_list.safe_push (ref);
1454 	  *slot = ref;
1455 
1456 	  if (dump_file && (dump_flags & TDF_DETAILS))
1457 	    {
1458 	      fprintf (dump_file, "Memory reference %u: ", id);
1459 	      print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
1460 	      fprintf (dump_file, "\n");
1461 	    }
1462 	}
1463 
1464       record_mem_ref_loc (ref, stmt, mem);
1465     }
1466   bitmap_set_bit (&memory_accesses.refs_in_loop[loop->num], ref->id);
1467   if (is_stored)
1468     {
1469       bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
1470       mark_ref_stored (ref, loop);
1471     }
1472   init_lim_data (stmt)->ref = ref->id;
1473   return;
1474 }
1475 
1476 static unsigned *bb_loop_postorder;
1477 
1478 /* qsort sort function to sort blocks after their loop fathers postorder.  */
1479 
1480 static int
sort_bbs_in_loop_postorder_cmp(const void * bb1_,const void * bb2_)1481 sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_)
1482 {
1483   basic_block bb1 = *(basic_block *)const_cast<void *>(bb1_);
1484   basic_block bb2 = *(basic_block *)const_cast<void *>(bb2_);
1485   struct loop *loop1 = bb1->loop_father;
1486   struct loop *loop2 = bb2->loop_father;
1487   if (loop1->num == loop2->num)
1488     return bb1->index - bb2->index;
1489   return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1490 }
1491 
1492 /* qsort sort function to sort ref locs after their loop fathers postorder.  */
1493 
1494 static int
sort_locs_in_loop_postorder_cmp(const void * loc1_,const void * loc2_)1495 sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_)
1496 {
1497   mem_ref_loc *loc1 = (mem_ref_loc *)const_cast<void *>(loc1_);
1498   mem_ref_loc *loc2 = (mem_ref_loc *)const_cast<void *>(loc2_);
1499   struct loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
1500   struct loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
1501   if (loop1->num == loop2->num)
1502     return 0;
1503   return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1504 }
1505 
1506 /* Gathers memory references in loops.  */
1507 
1508 static void
analyze_memory_references(void)1509 analyze_memory_references (void)
1510 {
1511   gimple_stmt_iterator bsi;
1512   basic_block bb, *bbs;
1513   struct loop *loop, *outer;
1514   unsigned i, n;
1515 
1516   /* Collect all basic-blocks in loops and sort them after their
1517      loops postorder.  */
1518   i = 0;
1519   bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
1520   FOR_EACH_BB_FN (bb, cfun)
1521     if (bb->loop_father != current_loops->tree_root)
1522       bbs[i++] = bb;
1523   n = i;
1524   qsort (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp);
1525 
1526   /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1527      That results in better locality for all the bitmaps.  */
1528   for (i = 0; i < n; ++i)
1529     {
1530       basic_block bb = bbs[i];
1531       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1532         gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
1533     }
1534 
1535   /* Sort the location list of gathered memory references after their
1536      loop postorder number.  */
1537   im_mem_ref *ref;
1538   FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
1539     ref->accesses_in_loop.qsort (sort_locs_in_loop_postorder_cmp);
1540 
1541   free (bbs);
1542 //  free (bb_loop_postorder);
1543 
1544   /* Propagate the information about accessed memory references up
1545      the loop hierarchy.  */
1546   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1547     {
1548       /* Finalize the overall touched references (including subloops).  */
1549       bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
1550 		       &memory_accesses.refs_stored_in_loop[loop->num]);
1551 
1552       /* Propagate the information about accessed memory references up
1553 	 the loop hierarchy.  */
1554       outer = loop_outer (loop);
1555       if (outer == current_loops->tree_root)
1556 	continue;
1557 
1558       bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
1559 		       &memory_accesses.all_refs_stored_in_loop[loop->num]);
1560     }
1561 }
1562 
1563 /* Returns true if MEM1 and MEM2 may alias.  TTAE_CACHE is used as a cache in
1564    tree_to_aff_combination_expand.  */
1565 
1566 static bool
mem_refs_may_alias_p(im_mem_ref * mem1,im_mem_ref * mem2,hash_map<tree,name_expansion * > ** ttae_cache)1567 mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2,
1568 		      hash_map<tree, name_expansion *> **ttae_cache)
1569 {
1570   /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1571      object and their offset differ in such a way that the locations cannot
1572      overlap, then they cannot alias.  */
1573   poly_widest_int size1, size2;
1574   aff_tree off1, off2;
1575 
1576   /* Perform basic offset and type-based disambiguation.  */
1577   if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, true))
1578     return false;
1579 
1580   /* The expansion of addresses may be a bit expensive, thus we only do
1581      the check at -O2 and higher optimization levels.  */
1582   if (optimize < 2)
1583     return true;
1584 
1585   get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
1586   get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
1587   aff_combination_expand (&off1, ttae_cache);
1588   aff_combination_expand (&off2, ttae_cache);
1589   aff_combination_scale (&off1, -1);
1590   aff_combination_add (&off2, &off1);
1591 
1592   if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1593     return false;
1594 
1595   return true;
1596 }
1597 
1598 /* Compare function for bsearch searching for reference locations
1599    in a loop.  */
1600 
1601 static int
find_ref_loc_in_loop_cmp(const void * loop_,const void * loc_)1602 find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_)
1603 {
1604   struct loop *loop = (struct loop *)const_cast<void *>(loop_);
1605   mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
1606   struct loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
1607   if (loop->num  == loc_loop->num
1608       || flow_loop_nested_p (loop, loc_loop))
1609     return 0;
1610   return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
1611 	  ? -1 : 1);
1612 }
1613 
1614 /* Iterates over all locations of REF in LOOP and its subloops calling
1615    fn.operator() with the location as argument.  When that operator
1616    returns true the iteration is stopped and true is returned.
1617    Otherwise false is returned.  */
1618 
1619 template <typename FN>
1620 static bool
for_all_locs_in_loop(struct loop * loop,im_mem_ref * ref,FN fn)1621 for_all_locs_in_loop (struct loop *loop, im_mem_ref *ref, FN fn)
1622 {
1623   unsigned i;
1624   mem_ref_loc *loc;
1625 
1626   /* Search for the cluster of locs in the accesses_in_loop vector
1627      which is sorted after postorder index of the loop father.  */
1628   loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp);
1629   if (!loc)
1630     return false;
1631 
1632   /* We have found one location inside loop or its sub-loops.  Iterate
1633      both forward and backward to cover the whole cluster.  */
1634   i = loc - ref->accesses_in_loop.address ();
1635   while (i > 0)
1636     {
1637       --i;
1638       mem_ref_loc *l = &ref->accesses_in_loop[i];
1639       if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1640 	break;
1641       if (fn (l))
1642 	return true;
1643     }
1644   for (i = loc - ref->accesses_in_loop.address ();
1645        i < ref->accesses_in_loop.length (); ++i)
1646     {
1647       mem_ref_loc *l = &ref->accesses_in_loop[i];
1648       if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1649 	break;
1650       if (fn (l))
1651 	return true;
1652     }
1653 
1654   return false;
1655 }
1656 
1657 /* Rewrites location LOC by TMP_VAR.  */
1658 
1659 struct rewrite_mem_ref_loc
1660 {
rewrite_mem_ref_locrewrite_mem_ref_loc1661   rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
1662   bool operator () (mem_ref_loc *loc);
1663   tree tmp_var;
1664 };
1665 
1666 bool
operator()1667 rewrite_mem_ref_loc::operator () (mem_ref_loc *loc)
1668 {
1669   *loc->ref = tmp_var;
1670   update_stmt (loc->stmt);
1671   return false;
1672 }
1673 
1674 /* Rewrites all references to REF in LOOP by variable TMP_VAR.  */
1675 
1676 static void
rewrite_mem_refs(struct loop * loop,im_mem_ref * ref,tree tmp_var)1677 rewrite_mem_refs (struct loop *loop, im_mem_ref *ref, tree tmp_var)
1678 {
1679   for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
1680 }
1681 
1682 /* Stores the first reference location in LOCP.  */
1683 
1684 struct first_mem_ref_loc_1
1685 {
first_mem_ref_loc_1first_mem_ref_loc_11686   first_mem_ref_loc_1 (mem_ref_loc **locp_) : locp (locp_) {}
1687   bool operator () (mem_ref_loc *loc);
1688   mem_ref_loc **locp;
1689 };
1690 
1691 bool
operator()1692 first_mem_ref_loc_1::operator () (mem_ref_loc *loc)
1693 {
1694   *locp = loc;
1695   return true;
1696 }
1697 
1698 /* Returns the first reference location to REF in LOOP.  */
1699 
1700 static mem_ref_loc *
first_mem_ref_loc(struct loop * loop,im_mem_ref * ref)1701 first_mem_ref_loc (struct loop *loop, im_mem_ref *ref)
1702 {
1703   mem_ref_loc *locp = NULL;
1704   for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
1705   return locp;
1706 }
1707 
1708 struct prev_flag_edges {
1709   /* Edge to insert new flag comparison code.  */
1710   edge append_cond_position;
1711 
1712   /* Edge for fall through from previous flag comparison.  */
1713   edge last_cond_fallthru;
1714 };
1715 
1716 /* Helper function for execute_sm.  Emit code to store TMP_VAR into
1717    MEM along edge EX.
1718 
1719    The store is only done if MEM has changed.  We do this so no
1720    changes to MEM occur on code paths that did not originally store
1721    into it.
1722 
1723    The common case for execute_sm will transform:
1724 
1725      for (...) {
1726        if (foo)
1727          stuff;
1728        else
1729          MEM = TMP_VAR;
1730      }
1731 
1732    into:
1733 
1734      lsm = MEM;
1735      for (...) {
1736        if (foo)
1737          stuff;
1738        else
1739          lsm = TMP_VAR;
1740      }
1741      MEM = lsm;
1742 
1743   This function will generate:
1744 
1745      lsm = MEM;
1746 
1747      lsm_flag = false;
1748      ...
1749      for (...) {
1750        if (foo)
1751          stuff;
1752        else {
1753          lsm = TMP_VAR;
1754          lsm_flag = true;
1755        }
1756      }
1757      if (lsm_flag)	<--
1758        MEM = lsm;	<--
1759 */
1760 
1761 static void
execute_sm_if_changed(edge ex,tree mem,tree tmp_var,tree flag,edge preheader,hash_set<basic_block> * flag_bbs)1762 execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
1763 		       edge preheader, hash_set <basic_block> *flag_bbs)
1764 {
1765   basic_block new_bb, then_bb, old_dest;
1766   bool loop_has_only_one_exit;
1767   edge then_old_edge, orig_ex = ex;
1768   gimple_stmt_iterator gsi;
1769   gimple *stmt;
1770   struct prev_flag_edges *prev_edges = (struct prev_flag_edges *) ex->aux;
1771   bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
1772 
1773   profile_count count_sum = profile_count::zero ();
1774   int nbbs = 0, ncount = 0;
1775   profile_probability flag_probability = profile_probability::uninitialized ();
1776 
1777   /* Flag is set in FLAG_BBS. Determine probability that flag will be true
1778      at loop exit.
1779 
1780      This code may look fancy, but it can not update profile very realistically
1781      because we do not know the probability that flag will be true at given
1782      loop exit.
1783 
1784      We look for two interesting extremes
1785        - when exit is dominated by block setting the flag, we know it will
1786          always be true.  This is a common case.
1787        - when all blocks setting the flag have very low frequency we know
1788          it will likely be false.
1789      In all other cases we default to 2/3 for flag being true.  */
1790 
1791   for (hash_set<basic_block>::iterator it = flag_bbs->begin ();
1792        it != flag_bbs->end (); ++it)
1793     {
1794        if ((*it)->count.initialized_p ())
1795          count_sum += (*it)->count, ncount ++;
1796        if (dominated_by_p (CDI_DOMINATORS, ex->src, *it))
1797 	 flag_probability = profile_probability::always ();
1798        nbbs++;
1799     }
1800 
1801   profile_probability cap = profile_probability::always ().apply_scale (2, 3);
1802 
1803   if (flag_probability.initialized_p ())
1804     ;
1805   else if (ncount == nbbs
1806 	   && preheader->count () >= count_sum && preheader->count ().nonzero_p ())
1807     {
1808       flag_probability = count_sum.probability_in (preheader->count ());
1809       if (flag_probability > cap)
1810 	flag_probability = cap;
1811     }
1812 
1813   if (!flag_probability.initialized_p ())
1814     flag_probability = cap;
1815 
1816   /* ?? Insert store after previous store if applicable.  See note
1817      below.  */
1818   if (prev_edges)
1819     ex = prev_edges->append_cond_position;
1820 
1821   loop_has_only_one_exit = single_pred_p (ex->dest);
1822 
1823   if (loop_has_only_one_exit)
1824     ex = split_block_after_labels (ex->dest);
1825   else
1826     {
1827       for (gphi_iterator gpi = gsi_start_phis (ex->dest);
1828 	   !gsi_end_p (gpi); gsi_next (&gpi))
1829 	{
1830 	  gphi *phi = gpi.phi ();
1831 	  if (virtual_operand_p (gimple_phi_result (phi)))
1832 	    continue;
1833 
1834 	  /* When the destination has a non-virtual PHI node with multiple
1835 	     predecessors make sure we preserve the PHI structure by
1836 	     forcing a forwarder block so that hoisting of that PHI will
1837 	     still work.  */
1838 	  split_edge (ex);
1839 	  break;
1840 	}
1841     }
1842 
1843   old_dest = ex->dest;
1844   new_bb = split_edge (ex);
1845   then_bb = create_empty_bb (new_bb);
1846   then_bb->count = new_bb->count.apply_probability (flag_probability);
1847   if (irr)
1848     then_bb->flags = BB_IRREDUCIBLE_LOOP;
1849   add_bb_to_loop (then_bb, new_bb->loop_father);
1850 
1851   gsi = gsi_start_bb (new_bb);
1852   stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
1853 			    NULL_TREE, NULL_TREE);
1854   gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1855 
1856   gsi = gsi_start_bb (then_bb);
1857   /* Insert actual store.  */
1858   stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
1859   gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1860 
1861   edge e1 = single_succ_edge (new_bb);
1862   edge e2 = make_edge (new_bb, then_bb,
1863 	               EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1864   e2->probability = flag_probability;
1865 
1866   e1->flags |= EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0);
1867   e1->flags &= ~EDGE_FALLTHRU;
1868 
1869   e1->probability = flag_probability.invert ();
1870 
1871   then_old_edge = make_single_succ_edge (then_bb, old_dest,
1872 			     EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1873 
1874   set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
1875 
1876   if (prev_edges)
1877     {
1878       basic_block prevbb = prev_edges->last_cond_fallthru->src;
1879       redirect_edge_succ (prev_edges->last_cond_fallthru, new_bb);
1880       set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
1881       set_immediate_dominator (CDI_DOMINATORS, old_dest,
1882 			       recompute_dominator (CDI_DOMINATORS, old_dest));
1883     }
1884 
1885   /* ?? Because stores may alias, they must happen in the exact
1886      sequence they originally happened.  Save the position right after
1887      the (_lsm) store we just created so we can continue appending after
1888      it and maintain the original order.  */
1889   {
1890     struct prev_flag_edges *p;
1891 
1892     if (orig_ex->aux)
1893       orig_ex->aux = NULL;
1894     alloc_aux_for_edge (orig_ex, sizeof (struct prev_flag_edges));
1895     p = (struct prev_flag_edges *) orig_ex->aux;
1896     p->append_cond_position = then_old_edge;
1897     p->last_cond_fallthru = find_edge (new_bb, old_dest);
1898     orig_ex->aux = (void *) p;
1899   }
1900 
1901   if (!loop_has_only_one_exit)
1902     for (gphi_iterator gpi = gsi_start_phis (old_dest);
1903 	 !gsi_end_p (gpi); gsi_next (&gpi))
1904       {
1905 	gphi *phi = gpi.phi ();
1906 	unsigned i;
1907 
1908 	for (i = 0; i < gimple_phi_num_args (phi); i++)
1909 	  if (gimple_phi_arg_edge (phi, i)->src == new_bb)
1910 	    {
1911 	      tree arg = gimple_phi_arg_def (phi, i);
1912 	      add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
1913 	      update_stmt (phi);
1914 	    }
1915       }
1916 }
1917 
1918 /* When REF is set on the location, set flag indicating the store.  */
1919 
1920 struct sm_set_flag_if_changed
1921 {
sm_set_flag_if_changedsm_set_flag_if_changed1922   sm_set_flag_if_changed (tree flag_, hash_set <basic_block> *bbs_)
1923 	 : flag (flag_), bbs (bbs_) {}
1924   bool operator () (mem_ref_loc *loc);
1925   tree flag;
1926   hash_set <basic_block> *bbs;
1927 };
1928 
1929 bool
operator()1930 sm_set_flag_if_changed::operator () (mem_ref_loc *loc)
1931 {
1932   /* Only set the flag for writes.  */
1933   if (is_gimple_assign (loc->stmt)
1934       && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
1935     {
1936       gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
1937       gimple *stmt = gimple_build_assign (flag, boolean_true_node);
1938       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1939       bbs->add (gimple_bb (stmt));
1940     }
1941   return false;
1942 }
1943 
1944 /* Helper function for execute_sm.  On every location where REF is
1945    set, set an appropriate flag indicating the store.  */
1946 
1947 static tree
execute_sm_if_changed_flag_set(struct loop * loop,im_mem_ref * ref,hash_set<basic_block> * bbs)1948 execute_sm_if_changed_flag_set (struct loop *loop, im_mem_ref *ref,
1949 				hash_set <basic_block> *bbs)
1950 {
1951   tree flag;
1952   char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
1953   flag = create_tmp_reg (boolean_type_node, str);
1954   for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag, bbs));
1955   return flag;
1956 }
1957 
1958 /* Executes store motion of memory reference REF from LOOP.
1959    Exits from the LOOP are stored in EXITS.  The initialization of the
1960    temporary variable is put to the preheader of the loop, and assignments
1961    to the reference from the temporary variable are emitted to exits.  */
1962 
1963 static void
execute_sm(struct loop * loop,vec<edge> exits,im_mem_ref * ref)1964 execute_sm (struct loop *loop, vec<edge> exits, im_mem_ref *ref)
1965 {
1966   tree tmp_var, store_flag = NULL_TREE;
1967   unsigned i;
1968   gassign *load;
1969   struct fmt_data fmt_data;
1970   edge ex;
1971   struct lim_aux_data *lim_data;
1972   bool multi_threaded_model_p = false;
1973   gimple_stmt_iterator gsi;
1974   hash_set<basic_block> flag_bbs;
1975 
1976   if (dump_file && (dump_flags & TDF_DETAILS))
1977     {
1978       fprintf (dump_file, "Executing store motion of ");
1979       print_generic_expr (dump_file, ref->mem.ref);
1980       fprintf (dump_file, " from loop %d\n", loop->num);
1981     }
1982 
1983   tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
1984 			    get_lsm_tmp_name (ref->mem.ref, ~0));
1985 
1986   fmt_data.loop = loop;
1987   fmt_data.orig_loop = loop;
1988   for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
1989 
1990   if (bb_in_transaction (loop_preheader_edge (loop)->src)
1991       || (! PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES)
1992 	  && ! ref_always_accessed_p (loop, ref, true)))
1993     multi_threaded_model_p = true;
1994 
1995   if (multi_threaded_model_p)
1996     store_flag = execute_sm_if_changed_flag_set (loop, ref, &flag_bbs);
1997 
1998   rewrite_mem_refs (loop, ref, tmp_var);
1999 
2000   /* Emit the load code on a random exit edge or into the latch if
2001      the loop does not exit, so that we are sure it will be processed
2002      by move_computations after all dependencies.  */
2003   gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
2004 
2005   /* FIXME/TODO: For the multi-threaded variant, we could avoid this
2006      load altogether, since the store is predicated by a flag.  We
2007      could, do the load only if it was originally in the loop.  */
2008   load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
2009   lim_data = init_lim_data (load);
2010   lim_data->max_loop = loop;
2011   lim_data->tgt_loop = loop;
2012   gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2013 
2014   if (multi_threaded_model_p)
2015     {
2016       load = gimple_build_assign (store_flag, boolean_false_node);
2017       lim_data = init_lim_data (load);
2018       lim_data->max_loop = loop;
2019       lim_data->tgt_loop = loop;
2020       gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2021     }
2022 
2023   /* Sink the store to every exit from the loop.  */
2024   FOR_EACH_VEC_ELT (exits, i, ex)
2025     if (!multi_threaded_model_p)
2026       {
2027 	gassign *store;
2028 	store = gimple_build_assign (unshare_expr (ref->mem.ref), tmp_var);
2029 	gsi_insert_on_edge (ex, store);
2030       }
2031     else
2032       execute_sm_if_changed (ex, ref->mem.ref, tmp_var, store_flag,
2033 			     loop_preheader_edge (loop), &flag_bbs);
2034 }
2035 
2036 /* Hoists memory references MEM_REFS out of LOOP.  EXITS is the list of exit
2037    edges of the LOOP.  */
2038 
2039 static void
hoist_memory_references(struct loop * loop,bitmap mem_refs,vec<edge> exits)2040 hoist_memory_references (struct loop *loop, bitmap mem_refs,
2041 			 vec<edge> exits)
2042 {
2043   im_mem_ref *ref;
2044   unsigned  i;
2045   bitmap_iterator bi;
2046 
2047   EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2048     {
2049       ref = memory_accesses.refs_list[i];
2050       execute_sm (loop, exits, ref);
2051     }
2052 }
2053 
2054 struct ref_always_accessed
2055 {
ref_always_accessedref_always_accessed2056   ref_always_accessed (struct loop *loop_, bool stored_p_)
2057       : loop (loop_), stored_p (stored_p_) {}
2058   bool operator () (mem_ref_loc *loc);
2059   struct loop *loop;
2060   bool stored_p;
2061 };
2062 
2063 bool
operator()2064 ref_always_accessed::operator () (mem_ref_loc *loc)
2065 {
2066   struct loop *must_exec;
2067 
2068   if (!get_lim_data (loc->stmt))
2069     return false;
2070 
2071   /* If we require an always executed store make sure the statement
2072      stores to the reference.  */
2073   if (stored_p)
2074     {
2075       tree lhs = gimple_get_lhs (loc->stmt);
2076       if (!lhs
2077 	  || lhs != *loc->ref)
2078 	return false;
2079     }
2080 
2081   must_exec = get_lim_data (loc->stmt)->always_executed_in;
2082   if (!must_exec)
2083     return false;
2084 
2085   if (must_exec == loop
2086       || flow_loop_nested_p (must_exec, loop))
2087     return true;
2088 
2089   return false;
2090 }
2091 
2092 /* Returns true if REF is always accessed in LOOP.  If STORED_P is true
2093    make sure REF is always stored to in LOOP.  */
2094 
2095 static bool
ref_always_accessed_p(struct loop * loop,im_mem_ref * ref,bool stored_p)2096 ref_always_accessed_p (struct loop *loop, im_mem_ref *ref, bool stored_p)
2097 {
2098   return for_all_locs_in_loop (loop, ref,
2099 			       ref_always_accessed (loop, stored_p));
2100 }
2101 
2102 /* Returns true if REF1 and REF2 are independent.  */
2103 
2104 static bool
refs_independent_p(im_mem_ref * ref1,im_mem_ref * ref2)2105 refs_independent_p (im_mem_ref *ref1, im_mem_ref *ref2)
2106 {
2107   if (ref1 == ref2)
2108     return true;
2109 
2110   if (dump_file && (dump_flags & TDF_DETAILS))
2111     fprintf (dump_file, "Querying dependency of refs %u and %u: ",
2112 	     ref1->id, ref2->id);
2113 
2114   if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache))
2115     {
2116       if (dump_file && (dump_flags & TDF_DETAILS))
2117 	fprintf (dump_file, "dependent.\n");
2118       return false;
2119     }
2120   else
2121     {
2122       if (dump_file && (dump_flags & TDF_DETAILS))
2123 	fprintf (dump_file, "independent.\n");
2124       return true;
2125     }
2126 }
2127 
2128 /* Mark REF dependent on stores or loads (according to STORED_P) in LOOP
2129    and its super-loops.  */
2130 
2131 static void
record_dep_loop(struct loop * loop,im_mem_ref * ref,bool stored_p)2132 record_dep_loop (struct loop *loop, im_mem_ref *ref, bool stored_p)
2133 {
2134   /* We can propagate dependent-in-loop bits up the loop
2135      hierarchy to all outer loops.  */
2136   while (loop != current_loops->tree_root
2137 	 && bitmap_set_bit (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2138     loop = loop_outer (loop);
2139 }
2140 
2141 /* Returns true if REF is independent on all other memory
2142    references in LOOP.  */
2143 
2144 static bool
ref_indep_loop_p_1(struct loop * loop,im_mem_ref * ref,bool stored_p)2145 ref_indep_loop_p_1 (struct loop *loop, im_mem_ref *ref, bool stored_p)
2146 {
2147   stored_p |= (ref->stored && bitmap_bit_p (ref->stored, loop->num));
2148 
2149   bool indep_p = true;
2150   bitmap refs_to_check;
2151 
2152   if (stored_p)
2153     refs_to_check = &memory_accesses.refs_in_loop[loop->num];
2154   else
2155     refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
2156 
2157   if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID))
2158     indep_p = false;
2159   else
2160     {
2161       if (bitmap_bit_p (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2162 	return true;
2163       if (bitmap_bit_p (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2164 	return false;
2165 
2166       struct loop *inner = loop->inner;
2167       while (inner)
2168 	{
2169 	  if (!ref_indep_loop_p_1 (inner, ref, stored_p))
2170 	    {
2171 	      indep_p = false;
2172 	      break;
2173 	    }
2174 	  inner = inner->next;
2175 	}
2176 
2177       if (indep_p)
2178 	{
2179 	  unsigned i;
2180 	  bitmap_iterator bi;
2181 	  EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
2182 	    {
2183 	      im_mem_ref *aref = memory_accesses.refs_list[i];
2184 	      if (!refs_independent_p (ref, aref))
2185 		{
2186 		  indep_p = false;
2187 		  break;
2188 		}
2189 	    }
2190 	}
2191     }
2192 
2193   if (dump_file && (dump_flags & TDF_DETAILS))
2194     fprintf (dump_file, "Querying dependencies of ref %u in loop %d: %s\n",
2195 	     ref->id, loop->num, indep_p ? "independent" : "dependent");
2196 
2197   /* Record the computed result in the cache.  */
2198   if (indep_p)
2199     {
2200       if (bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p))
2201 	  && stored_p)
2202 	{
2203 	  /* If it's independend against all refs then it's independent
2204 	     against stores, too.  */
2205 	  bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, false));
2206 	}
2207     }
2208   else
2209     {
2210       record_dep_loop (loop, ref, stored_p);
2211       if (!stored_p)
2212 	{
2213 	  /* If it's dependent against stores it's dependent against
2214 	     all refs, too.  */
2215 	  record_dep_loop (loop, ref, true);
2216 	}
2217     }
2218 
2219   return indep_p;
2220 }
2221 
2222 /* Returns true if REF is independent on all other memory references in
2223    LOOP.  */
2224 
2225 static bool
ref_indep_loop_p(struct loop * loop,im_mem_ref * ref)2226 ref_indep_loop_p (struct loop *loop, im_mem_ref *ref)
2227 {
2228   gcc_checking_assert (MEM_ANALYZABLE (ref));
2229 
2230   return ref_indep_loop_p_1 (loop, ref, false);
2231 }
2232 
2233 /* Returns true if we can perform store motion of REF from LOOP.  */
2234 
2235 static bool
can_sm_ref_p(struct loop * loop,im_mem_ref * ref)2236 can_sm_ref_p (struct loop *loop, im_mem_ref *ref)
2237 {
2238   tree base;
2239 
2240   /* Can't hoist unanalyzable refs.  */
2241   if (!MEM_ANALYZABLE (ref))
2242     return false;
2243 
2244   /* It should be movable.  */
2245   if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
2246       || TREE_THIS_VOLATILE (ref->mem.ref)
2247       || !for_each_index (&ref->mem.ref, may_move_till, loop))
2248     return false;
2249 
2250   /* If it can throw fail, we do not properly update EH info.  */
2251   if (tree_could_throw_p (ref->mem.ref))
2252     return false;
2253 
2254   /* If it can trap, it must be always executed in LOOP.
2255      Readonly memory locations may trap when storing to them, but
2256      tree_could_trap_p is a predicate for rvalues, so check that
2257      explicitly.  */
2258   base = get_base_address (ref->mem.ref);
2259   if ((tree_could_trap_p (ref->mem.ref)
2260        || (DECL_P (base) && TREE_READONLY (base)))
2261       && !ref_always_accessed_p (loop, ref, true))
2262     return false;
2263 
2264   /* And it must be independent on all other memory references
2265      in LOOP.  */
2266   if (!ref_indep_loop_p (loop, ref))
2267     return false;
2268 
2269   return true;
2270 }
2271 
2272 /* Marks the references in LOOP for that store motion should be performed
2273    in REFS_TO_SM.  SM_EXECUTED is the set of references for that store
2274    motion was performed in one of the outer loops.  */
2275 
2276 static void
find_refs_for_sm(struct loop * loop,bitmap sm_executed,bitmap refs_to_sm)2277 find_refs_for_sm (struct loop *loop, bitmap sm_executed, bitmap refs_to_sm)
2278 {
2279   bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
2280   unsigned i;
2281   bitmap_iterator bi;
2282   im_mem_ref *ref;
2283 
2284   EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
2285     {
2286       ref = memory_accesses.refs_list[i];
2287       if (can_sm_ref_p (loop, ref))
2288 	bitmap_set_bit (refs_to_sm, i);
2289     }
2290 }
2291 
2292 /* Checks whether LOOP (with exits stored in EXITS array) is suitable
2293    for a store motion optimization (i.e. whether we can insert statement
2294    on its exits).  */
2295 
2296 static bool
loop_suitable_for_sm(struct loop * loop ATTRIBUTE_UNUSED,vec<edge> exits)2297 loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED,
2298 		      vec<edge> exits)
2299 {
2300   unsigned i;
2301   edge ex;
2302 
2303   FOR_EACH_VEC_ELT (exits, i, ex)
2304     if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
2305       return false;
2306 
2307   return true;
2308 }
2309 
2310 /* Try to perform store motion for all memory references modified inside
2311    LOOP.  SM_EXECUTED is the bitmap of the memory references for that
2312    store motion was executed in one of the outer loops.  */
2313 
2314 static void
store_motion_loop(struct loop * loop,bitmap sm_executed)2315 store_motion_loop (struct loop *loop, bitmap sm_executed)
2316 {
2317   vec<edge> exits = get_loop_exit_edges (loop);
2318   struct loop *subloop;
2319   bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
2320 
2321   if (loop_suitable_for_sm (loop, exits))
2322     {
2323       find_refs_for_sm (loop, sm_executed, sm_in_loop);
2324       hoist_memory_references (loop, sm_in_loop, exits);
2325     }
2326   exits.release ();
2327 
2328   bitmap_ior_into (sm_executed, sm_in_loop);
2329   for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
2330     store_motion_loop (subloop, sm_executed);
2331   bitmap_and_compl_into (sm_executed, sm_in_loop);
2332   BITMAP_FREE (sm_in_loop);
2333 }
2334 
2335 /* Try to perform store motion for all memory references modified inside
2336    loops.  */
2337 
2338 static void
store_motion(void)2339 store_motion (void)
2340 {
2341   struct loop *loop;
2342   bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
2343 
2344   for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
2345     store_motion_loop (loop, sm_executed);
2346 
2347   BITMAP_FREE (sm_executed);
2348   gsi_commit_edge_inserts ();
2349 }
2350 
2351 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
2352    for each such basic block bb records the outermost loop for that execution
2353    of its header implies execution of bb.  CONTAINS_CALL is the bitmap of
2354    blocks that contain a nonpure call.  */
2355 
2356 static void
fill_always_executed_in_1(struct loop * loop,sbitmap contains_call)2357 fill_always_executed_in_1 (struct loop *loop, sbitmap contains_call)
2358 {
2359   basic_block bb = NULL, *bbs, last = NULL;
2360   unsigned i;
2361   edge e;
2362   struct loop *inn_loop = loop;
2363 
2364   if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
2365     {
2366       bbs = get_loop_body_in_dom_order (loop);
2367 
2368       for (i = 0; i < loop->num_nodes; i++)
2369 	{
2370 	  edge_iterator ei;
2371 	  bb = bbs[i];
2372 
2373 	  if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2374 	    last = bb;
2375 
2376 	  if (bitmap_bit_p (contains_call, bb->index))
2377 	    break;
2378 
2379 	  FOR_EACH_EDGE (e, ei, bb->succs)
2380 	    {
2381 	      /* If there is an exit from this BB.  */
2382 	      if (!flow_bb_inside_loop_p (loop, e->dest))
2383 		break;
2384 	      /* Or we enter a possibly non-finite loop.  */
2385 	      if (flow_loop_nested_p (bb->loop_father,
2386 				      e->dest->loop_father)
2387 		  && ! finite_loop_p (e->dest->loop_father))
2388 		break;
2389 	    }
2390 	  if (e)
2391 	    break;
2392 
2393 	  /* A loop might be infinite (TODO use simple loop analysis
2394 	     to disprove this if possible).  */
2395 	  if (bb->flags & BB_IRREDUCIBLE_LOOP)
2396 	    break;
2397 
2398 	  if (!flow_bb_inside_loop_p (inn_loop, bb))
2399 	    break;
2400 
2401 	  if (bb->loop_father->header == bb)
2402 	    {
2403 	      if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2404 		break;
2405 
2406 	      /* In a loop that is always entered we may proceed anyway.
2407 		 But record that we entered it and stop once we leave it.  */
2408 	      inn_loop = bb->loop_father;
2409 	    }
2410 	}
2411 
2412       while (1)
2413 	{
2414 	  SET_ALWAYS_EXECUTED_IN (last, loop);
2415 	  if (last == loop->header)
2416 	    break;
2417 	  last = get_immediate_dominator (CDI_DOMINATORS, last);
2418 	}
2419 
2420       free (bbs);
2421     }
2422 
2423   for (loop = loop->inner; loop; loop = loop->next)
2424     fill_always_executed_in_1 (loop, contains_call);
2425 }
2426 
2427 /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
2428    for each such basic block bb records the outermost loop for that execution
2429    of its header implies execution of bb.  */
2430 
2431 static void
fill_always_executed_in(void)2432 fill_always_executed_in (void)
2433 {
2434   basic_block bb;
2435   struct loop *loop;
2436 
2437   auto_sbitmap contains_call (last_basic_block_for_fn (cfun));
2438   bitmap_clear (contains_call);
2439   FOR_EACH_BB_FN (bb, cfun)
2440     {
2441       gimple_stmt_iterator gsi;
2442       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2443 	{
2444 	  if (nonpure_call_p (gsi_stmt (gsi)))
2445 	    break;
2446 	}
2447 
2448       if (!gsi_end_p (gsi))
2449 	bitmap_set_bit (contains_call, bb->index);
2450     }
2451 
2452   for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
2453     fill_always_executed_in_1 (loop, contains_call);
2454 }
2455 
2456 
2457 /* Compute the global information needed by the loop invariant motion pass.  */
2458 
2459 static void
tree_ssa_lim_initialize(void)2460 tree_ssa_lim_initialize (void)
2461 {
2462   struct loop *loop;
2463   unsigned i;
2464 
2465   bitmap_obstack_initialize (&lim_bitmap_obstack);
2466   gcc_obstack_init (&mem_ref_obstack);
2467   lim_aux_data_map = new hash_map<gimple *, lim_aux_data *>;
2468 
2469   if (flag_tm)
2470     compute_transaction_bits ();
2471 
2472   alloc_aux_for_edges (0);
2473 
2474   memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
2475   memory_accesses.refs_list.create (100);
2476   /* Allocate a special, unanalyzable mem-ref with ID zero.  */
2477   memory_accesses.refs_list.quick_push
2478     (mem_ref_alloc (error_mark_node, 0, UNANALYZABLE_MEM_ID));
2479 
2480   memory_accesses.refs_in_loop.create (number_of_loops (cfun));
2481   memory_accesses.refs_in_loop.quick_grow (number_of_loops (cfun));
2482   memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
2483   memory_accesses.refs_stored_in_loop.quick_grow (number_of_loops (cfun));
2484   memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
2485   memory_accesses.all_refs_stored_in_loop.quick_grow (number_of_loops (cfun));
2486 
2487   for (i = 0; i < number_of_loops (cfun); i++)
2488     {
2489       bitmap_initialize (&memory_accesses.refs_in_loop[i],
2490 			 &lim_bitmap_obstack);
2491       bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
2492 			 &lim_bitmap_obstack);
2493       bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
2494 			 &lim_bitmap_obstack);
2495     }
2496 
2497   memory_accesses.ttae_cache = NULL;
2498 
2499   /* Initialize bb_loop_postorder with a mapping from loop->num to
2500      its postorder index.  */
2501   i = 0;
2502   bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
2503   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2504     bb_loop_postorder[loop->num] = i++;
2505 }
2506 
2507 /* Cleans up after the invariant motion pass.  */
2508 
2509 static void
tree_ssa_lim_finalize(void)2510 tree_ssa_lim_finalize (void)
2511 {
2512   basic_block bb;
2513   unsigned i;
2514   im_mem_ref *ref;
2515 
2516   free_aux_for_edges ();
2517 
2518   FOR_EACH_BB_FN (bb, cfun)
2519     SET_ALWAYS_EXECUTED_IN (bb, NULL);
2520 
2521   bitmap_obstack_release (&lim_bitmap_obstack);
2522   delete lim_aux_data_map;
2523 
2524   delete memory_accesses.refs;
2525   memory_accesses.refs = NULL;
2526 
2527   FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
2528     memref_free (ref);
2529   memory_accesses.refs_list.release ();
2530   obstack_free (&mem_ref_obstack, NULL);
2531 
2532   memory_accesses.refs_in_loop.release ();
2533   memory_accesses.refs_stored_in_loop.release ();
2534   memory_accesses.all_refs_stored_in_loop.release ();
2535 
2536   if (memory_accesses.ttae_cache)
2537     free_affine_expand_cache (&memory_accesses.ttae_cache);
2538 
2539   free (bb_loop_postorder);
2540 }
2541 
2542 /* Moves invariants from loops.  Only "expensive" invariants are moved out --
2543    i.e. those that are likely to be win regardless of the register pressure.  */
2544 
2545 static unsigned int
tree_ssa_lim(void)2546 tree_ssa_lim (void)
2547 {
2548   unsigned int todo;
2549 
2550   tree_ssa_lim_initialize ();
2551 
2552   /* Gathers information about memory accesses in the loops.  */
2553   analyze_memory_references ();
2554 
2555   /* Fills ALWAYS_EXECUTED_IN information for basic blocks.  */
2556   fill_always_executed_in ();
2557 
2558   /* For each statement determine the outermost loop in that it is
2559      invariant and cost for computing the invariant.  */
2560   invariantness_dom_walker (CDI_DOMINATORS)
2561     .walk (cfun->cfg->x_entry_block_ptr);
2562 
2563   /* Execute store motion.  Force the necessary invariants to be moved
2564      out of the loops as well.  */
2565   store_motion ();
2566 
2567   /* Move the expressions that are expensive enough.  */
2568   todo = move_computations ();
2569 
2570   tree_ssa_lim_finalize ();
2571 
2572   return todo;
2573 }
2574 
2575 /* Loop invariant motion pass.  */
2576 
2577 namespace {
2578 
2579 const pass_data pass_data_lim =
2580 {
2581   GIMPLE_PASS, /* type */
2582   "lim", /* name */
2583   OPTGROUP_LOOP, /* optinfo_flags */
2584   TV_LIM, /* tv_id */
2585   PROP_cfg, /* properties_required */
2586   0, /* properties_provided */
2587   0, /* properties_destroyed */
2588   0, /* todo_flags_start */
2589   0, /* todo_flags_finish */
2590 };
2591 
2592 class pass_lim : public gimple_opt_pass
2593 {
2594 public:
pass_lim(gcc::context * ctxt)2595   pass_lim (gcc::context *ctxt)
2596     : gimple_opt_pass (pass_data_lim, ctxt)
2597   {}
2598 
2599   /* opt_pass methods: */
clone()2600   opt_pass * clone () { return new pass_lim (m_ctxt); }
gate(function *)2601   virtual bool gate (function *) { return flag_tree_loop_im != 0; }
2602   virtual unsigned int execute (function *);
2603 
2604 }; // class pass_lim
2605 
2606 unsigned int
execute(function * fun)2607 pass_lim::execute (function *fun)
2608 {
2609   bool in_loop_pipeline = scev_initialized_p ();
2610   if (!in_loop_pipeline)
2611     loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2612 
2613   if (number_of_loops (fun) <= 1)
2614     return 0;
2615   unsigned int todo = tree_ssa_lim ();
2616 
2617   if (!in_loop_pipeline)
2618     loop_optimizer_finalize ();
2619   else
2620     scev_reset ();
2621   return todo;
2622 }
2623 
2624 } // anon namespace
2625 
2626 gimple_opt_pass *
make_pass_lim(gcc::context * ctxt)2627 make_pass_lim (gcc::context *ctxt)
2628 {
2629   return new pass_lim (ctxt);
2630 }
2631 
2632 
2633