1 /* Loop invariant motion.
2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "tree.h"
25 #include "tm_p.h"
26 #include "basic-block.h"
27 #include "gimple-pretty-print.h"
28 #include "tree-flow.h"
29 #include "cfgloop.h"
30 #include "domwalk.h"
31 #include "params.h"
32 #include "tree-pass.h"
33 #include "flags.h"
34 #include "hashtab.h"
35 #include "tree-affine.h"
36 #include "pointer-set.h"
37 #include "tree-ssa-propagate.h"
38 
39 /* TODO:  Support for predicated code motion.  I.e.
40 
41    while (1)
42      {
43        if (cond)
44 	 {
45 	   a = inv;
46 	   something;
47 	 }
48      }
49 
50    Where COND and INV are invariants, but evaluating INV may trap or be
51    invalid from some other reason if !COND.  This may be transformed to
52 
53    if (cond)
54      a = inv;
55    while (1)
56      {
57        if (cond)
58 	 something;
59      }  */
60 
61 /* A type for the list of statements that have to be moved in order to be able
62    to hoist an invariant computation.  */
63 
64 struct depend
65 {
66   gimple stmt;
67   struct depend *next;
68 };
69 
70 /* The auxiliary data kept for each statement.  */
71 
72 struct lim_aux_data
73 {
74   struct loop *max_loop;	/* The outermost loop in that the statement
75 				   is invariant.  */
76 
77   struct loop *tgt_loop;	/* The loop out of that we want to move the
78 				   invariant.  */
79 
80   struct loop *always_executed_in;
81 				/* The outermost loop for that we are sure
82 				   the statement is executed if the loop
83 				   is entered.  */
84 
85   unsigned cost;		/* Cost of the computation performed by the
86 				   statement.  */
87 
88   struct depend *depends;	/* List of statements that must be also hoisted
89 				   out of the loop when this statement is
90 				   hoisted; i.e. those that define the operands
91 				   of the statement and are inside of the
92 				   MAX_LOOP loop.  */
93 };
94 
95 /* Maps statements to their lim_aux_data.  */
96 
97 static struct pointer_map_t *lim_aux_data_map;
98 
99 /* Description of a memory reference location.  */
100 
101 typedef struct mem_ref_loc
102 {
103   tree *ref;			/* The reference itself.  */
104   gimple stmt;			/* The statement in that it occurs.  */
105 } *mem_ref_loc_p;
106 
107 
108 /* The list of memory reference locations in a loop.  */
109 
110 typedef struct mem_ref_locs
111 {
112   vec<mem_ref_loc_p> locs;
113 } *mem_ref_locs_p;
114 
115 
116 /* Description of a memory reference.  */
117 
118 typedef struct mem_ref
119 {
120   tree mem;			/* The memory itself.  */
121   unsigned id;			/* ID assigned to the memory reference
122 				   (its index in memory_accesses.refs_list)  */
123   hashval_t hash;		/* Its hash value.  */
124   bitmap stored;		/* The set of loops in that this memory location
125 				   is stored to.  */
126   vec<mem_ref_locs_p> accesses_in_loop;
127 				/* The locations of the accesses.  Vector
128 				   indexed by the loop number.  */
129 
130   /* The following sets are computed on demand.  We keep both set and
131      its complement, so that we know whether the information was
132      already computed or not.  */
133   bitmap indep_loop;		/* The set of loops in that the memory
134 				   reference is independent, meaning:
135 				   If it is stored in the loop, this store
136 				     is independent on all other loads and
137 				     stores.
138 				   If it is only loaded, then it is independent
139 				     on all stores in the loop.  */
140   bitmap dep_loop;		/* The complement of INDEP_LOOP.  */
141 
142   bitmap indep_ref;		/* The set of memory references on that
143 				   this reference is independent.  */
144   bitmap dep_ref;		/* The complement of INDEP_REF.  */
145 } *mem_ref_p;
146 
147 
148 
149 
150 /* Description of memory accesses in loops.  */
151 
152 static struct
153 {
154   /* The hash table of memory references accessed in loops.  */
155   htab_t refs;
156 
157   /* The list of memory references.  */
158   vec<mem_ref_p> refs_list;
159 
160   /* The set of memory references accessed in each loop.  */
161   vec<bitmap> refs_in_loop;
162 
163   /* The set of memory references accessed in each loop, including
164      subloops.  */
165   vec<bitmap> all_refs_in_loop;
166 
167   /* The set of memory references stored in each loop, including
168      subloops.  */
169   vec<bitmap> all_refs_stored_in_loop;
170 
171   /* Cache for expanding memory addresses.  */
172   struct pointer_map_t *ttae_cache;
173 } memory_accesses;
174 
175 /* Obstack for the bitmaps in the above data structures.  */
176 static bitmap_obstack lim_bitmap_obstack;
177 
178 static bool ref_indep_loop_p (struct loop *, mem_ref_p);
179 
180 /* Minimum cost of an expensive expression.  */
181 #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
182 
183 /* The outermost loop for which execution of the header guarantees that the
184    block will be executed.  */
185 #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
186 #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
187 
188 /* Whether the reference was analyzable.  */
189 #define MEM_ANALYZABLE(REF) ((REF)->mem != error_mark_node)
190 
191 static struct lim_aux_data *
init_lim_data(gimple stmt)192 init_lim_data (gimple stmt)
193 {
194   void **p = pointer_map_insert (lim_aux_data_map, stmt);
195 
196   *p = XCNEW (struct lim_aux_data);
197   return (struct lim_aux_data *) *p;
198 }
199 
200 static struct lim_aux_data *
get_lim_data(gimple stmt)201 get_lim_data (gimple stmt)
202 {
203   void **p = pointer_map_contains (lim_aux_data_map, stmt);
204   if (!p)
205     return NULL;
206 
207   return (struct lim_aux_data *) *p;
208 }
209 
210 /* Releases the memory occupied by DATA.  */
211 
212 static void
free_lim_aux_data(struct lim_aux_data * data)213 free_lim_aux_data (struct lim_aux_data *data)
214 {
215   struct depend *dep, *next;
216 
217   for (dep = data->depends; dep; dep = next)
218     {
219       next = dep->next;
220       free (dep);
221     }
222   free (data);
223 }
224 
225 static void
clear_lim_data(gimple stmt)226 clear_lim_data (gimple stmt)
227 {
228   void **p = pointer_map_contains (lim_aux_data_map, stmt);
229   if (!p)
230     return;
231 
232   free_lim_aux_data ((struct lim_aux_data *) *p);
233   *p = NULL;
234 }
235 
236 /* Calls CBCK for each index in memory reference ADDR_P.  There are two
237    kinds situations handled; in each of these cases, the memory reference
238    and DATA are passed to the callback:
239 
240    Access to an array: ARRAY_{RANGE_}REF (base, index).  In this case we also
241    pass the pointer to the index to the callback.
242 
243    Pointer dereference: INDIRECT_REF (addr).  In this case we also pass the
244    pointer to addr to the callback.
245 
246    If the callback returns false, the whole search stops and false is returned.
247    Otherwise the function returns true after traversing through the whole
248    reference *ADDR_P.  */
249 
250 bool
for_each_index(tree * addr_p,bool (* cbck)(tree,tree *,void *),void * data)251 for_each_index (tree *addr_p, bool (*cbck) (tree, tree *, void *), void *data)
252 {
253   tree *nxt, *idx;
254 
255   for (; ; addr_p = nxt)
256     {
257       switch (TREE_CODE (*addr_p))
258 	{
259 	case SSA_NAME:
260 	  return cbck (*addr_p, addr_p, data);
261 
262 	case MEM_REF:
263 	  nxt = &TREE_OPERAND (*addr_p, 0);
264 	  return cbck (*addr_p, nxt, data);
265 
266 	case BIT_FIELD_REF:
267 	case VIEW_CONVERT_EXPR:
268 	case REALPART_EXPR:
269 	case IMAGPART_EXPR:
270 	  nxt = &TREE_OPERAND (*addr_p, 0);
271 	  break;
272 
273 	case COMPONENT_REF:
274 	  /* If the component has varying offset, it behaves like index
275 	     as well.  */
276 	  idx = &TREE_OPERAND (*addr_p, 2);
277 	  if (*idx
278 	      && !cbck (*addr_p, idx, data))
279 	    return false;
280 
281 	  nxt = &TREE_OPERAND (*addr_p, 0);
282 	  break;
283 
284 	case ARRAY_REF:
285 	case ARRAY_RANGE_REF:
286 	  nxt = &TREE_OPERAND (*addr_p, 0);
287 	  if (!cbck (*addr_p, &TREE_OPERAND (*addr_p, 1), data))
288 	    return false;
289 	  break;
290 
291 	case VAR_DECL:
292 	case PARM_DECL:
293 	case CONST_DECL:
294 	case STRING_CST:
295 	case RESULT_DECL:
296 	case VECTOR_CST:
297 	case COMPLEX_CST:
298 	case INTEGER_CST:
299 	case REAL_CST:
300 	case FIXED_CST:
301 	case CONSTRUCTOR:
302 	  return true;
303 
304 	case ADDR_EXPR:
305 	  gcc_assert (is_gimple_min_invariant (*addr_p));
306 	  return true;
307 
308 	case TARGET_MEM_REF:
309 	  idx = &TMR_BASE (*addr_p);
310 	  if (*idx
311 	      && !cbck (*addr_p, idx, data))
312 	    return false;
313 	  idx = &TMR_INDEX (*addr_p);
314 	  if (*idx
315 	      && !cbck (*addr_p, idx, data))
316 	    return false;
317 	  idx = &TMR_INDEX2 (*addr_p);
318 	  if (*idx
319 	      && !cbck (*addr_p, idx, data))
320 	    return false;
321 	  return true;
322 
323 	default:
324     	  gcc_unreachable ();
325 	}
326     }
327 }
328 
329 /* If it is possible to hoist the statement STMT unconditionally,
330    returns MOVE_POSSIBLE.
331    If it is possible to hoist the statement STMT, but we must avoid making
332    it executed if it would not be executed in the original program (e.g.
333    because it may trap), return MOVE_PRESERVE_EXECUTION.
334    Otherwise return MOVE_IMPOSSIBLE.  */
335 
336 enum move_pos
movement_possibility(gimple stmt)337 movement_possibility (gimple stmt)
338 {
339   tree lhs;
340   enum move_pos ret = MOVE_POSSIBLE;
341 
342   if (flag_unswitch_loops
343       && gimple_code (stmt) == GIMPLE_COND)
344     {
345       /* If we perform unswitching, force the operands of the invariant
346 	 condition to be moved out of the loop.  */
347       return MOVE_POSSIBLE;
348     }
349 
350   if (gimple_code (stmt) == GIMPLE_PHI
351       && gimple_phi_num_args (stmt) <= 2
352       && !virtual_operand_p (gimple_phi_result (stmt))
353       && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
354     return MOVE_POSSIBLE;
355 
356   if (gimple_get_lhs (stmt) == NULL_TREE)
357     return MOVE_IMPOSSIBLE;
358 
359   if (gimple_vdef (stmt))
360     return MOVE_IMPOSSIBLE;
361 
362   if (stmt_ends_bb_p (stmt)
363       || gimple_has_volatile_ops (stmt)
364       || gimple_has_side_effects (stmt)
365       || stmt_could_throw_p (stmt))
366     return MOVE_IMPOSSIBLE;
367 
368   if (is_gimple_call (stmt))
369     {
370       /* While pure or const call is guaranteed to have no side effects, we
371 	 cannot move it arbitrarily.  Consider code like
372 
373 	 char *s = something ();
374 
375 	 while (1)
376 	   {
377 	     if (s)
378 	       t = strlen (s);
379 	     else
380 	       t = 0;
381 	   }
382 
383 	 Here the strlen call cannot be moved out of the loop, even though
384 	 s is invariant.  In addition to possibly creating a call with
385 	 invalid arguments, moving out a function call that is not executed
386 	 may cause performance regressions in case the call is costly and
387 	 not executed at all.  */
388       ret = MOVE_PRESERVE_EXECUTION;
389       lhs = gimple_call_lhs (stmt);
390     }
391   else if (is_gimple_assign (stmt))
392     lhs = gimple_assign_lhs (stmt);
393   else
394     return MOVE_IMPOSSIBLE;
395 
396   if (TREE_CODE (lhs) == SSA_NAME
397       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
398     return MOVE_IMPOSSIBLE;
399 
400   if (TREE_CODE (lhs) != SSA_NAME
401       || gimple_could_trap_p (stmt))
402     return MOVE_PRESERVE_EXECUTION;
403 
404   /* Non local loads in a transaction cannot be hoisted out.  Well,
405      unless the load happens on every path out of the loop, but we
406      don't take this into account yet.  */
407   if (flag_tm
408       && gimple_in_transaction (stmt)
409       && gimple_assign_single_p (stmt))
410     {
411       tree rhs = gimple_assign_rhs1 (stmt);
412       if (DECL_P (rhs) && is_global_var (rhs))
413 	{
414 	  if (dump_file)
415 	    {
416 	      fprintf (dump_file, "Cannot hoist conditional load of ");
417 	      print_generic_expr (dump_file, rhs, TDF_SLIM);
418 	      fprintf (dump_file, " because it is in a transaction.\n");
419 	    }
420 	  return MOVE_IMPOSSIBLE;
421 	}
422     }
423 
424   return ret;
425 }
426 
427 /* Suppose that operand DEF is used inside the LOOP.  Returns the outermost
428    loop to that we could move the expression using DEF if it did not have
429    other operands, i.e. the outermost loop enclosing LOOP in that the value
430    of DEF is invariant.  */
431 
432 static struct loop *
outermost_invariant_loop(tree def,struct loop * loop)433 outermost_invariant_loop (tree def, struct loop *loop)
434 {
435   gimple def_stmt;
436   basic_block def_bb;
437   struct loop *max_loop;
438   struct lim_aux_data *lim_data;
439 
440   if (!def)
441     return superloop_at_depth (loop, 1);
442 
443   if (TREE_CODE (def) != SSA_NAME)
444     {
445       gcc_assert (is_gimple_min_invariant (def));
446       return superloop_at_depth (loop, 1);
447     }
448 
449   def_stmt = SSA_NAME_DEF_STMT (def);
450   def_bb = gimple_bb (def_stmt);
451   if (!def_bb)
452     return superloop_at_depth (loop, 1);
453 
454   max_loop = find_common_loop (loop, def_bb->loop_father);
455 
456   lim_data = get_lim_data (def_stmt);
457   if (lim_data != NULL && lim_data->max_loop != NULL)
458     max_loop = find_common_loop (max_loop,
459 				 loop_outer (lim_data->max_loop));
460   if (max_loop == loop)
461     return NULL;
462   max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
463 
464   return max_loop;
465 }
466 
467 /* DATA is a structure containing information associated with a statement
468    inside LOOP.  DEF is one of the operands of this statement.
469 
470    Find the outermost loop enclosing LOOP in that value of DEF is invariant
471    and record this in DATA->max_loop field.  If DEF itself is defined inside
472    this loop as well (i.e. we need to hoist it out of the loop if we want
473    to hoist the statement represented by DATA), record the statement in that
474    DEF is defined to the DATA->depends list.  Additionally if ADD_COST is true,
475    add the cost of the computation of DEF to the DATA->cost.
476 
477    If DEF is not invariant in LOOP, return false.  Otherwise return TRUE.  */
478 
479 static bool
add_dependency(tree def,struct lim_aux_data * data,struct loop * loop,bool add_cost)480 add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
481 		bool add_cost)
482 {
483   gimple def_stmt = SSA_NAME_DEF_STMT (def);
484   basic_block def_bb = gimple_bb (def_stmt);
485   struct loop *max_loop;
486   struct depend *dep;
487   struct lim_aux_data *def_data;
488 
489   if (!def_bb)
490     return true;
491 
492   max_loop = outermost_invariant_loop (def, loop);
493   if (!max_loop)
494     return false;
495 
496   if (flow_loop_nested_p (data->max_loop, max_loop))
497     data->max_loop = max_loop;
498 
499   def_data = get_lim_data (def_stmt);
500   if (!def_data)
501     return true;
502 
503   if (add_cost
504       /* Only add the cost if the statement defining DEF is inside LOOP,
505 	 i.e. if it is likely that by moving the invariants dependent
506 	 on it, we will be able to avoid creating a new register for
507 	 it (since it will be only used in these dependent invariants).  */
508       && def_bb->loop_father == loop)
509     data->cost += def_data->cost;
510 
511   dep = XNEW (struct depend);
512   dep->stmt = def_stmt;
513   dep->next = data->depends;
514   data->depends = dep;
515 
516   return true;
517 }
518 
519 /* Returns an estimate for a cost of statement STMT.  The values here
520    are just ad-hoc constants, similar to costs for inlining.  */
521 
522 static unsigned
stmt_cost(gimple stmt)523 stmt_cost (gimple stmt)
524 {
525   /* Always try to create possibilities for unswitching.  */
526   if (gimple_code (stmt) == GIMPLE_COND
527       || gimple_code (stmt) == GIMPLE_PHI)
528     return LIM_EXPENSIVE;
529 
530   /* We should be hoisting calls if possible.  */
531   if (is_gimple_call (stmt))
532     {
533       tree fndecl;
534 
535       /* Unless the call is a builtin_constant_p; this always folds to a
536 	 constant, so moving it is useless.  */
537       fndecl = gimple_call_fndecl (stmt);
538       if (fndecl
539 	  && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
540 	  && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CONSTANT_P)
541 	return 0;
542 
543       return LIM_EXPENSIVE;
544     }
545 
546   /* Hoisting memory references out should almost surely be a win.  */
547   if (gimple_references_memory_p (stmt))
548     return LIM_EXPENSIVE;
549 
550   if (gimple_code (stmt) != GIMPLE_ASSIGN)
551     return 1;
552 
553   switch (gimple_assign_rhs_code (stmt))
554     {
555     case MULT_EXPR:
556     case WIDEN_MULT_EXPR:
557     case WIDEN_MULT_PLUS_EXPR:
558     case WIDEN_MULT_MINUS_EXPR:
559     case DOT_PROD_EXPR:
560     case FMA_EXPR:
561     case TRUNC_DIV_EXPR:
562     case CEIL_DIV_EXPR:
563     case FLOOR_DIV_EXPR:
564     case ROUND_DIV_EXPR:
565     case EXACT_DIV_EXPR:
566     case CEIL_MOD_EXPR:
567     case FLOOR_MOD_EXPR:
568     case ROUND_MOD_EXPR:
569     case TRUNC_MOD_EXPR:
570     case RDIV_EXPR:
571       /* Division and multiplication are usually expensive.  */
572       return LIM_EXPENSIVE;
573 
574     case LSHIFT_EXPR:
575     case RSHIFT_EXPR:
576     case WIDEN_LSHIFT_EXPR:
577     case LROTATE_EXPR:
578     case RROTATE_EXPR:
579       /* Shifts and rotates are usually expensive.  */
580       return LIM_EXPENSIVE;
581 
582     case CONSTRUCTOR:
583       /* Make vector construction cost proportional to the number
584          of elements.  */
585       return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
586 
587     case SSA_NAME:
588     case PAREN_EXPR:
589       /* Whether or not something is wrapped inside a PAREN_EXPR
590          should not change move cost.  Nor should an intermediate
591 	 unpropagated SSA name copy.  */
592       return 0;
593 
594     default:
595       return 1;
596     }
597 }
598 
599 /* Finds the outermost loop between OUTER and LOOP in that the memory reference
600    REF is independent.  If REF is not independent in LOOP, NULL is returned
601    instead.  */
602 
603 static struct loop *
outermost_indep_loop(struct loop * outer,struct loop * loop,mem_ref_p ref)604 outermost_indep_loop (struct loop *outer, struct loop *loop, mem_ref_p ref)
605 {
606   struct loop *aloop;
607 
608   if (bitmap_bit_p (ref->stored, loop->num))
609     return NULL;
610 
611   for (aloop = outer;
612        aloop != loop;
613        aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
614     if (!bitmap_bit_p (ref->stored, aloop->num)
615 	&& ref_indep_loop_p (aloop, ref))
616       return aloop;
617 
618   if (ref_indep_loop_p (loop, ref))
619     return loop;
620   else
621     return NULL;
622 }
623 
624 /* If there is a simple load or store to a memory reference in STMT, returns
625    the location of the memory reference, and sets IS_STORE according to whether
626    it is a store or load.  Otherwise, returns NULL.  */
627 
628 static tree *
simple_mem_ref_in_stmt(gimple stmt,bool * is_store)629 simple_mem_ref_in_stmt (gimple stmt, bool *is_store)
630 {
631   tree *lhs, *rhs;
632 
633   /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns.  */
634   if (!gimple_assign_single_p (stmt))
635     return NULL;
636 
637   lhs = gimple_assign_lhs_ptr (stmt);
638   rhs = gimple_assign_rhs1_ptr (stmt);
639 
640   if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
641     {
642       *is_store = false;
643       return rhs;
644     }
645   else if (gimple_vdef (stmt)
646 	   && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
647     {
648       *is_store = true;
649       return lhs;
650     }
651   else
652     return NULL;
653 }
654 
655 /* Returns the memory reference contained in STMT.  */
656 
657 static mem_ref_p
mem_ref_in_stmt(gimple stmt)658 mem_ref_in_stmt (gimple stmt)
659 {
660   bool store;
661   tree *mem = simple_mem_ref_in_stmt (stmt, &store);
662   hashval_t hash;
663   mem_ref_p ref;
664 
665   if (!mem)
666     return NULL;
667   gcc_assert (!store);
668 
669   hash = iterative_hash_expr (*mem, 0);
670   ref = (mem_ref_p) htab_find_with_hash (memory_accesses.refs, *mem, hash);
671 
672   gcc_assert (ref != NULL);
673   return ref;
674 }
675 
676 /* From a controlling predicate in DOM determine the arguments from
677    the PHI node PHI that are chosen if the predicate evaluates to
678    true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
679    they are non-NULL.  Returns true if the arguments can be determined,
680    else return false.  */
681 
682 static bool
extract_true_false_args_from_phi(basic_block dom,gimple phi,tree * true_arg_p,tree * false_arg_p)683 extract_true_false_args_from_phi (basic_block dom, gimple phi,
684 				  tree *true_arg_p, tree *false_arg_p)
685 {
686   basic_block bb = gimple_bb (phi);
687   edge true_edge, false_edge, tem;
688   tree arg0 = NULL_TREE, arg1 = NULL_TREE;
689 
690   /* We have to verify that one edge into the PHI node is dominated
691      by the true edge of the predicate block and the other edge
692      dominated by the false edge.  This ensures that the PHI argument
693      we are going to take is completely determined by the path we
694      take from the predicate block.
695      We can only use BB dominance checks below if the destination of
696      the true/false edges are dominated by their edge, thus only
697      have a single predecessor.  */
698   extract_true_false_edges_from_block (dom, &true_edge, &false_edge);
699   tem = EDGE_PRED (bb, 0);
700   if (tem == true_edge
701       || (single_pred_p (true_edge->dest)
702 	  && (tem->src == true_edge->dest
703 	      || dominated_by_p (CDI_DOMINATORS,
704 				 tem->src, true_edge->dest))))
705     arg0 = PHI_ARG_DEF (phi, tem->dest_idx);
706   else if (tem == false_edge
707 	   || (single_pred_p (false_edge->dest)
708 	       && (tem->src == false_edge->dest
709 		   || dominated_by_p (CDI_DOMINATORS,
710 				      tem->src, false_edge->dest))))
711     arg1 = PHI_ARG_DEF (phi, tem->dest_idx);
712   else
713     return false;
714   tem = EDGE_PRED (bb, 1);
715   if (tem == true_edge
716       || (single_pred_p (true_edge->dest)
717 	  && (tem->src == true_edge->dest
718 	      || dominated_by_p (CDI_DOMINATORS,
719 				 tem->src, true_edge->dest))))
720     arg0 = PHI_ARG_DEF (phi, tem->dest_idx);
721   else if (tem == false_edge
722 	   || (single_pred_p (false_edge->dest)
723 	       && (tem->src == false_edge->dest
724 		   || dominated_by_p (CDI_DOMINATORS,
725 				      tem->src, false_edge->dest))))
726     arg1 = PHI_ARG_DEF (phi, tem->dest_idx);
727   else
728     return false;
729   if (!arg0 || !arg1)
730     return false;
731 
732   if (true_arg_p)
733     *true_arg_p = arg0;
734   if (false_arg_p)
735     *false_arg_p = arg1;
736 
737   return true;
738 }
739 
740 /* Determine the outermost loop to that it is possible to hoist a statement
741    STMT and store it to LIM_DATA (STMT)->max_loop.  To do this we determine
742    the outermost loop in that the value computed by STMT is invariant.
743    If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
744    we preserve the fact whether STMT is executed.  It also fills other related
745    information to LIM_DATA (STMT).
746 
747    The function returns false if STMT cannot be hoisted outside of the loop it
748    is defined in, and true otherwise.  */
749 
750 static bool
determine_max_movement(gimple stmt,bool must_preserve_exec)751 determine_max_movement (gimple stmt, bool must_preserve_exec)
752 {
753   basic_block bb = gimple_bb (stmt);
754   struct loop *loop = bb->loop_father;
755   struct loop *level;
756   struct lim_aux_data *lim_data = get_lim_data (stmt);
757   tree val;
758   ssa_op_iter iter;
759 
760   if (must_preserve_exec)
761     level = ALWAYS_EXECUTED_IN (bb);
762   else
763     level = superloop_at_depth (loop, 1);
764   lim_data->max_loop = level;
765 
766   if (gimple_code (stmt) == GIMPLE_PHI)
767     {
768       use_operand_p use_p;
769       unsigned min_cost = UINT_MAX;
770       unsigned total_cost = 0;
771       struct lim_aux_data *def_data;
772 
773       /* We will end up promoting dependencies to be unconditionally
774 	 evaluated.  For this reason the PHI cost (and thus the
775 	 cost we remove from the loop by doing the invariant motion)
776 	 is that of the cheapest PHI argument dependency chain.  */
777       FOR_EACH_PHI_ARG (use_p, stmt, iter, SSA_OP_USE)
778 	{
779 	  val = USE_FROM_PTR (use_p);
780 	  if (TREE_CODE (val) != SSA_NAME)
781 	    continue;
782 	  if (!add_dependency (val, lim_data, loop, false))
783 	    return false;
784 	  def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
785 	  if (def_data)
786 	    {
787 	      min_cost = MIN (min_cost, def_data->cost);
788 	      total_cost += def_data->cost;
789 	    }
790 	}
791 
792       lim_data->cost += min_cost;
793 
794       if (gimple_phi_num_args (stmt) > 1)
795 	{
796 	  basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
797 	  gimple cond;
798 	  if (gsi_end_p (gsi_last_bb (dom)))
799 	    return false;
800 	  cond = gsi_stmt (gsi_last_bb (dom));
801 	  if (gimple_code (cond) != GIMPLE_COND)
802 	    return false;
803 	  /* Verify that this is an extended form of a diamond and
804 	     the PHI arguments are completely controlled by the
805 	     predicate in DOM.  */
806 	  if (!extract_true_false_args_from_phi (dom, stmt, NULL, NULL))
807 	    return false;
808 
809 	  /* Fold in dependencies and cost of the condition.  */
810 	  FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
811 	    {
812 	      if (!add_dependency (val, lim_data, loop, false))
813 		return false;
814 	      def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
815 	      if (def_data)
816 		total_cost += def_data->cost;
817 	    }
818 
819 	  /* We want to avoid unconditionally executing very expensive
820 	     operations.  As costs for our dependencies cannot be
821 	     negative just claim we are not invariand for this case.
822 	     We also are not sure whether the control-flow inside the
823 	     loop will vanish.  */
824 	  if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
825 	      && !(min_cost != 0
826 		   && total_cost / min_cost <= 2))
827 	    return false;
828 
829 	  /* Assume that the control-flow in the loop will vanish.
830 	     ???  We should verify this and not artificially increase
831 	     the cost if that is not the case.  */
832 	  lim_data->cost += stmt_cost (stmt);
833 	}
834 
835       return true;
836     }
837   else
838     FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
839       if (!add_dependency (val, lim_data, loop, true))
840 	return false;
841 
842   if (gimple_vuse (stmt))
843     {
844       mem_ref_p ref = mem_ref_in_stmt (stmt);
845 
846       if (ref)
847 	{
848 	  lim_data->max_loop
849 		  = outermost_indep_loop (lim_data->max_loop, loop, ref);
850 	  if (!lim_data->max_loop)
851 	    return false;
852 	}
853       else
854 	{
855 	  if ((val = gimple_vuse (stmt)) != NULL_TREE)
856 	    {
857 	      if (!add_dependency (val, lim_data, loop, false))
858 		return false;
859 	    }
860 	}
861     }
862 
863   lim_data->cost += stmt_cost (stmt);
864 
865   return true;
866 }
867 
868 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
869    and that one of the operands of this statement is computed by STMT.
870    Ensure that STMT (together with all the statements that define its
871    operands) is hoisted at least out of the loop LEVEL.  */
872 
873 static void
set_level(gimple stmt,struct loop * orig_loop,struct loop * level)874 set_level (gimple stmt, struct loop *orig_loop, struct loop *level)
875 {
876   struct loop *stmt_loop = gimple_bb (stmt)->loop_father;
877   struct depend *dep;
878   struct lim_aux_data *lim_data;
879 
880   stmt_loop = find_common_loop (orig_loop, stmt_loop);
881   lim_data = get_lim_data (stmt);
882   if (lim_data != NULL && lim_data->tgt_loop != NULL)
883     stmt_loop = find_common_loop (stmt_loop,
884 				  loop_outer (lim_data->tgt_loop));
885   if (flow_loop_nested_p (stmt_loop, level))
886     return;
887 
888   gcc_assert (level == lim_data->max_loop
889 	      || flow_loop_nested_p (lim_data->max_loop, level));
890 
891   lim_data->tgt_loop = level;
892   for (dep = lim_data->depends; dep; dep = dep->next)
893     set_level (dep->stmt, orig_loop, level);
894 }
895 
896 /* Determines an outermost loop from that we want to hoist the statement STMT.
897    For now we chose the outermost possible loop.  TODO -- use profiling
898    information to set it more sanely.  */
899 
900 static void
set_profitable_level(gimple stmt)901 set_profitable_level (gimple stmt)
902 {
903   set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
904 }
905 
906 /* Returns true if STMT is a call that has side effects.  */
907 
908 static bool
nonpure_call_p(gimple stmt)909 nonpure_call_p (gimple stmt)
910 {
911   if (gimple_code (stmt) != GIMPLE_CALL)
912     return false;
913 
914   return gimple_has_side_effects (stmt);
915 }
916 
917 /* Rewrite a/b to a*(1/b).  Return the invariant stmt to process.  */
918 
919 static gimple
rewrite_reciprocal(gimple_stmt_iterator * bsi)920 rewrite_reciprocal (gimple_stmt_iterator *bsi)
921 {
922   gimple stmt, stmt1, stmt2;
923   tree name, lhs, type;
924   tree real_one;
925   gimple_stmt_iterator gsi;
926 
927   stmt = gsi_stmt (*bsi);
928   lhs = gimple_assign_lhs (stmt);
929   type = TREE_TYPE (lhs);
930 
931   real_one = build_one_cst (type);
932 
933   name = make_temp_ssa_name (type, NULL, "reciptmp");
934   stmt1 = gimple_build_assign_with_ops (RDIV_EXPR, name, real_one,
935 					gimple_assign_rhs2 (stmt));
936 
937   stmt2 = gimple_build_assign_with_ops (MULT_EXPR, lhs, name,
938 					gimple_assign_rhs1 (stmt));
939 
940   /* Replace division stmt with reciprocal and multiply stmts.
941      The multiply stmt is not invariant, so update iterator
942      and avoid rescanning.  */
943   gsi = *bsi;
944   gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
945   gsi_replace (&gsi, stmt2, true);
946 
947   /* Continue processing with invariant reciprocal statement.  */
948   return stmt1;
949 }
950 
951 /* Check if the pattern at *BSI is a bittest of the form
952    (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0.  */
953 
954 static gimple
rewrite_bittest(gimple_stmt_iterator * bsi)955 rewrite_bittest (gimple_stmt_iterator *bsi)
956 {
957   gimple stmt, use_stmt, stmt1, stmt2;
958   tree lhs, name, t, a, b;
959   use_operand_p use;
960 
961   stmt = gsi_stmt (*bsi);
962   lhs = gimple_assign_lhs (stmt);
963 
964   /* Verify that the single use of lhs is a comparison against zero.  */
965   if (TREE_CODE (lhs) != SSA_NAME
966       || !single_imm_use (lhs, &use, &use_stmt)
967       || gimple_code (use_stmt) != GIMPLE_COND)
968     return stmt;
969   if (gimple_cond_lhs (use_stmt) != lhs
970       || (gimple_cond_code (use_stmt) != NE_EXPR
971 	  && gimple_cond_code (use_stmt) != EQ_EXPR)
972       || !integer_zerop (gimple_cond_rhs (use_stmt)))
973     return stmt;
974 
975   /* Get at the operands of the shift.  The rhs is TMP1 & 1.  */
976   stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
977   if (gimple_code (stmt1) != GIMPLE_ASSIGN)
978     return stmt;
979 
980   /* There is a conversion in between possibly inserted by fold.  */
981   if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
982     {
983       t = gimple_assign_rhs1 (stmt1);
984       if (TREE_CODE (t) != SSA_NAME
985 	  || !has_single_use (t))
986 	return stmt;
987       stmt1 = SSA_NAME_DEF_STMT (t);
988       if (gimple_code (stmt1) != GIMPLE_ASSIGN)
989 	return stmt;
990     }
991 
992   /* Verify that B is loop invariant but A is not.  Verify that with
993      all the stmt walking we are still in the same loop.  */
994   if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
995       || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
996     return stmt;
997 
998   a = gimple_assign_rhs1 (stmt1);
999   b = gimple_assign_rhs2 (stmt1);
1000 
1001   if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
1002       && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
1003     {
1004       gimple_stmt_iterator rsi;
1005 
1006       /* 1 << B */
1007       t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
1008 		       build_int_cst (TREE_TYPE (a), 1), b);
1009       name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
1010       stmt1 = gimple_build_assign (name, t);
1011 
1012       /* A & (1 << B) */
1013       t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
1014       name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
1015       stmt2 = gimple_build_assign (name, t);
1016 
1017       /* Replace the SSA_NAME we compare against zero.  Adjust
1018 	 the type of zero accordingly.  */
1019       SET_USE (use, name);
1020       gimple_cond_set_rhs (use_stmt, build_int_cst_type (TREE_TYPE (name), 0));
1021 
1022       /* Don't use gsi_replace here, none of the new assignments sets
1023 	 the variable originally set in stmt.  Move bsi to stmt1, and
1024 	 then remove the original stmt, so that we get a chance to
1025 	 retain debug info for it.  */
1026       rsi = *bsi;
1027       gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
1028       gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
1029       gsi_remove (&rsi, true);
1030 
1031       return stmt1;
1032     }
1033 
1034   return stmt;
1035 }
1036 
1037 
1038 /* Determine the outermost loops in that statements in basic block BB are
1039    invariant, and record them to the LIM_DATA associated with the statements.
1040    Callback for walk_dominator_tree.  */
1041 
1042 static void
determine_invariantness_stmt(struct dom_walk_data * dw_data ATTRIBUTE_UNUSED,basic_block bb)1043 determine_invariantness_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
1044 			      basic_block bb)
1045 {
1046   enum move_pos pos;
1047   gimple_stmt_iterator bsi;
1048   gimple stmt;
1049   bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
1050   struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
1051   struct lim_aux_data *lim_data;
1052 
1053   if (!loop_outer (bb->loop_father))
1054     return;
1055 
1056   if (dump_file && (dump_flags & TDF_DETAILS))
1057     fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
1058 	     bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
1059 
1060   /* Look at PHI nodes, but only if there is at most two.
1061      ???  We could relax this further by post-processing the inserted
1062      code and transforming adjacent cond-exprs with the same predicate
1063      to control flow again.  */
1064   bsi = gsi_start_phis (bb);
1065   if (!gsi_end_p (bsi)
1066       && ((gsi_next (&bsi), gsi_end_p (bsi))
1067 	  || (gsi_next (&bsi), gsi_end_p (bsi))))
1068     for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1069       {
1070 	stmt = gsi_stmt (bsi);
1071 
1072 	pos = movement_possibility (stmt);
1073 	if (pos == MOVE_IMPOSSIBLE)
1074 	  continue;
1075 
1076 	lim_data = init_lim_data (stmt);
1077 	lim_data->always_executed_in = outermost;
1078 
1079 	if (!determine_max_movement (stmt, false))
1080 	  {
1081 	    lim_data->max_loop = NULL;
1082 	    continue;
1083 	  }
1084 
1085 	if (dump_file && (dump_flags & TDF_DETAILS))
1086 	  {
1087 	    print_gimple_stmt (dump_file, stmt, 2, 0);
1088 	    fprintf (dump_file, "  invariant up to level %d, cost %d.\n\n",
1089 		     loop_depth (lim_data->max_loop),
1090 		     lim_data->cost);
1091 	  }
1092 
1093 	if (lim_data->cost >= LIM_EXPENSIVE)
1094 	  set_profitable_level (stmt);
1095       }
1096 
1097   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1098     {
1099       stmt = gsi_stmt (bsi);
1100 
1101       pos = movement_possibility (stmt);
1102       if (pos == MOVE_IMPOSSIBLE)
1103 	{
1104 	  if (nonpure_call_p (stmt))
1105 	    {
1106 	      maybe_never = true;
1107 	      outermost = NULL;
1108 	    }
1109 	  /* Make sure to note always_executed_in for stores to make
1110 	     store-motion work.  */
1111 	  else if (stmt_makes_single_store (stmt))
1112 	    {
1113 	      struct lim_aux_data *lim_data = init_lim_data (stmt);
1114 	      lim_data->always_executed_in = outermost;
1115 	    }
1116 	  continue;
1117 	}
1118 
1119       if (is_gimple_assign (stmt)
1120 	  && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1121 	      == GIMPLE_BINARY_RHS))
1122 	{
1123 	  tree op0 = gimple_assign_rhs1 (stmt);
1124 	  tree op1 = gimple_assign_rhs2 (stmt);
1125 	  struct loop *ol1 = outermost_invariant_loop (op1,
1126 					loop_containing_stmt (stmt));
1127 
1128 	  /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1129 	     to be hoisted out of loop, saving expensive divide.  */
1130 	  if (pos == MOVE_POSSIBLE
1131 	      && gimple_assign_rhs_code (stmt) == RDIV_EXPR
1132 	      && flag_unsafe_math_optimizations
1133 	      && !flag_trapping_math
1134 	      && ol1 != NULL
1135 	      && outermost_invariant_loop (op0, ol1) == NULL)
1136 	    stmt = rewrite_reciprocal (&bsi);
1137 
1138 	  /* If the shift count is invariant, convert (A >> B) & 1 to
1139 	     A & (1 << B) allowing the bit mask to be hoisted out of the loop
1140 	     saving an expensive shift.  */
1141 	  if (pos == MOVE_POSSIBLE
1142 	      && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1143 	      && integer_onep (op1)
1144 	      && TREE_CODE (op0) == SSA_NAME
1145 	      && has_single_use (op0))
1146 	    stmt = rewrite_bittest (&bsi);
1147 	}
1148 
1149       lim_data = init_lim_data (stmt);
1150       lim_data->always_executed_in = outermost;
1151 
1152       if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1153 	continue;
1154 
1155       if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1156 	{
1157 	  lim_data->max_loop = NULL;
1158 	  continue;
1159 	}
1160 
1161       if (dump_file && (dump_flags & TDF_DETAILS))
1162 	{
1163 	  print_gimple_stmt (dump_file, stmt, 2, 0);
1164 	  fprintf (dump_file, "  invariant up to level %d, cost %d.\n\n",
1165 		   loop_depth (lim_data->max_loop),
1166 		   lim_data->cost);
1167 	}
1168 
1169       if (lim_data->cost >= LIM_EXPENSIVE)
1170 	set_profitable_level (stmt);
1171     }
1172 }
1173 
1174 /* For each statement determines the outermost loop in that it is invariant,
1175    statements on whose motion it depends and the cost of the computation.
1176    This information is stored to the LIM_DATA structure associated with
1177    each statement.  */
1178 
1179 static void
determine_invariantness(void)1180 determine_invariantness (void)
1181 {
1182   struct dom_walk_data walk_data;
1183 
1184   memset (&walk_data, 0, sizeof (struct dom_walk_data));
1185   walk_data.dom_direction = CDI_DOMINATORS;
1186   walk_data.before_dom_children = determine_invariantness_stmt;
1187 
1188   init_walk_dominator_tree (&walk_data);
1189   walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
1190   fini_walk_dominator_tree (&walk_data);
1191 }
1192 
1193 /* Return true if CODE is an operation that when operating on signed
1194    integer types involves undefined behavior on overflow and the
1195    operation can be expressed with unsigned arithmetic.  */
1196 
1197 static bool
arith_code_with_undefined_signed_overflow(tree_code code)1198 arith_code_with_undefined_signed_overflow (tree_code code)
1199 {
1200   switch (code)
1201     {
1202     case PLUS_EXPR:
1203     case MINUS_EXPR:
1204     case MULT_EXPR:
1205     case NEGATE_EXPR:
1206     case POINTER_PLUS_EXPR:
1207       return true;
1208     default:
1209       return false;
1210     }
1211 }
1212 
1213 /* Rewrite STMT, an assignment with a signed integer or pointer arithmetic
1214    operation that can be transformed to unsigned arithmetic by converting
1215    its operand, carrying out the operation in the corresponding unsigned
1216    type and converting the result back to the original type.
1217 
1218    Returns a sequence of statements that replace STMT and also contain
1219    a modified form of STMT itself.  */
1220 
1221 static gimple_seq
rewrite_to_defined_overflow(gimple stmt)1222 rewrite_to_defined_overflow (gimple stmt)
1223 {
1224   if (dump_file && (dump_flags & TDF_DETAILS))
1225     {
1226       fprintf (dump_file, "rewriting stmt with undefined signed "
1227 	       "overflow ");
1228       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1229     }
1230 
1231   tree lhs = gimple_assign_lhs (stmt);
1232   tree type = unsigned_type_for (TREE_TYPE (lhs));
1233   gimple_seq stmts = NULL;
1234   for (unsigned i = 1; i < gimple_num_ops (stmt); ++i)
1235     {
1236       gimple_seq stmts2 = NULL;
1237       gimple_set_op (stmt, i,
1238 		     force_gimple_operand (fold_convert (type,
1239 							 gimple_op (stmt, i)),
1240 					   &stmts2, true, NULL_TREE));
1241       gimple_seq_add_seq (&stmts, stmts2);
1242     }
1243   gimple_assign_set_lhs (stmt, make_ssa_name (type, stmt));
1244   if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
1245     gimple_assign_set_rhs_code (stmt, PLUS_EXPR);
1246   gimple_seq_add_stmt (&stmts, stmt);
1247   gimple cvt = gimple_build_assign_with_ops
1248       (NOP_EXPR, lhs, gimple_assign_lhs (stmt), NULL_TREE);
1249   gimple_seq_add_stmt (&stmts, cvt);
1250 
1251   return stmts;
1252 }
1253 
1254 /* Hoist the statements in basic block BB out of the loops prescribed by
1255    data stored in LIM_DATA structures associated with each statement.  Callback
1256    for walk_dominator_tree.  */
1257 
1258 static void
move_computations_stmt(struct dom_walk_data * dw_data,basic_block bb)1259 move_computations_stmt (struct dom_walk_data *dw_data,
1260 			basic_block bb)
1261 {
1262   struct loop *level;
1263   gimple_stmt_iterator bsi;
1264   gimple stmt;
1265   unsigned cost = 0;
1266   struct lim_aux_data *lim_data;
1267 
1268   if (!loop_outer (bb->loop_father))
1269     return;
1270 
1271   for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
1272     {
1273       gimple new_stmt;
1274       stmt = gsi_stmt (bsi);
1275 
1276       lim_data = get_lim_data (stmt);
1277       if (lim_data == NULL)
1278 	{
1279 	  gsi_next (&bsi);
1280 	  continue;
1281 	}
1282 
1283       cost = lim_data->cost;
1284       level = lim_data->tgt_loop;
1285       clear_lim_data (stmt);
1286 
1287       if (!level)
1288 	{
1289 	  gsi_next (&bsi);
1290 	  continue;
1291 	}
1292 
1293       if (dump_file && (dump_flags & TDF_DETAILS))
1294 	{
1295 	  fprintf (dump_file, "Moving PHI node\n");
1296 	  print_gimple_stmt (dump_file, stmt, 0, 0);
1297 	  fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1298 		   cost, level->num);
1299 	}
1300 
1301       if (gimple_phi_num_args (stmt) == 1)
1302 	{
1303 	  tree arg = PHI_ARG_DEF (stmt, 0);
1304 	  new_stmt = gimple_build_assign_with_ops (TREE_CODE (arg),
1305 						   gimple_phi_result (stmt),
1306 						   arg, NULL_TREE);
1307 	  SSA_NAME_DEF_STMT (gimple_phi_result (stmt)) = new_stmt;
1308 	}
1309       else
1310 	{
1311 	  basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1312 	  gimple cond = gsi_stmt (gsi_last_bb (dom));
1313 	  tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1314 	  /* Get the PHI arguments corresponding to the true and false
1315 	     edges of COND.  */
1316 	  extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1317 	  gcc_assert (arg0 && arg1);
1318 	  t = build2 (gimple_cond_code (cond), boolean_type_node,
1319 		      gimple_cond_lhs (cond), gimple_cond_rhs (cond));
1320 	  new_stmt = gimple_build_assign_with_ops (COND_EXPR,
1321 						   gimple_phi_result (stmt),
1322 						   t, arg0, arg1);
1323 	  SSA_NAME_DEF_STMT (gimple_phi_result (stmt)) = new_stmt;
1324 	  *((unsigned int *)(dw_data->global_data)) |= TODO_cleanup_cfg;
1325 	}
1326       gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1327       remove_phi_node (&bsi, false);
1328     }
1329 
1330   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
1331     {
1332       edge e;
1333 
1334       stmt = gsi_stmt (bsi);
1335 
1336       lim_data = get_lim_data (stmt);
1337       if (lim_data == NULL)
1338 	{
1339 	  gsi_next (&bsi);
1340 	  continue;
1341 	}
1342 
1343       cost = lim_data->cost;
1344       level = lim_data->tgt_loop;
1345       clear_lim_data (stmt);
1346 
1347       if (!level)
1348 	{
1349 	  gsi_next (&bsi);
1350 	  continue;
1351 	}
1352 
1353       /* We do not really want to move conditionals out of the loop; we just
1354 	 placed it here to force its operands to be moved if necessary.  */
1355       if (gimple_code (stmt) == GIMPLE_COND)
1356 	continue;
1357 
1358       if (dump_file && (dump_flags & TDF_DETAILS))
1359 	{
1360 	  fprintf (dump_file, "Moving statement\n");
1361 	  print_gimple_stmt (dump_file, stmt, 0, 0);
1362 	  fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1363 		   cost, level->num);
1364 	}
1365 
1366       e = loop_preheader_edge (level);
1367       gcc_assert (!gimple_vdef (stmt));
1368       if (gimple_vuse (stmt))
1369 	{
1370 	  /* The new VUSE is the one from the virtual PHI in the loop
1371 	     header or the one already present.  */
1372 	  gimple_stmt_iterator gsi2;
1373 	  for (gsi2 = gsi_start_phis (e->dest);
1374 	       !gsi_end_p (gsi2); gsi_next (&gsi2))
1375 	    {
1376 	      gimple phi = gsi_stmt (gsi2);
1377 	      if (virtual_operand_p (gimple_phi_result (phi)))
1378 		{
1379 		  gimple_set_vuse (stmt, PHI_ARG_DEF_FROM_EDGE (phi, e));
1380 		  break;
1381 		}
1382 	    }
1383 	}
1384       gsi_remove (&bsi, false);
1385       /* In case this is a stmt that is not unconditionally executed
1386          when the target loop header is executed and the stmt may
1387 	 invoke undefined integer or pointer overflow rewrite it to
1388 	 unsigned arithmetic.  */
1389       if (is_gimple_assign (stmt)
1390 	  && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))
1391 	  && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt)))
1392 	  && arith_code_with_undefined_signed_overflow
1393 	       (gimple_assign_rhs_code (stmt))
1394 	  && (!ALWAYS_EXECUTED_IN (bb)
1395 	      || !(ALWAYS_EXECUTED_IN (bb) == level
1396 		   || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1397 	gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt));
1398       else
1399 	gsi_insert_on_edge (e, stmt);
1400     }
1401 }
1402 
1403 /* Hoist the statements out of the loops prescribed by data stored in
1404    LIM_DATA structures associated with each statement.*/
1405 
1406 static unsigned int
move_computations(void)1407 move_computations (void)
1408 {
1409   struct dom_walk_data walk_data;
1410   unsigned int todo = 0;
1411 
1412   memset (&walk_data, 0, sizeof (struct dom_walk_data));
1413   walk_data.global_data = &todo;
1414   walk_data.dom_direction = CDI_DOMINATORS;
1415   walk_data.before_dom_children = move_computations_stmt;
1416 
1417   init_walk_dominator_tree (&walk_data);
1418   walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
1419   fini_walk_dominator_tree (&walk_data);
1420 
1421   gsi_commit_edge_inserts ();
1422   if (need_ssa_update_p (cfun))
1423     rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1424 
1425   return todo;
1426 }
1427 
1428 /* Checks whether the statement defining variable *INDEX can be hoisted
1429    out of the loop passed in DATA.  Callback for for_each_index.  */
1430 
1431 static bool
may_move_till(tree ref,tree * index,void * data)1432 may_move_till (tree ref, tree *index, void *data)
1433 {
1434   struct loop *loop = (struct loop *) data, *max_loop;
1435 
1436   /* If REF is an array reference, check also that the step and the lower
1437      bound is invariant in LOOP.  */
1438   if (TREE_CODE (ref) == ARRAY_REF)
1439     {
1440       tree step = TREE_OPERAND (ref, 3);
1441       tree lbound = TREE_OPERAND (ref, 2);
1442 
1443       max_loop = outermost_invariant_loop (step, loop);
1444       if (!max_loop)
1445 	return false;
1446 
1447       max_loop = outermost_invariant_loop (lbound, loop);
1448       if (!max_loop)
1449 	return false;
1450     }
1451 
1452   max_loop = outermost_invariant_loop (*index, loop);
1453   if (!max_loop)
1454     return false;
1455 
1456   return true;
1457 }
1458 
1459 /* If OP is SSA NAME, force the statement that defines it to be
1460    moved out of the LOOP.  ORIG_LOOP is the loop in that EXPR is used.  */
1461 
1462 static void
force_move_till_op(tree op,struct loop * orig_loop,struct loop * loop)1463 force_move_till_op (tree op, struct loop *orig_loop, struct loop *loop)
1464 {
1465   gimple stmt;
1466 
1467   if (!op
1468       || is_gimple_min_invariant (op))
1469     return;
1470 
1471   gcc_assert (TREE_CODE (op) == SSA_NAME);
1472 
1473   stmt = SSA_NAME_DEF_STMT (op);
1474   if (gimple_nop_p (stmt))
1475     return;
1476 
1477   set_level (stmt, orig_loop, loop);
1478 }
1479 
1480 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1481    the LOOP.  The reference REF is used in the loop ORIG_LOOP.  Callback for
1482    for_each_index.  */
1483 
1484 struct fmt_data
1485 {
1486   struct loop *loop;
1487   struct loop *orig_loop;
1488 };
1489 
1490 static bool
force_move_till(tree ref,tree * index,void * data)1491 force_move_till (tree ref, tree *index, void *data)
1492 {
1493   struct fmt_data *fmt_data = (struct fmt_data *) data;
1494 
1495   if (TREE_CODE (ref) == ARRAY_REF)
1496     {
1497       tree step = TREE_OPERAND (ref, 3);
1498       tree lbound = TREE_OPERAND (ref, 2);
1499 
1500       force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1501       force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
1502     }
1503 
1504   force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
1505 
1506   return true;
1507 }
1508 
1509 /* A hash function for struct mem_ref object OBJ.  */
1510 
1511 static hashval_t
memref_hash(const void * obj)1512 memref_hash (const void *obj)
1513 {
1514   const struct mem_ref *const mem = (const struct mem_ref *) obj;
1515 
1516   return mem->hash;
1517 }
1518 
1519 /* An equality function for struct mem_ref object OBJ1 with
1520    memory reference OBJ2.  */
1521 
1522 static int
memref_eq(const void * obj1,const void * obj2)1523 memref_eq (const void *obj1, const void *obj2)
1524 {
1525   const struct mem_ref *const mem1 = (const struct mem_ref *) obj1;
1526 
1527   return operand_equal_p (mem1->mem, (const_tree) obj2, 0);
1528 }
1529 
1530 /* Releases list of memory reference locations ACCS.  */
1531 
1532 static void
free_mem_ref_locs(mem_ref_locs_p accs)1533 free_mem_ref_locs (mem_ref_locs_p accs)
1534 {
1535   unsigned i;
1536   mem_ref_loc_p loc;
1537 
1538   if (!accs)
1539     return;
1540 
1541   FOR_EACH_VEC_ELT (accs->locs, i, loc)
1542     free (loc);
1543   accs->locs.release ();
1544   free (accs);
1545 }
1546 
1547 /* A function to free the mem_ref object OBJ.  */
1548 
1549 static void
memref_free(struct mem_ref * mem)1550 memref_free (struct mem_ref *mem)
1551 {
1552   unsigned i;
1553   mem_ref_locs_p accs;
1554 
1555   FOR_EACH_VEC_ELT (mem->accesses_in_loop, i, accs)
1556     free_mem_ref_locs (accs);
1557   mem->accesses_in_loop.release ();
1558 
1559   free (mem);
1560 }
1561 
1562 /* Allocates and returns a memory reference description for MEM whose hash
1563    value is HASH and id is ID.  */
1564 
1565 static mem_ref_p
mem_ref_alloc(tree mem,unsigned hash,unsigned id)1566 mem_ref_alloc (tree mem, unsigned hash, unsigned id)
1567 {
1568   mem_ref_p ref = XNEW (struct mem_ref);
1569   ref->mem = mem;
1570   ref->id = id;
1571   ref->hash = hash;
1572   ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1573   ref->indep_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
1574   ref->dep_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
1575   ref->indep_ref = BITMAP_ALLOC (&lim_bitmap_obstack);
1576   ref->dep_ref = BITMAP_ALLOC (&lim_bitmap_obstack);
1577   ref->accesses_in_loop.create (0);
1578 
1579   return ref;
1580 }
1581 
1582 /* Allocates and returns the new list of locations.  */
1583 
1584 static mem_ref_locs_p
mem_ref_locs_alloc(void)1585 mem_ref_locs_alloc (void)
1586 {
1587   mem_ref_locs_p accs = XNEW (struct mem_ref_locs);
1588   accs->locs.create (0);
1589   return accs;
1590 }
1591 
1592 /* Records memory reference location *LOC in LOOP to the memory reference
1593    description REF.  The reference occurs in statement STMT.  */
1594 
1595 static void
record_mem_ref_loc(mem_ref_p ref,struct loop * loop,gimple stmt,tree * loc)1596 record_mem_ref_loc (mem_ref_p ref, struct loop *loop, gimple stmt, tree *loc)
1597 {
1598   mem_ref_loc_p aref = XNEW (struct mem_ref_loc);
1599   mem_ref_locs_p accs;
1600   bitmap ril = memory_accesses.refs_in_loop[loop->num];
1601 
1602   if (ref->accesses_in_loop.length ()
1603       <= (unsigned) loop->num)
1604     ref->accesses_in_loop.safe_grow_cleared (loop->num + 1);
1605   accs = ref->accesses_in_loop[loop->num];
1606   if (!accs)
1607     {
1608       accs = mem_ref_locs_alloc ();
1609       ref->accesses_in_loop[loop->num] = accs;
1610     }
1611 
1612   aref->stmt = stmt;
1613   aref->ref = loc;
1614 
1615   accs->locs.safe_push (aref);
1616   bitmap_set_bit (ril, ref->id);
1617 }
1618 
1619 /* Marks reference REF as stored in LOOP.  */
1620 
1621 static void
mark_ref_stored(mem_ref_p ref,struct loop * loop)1622 mark_ref_stored (mem_ref_p ref, struct loop *loop)
1623 {
1624   for (;
1625        loop != current_loops->tree_root
1626        && !bitmap_bit_p (ref->stored, loop->num);
1627        loop = loop_outer (loop))
1628     bitmap_set_bit (ref->stored, loop->num);
1629 }
1630 
1631 /* Gathers memory references in statement STMT in LOOP, storing the
1632    information about them in the memory_accesses structure.  Marks
1633    the vops accessed through unrecognized statements there as
1634    well.  */
1635 
1636 static void
gather_mem_refs_stmt(struct loop * loop,gimple stmt)1637 gather_mem_refs_stmt (struct loop *loop, gimple stmt)
1638 {
1639   tree *mem = NULL;
1640   hashval_t hash;
1641   PTR *slot;
1642   mem_ref_p ref;
1643   bool is_stored;
1644   unsigned id;
1645 
1646   if (!gimple_vuse (stmt))
1647     return;
1648 
1649   mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1650   if (!mem)
1651     {
1652       id = memory_accesses.refs_list.length ();
1653       ref = mem_ref_alloc (error_mark_node, 0, id);
1654       memory_accesses.refs_list.safe_push (ref);
1655       if (dump_file && (dump_flags & TDF_DETAILS))
1656 	{
1657 	  fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1658 	  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1659 	}
1660       if (gimple_vdef (stmt))
1661 	mark_ref_stored (ref, loop);
1662       record_mem_ref_loc (ref, loop, stmt, mem);
1663       return;
1664     }
1665 
1666   hash = iterative_hash_expr (*mem, 0);
1667   slot = htab_find_slot_with_hash (memory_accesses.refs, *mem, hash, INSERT);
1668 
1669   if (*slot)
1670     {
1671       ref = (mem_ref_p) *slot;
1672       id = ref->id;
1673     }
1674   else
1675     {
1676       id = memory_accesses.refs_list.length ();
1677       ref = mem_ref_alloc (*mem, hash, id);
1678       memory_accesses.refs_list.safe_push (ref);
1679       *slot = ref;
1680 
1681       if (dump_file && (dump_flags & TDF_DETAILS))
1682 	{
1683 	  fprintf (dump_file, "Memory reference %u: ", id);
1684 	  print_generic_expr (dump_file, ref->mem, TDF_SLIM);
1685 	  fprintf (dump_file, "\n");
1686 	}
1687     }
1688 
1689   if (is_stored)
1690     mark_ref_stored (ref, loop);
1691 
1692   record_mem_ref_loc (ref, loop, stmt, mem);
1693   return;
1694 }
1695 
1696 /* Gathers memory references in loops.  */
1697 
1698 static void
gather_mem_refs_in_loops(void)1699 gather_mem_refs_in_loops (void)
1700 {
1701   gimple_stmt_iterator bsi;
1702   basic_block bb;
1703   struct loop *loop;
1704   loop_iterator li;
1705   bitmap lrefs, alrefs, alrefso;
1706 
1707   FOR_EACH_BB (bb)
1708     {
1709       loop = bb->loop_father;
1710       if (loop == current_loops->tree_root)
1711 	continue;
1712 
1713       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1714 	gather_mem_refs_stmt (loop, gsi_stmt (bsi));
1715     }
1716 
1717   /* Propagate the information about accessed memory references up
1718      the loop hierarchy.  */
1719   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
1720     {
1721       lrefs = memory_accesses.refs_in_loop[loop->num];
1722       alrefs = memory_accesses.all_refs_in_loop[loop->num];
1723       bitmap_ior_into (alrefs, lrefs);
1724 
1725       if (loop_outer (loop) == current_loops->tree_root)
1726 	continue;
1727 
1728       alrefso = memory_accesses.all_refs_in_loop[loop_outer (loop)->num];
1729       bitmap_ior_into (alrefso, alrefs);
1730     }
1731 }
1732 
1733 /* Create a mapping from virtual operands to references that touch them
1734    in LOOP.  */
1735 
1736 static void
create_vop_ref_mapping_loop(struct loop * loop)1737 create_vop_ref_mapping_loop (struct loop *loop)
1738 {
1739   bitmap refs = memory_accesses.refs_in_loop[loop->num];
1740   struct loop *sloop;
1741   bitmap_iterator bi;
1742   unsigned i;
1743   mem_ref_p ref;
1744 
1745   EXECUTE_IF_SET_IN_BITMAP (refs, 0, i, bi)
1746     {
1747       ref = memory_accesses.refs_list[i];
1748       for (sloop = loop; sloop != current_loops->tree_root;
1749 	   sloop = loop_outer (sloop))
1750 	if (bitmap_bit_p (ref->stored, loop->num))
1751 	  {
1752 	    bitmap refs_stored
1753 	      = memory_accesses.all_refs_stored_in_loop[sloop->num];
1754 	    bitmap_set_bit (refs_stored, ref->id);
1755 	  }
1756     }
1757 }
1758 
1759 /* For each non-clobbered virtual operand and each loop, record the memory
1760    references in this loop that touch the operand.  */
1761 
1762 static void
create_vop_ref_mapping(void)1763 create_vop_ref_mapping (void)
1764 {
1765   loop_iterator li;
1766   struct loop *loop;
1767 
1768   FOR_EACH_LOOP (li, loop, 0)
1769     {
1770       create_vop_ref_mapping_loop (loop);
1771     }
1772 }
1773 
1774 /* Gathers information about memory accesses in the loops.  */
1775 
1776 static void
analyze_memory_references(void)1777 analyze_memory_references (void)
1778 {
1779   unsigned i;
1780   bitmap empty;
1781 
1782   memory_accesses.refs = htab_create (100, memref_hash, memref_eq, NULL);
1783   memory_accesses.refs_list.create (0);
1784   memory_accesses.refs_in_loop.create (number_of_loops ());
1785   memory_accesses.all_refs_in_loop.create (number_of_loops ());
1786   memory_accesses.all_refs_stored_in_loop.create (number_of_loops ());
1787 
1788   for (i = 0; i < number_of_loops (); i++)
1789     {
1790       empty = BITMAP_ALLOC (&lim_bitmap_obstack);
1791       memory_accesses.refs_in_loop.quick_push (empty);
1792       empty = BITMAP_ALLOC (&lim_bitmap_obstack);
1793       memory_accesses.all_refs_in_loop.quick_push (empty);
1794       empty = BITMAP_ALLOC (&lim_bitmap_obstack);
1795       memory_accesses.all_refs_stored_in_loop.quick_push (empty);
1796     }
1797 
1798   memory_accesses.ttae_cache = NULL;
1799 
1800   gather_mem_refs_in_loops ();
1801   create_vop_ref_mapping ();
1802 }
1803 
1804 /* Returns true if MEM1 and MEM2 may alias.  TTAE_CACHE is used as a cache in
1805    tree_to_aff_combination_expand.  */
1806 
1807 static bool
mem_refs_may_alias_p(tree mem1,tree mem2,struct pointer_map_t ** ttae_cache)1808 mem_refs_may_alias_p (tree mem1, tree mem2, struct pointer_map_t **ttae_cache)
1809 {
1810   /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1811      object and their offset differ in such a way that the locations cannot
1812      overlap, then they cannot alias.  */
1813   double_int size1, size2;
1814   aff_tree off1, off2;
1815 
1816   /* Perform basic offset and type-based disambiguation.  */
1817   if (!refs_may_alias_p (mem1, mem2))
1818     return false;
1819 
1820   /* The expansion of addresses may be a bit expensive, thus we only do
1821      the check at -O2 and higher optimization levels.  */
1822   if (optimize < 2)
1823     return true;
1824 
1825   get_inner_reference_aff (mem1, &off1, &size1);
1826   get_inner_reference_aff (mem2, &off2, &size2);
1827   aff_combination_expand (&off1, ttae_cache);
1828   aff_combination_expand (&off2, ttae_cache);
1829   aff_combination_scale (&off1, double_int_minus_one);
1830   aff_combination_add (&off2, &off1);
1831 
1832   if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1833     return false;
1834 
1835   return true;
1836 }
1837 
1838 /* Rewrites location LOC by TMP_VAR.  */
1839 
1840 static void
rewrite_mem_ref_loc(mem_ref_loc_p loc,tree tmp_var)1841 rewrite_mem_ref_loc (mem_ref_loc_p loc, tree tmp_var)
1842 {
1843   *loc->ref = tmp_var;
1844   update_stmt (loc->stmt);
1845 }
1846 
1847 /* Adds all locations of REF in LOOP and its subloops to LOCS.  */
1848 
1849 static void
get_all_locs_in_loop(struct loop * loop,mem_ref_p ref,vec<mem_ref_loc_p> * locs)1850 get_all_locs_in_loop (struct loop *loop, mem_ref_p ref,
1851 		      vec<mem_ref_loc_p> *locs)
1852 {
1853   mem_ref_locs_p accs;
1854   unsigned i;
1855   mem_ref_loc_p loc;
1856   bitmap refs = memory_accesses.all_refs_in_loop[loop->num];
1857   struct loop *subloop;
1858 
1859   if (!bitmap_bit_p (refs, ref->id))
1860     return;
1861 
1862   if (ref->accesses_in_loop.length ()
1863       > (unsigned) loop->num)
1864     {
1865       accs = ref->accesses_in_loop[loop->num];
1866       if (accs)
1867 	{
1868 	  FOR_EACH_VEC_ELT (accs->locs, i, loc)
1869 	    locs->safe_push (loc);
1870 	}
1871     }
1872 
1873   for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
1874     get_all_locs_in_loop (subloop, ref, locs);
1875 }
1876 
1877 /* Rewrites all references to REF in LOOP by variable TMP_VAR.  */
1878 
1879 static void
rewrite_mem_refs(struct loop * loop,mem_ref_p ref,tree tmp_var)1880 rewrite_mem_refs (struct loop *loop, mem_ref_p ref, tree tmp_var)
1881 {
1882   unsigned i;
1883   mem_ref_loc_p loc;
1884   vec<mem_ref_loc_p> locs = vNULL;
1885 
1886   get_all_locs_in_loop (loop, ref, &locs);
1887   FOR_EACH_VEC_ELT (locs, i, loc)
1888     rewrite_mem_ref_loc (loc, tmp_var);
1889   locs.release ();
1890 }
1891 
1892 /* The name and the length of the currently generated variable
1893    for lsm.  */
1894 #define MAX_LSM_NAME_LENGTH 40
1895 static char lsm_tmp_name[MAX_LSM_NAME_LENGTH + 1];
1896 static int lsm_tmp_name_length;
1897 
1898 /* Adds S to lsm_tmp_name.  */
1899 
1900 static void
lsm_tmp_name_add(const char * s)1901 lsm_tmp_name_add (const char *s)
1902 {
1903   int l = strlen (s) + lsm_tmp_name_length;
1904   if (l > MAX_LSM_NAME_LENGTH)
1905     return;
1906 
1907   strcpy (lsm_tmp_name + lsm_tmp_name_length, s);
1908   lsm_tmp_name_length = l;
1909 }
1910 
1911 /* Stores the name for temporary variable that replaces REF to
1912    lsm_tmp_name.  */
1913 
1914 static void
gen_lsm_tmp_name(tree ref)1915 gen_lsm_tmp_name (tree ref)
1916 {
1917   const char *name;
1918 
1919   switch (TREE_CODE (ref))
1920     {
1921     case MEM_REF:
1922     case TARGET_MEM_REF:
1923       gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1924       lsm_tmp_name_add ("_");
1925       break;
1926 
1927     case ADDR_EXPR:
1928       gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1929       break;
1930 
1931     case BIT_FIELD_REF:
1932     case VIEW_CONVERT_EXPR:
1933     case ARRAY_RANGE_REF:
1934       gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1935       break;
1936 
1937     case REALPART_EXPR:
1938       gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1939       lsm_tmp_name_add ("_RE");
1940       break;
1941 
1942     case IMAGPART_EXPR:
1943       gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1944       lsm_tmp_name_add ("_IM");
1945       break;
1946 
1947     case COMPONENT_REF:
1948       gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1949       lsm_tmp_name_add ("_");
1950       name = get_name (TREE_OPERAND (ref, 1));
1951       if (!name)
1952 	name = "F";
1953       lsm_tmp_name_add (name);
1954       break;
1955 
1956     case ARRAY_REF:
1957       gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1958       lsm_tmp_name_add ("_I");
1959       break;
1960 
1961     case SSA_NAME:
1962     case VAR_DECL:
1963     case PARM_DECL:
1964       name = get_name (ref);
1965       if (!name)
1966 	name = "D";
1967       lsm_tmp_name_add (name);
1968       break;
1969 
1970     case STRING_CST:
1971       lsm_tmp_name_add ("S");
1972       break;
1973 
1974     case RESULT_DECL:
1975       lsm_tmp_name_add ("R");
1976       break;
1977 
1978     case INTEGER_CST:
1979       /* Nothing.  */
1980       break;
1981 
1982     default:
1983       gcc_unreachable ();
1984     }
1985 }
1986 
1987 /* Determines name for temporary variable that replaces REF.
1988    The name is accumulated into the lsm_tmp_name variable.
1989    N is added to the name of the temporary.  */
1990 
1991 char *
get_lsm_tmp_name(tree ref,unsigned n)1992 get_lsm_tmp_name (tree ref, unsigned n)
1993 {
1994   char ns[2];
1995 
1996   lsm_tmp_name_length = 0;
1997   gen_lsm_tmp_name (ref);
1998   lsm_tmp_name_add ("_lsm");
1999   if (n < 10)
2000     {
2001       ns[0] = '0' + n;
2002       ns[1] = 0;
2003       lsm_tmp_name_add (ns);
2004     }
2005   return lsm_tmp_name;
2006 }
2007 
2008 struct prev_flag_edges {
2009   /* Edge to insert new flag comparison code.  */
2010   edge append_cond_position;
2011 
2012   /* Edge for fall through from previous flag comparison.  */
2013   edge last_cond_fallthru;
2014 };
2015 
2016 /* Helper function for execute_sm.  Emit code to store TMP_VAR into
2017    MEM along edge EX.
2018 
2019    The store is only done if MEM has changed.  We do this so no
2020    changes to MEM occur on code paths that did not originally store
2021    into it.
2022 
2023    The common case for execute_sm will transform:
2024 
2025      for (...) {
2026        if (foo)
2027          stuff;
2028        else
2029          MEM = TMP_VAR;
2030      }
2031 
2032    into:
2033 
2034      lsm = MEM;
2035      for (...) {
2036        if (foo)
2037          stuff;
2038        else
2039          lsm = TMP_VAR;
2040      }
2041      MEM = lsm;
2042 
2043   This function will generate:
2044 
2045      lsm = MEM;
2046 
2047      lsm_flag = false;
2048      ...
2049      for (...) {
2050        if (foo)
2051          stuff;
2052        else {
2053          lsm = TMP_VAR;
2054          lsm_flag = true;
2055        }
2056      }
2057      if (lsm_flag)	<--
2058        MEM = lsm;	<--
2059 */
2060 
2061 static void
execute_sm_if_changed(edge ex,tree mem,tree tmp_var,tree flag)2062 execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag)
2063 {
2064   basic_block new_bb, then_bb, old_dest;
2065   bool loop_has_only_one_exit;
2066   edge then_old_edge, orig_ex = ex;
2067   gimple_stmt_iterator gsi;
2068   gimple stmt;
2069   struct prev_flag_edges *prev_edges = (struct prev_flag_edges *) ex->aux;
2070   bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
2071 
2072   /* ?? Insert store after previous store if applicable.  See note
2073      below.  */
2074   if (prev_edges)
2075     ex = prev_edges->append_cond_position;
2076 
2077   loop_has_only_one_exit = single_pred_p (ex->dest);
2078 
2079   if (loop_has_only_one_exit)
2080     ex = split_block_after_labels (ex->dest);
2081 
2082   old_dest = ex->dest;
2083   new_bb = split_edge (ex);
2084   then_bb = create_empty_bb (new_bb);
2085   if (irr)
2086     then_bb->flags = BB_IRREDUCIBLE_LOOP;
2087   add_bb_to_loop (then_bb, new_bb->loop_father);
2088 
2089   gsi = gsi_start_bb (new_bb);
2090   stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
2091 			    NULL_TREE, NULL_TREE);
2092   gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2093 
2094   gsi = gsi_start_bb (then_bb);
2095   /* Insert actual store.  */
2096   stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
2097   gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2098 
2099   make_edge (new_bb, then_bb,
2100 	     EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2101   make_edge (new_bb, old_dest,
2102 	     EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2103   then_old_edge = make_edge (then_bb, old_dest,
2104 			     EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2105 
2106   set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
2107 
2108   if (prev_edges)
2109     {
2110       basic_block prevbb = prev_edges->last_cond_fallthru->src;
2111       redirect_edge_succ (prev_edges->last_cond_fallthru, new_bb);
2112       set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
2113       set_immediate_dominator (CDI_DOMINATORS, old_dest,
2114 			       recompute_dominator (CDI_DOMINATORS, old_dest));
2115     }
2116 
2117   /* ?? Because stores may alias, they must happen in the exact
2118      sequence they originally happened.  Save the position right after
2119      the (_lsm) store we just created so we can continue appending after
2120      it and maintain the original order.  */
2121   {
2122     struct prev_flag_edges *p;
2123 
2124     if (orig_ex->aux)
2125       orig_ex->aux = NULL;
2126     alloc_aux_for_edge (orig_ex, sizeof (struct prev_flag_edges));
2127     p = (struct prev_flag_edges *) orig_ex->aux;
2128     p->append_cond_position = then_old_edge;
2129     p->last_cond_fallthru = find_edge (new_bb, old_dest);
2130     orig_ex->aux = (void *) p;
2131   }
2132 
2133   if (!loop_has_only_one_exit)
2134     for (gsi = gsi_start_phis (old_dest); !gsi_end_p (gsi); gsi_next (&gsi))
2135       {
2136 	gimple phi = gsi_stmt (gsi);
2137 	unsigned i;
2138 
2139 	for (i = 0; i < gimple_phi_num_args (phi); i++)
2140 	  if (gimple_phi_arg_edge (phi, i)->src == new_bb)
2141 	    {
2142 	      tree arg = gimple_phi_arg_def (phi, i);
2143 	      add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
2144 	      update_stmt (phi);
2145 	    }
2146       }
2147   /* Remove the original fall through edge.  This was the
2148      single_succ_edge (new_bb).  */
2149   EDGE_SUCC (new_bb, 0)->flags &= ~EDGE_FALLTHRU;
2150 }
2151 
2152 /* Helper function for execute_sm.  On every location where REF is
2153    set, set an appropriate flag indicating the store.  */
2154 
2155 static tree
execute_sm_if_changed_flag_set(struct loop * loop,mem_ref_p ref)2156 execute_sm_if_changed_flag_set (struct loop *loop, mem_ref_p ref)
2157 {
2158   unsigned i;
2159   mem_ref_loc_p loc;
2160   tree flag;
2161   vec<mem_ref_loc_p> locs = vNULL;
2162   char *str = get_lsm_tmp_name (ref->mem, ~0);
2163 
2164   lsm_tmp_name_add ("_flag");
2165   flag = create_tmp_reg (boolean_type_node, str);
2166   get_all_locs_in_loop (loop, ref, &locs);
2167   FOR_EACH_VEC_ELT (locs, i, loc)
2168     {
2169       gimple_stmt_iterator gsi;
2170       gimple stmt;
2171 
2172       /* Only set the flag for writes.  */
2173       if (is_gimple_assign (loc->stmt)
2174 	  && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
2175 	{
2176 	  gsi = gsi_for_stmt (loc->stmt);
2177 	  stmt = gimple_build_assign (flag, boolean_true_node);
2178 	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2179 	}
2180     }
2181   locs.release ();
2182   return flag;
2183 }
2184 
2185 /* Executes store motion of memory reference REF from LOOP.
2186    Exits from the LOOP are stored in EXITS.  The initialization of the
2187    temporary variable is put to the preheader of the loop, and assignments
2188    to the reference from the temporary variable are emitted to exits.  */
2189 
2190 static void
execute_sm(struct loop * loop,vec<edge> exits,mem_ref_p ref)2191 execute_sm (struct loop *loop, vec<edge> exits, mem_ref_p ref)
2192 {
2193   tree tmp_var, store_flag;
2194   unsigned i;
2195   gimple load;
2196   struct fmt_data fmt_data;
2197   edge ex, latch_edge;
2198   struct lim_aux_data *lim_data;
2199   bool multi_threaded_model_p = false;
2200 
2201   if (dump_file && (dump_flags & TDF_DETAILS))
2202     {
2203       fprintf (dump_file, "Executing store motion of ");
2204       print_generic_expr (dump_file, ref->mem, 0);
2205       fprintf (dump_file, " from loop %d\n", loop->num);
2206     }
2207 
2208   tmp_var = create_tmp_reg (TREE_TYPE (ref->mem),
2209 			      get_lsm_tmp_name (ref->mem, ~0));
2210 
2211   fmt_data.loop = loop;
2212   fmt_data.orig_loop = loop;
2213   for_each_index (&ref->mem, force_move_till, &fmt_data);
2214 
2215   if (block_in_transaction (loop_preheader_edge (loop)->src)
2216       || !PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES))
2217     multi_threaded_model_p = true;
2218 
2219   if (multi_threaded_model_p)
2220     store_flag = execute_sm_if_changed_flag_set (loop, ref);
2221 
2222   rewrite_mem_refs (loop, ref, tmp_var);
2223 
2224   /* Emit the load code into the latch, so that we are sure it will
2225      be processed after all dependencies.  */
2226   latch_edge = loop_latch_edge (loop);
2227 
2228   /* FIXME/TODO: For the multi-threaded variant, we could avoid this
2229      load altogether, since the store is predicated by a flag.  We
2230      could, do the load only if it was originally in the loop.  */
2231   load = gimple_build_assign (tmp_var, unshare_expr (ref->mem));
2232   lim_data = init_lim_data (load);
2233   lim_data->max_loop = loop;
2234   lim_data->tgt_loop = loop;
2235   gsi_insert_on_edge (latch_edge, load);
2236 
2237   if (multi_threaded_model_p)
2238     {
2239       load = gimple_build_assign (store_flag, boolean_false_node);
2240       lim_data = init_lim_data (load);
2241       lim_data->max_loop = loop;
2242       lim_data->tgt_loop = loop;
2243       gsi_insert_on_edge (latch_edge, load);
2244     }
2245 
2246   /* Sink the store to every exit from the loop.  */
2247   FOR_EACH_VEC_ELT (exits, i, ex)
2248     if (!multi_threaded_model_p)
2249       {
2250 	gimple store;
2251 	store = gimple_build_assign (unshare_expr (ref->mem), tmp_var);
2252 	gsi_insert_on_edge (ex, store);
2253       }
2254     else
2255       execute_sm_if_changed (ex, ref->mem, tmp_var, store_flag);
2256 }
2257 
2258 /* Hoists memory references MEM_REFS out of LOOP.  EXITS is the list of exit
2259    edges of the LOOP.  */
2260 
2261 static void
hoist_memory_references(struct loop * loop,bitmap mem_refs,vec<edge> exits)2262 hoist_memory_references (struct loop *loop, bitmap mem_refs,
2263 			 vec<edge> exits)
2264 {
2265   mem_ref_p ref;
2266   unsigned  i;
2267   bitmap_iterator bi;
2268 
2269   EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2270     {
2271       ref = memory_accesses.refs_list[i];
2272       execute_sm (loop, exits, ref);
2273     }
2274 }
2275 
2276 /* Returns true if REF is always accessed in LOOP.  If STORED_P is true
2277    make sure REF is always stored to in LOOP.  */
2278 
2279 static bool
ref_always_accessed_p(struct loop * loop,mem_ref_p ref,bool stored_p)2280 ref_always_accessed_p (struct loop *loop, mem_ref_p ref, bool stored_p)
2281 {
2282   vec<mem_ref_loc_p> locs = vNULL;
2283   unsigned i;
2284   mem_ref_loc_p loc;
2285   bool ret = false;
2286   struct loop *must_exec;
2287   tree base;
2288 
2289   base = get_base_address (ref->mem);
2290   if (INDIRECT_REF_P (base)
2291       || TREE_CODE (base) == MEM_REF)
2292     base = TREE_OPERAND (base, 0);
2293 
2294   get_all_locs_in_loop (loop, ref, &locs);
2295   FOR_EACH_VEC_ELT (locs, i, loc)
2296     {
2297       if (!get_lim_data (loc->stmt))
2298 	continue;
2299 
2300       /* If we require an always executed store make sure the statement
2301          stores to the reference.  */
2302       if (stored_p)
2303 	{
2304 	  tree lhs;
2305 	  if (!gimple_get_lhs (loc->stmt))
2306 	    continue;
2307 	  lhs = get_base_address (gimple_get_lhs (loc->stmt));
2308 	  if (!lhs)
2309 	    continue;
2310 	  if (INDIRECT_REF_P (lhs)
2311 	      || TREE_CODE (lhs) == MEM_REF)
2312 	    lhs = TREE_OPERAND (lhs, 0);
2313 	  if (lhs != base)
2314 	    continue;
2315 	}
2316 
2317       must_exec = get_lim_data (loc->stmt)->always_executed_in;
2318       if (!must_exec)
2319 	continue;
2320 
2321       if (must_exec == loop
2322 	  || flow_loop_nested_p (must_exec, loop))
2323 	{
2324 	  ret = true;
2325 	  break;
2326 	}
2327     }
2328   locs.release ();
2329 
2330   return ret;
2331 }
2332 
2333 /* Returns true if REF1 and REF2 are independent.  */
2334 
2335 static bool
refs_independent_p(mem_ref_p ref1,mem_ref_p ref2)2336 refs_independent_p (mem_ref_p ref1, mem_ref_p ref2)
2337 {
2338   if (ref1 == ref2
2339       || bitmap_bit_p (ref1->indep_ref, ref2->id))
2340     return true;
2341   if (bitmap_bit_p (ref1->dep_ref, ref2->id))
2342     return false;
2343   if (!MEM_ANALYZABLE (ref1)
2344       || !MEM_ANALYZABLE (ref2))
2345     return false;
2346 
2347   if (dump_file && (dump_flags & TDF_DETAILS))
2348     fprintf (dump_file, "Querying dependency of refs %u and %u: ",
2349 	     ref1->id, ref2->id);
2350 
2351   if (mem_refs_may_alias_p (ref1->mem, ref2->mem,
2352 			    &memory_accesses.ttae_cache))
2353     {
2354       bitmap_set_bit (ref1->dep_ref, ref2->id);
2355       bitmap_set_bit (ref2->dep_ref, ref1->id);
2356       if (dump_file && (dump_flags & TDF_DETAILS))
2357 	fprintf (dump_file, "dependent.\n");
2358       return false;
2359     }
2360   else
2361     {
2362       bitmap_set_bit (ref1->indep_ref, ref2->id);
2363       bitmap_set_bit (ref2->indep_ref, ref1->id);
2364       if (dump_file && (dump_flags & TDF_DETAILS))
2365 	fprintf (dump_file, "independent.\n");
2366       return true;
2367     }
2368 }
2369 
2370 /* Records the information whether REF is independent in LOOP (according
2371    to INDEP).  */
2372 
2373 static void
record_indep_loop(struct loop * loop,mem_ref_p ref,bool indep)2374 record_indep_loop (struct loop *loop, mem_ref_p ref, bool indep)
2375 {
2376   if (indep)
2377     bitmap_set_bit (ref->indep_loop, loop->num);
2378   else
2379     bitmap_set_bit (ref->dep_loop, loop->num);
2380 }
2381 
2382 /* Returns true if REF is independent on all other memory references in
2383    LOOP.  */
2384 
2385 static bool
ref_indep_loop_p_1(struct loop * loop,mem_ref_p ref)2386 ref_indep_loop_p_1 (struct loop *loop, mem_ref_p ref)
2387 {
2388   bitmap refs_to_check;
2389   unsigned i;
2390   bitmap_iterator bi;
2391   bool ret = true, stored = bitmap_bit_p (ref->stored, loop->num);
2392   mem_ref_p aref;
2393 
2394   if (stored)
2395     refs_to_check = memory_accesses.all_refs_in_loop[loop->num];
2396   else
2397     refs_to_check = memory_accesses.all_refs_stored_in_loop[loop->num];
2398 
2399   EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
2400     {
2401       aref = memory_accesses.refs_list[i];
2402       if (!MEM_ANALYZABLE (aref)
2403 	  || !refs_independent_p (ref, aref))
2404 	{
2405 	  ret = false;
2406 	  record_indep_loop (loop, aref, false);
2407 	  break;
2408 	}
2409     }
2410 
2411   return ret;
2412 }
2413 
2414 /* Returns true if REF is independent on all other memory references in
2415    LOOP.  Wrapper over ref_indep_loop_p_1, caching its results.  */
2416 
2417 static bool
ref_indep_loop_p(struct loop * loop,mem_ref_p ref)2418 ref_indep_loop_p (struct loop *loop, mem_ref_p ref)
2419 {
2420   bool ret;
2421 
2422   if (bitmap_bit_p (ref->indep_loop, loop->num))
2423     return true;
2424   if (bitmap_bit_p (ref->dep_loop, loop->num))
2425     return false;
2426 
2427   ret = ref_indep_loop_p_1 (loop, ref);
2428 
2429   if (dump_file && (dump_flags & TDF_DETAILS))
2430     fprintf (dump_file, "Querying dependencies of ref %u in loop %d: %s\n",
2431 	     ref->id, loop->num, ret ? "independent" : "dependent");
2432 
2433   record_indep_loop (loop, ref, ret);
2434 
2435   return ret;
2436 }
2437 
2438 /* Returns true if we can perform store motion of REF from LOOP.  */
2439 
2440 static bool
can_sm_ref_p(struct loop * loop,mem_ref_p ref)2441 can_sm_ref_p (struct loop *loop, mem_ref_p ref)
2442 {
2443   tree base;
2444 
2445   /* Can't hoist unanalyzable refs.  */
2446   if (!MEM_ANALYZABLE (ref))
2447     return false;
2448 
2449   /* Unless the reference is stored in the loop, there is nothing to do.  */
2450   if (!bitmap_bit_p (ref->stored, loop->num))
2451     return false;
2452 
2453   /* It should be movable.  */
2454   if (!is_gimple_reg_type (TREE_TYPE (ref->mem))
2455       || TREE_THIS_VOLATILE (ref->mem)
2456       || !for_each_index (&ref->mem, may_move_till, loop))
2457     return false;
2458 
2459   /* If it can throw fail, we do not properly update EH info.  */
2460   if (tree_could_throw_p (ref->mem))
2461     return false;
2462 
2463   /* If it can trap, it must be always executed in LOOP.
2464      Readonly memory locations may trap when storing to them, but
2465      tree_could_trap_p is a predicate for rvalues, so check that
2466      explicitly.  */
2467   base = get_base_address (ref->mem);
2468   if ((tree_could_trap_p (ref->mem)
2469        || (DECL_P (base) && TREE_READONLY (base)))
2470       && !ref_always_accessed_p (loop, ref, true))
2471     return false;
2472 
2473   /* And it must be independent on all other memory references
2474      in LOOP.  */
2475   if (!ref_indep_loop_p (loop, ref))
2476     return false;
2477 
2478   return true;
2479 }
2480 
2481 /* Marks the references in LOOP for that store motion should be performed
2482    in REFS_TO_SM.  SM_EXECUTED is the set of references for that store
2483    motion was performed in one of the outer loops.  */
2484 
2485 static void
find_refs_for_sm(struct loop * loop,bitmap sm_executed,bitmap refs_to_sm)2486 find_refs_for_sm (struct loop *loop, bitmap sm_executed, bitmap refs_to_sm)
2487 {
2488   bitmap refs = memory_accesses.all_refs_in_loop[loop->num];
2489   unsigned i;
2490   bitmap_iterator bi;
2491   mem_ref_p ref;
2492 
2493   EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
2494     {
2495       ref = memory_accesses.refs_list[i];
2496       if (can_sm_ref_p (loop, ref))
2497 	bitmap_set_bit (refs_to_sm, i);
2498     }
2499 }
2500 
2501 /* Checks whether LOOP (with exits stored in EXITS array) is suitable
2502    for a store motion optimization (i.e. whether we can insert statement
2503    on its exits).  */
2504 
2505 static bool
loop_suitable_for_sm(struct loop * loop ATTRIBUTE_UNUSED,vec<edge> exits)2506 loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED,
2507 		      vec<edge> exits)
2508 {
2509   unsigned i;
2510   edge ex;
2511 
2512   FOR_EACH_VEC_ELT (exits, i, ex)
2513     if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
2514       return false;
2515 
2516   return true;
2517 }
2518 
2519 /* Try to perform store motion for all memory references modified inside
2520    LOOP.  SM_EXECUTED is the bitmap of the memory references for that
2521    store motion was executed in one of the outer loops.  */
2522 
2523 static void
store_motion_loop(struct loop * loop,bitmap sm_executed)2524 store_motion_loop (struct loop *loop, bitmap sm_executed)
2525 {
2526   vec<edge> exits = get_loop_exit_edges (loop);
2527   struct loop *subloop;
2528   bitmap sm_in_loop = BITMAP_ALLOC (NULL);
2529 
2530   if (loop_suitable_for_sm (loop, exits))
2531     {
2532       find_refs_for_sm (loop, sm_executed, sm_in_loop);
2533       hoist_memory_references (loop, sm_in_loop, exits);
2534     }
2535   exits.release ();
2536 
2537   bitmap_ior_into (sm_executed, sm_in_loop);
2538   for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
2539     store_motion_loop (subloop, sm_executed);
2540   bitmap_and_compl_into (sm_executed, sm_in_loop);
2541   BITMAP_FREE (sm_in_loop);
2542 }
2543 
2544 /* Try to perform store motion for all memory references modified inside
2545    loops.  */
2546 
2547 static void
store_motion(void)2548 store_motion (void)
2549 {
2550   struct loop *loop;
2551   bitmap sm_executed = BITMAP_ALLOC (NULL);
2552 
2553   for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
2554     store_motion_loop (loop, sm_executed);
2555 
2556   BITMAP_FREE (sm_executed);
2557   gsi_commit_edge_inserts ();
2558 }
2559 
2560 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
2561    for each such basic block bb records the outermost loop for that execution
2562    of its header implies execution of bb.  CONTAINS_CALL is the bitmap of
2563    blocks that contain a nonpure call.  */
2564 
2565 static void
fill_always_executed_in(struct loop * loop,sbitmap contains_call)2566 fill_always_executed_in (struct loop *loop, sbitmap contains_call)
2567 {
2568   basic_block bb = NULL, *bbs, last = NULL;
2569   unsigned i;
2570   edge e;
2571   struct loop *inn_loop = loop;
2572 
2573   if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
2574     {
2575       bbs = get_loop_body_in_dom_order (loop);
2576 
2577       for (i = 0; i < loop->num_nodes; i++)
2578 	{
2579 	  edge_iterator ei;
2580 	  bb = bbs[i];
2581 
2582 	  if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2583 	    last = bb;
2584 
2585 	  if (bitmap_bit_p (contains_call, bb->index))
2586 	    break;
2587 
2588 	  FOR_EACH_EDGE (e, ei, bb->succs)
2589 	    if (!flow_bb_inside_loop_p (loop, e->dest))
2590 	      break;
2591 	  if (e)
2592 	    break;
2593 
2594 	  /* A loop might be infinite (TODO use simple loop analysis
2595 	     to disprove this if possible).  */
2596 	  if (bb->flags & BB_IRREDUCIBLE_LOOP)
2597 	    break;
2598 
2599 	  if (!flow_bb_inside_loop_p (inn_loop, bb))
2600 	    break;
2601 
2602 	  if (bb->loop_father->header == bb)
2603 	    {
2604 	      if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2605 		break;
2606 
2607 	      /* In a loop that is always entered we may proceed anyway.
2608 		 But record that we entered it and stop once we leave it.  */
2609 	      inn_loop = bb->loop_father;
2610 	    }
2611 	}
2612 
2613       while (1)
2614 	{
2615 	  SET_ALWAYS_EXECUTED_IN (last, loop);
2616 	  if (last == loop->header)
2617 	    break;
2618 	  last = get_immediate_dominator (CDI_DOMINATORS, last);
2619 	}
2620 
2621       free (bbs);
2622     }
2623 
2624   for (loop = loop->inner; loop; loop = loop->next)
2625     fill_always_executed_in (loop, contains_call);
2626 }
2627 
2628 /* Compute the global information needed by the loop invariant motion pass.  */
2629 
2630 static void
tree_ssa_lim_initialize(void)2631 tree_ssa_lim_initialize (void)
2632 {
2633   sbitmap contains_call = sbitmap_alloc (last_basic_block);
2634   gimple_stmt_iterator bsi;
2635   struct loop *loop;
2636   basic_block bb;
2637 
2638   bitmap_obstack_initialize (&lim_bitmap_obstack);
2639 
2640   bitmap_clear (contains_call);
2641   FOR_EACH_BB (bb)
2642     {
2643       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2644 	{
2645 	  if (nonpure_call_p (gsi_stmt (bsi)))
2646 	    break;
2647 	}
2648 
2649       if (!gsi_end_p (bsi))
2650 	bitmap_set_bit (contains_call, bb->index);
2651     }
2652 
2653   for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
2654     fill_always_executed_in (loop, contains_call);
2655 
2656   sbitmap_free (contains_call);
2657 
2658   lim_aux_data_map = pointer_map_create ();
2659 
2660   if (flag_tm)
2661     compute_transaction_bits ();
2662 
2663   alloc_aux_for_edges (0);
2664 }
2665 
2666 /* Cleans up after the invariant motion pass.  */
2667 
2668 static void
tree_ssa_lim_finalize(void)2669 tree_ssa_lim_finalize (void)
2670 {
2671   basic_block bb;
2672   unsigned i;
2673   mem_ref_p ref;
2674 
2675   free_aux_for_edges ();
2676 
2677   FOR_EACH_BB (bb)
2678     SET_ALWAYS_EXECUTED_IN (bb, NULL);
2679 
2680   bitmap_obstack_release (&lim_bitmap_obstack);
2681   pointer_map_destroy (lim_aux_data_map);
2682 
2683   htab_delete (memory_accesses.refs);
2684 
2685   FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
2686     memref_free (ref);
2687   memory_accesses.refs_list.release ();
2688 
2689   memory_accesses.refs_in_loop.release ();
2690   memory_accesses.all_refs_in_loop.release ();
2691   memory_accesses.all_refs_stored_in_loop.release ();
2692 
2693   if (memory_accesses.ttae_cache)
2694     free_affine_expand_cache (&memory_accesses.ttae_cache);
2695 }
2696 
2697 /* Moves invariants from loops.  Only "expensive" invariants are moved out --
2698    i.e. those that are likely to be win regardless of the register pressure.  */
2699 
2700 unsigned int
tree_ssa_lim(void)2701 tree_ssa_lim (void)
2702 {
2703   unsigned int todo;
2704 
2705   tree_ssa_lim_initialize ();
2706 
2707   /* Gathers information about memory accesses in the loops.  */
2708   analyze_memory_references ();
2709 
2710   /* For each statement determine the outermost loop in that it is
2711      invariant and cost for computing the invariant.  */
2712   determine_invariantness ();
2713 
2714   /* Execute store motion.  Force the necessary invariants to be moved
2715      out of the loops as well.  */
2716   store_motion ();
2717 
2718   /* Move the expressions that are expensive enough.  */
2719   todo = move_computations ();
2720 
2721   tree_ssa_lim_finalize ();
2722 
2723   return todo;
2724 }
2725