1 /* Loop invariant motion.
2    Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "tree.h"
25 #include "gimple.h"
26 #include "cfghooks.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "gimple-pretty-print.h"
30 #include "fold-const.h"
31 #include "cfganal.h"
32 #include "tree-eh.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "tree-cfg.h"
36 #include "tree-ssa-loop-manip.h"
37 #include "tree-ssa-loop.h"
38 #include "tree-into-ssa.h"
39 #include "cfgloop.h"
40 #include "domwalk.h"
41 #include "params.h"
42 #include "tree-affine.h"
43 #include "tree-ssa-propagate.h"
44 #include "trans-mem.h"
45 #include "gimple-fold.h"
46 #include "tree-scalar-evolution.h"
47 #include "tree-ssa-loop-niter.h"
48 
49 /* TODO:  Support for predicated code motion.  I.e.
50 
51    while (1)
52      {
53        if (cond)
54 	 {
55 	   a = inv;
56 	   something;
57 	 }
58      }
59 
60    Where COND and INV are invariants, but evaluating INV may trap or be
61    invalid from some other reason if !COND.  This may be transformed to
62 
63    if (cond)
64      a = inv;
65    while (1)
66      {
67        if (cond)
68 	 something;
69      }  */
70 
71 /* The auxiliary data kept for each statement.  */
72 
73 struct lim_aux_data
74 {
75   struct loop *max_loop;	/* The outermost loop in that the statement
76 				   is invariant.  */
77 
78   struct loop *tgt_loop;	/* The loop out of that we want to move the
79 				   invariant.  */
80 
81   struct loop *always_executed_in;
82 				/* The outermost loop for that we are sure
83 				   the statement is executed if the loop
84 				   is entered.  */
85 
86   unsigned cost;		/* Cost of the computation performed by the
87 				   statement.  */
88 
89   vec<gimple *> depends;		/* Vector of statements that must be also
90 				   hoisted out of the loop when this statement
91 				   is hoisted; i.e. those that define the
92 				   operands of the statement and are inside of
93 				   the MAX_LOOP loop.  */
94 };
95 
96 /* Maps statements to their lim_aux_data.  */
97 
98 static hash_map<gimple *, lim_aux_data *> *lim_aux_data_map;
99 
100 /* Description of a memory reference location.  */
101 
102 struct mem_ref_loc
103 {
104   tree *ref;			/* The reference itself.  */
105   gimple *stmt;			/* The statement in that it occurs.  */
106 };
107 
108 
109 /* Description of a memory reference.  */
110 
111 struct im_mem_ref
112 {
113   unsigned id;			/* ID assigned to the memory reference
114 				   (its index in memory_accesses.refs_list)  */
115   hashval_t hash;		/* Its hash value.  */
116 
117   /* The memory access itself and associated caching of alias-oracle
118      query meta-data.  */
119   ao_ref mem;
120 
121   bitmap stored;		/* The set of loops in that this memory location
122 				   is stored to.  */
123   vec<mem_ref_loc>		accesses_in_loop;
124 				/* The locations of the accesses.  Vector
125 				   indexed by the loop number.  */
126 
127   /* The following sets are computed on demand.  We keep both set and
128      its complement, so that we know whether the information was
129      already computed or not.  */
130   bitmap_head indep_loop;	/* The set of loops in that the memory
131 				   reference is independent, meaning:
132 				   If it is stored in the loop, this store
133 				     is independent on all other loads and
134 				     stores.
135 				   If it is only loaded, then it is independent
136 				     on all stores in the loop.  */
137   bitmap_head dep_loop;		/* The complement of INDEP_LOOP.  */
138 };
139 
140 /* We use two bits per loop in the ref->{in,}dep_loop bitmaps, the first
141    to record (in)dependence against stores in the loop and its subloops, the
142    second to record (in)dependence against all references in the loop
143    and its subloops.  */
144 #define LOOP_DEP_BIT(loopnum, storedp) (2 * (loopnum) + (storedp ? 1 : 0))
145 
146 /* Mem_ref hashtable helpers.  */
147 
148 struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref>
149 {
150   typedef tree_node *compare_type;
151   static inline hashval_t hash (const im_mem_ref *);
152   static inline bool equal (const im_mem_ref *, const tree_node *);
153 };
154 
155 /* A hash function for struct im_mem_ref object OBJ.  */
156 
157 inline hashval_t
hash(const im_mem_ref * mem)158 mem_ref_hasher::hash (const im_mem_ref *mem)
159 {
160   return mem->hash;
161 }
162 
163 /* An equality function for struct im_mem_ref object MEM1 with
164    memory reference OBJ2.  */
165 
166 inline bool
equal(const im_mem_ref * mem1,const tree_node * obj2)167 mem_ref_hasher::equal (const im_mem_ref *mem1, const tree_node *obj2)
168 {
169   return operand_equal_p (mem1->mem.ref, (const_tree) obj2, 0);
170 }
171 
172 
173 /* Description of memory accesses in loops.  */
174 
175 static struct
176 {
177   /* The hash table of memory references accessed in loops.  */
178   hash_table<mem_ref_hasher> *refs;
179 
180   /* The list of memory references.  */
181   vec<im_mem_ref *> refs_list;
182 
183   /* The set of memory references accessed in each loop.  */
184   vec<bitmap_head> refs_in_loop;
185 
186   /* The set of memory references stored in each loop.  */
187   vec<bitmap_head> refs_stored_in_loop;
188 
189   /* The set of memory references stored in each loop, including subloops .  */
190   vec<bitmap_head> all_refs_stored_in_loop;
191 
192   /* Cache for expanding memory addresses.  */
193   hash_map<tree, name_expansion *> *ttae_cache;
194 } memory_accesses;
195 
196 /* Obstack for the bitmaps in the above data structures.  */
197 static bitmap_obstack lim_bitmap_obstack;
198 static obstack mem_ref_obstack;
199 
200 static bool ref_indep_loop_p (struct loop *, im_mem_ref *);
201 
202 /* Minimum cost of an expensive expression.  */
203 #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
204 
205 /* The outermost loop for which execution of the header guarantees that the
206    block will be executed.  */
207 #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
208 #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
209 
210 /* ID of the shared unanalyzable mem.  */
211 #define UNANALYZABLE_MEM_ID 0
212 
213 /* Whether the reference was analyzable.  */
214 #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
215 
216 static struct lim_aux_data *
init_lim_data(gimple * stmt)217 init_lim_data (gimple *stmt)
218 {
219   lim_aux_data *p = XCNEW (struct lim_aux_data);
220   lim_aux_data_map->put (stmt, p);
221 
222   return p;
223 }
224 
225 static struct lim_aux_data *
get_lim_data(gimple * stmt)226 get_lim_data (gimple *stmt)
227 {
228   lim_aux_data **p = lim_aux_data_map->get (stmt);
229   if (!p)
230     return NULL;
231 
232   return *p;
233 }
234 
235 /* Releases the memory occupied by DATA.  */
236 
237 static void
free_lim_aux_data(struct lim_aux_data * data)238 free_lim_aux_data (struct lim_aux_data *data)
239 {
240   data->depends.release ();
241   free (data);
242 }
243 
244 static void
clear_lim_data(gimple * stmt)245 clear_lim_data (gimple *stmt)
246 {
247   lim_aux_data **p = lim_aux_data_map->get (stmt);
248   if (!p)
249     return;
250 
251   free_lim_aux_data (*p);
252   *p = NULL;
253 }
254 
255 
256 /* The possibilities of statement movement.  */
257 enum move_pos
258   {
259     MOVE_IMPOSSIBLE,		/* No movement -- side effect expression.  */
260     MOVE_PRESERVE_EXECUTION,	/* Must not cause the non-executed statement
261 				   become executed -- memory accesses, ... */
262     MOVE_POSSIBLE		/* Unlimited movement.  */
263   };
264 
265 
266 /* If it is possible to hoist the statement STMT unconditionally,
267    returns MOVE_POSSIBLE.
268    If it is possible to hoist the statement STMT, but we must avoid making
269    it executed if it would not be executed in the original program (e.g.
270    because it may trap), return MOVE_PRESERVE_EXECUTION.
271    Otherwise return MOVE_IMPOSSIBLE.  */
272 
273 enum move_pos
movement_possibility(gimple * stmt)274 movement_possibility (gimple *stmt)
275 {
276   tree lhs;
277   enum move_pos ret = MOVE_POSSIBLE;
278 
279   if (flag_unswitch_loops
280       && gimple_code (stmt) == GIMPLE_COND)
281     {
282       /* If we perform unswitching, force the operands of the invariant
283 	 condition to be moved out of the loop.  */
284       return MOVE_POSSIBLE;
285     }
286 
287   if (gimple_code (stmt) == GIMPLE_PHI
288       && gimple_phi_num_args (stmt) <= 2
289       && !virtual_operand_p (gimple_phi_result (stmt))
290       && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
291     return MOVE_POSSIBLE;
292 
293   if (gimple_get_lhs (stmt) == NULL_TREE)
294     return MOVE_IMPOSSIBLE;
295 
296   if (gimple_vdef (stmt))
297     return MOVE_IMPOSSIBLE;
298 
299   if (stmt_ends_bb_p (stmt)
300       || gimple_has_volatile_ops (stmt)
301       || gimple_has_side_effects (stmt)
302       || stmt_could_throw_p (stmt))
303     return MOVE_IMPOSSIBLE;
304 
305   if (is_gimple_call (stmt))
306     {
307       /* While pure or const call is guaranteed to have no side effects, we
308 	 cannot move it arbitrarily.  Consider code like
309 
310 	 char *s = something ();
311 
312 	 while (1)
313 	   {
314 	     if (s)
315 	       t = strlen (s);
316 	     else
317 	       t = 0;
318 	   }
319 
320 	 Here the strlen call cannot be moved out of the loop, even though
321 	 s is invariant.  In addition to possibly creating a call with
322 	 invalid arguments, moving out a function call that is not executed
323 	 may cause performance regressions in case the call is costly and
324 	 not executed at all.  */
325       ret = MOVE_PRESERVE_EXECUTION;
326       lhs = gimple_call_lhs (stmt);
327     }
328   else if (is_gimple_assign (stmt))
329     lhs = gimple_assign_lhs (stmt);
330   else
331     return MOVE_IMPOSSIBLE;
332 
333   if (TREE_CODE (lhs) == SSA_NAME
334       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
335     return MOVE_IMPOSSIBLE;
336 
337   if (TREE_CODE (lhs) != SSA_NAME
338       || gimple_could_trap_p (stmt))
339     return MOVE_PRESERVE_EXECUTION;
340 
341   /* Non local loads in a transaction cannot be hoisted out.  Well,
342      unless the load happens on every path out of the loop, but we
343      don't take this into account yet.  */
344   if (flag_tm
345       && gimple_in_transaction (stmt)
346       && gimple_assign_single_p (stmt))
347     {
348       tree rhs = gimple_assign_rhs1 (stmt);
349       if (DECL_P (rhs) && is_global_var (rhs))
350 	{
351 	  if (dump_file)
352 	    {
353 	      fprintf (dump_file, "Cannot hoist conditional load of ");
354 	      print_generic_expr (dump_file, rhs, TDF_SLIM);
355 	      fprintf (dump_file, " because it is in a transaction.\n");
356 	    }
357 	  return MOVE_IMPOSSIBLE;
358 	}
359     }
360 
361   return ret;
362 }
363 
364 /* Suppose that operand DEF is used inside the LOOP.  Returns the outermost
365    loop to that we could move the expression using DEF if it did not have
366    other operands, i.e. the outermost loop enclosing LOOP in that the value
367    of DEF is invariant.  */
368 
369 static struct loop *
outermost_invariant_loop(tree def,struct loop * loop)370 outermost_invariant_loop (tree def, struct loop *loop)
371 {
372   gimple *def_stmt;
373   basic_block def_bb;
374   struct loop *max_loop;
375   struct lim_aux_data *lim_data;
376 
377   if (!def)
378     return superloop_at_depth (loop, 1);
379 
380   if (TREE_CODE (def) != SSA_NAME)
381     {
382       gcc_assert (is_gimple_min_invariant (def));
383       return superloop_at_depth (loop, 1);
384     }
385 
386   def_stmt = SSA_NAME_DEF_STMT (def);
387   def_bb = gimple_bb (def_stmt);
388   if (!def_bb)
389     return superloop_at_depth (loop, 1);
390 
391   max_loop = find_common_loop (loop, def_bb->loop_father);
392 
393   lim_data = get_lim_data (def_stmt);
394   if (lim_data != NULL && lim_data->max_loop != NULL)
395     max_loop = find_common_loop (max_loop,
396 				 loop_outer (lim_data->max_loop));
397   if (max_loop == loop)
398     return NULL;
399   max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
400 
401   return max_loop;
402 }
403 
404 /* DATA is a structure containing information associated with a statement
405    inside LOOP.  DEF is one of the operands of this statement.
406 
407    Find the outermost loop enclosing LOOP in that value of DEF is invariant
408    and record this in DATA->max_loop field.  If DEF itself is defined inside
409    this loop as well (i.e. we need to hoist it out of the loop if we want
410    to hoist the statement represented by DATA), record the statement in that
411    DEF is defined to the DATA->depends list.  Additionally if ADD_COST is true,
412    add the cost of the computation of DEF to the DATA->cost.
413 
414    If DEF is not invariant in LOOP, return false.  Otherwise return TRUE.  */
415 
416 static bool
add_dependency(tree def,struct lim_aux_data * data,struct loop * loop,bool add_cost)417 add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
418 		bool add_cost)
419 {
420   gimple *def_stmt = SSA_NAME_DEF_STMT (def);
421   basic_block def_bb = gimple_bb (def_stmt);
422   struct loop *max_loop;
423   struct lim_aux_data *def_data;
424 
425   if (!def_bb)
426     return true;
427 
428   max_loop = outermost_invariant_loop (def, loop);
429   if (!max_loop)
430     return false;
431 
432   if (flow_loop_nested_p (data->max_loop, max_loop))
433     data->max_loop = max_loop;
434 
435   def_data = get_lim_data (def_stmt);
436   if (!def_data)
437     return true;
438 
439   if (add_cost
440       /* Only add the cost if the statement defining DEF is inside LOOP,
441 	 i.e. if it is likely that by moving the invariants dependent
442 	 on it, we will be able to avoid creating a new register for
443 	 it (since it will be only used in these dependent invariants).  */
444       && def_bb->loop_father == loop)
445     data->cost += def_data->cost;
446 
447   data->depends.safe_push (def_stmt);
448 
449   return true;
450 }
451 
452 /* Returns an estimate for a cost of statement STMT.  The values here
453    are just ad-hoc constants, similar to costs for inlining.  */
454 
455 static unsigned
stmt_cost(gimple * stmt)456 stmt_cost (gimple *stmt)
457 {
458   /* Always try to create possibilities for unswitching.  */
459   if (gimple_code (stmt) == GIMPLE_COND
460       || gimple_code (stmt) == GIMPLE_PHI)
461     return LIM_EXPENSIVE;
462 
463   /* We should be hoisting calls if possible.  */
464   if (is_gimple_call (stmt))
465     {
466       tree fndecl;
467 
468       /* Unless the call is a builtin_constant_p; this always folds to a
469 	 constant, so moving it is useless.  */
470       fndecl = gimple_call_fndecl (stmt);
471       if (fndecl
472 	  && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
473 	  && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CONSTANT_P)
474 	return 0;
475 
476       return LIM_EXPENSIVE;
477     }
478 
479   /* Hoisting memory references out should almost surely be a win.  */
480   if (gimple_references_memory_p (stmt))
481     return LIM_EXPENSIVE;
482 
483   if (gimple_code (stmt) != GIMPLE_ASSIGN)
484     return 1;
485 
486   switch (gimple_assign_rhs_code (stmt))
487     {
488     case MULT_EXPR:
489     case WIDEN_MULT_EXPR:
490     case WIDEN_MULT_PLUS_EXPR:
491     case WIDEN_MULT_MINUS_EXPR:
492     case DOT_PROD_EXPR:
493     case FMA_EXPR:
494     case TRUNC_DIV_EXPR:
495     case CEIL_DIV_EXPR:
496     case FLOOR_DIV_EXPR:
497     case ROUND_DIV_EXPR:
498     case EXACT_DIV_EXPR:
499     case CEIL_MOD_EXPR:
500     case FLOOR_MOD_EXPR:
501     case ROUND_MOD_EXPR:
502     case TRUNC_MOD_EXPR:
503     case RDIV_EXPR:
504       /* Division and multiplication are usually expensive.  */
505       return LIM_EXPENSIVE;
506 
507     case LSHIFT_EXPR:
508     case RSHIFT_EXPR:
509     case WIDEN_LSHIFT_EXPR:
510     case LROTATE_EXPR:
511     case RROTATE_EXPR:
512       /* Shifts and rotates are usually expensive.  */
513       return LIM_EXPENSIVE;
514 
515     case CONSTRUCTOR:
516       /* Make vector construction cost proportional to the number
517          of elements.  */
518       return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
519 
520     case SSA_NAME:
521     case PAREN_EXPR:
522       /* Whether or not something is wrapped inside a PAREN_EXPR
523          should not change move cost.  Nor should an intermediate
524 	 unpropagated SSA name copy.  */
525       return 0;
526 
527     default:
528       return 1;
529     }
530 }
531 
532 /* Finds the outermost loop between OUTER and LOOP in that the memory reference
533    REF is independent.  If REF is not independent in LOOP, NULL is returned
534    instead.  */
535 
536 static struct loop *
outermost_indep_loop(struct loop * outer,struct loop * loop,im_mem_ref * ref)537 outermost_indep_loop (struct loop *outer, struct loop *loop, im_mem_ref *ref)
538 {
539   struct loop *aloop;
540 
541   if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
542     return NULL;
543 
544   for (aloop = outer;
545        aloop != loop;
546        aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
547     if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
548 	&& ref_indep_loop_p (aloop, ref))
549       return aloop;
550 
551   if (ref_indep_loop_p (loop, ref))
552     return loop;
553   else
554     return NULL;
555 }
556 
557 /* If there is a simple load or store to a memory reference in STMT, returns
558    the location of the memory reference, and sets IS_STORE according to whether
559    it is a store or load.  Otherwise, returns NULL.  */
560 
561 static tree *
simple_mem_ref_in_stmt(gimple * stmt,bool * is_store)562 simple_mem_ref_in_stmt (gimple *stmt, bool *is_store)
563 {
564   tree *lhs, *rhs;
565 
566   /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns.  */
567   if (!gimple_assign_single_p (stmt))
568     return NULL;
569 
570   lhs = gimple_assign_lhs_ptr (stmt);
571   rhs = gimple_assign_rhs1_ptr (stmt);
572 
573   if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
574     {
575       *is_store = false;
576       return rhs;
577     }
578   else if (gimple_vdef (stmt)
579 	   && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
580     {
581       *is_store = true;
582       return lhs;
583     }
584   else
585     return NULL;
586 }
587 
588 /* Returns the memory reference contained in STMT.  */
589 
590 static im_mem_ref *
mem_ref_in_stmt(gimple * stmt)591 mem_ref_in_stmt (gimple *stmt)
592 {
593   bool store;
594   tree *mem = simple_mem_ref_in_stmt (stmt, &store);
595   hashval_t hash;
596   im_mem_ref *ref;
597 
598   if (!mem)
599     return NULL;
600   gcc_assert (!store);
601 
602   hash = iterative_hash_expr (*mem, 0);
603   ref = memory_accesses.refs->find_with_hash (*mem, hash);
604 
605   gcc_assert (ref != NULL);
606   return ref;
607 }
608 
609 /* From a controlling predicate in DOM determine the arguments from
610    the PHI node PHI that are chosen if the predicate evaluates to
611    true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
612    they are non-NULL.  Returns true if the arguments can be determined,
613    else return false.  */
614 
615 static bool
extract_true_false_args_from_phi(basic_block dom,gphi * phi,tree * true_arg_p,tree * false_arg_p)616 extract_true_false_args_from_phi (basic_block dom, gphi *phi,
617 				  tree *true_arg_p, tree *false_arg_p)
618 {
619   edge te, fe;
620   if (! extract_true_false_controlled_edges (dom, gimple_bb (phi),
621 					     &te, &fe))
622     return false;
623 
624   if (true_arg_p)
625     *true_arg_p = PHI_ARG_DEF (phi, te->dest_idx);
626   if (false_arg_p)
627     *false_arg_p = PHI_ARG_DEF (phi, fe->dest_idx);
628 
629   return true;
630 }
631 
632 /* Determine the outermost loop to that it is possible to hoist a statement
633    STMT and store it to LIM_DATA (STMT)->max_loop.  To do this we determine
634    the outermost loop in that the value computed by STMT is invariant.
635    If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
636    we preserve the fact whether STMT is executed.  It also fills other related
637    information to LIM_DATA (STMT).
638 
639    The function returns false if STMT cannot be hoisted outside of the loop it
640    is defined in, and true otherwise.  */
641 
642 static bool
determine_max_movement(gimple * stmt,bool must_preserve_exec)643 determine_max_movement (gimple *stmt, bool must_preserve_exec)
644 {
645   basic_block bb = gimple_bb (stmt);
646   struct loop *loop = bb->loop_father;
647   struct loop *level;
648   struct lim_aux_data *lim_data = get_lim_data (stmt);
649   tree val;
650   ssa_op_iter iter;
651 
652   if (must_preserve_exec)
653     level = ALWAYS_EXECUTED_IN (bb);
654   else
655     level = superloop_at_depth (loop, 1);
656   lim_data->max_loop = level;
657 
658   if (gphi *phi = dyn_cast <gphi *> (stmt))
659     {
660       use_operand_p use_p;
661       unsigned min_cost = UINT_MAX;
662       unsigned total_cost = 0;
663       struct lim_aux_data *def_data;
664 
665       /* We will end up promoting dependencies to be unconditionally
666 	 evaluated.  For this reason the PHI cost (and thus the
667 	 cost we remove from the loop by doing the invariant motion)
668 	 is that of the cheapest PHI argument dependency chain.  */
669       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
670 	{
671 	  val = USE_FROM_PTR (use_p);
672 
673 	  if (TREE_CODE (val) != SSA_NAME)
674 	    {
675 	      /* Assign const 1 to constants.  */
676 	      min_cost = MIN (min_cost, 1);
677 	      total_cost += 1;
678 	      continue;
679 	    }
680 	  if (!add_dependency (val, lim_data, loop, false))
681 	    return false;
682 
683 	  gimple *def_stmt = SSA_NAME_DEF_STMT (val);
684 	  if (gimple_bb (def_stmt)
685 	      && gimple_bb (def_stmt)->loop_father == loop)
686 	    {
687 	      def_data = get_lim_data (def_stmt);
688 	      if (def_data)
689 		{
690 		  min_cost = MIN (min_cost, def_data->cost);
691 		  total_cost += def_data->cost;
692 		}
693 	    }
694 	}
695 
696       min_cost = MIN (min_cost, total_cost);
697       lim_data->cost += min_cost;
698 
699       if (gimple_phi_num_args (phi) > 1)
700 	{
701 	  basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
702 	  gimple *cond;
703 	  if (gsi_end_p (gsi_last_bb (dom)))
704 	    return false;
705 	  cond = gsi_stmt (gsi_last_bb (dom));
706 	  if (gimple_code (cond) != GIMPLE_COND)
707 	    return false;
708 	  /* Verify that this is an extended form of a diamond and
709 	     the PHI arguments are completely controlled by the
710 	     predicate in DOM.  */
711 	  if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
712 	    return false;
713 
714 	  /* Fold in dependencies and cost of the condition.  */
715 	  FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
716 	    {
717 	      if (!add_dependency (val, lim_data, loop, false))
718 		return false;
719 	      def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
720 	      if (def_data)
721 		total_cost += def_data->cost;
722 	    }
723 
724 	  /* We want to avoid unconditionally executing very expensive
725 	     operations.  As costs for our dependencies cannot be
726 	     negative just claim we are not invariand for this case.
727 	     We also are not sure whether the control-flow inside the
728 	     loop will vanish.  */
729 	  if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
730 	      && !(min_cost != 0
731 		   && total_cost / min_cost <= 2))
732 	    return false;
733 
734 	  /* Assume that the control-flow in the loop will vanish.
735 	     ???  We should verify this and not artificially increase
736 	     the cost if that is not the case.  */
737 	  lim_data->cost += stmt_cost (stmt);
738 	}
739 
740       return true;
741     }
742   else
743     FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
744       if (!add_dependency (val, lim_data, loop, true))
745 	return false;
746 
747   if (gimple_vuse (stmt))
748     {
749       im_mem_ref *ref = mem_ref_in_stmt (stmt);
750 
751       if (ref)
752 	{
753 	  lim_data->max_loop
754 		  = outermost_indep_loop (lim_data->max_loop, loop, ref);
755 	  if (!lim_data->max_loop)
756 	    return false;
757 	}
758       else
759 	{
760 	  if ((val = gimple_vuse (stmt)) != NULL_TREE)
761 	    {
762 	      if (!add_dependency (val, lim_data, loop, false))
763 		return false;
764 	    }
765 	}
766     }
767 
768   lim_data->cost += stmt_cost (stmt);
769 
770   return true;
771 }
772 
773 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
774    and that one of the operands of this statement is computed by STMT.
775    Ensure that STMT (together with all the statements that define its
776    operands) is hoisted at least out of the loop LEVEL.  */
777 
778 static void
set_level(gimple * stmt,struct loop * orig_loop,struct loop * level)779 set_level (gimple *stmt, struct loop *orig_loop, struct loop *level)
780 {
781   struct loop *stmt_loop = gimple_bb (stmt)->loop_father;
782   struct lim_aux_data *lim_data;
783   gimple *dep_stmt;
784   unsigned i;
785 
786   stmt_loop = find_common_loop (orig_loop, stmt_loop);
787   lim_data = get_lim_data (stmt);
788   if (lim_data != NULL && lim_data->tgt_loop != NULL)
789     stmt_loop = find_common_loop (stmt_loop,
790 				  loop_outer (lim_data->tgt_loop));
791   if (flow_loop_nested_p (stmt_loop, level))
792     return;
793 
794   gcc_assert (level == lim_data->max_loop
795 	      || flow_loop_nested_p (lim_data->max_loop, level));
796 
797   lim_data->tgt_loop = level;
798   FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
799     set_level (dep_stmt, orig_loop, level);
800 }
801 
802 /* Determines an outermost loop from that we want to hoist the statement STMT.
803    For now we chose the outermost possible loop.  TODO -- use profiling
804    information to set it more sanely.  */
805 
806 static void
set_profitable_level(gimple * stmt)807 set_profitable_level (gimple *stmt)
808 {
809   set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
810 }
811 
812 /* Returns true if STMT is a call that has side effects.  */
813 
814 static bool
nonpure_call_p(gimple * stmt)815 nonpure_call_p (gimple *stmt)
816 {
817   if (gimple_code (stmt) != GIMPLE_CALL)
818     return false;
819 
820   return gimple_has_side_effects (stmt);
821 }
822 
823 /* Rewrite a/b to a*(1/b).  Return the invariant stmt to process.  */
824 
825 static gimple *
rewrite_reciprocal(gimple_stmt_iterator * bsi)826 rewrite_reciprocal (gimple_stmt_iterator *bsi)
827 {
828   gassign *stmt, *stmt1, *stmt2;
829   tree name, lhs, type;
830   tree real_one;
831   gimple_stmt_iterator gsi;
832 
833   stmt = as_a <gassign *> (gsi_stmt (*bsi));
834   lhs = gimple_assign_lhs (stmt);
835   type = TREE_TYPE (lhs);
836 
837   real_one = build_one_cst (type);
838 
839   name = make_temp_ssa_name (type, NULL, "reciptmp");
840   stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
841 			       gimple_assign_rhs2 (stmt));
842   stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
843 			       gimple_assign_rhs1 (stmt));
844 
845   /* Replace division stmt with reciprocal and multiply stmts.
846      The multiply stmt is not invariant, so update iterator
847      and avoid rescanning.  */
848   gsi = *bsi;
849   gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
850   gsi_replace (&gsi, stmt2, true);
851 
852   /* Continue processing with invariant reciprocal statement.  */
853   return stmt1;
854 }
855 
856 /* Check if the pattern at *BSI is a bittest of the form
857    (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0.  */
858 
859 static gimple *
rewrite_bittest(gimple_stmt_iterator * bsi)860 rewrite_bittest (gimple_stmt_iterator *bsi)
861 {
862   gassign *stmt;
863   gimple *stmt1;
864   gassign *stmt2;
865   gimple *use_stmt;
866   gcond *cond_stmt;
867   tree lhs, name, t, a, b;
868   use_operand_p use;
869 
870   stmt = as_a <gassign *> (gsi_stmt (*bsi));
871   lhs = gimple_assign_lhs (stmt);
872 
873   /* Verify that the single use of lhs is a comparison against zero.  */
874   if (TREE_CODE (lhs) != SSA_NAME
875       || !single_imm_use (lhs, &use, &use_stmt))
876     return stmt;
877   cond_stmt = dyn_cast <gcond *> (use_stmt);
878   if (!cond_stmt)
879     return stmt;
880   if (gimple_cond_lhs (cond_stmt) != lhs
881       || (gimple_cond_code (cond_stmt) != NE_EXPR
882 	  && gimple_cond_code (cond_stmt) != EQ_EXPR)
883       || !integer_zerop (gimple_cond_rhs (cond_stmt)))
884     return stmt;
885 
886   /* Get at the operands of the shift.  The rhs is TMP1 & 1.  */
887   stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
888   if (gimple_code (stmt1) != GIMPLE_ASSIGN)
889     return stmt;
890 
891   /* There is a conversion in between possibly inserted by fold.  */
892   if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
893     {
894       t = gimple_assign_rhs1 (stmt1);
895       if (TREE_CODE (t) != SSA_NAME
896 	  || !has_single_use (t))
897 	return stmt;
898       stmt1 = SSA_NAME_DEF_STMT (t);
899       if (gimple_code (stmt1) != GIMPLE_ASSIGN)
900 	return stmt;
901     }
902 
903   /* Verify that B is loop invariant but A is not.  Verify that with
904      all the stmt walking we are still in the same loop.  */
905   if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
906       || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
907     return stmt;
908 
909   a = gimple_assign_rhs1 (stmt1);
910   b = gimple_assign_rhs2 (stmt1);
911 
912   if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
913       && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
914     {
915       gimple_stmt_iterator rsi;
916 
917       /* 1 << B */
918       t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
919 		       build_int_cst (TREE_TYPE (a), 1), b);
920       name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
921       stmt1 = gimple_build_assign (name, t);
922 
923       /* A & (1 << B) */
924       t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
925       name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
926       stmt2 = gimple_build_assign (name, t);
927 
928       /* Replace the SSA_NAME we compare against zero.  Adjust
929 	 the type of zero accordingly.  */
930       SET_USE (use, name);
931       gimple_cond_set_rhs (cond_stmt,
932 			   build_int_cst_type (TREE_TYPE (name),
933 					       0));
934 
935       /* Don't use gsi_replace here, none of the new assignments sets
936 	 the variable originally set in stmt.  Move bsi to stmt1, and
937 	 then remove the original stmt, so that we get a chance to
938 	 retain debug info for it.  */
939       rsi = *bsi;
940       gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
941       gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
942       gimple *to_release = gsi_stmt (rsi);
943       gsi_remove (&rsi, true);
944       release_defs (to_release);
945 
946       return stmt1;
947     }
948 
949   return stmt;
950 }
951 
952 /* For each statement determines the outermost loop in that it is invariant,
953    -   statements on whose motion it depends and the cost of the computation.
954    -   This information is stored to the LIM_DATA structure associated with
955    -   each statement.  */
956 class invariantness_dom_walker : public dom_walker
957 {
958 public:
invariantness_dom_walker(cdi_direction direction)959   invariantness_dom_walker (cdi_direction direction)
960     : dom_walker (direction) {}
961 
962   virtual edge before_dom_children (basic_block);
963 };
964 
965 /* Determine the outermost loops in that statements in basic block BB are
966    invariant, and record them to the LIM_DATA associated with the statements.
967    Callback for dom_walker.  */
968 
969 edge
before_dom_children(basic_block bb)970 invariantness_dom_walker::before_dom_children (basic_block bb)
971 {
972   enum move_pos pos;
973   gimple_stmt_iterator bsi;
974   gimple *stmt;
975   bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
976   struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
977   struct lim_aux_data *lim_data;
978 
979   if (!loop_outer (bb->loop_father))
980     return NULL;
981 
982   if (dump_file && (dump_flags & TDF_DETAILS))
983     fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
984 	     bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
985 
986   /* Look at PHI nodes, but only if there is at most two.
987      ???  We could relax this further by post-processing the inserted
988      code and transforming adjacent cond-exprs with the same predicate
989      to control flow again.  */
990   bsi = gsi_start_phis (bb);
991   if (!gsi_end_p (bsi)
992       && ((gsi_next (&bsi), gsi_end_p (bsi))
993 	  || (gsi_next (&bsi), gsi_end_p (bsi))))
994     for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
995       {
996 	stmt = gsi_stmt (bsi);
997 
998 	pos = movement_possibility (stmt);
999 	if (pos == MOVE_IMPOSSIBLE)
1000 	  continue;
1001 
1002 	lim_data = init_lim_data (stmt);
1003 	lim_data->always_executed_in = outermost;
1004 
1005 	if (!determine_max_movement (stmt, false))
1006 	  {
1007 	    lim_data->max_loop = NULL;
1008 	    continue;
1009 	  }
1010 
1011 	if (dump_file && (dump_flags & TDF_DETAILS))
1012 	  {
1013 	    print_gimple_stmt (dump_file, stmt, 2, 0);
1014 	    fprintf (dump_file, "  invariant up to level %d, cost %d.\n\n",
1015 		     loop_depth (lim_data->max_loop),
1016 		     lim_data->cost);
1017 	  }
1018 
1019 	if (lim_data->cost >= LIM_EXPENSIVE)
1020 	  set_profitable_level (stmt);
1021       }
1022 
1023   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1024     {
1025       stmt = gsi_stmt (bsi);
1026 
1027       pos = movement_possibility (stmt);
1028       if (pos == MOVE_IMPOSSIBLE)
1029 	{
1030 	  if (nonpure_call_p (stmt))
1031 	    {
1032 	      maybe_never = true;
1033 	      outermost = NULL;
1034 	    }
1035 	  /* Make sure to note always_executed_in for stores to make
1036 	     store-motion work.  */
1037 	  else if (stmt_makes_single_store (stmt))
1038 	    {
1039 	      struct lim_aux_data *lim_data = init_lim_data (stmt);
1040 	      lim_data->always_executed_in = outermost;
1041 	    }
1042 	  continue;
1043 	}
1044 
1045       if (is_gimple_assign (stmt)
1046 	  && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1047 	      == GIMPLE_BINARY_RHS))
1048 	{
1049 	  tree op0 = gimple_assign_rhs1 (stmt);
1050 	  tree op1 = gimple_assign_rhs2 (stmt);
1051 	  struct loop *ol1 = outermost_invariant_loop (op1,
1052 					loop_containing_stmt (stmt));
1053 
1054 	  /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1055 	     to be hoisted out of loop, saving expensive divide.  */
1056 	  if (pos == MOVE_POSSIBLE
1057 	      && gimple_assign_rhs_code (stmt) == RDIV_EXPR
1058 	      && flag_unsafe_math_optimizations
1059 	      && !flag_trapping_math
1060 	      && ol1 != NULL
1061 	      && outermost_invariant_loop (op0, ol1) == NULL)
1062 	    stmt = rewrite_reciprocal (&bsi);
1063 
1064 	  /* If the shift count is invariant, convert (A >> B) & 1 to
1065 	     A & (1 << B) allowing the bit mask to be hoisted out of the loop
1066 	     saving an expensive shift.  */
1067 	  if (pos == MOVE_POSSIBLE
1068 	      && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1069 	      && integer_onep (op1)
1070 	      && TREE_CODE (op0) == SSA_NAME
1071 	      && has_single_use (op0))
1072 	    stmt = rewrite_bittest (&bsi);
1073 	}
1074 
1075       lim_data = init_lim_data (stmt);
1076       lim_data->always_executed_in = outermost;
1077 
1078       if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1079 	continue;
1080 
1081       if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1082 	{
1083 	  lim_data->max_loop = NULL;
1084 	  continue;
1085 	}
1086 
1087       if (dump_file && (dump_flags & TDF_DETAILS))
1088 	{
1089 	  print_gimple_stmt (dump_file, stmt, 2, 0);
1090 	  fprintf (dump_file, "  invariant up to level %d, cost %d.\n\n",
1091 		   loop_depth (lim_data->max_loop),
1092 		   lim_data->cost);
1093 	}
1094 
1095       if (lim_data->cost >= LIM_EXPENSIVE)
1096 	set_profitable_level (stmt);
1097     }
1098   return NULL;
1099 }
1100 
1101 class move_computations_dom_walker : public dom_walker
1102 {
1103 public:
move_computations_dom_walker(cdi_direction direction)1104   move_computations_dom_walker (cdi_direction direction)
1105     : dom_walker (direction), todo_ (0) {}
1106 
1107   virtual edge before_dom_children (basic_block);
1108 
1109   unsigned int todo_;
1110 };
1111 
1112 /* Hoist the statements in basic block BB out of the loops prescribed by
1113    data stored in LIM_DATA structures associated with each statement.  Callback
1114    for walk_dominator_tree.  */
1115 
1116 unsigned int
move_computations_worker(basic_block bb)1117 move_computations_worker (basic_block bb)
1118 {
1119   struct loop *level;
1120   unsigned cost = 0;
1121   struct lim_aux_data *lim_data;
1122   unsigned int todo = 0;
1123 
1124   if (!loop_outer (bb->loop_father))
1125     return todo;
1126 
1127   for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
1128     {
1129       gassign *new_stmt;
1130       gphi *stmt = bsi.phi ();
1131 
1132       lim_data = get_lim_data (stmt);
1133       if (lim_data == NULL)
1134 	{
1135 	  gsi_next (&bsi);
1136 	  continue;
1137 	}
1138 
1139       cost = lim_data->cost;
1140       level = lim_data->tgt_loop;
1141       clear_lim_data (stmt);
1142 
1143       if (!level)
1144 	{
1145 	  gsi_next (&bsi);
1146 	  continue;
1147 	}
1148 
1149       if (dump_file && (dump_flags & TDF_DETAILS))
1150 	{
1151 	  fprintf (dump_file, "Moving PHI node\n");
1152 	  print_gimple_stmt (dump_file, stmt, 0, 0);
1153 	  fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1154 		   cost, level->num);
1155 	}
1156 
1157       if (gimple_phi_num_args (stmt) == 1)
1158 	{
1159 	  tree arg = PHI_ARG_DEF (stmt, 0);
1160 	  new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1161 					  TREE_CODE (arg), arg);
1162 	}
1163       else
1164 	{
1165 	  basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1166 	  gimple *cond = gsi_stmt (gsi_last_bb (dom));
1167 	  tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1168 	  /* Get the PHI arguments corresponding to the true and false
1169 	     edges of COND.  */
1170 	  extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1171 	  gcc_assert (arg0 && arg1);
1172 	  t = build2 (gimple_cond_code (cond), boolean_type_node,
1173 		      gimple_cond_lhs (cond), gimple_cond_rhs (cond));
1174 	  new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1175 					  COND_EXPR, t, arg0, arg1);
1176 	  todo |= TODO_cleanup_cfg;
1177 	}
1178       if (INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (new_stmt)))
1179 	  && (!ALWAYS_EXECUTED_IN (bb)
1180 	      || (ALWAYS_EXECUTED_IN (bb) != level
1181 		  && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1182 	{
1183 	  tree lhs = gimple_assign_lhs (new_stmt);
1184 	  SSA_NAME_RANGE_INFO (lhs) = NULL;
1185 	}
1186       gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1187       remove_phi_node (&bsi, false);
1188     }
1189 
1190   for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
1191     {
1192       edge e;
1193 
1194       gimple *stmt = gsi_stmt (bsi);
1195 
1196       lim_data = get_lim_data (stmt);
1197       if (lim_data == NULL)
1198 	{
1199 	  gsi_next (&bsi);
1200 	  continue;
1201 	}
1202 
1203       cost = lim_data->cost;
1204       level = lim_data->tgt_loop;
1205       clear_lim_data (stmt);
1206 
1207       if (!level)
1208 	{
1209 	  gsi_next (&bsi);
1210 	  continue;
1211 	}
1212 
1213       /* We do not really want to move conditionals out of the loop; we just
1214 	 placed it here to force its operands to be moved if necessary.  */
1215       if (gimple_code (stmt) == GIMPLE_COND)
1216 	continue;
1217 
1218       if (dump_file && (dump_flags & TDF_DETAILS))
1219 	{
1220 	  fprintf (dump_file, "Moving statement\n");
1221 	  print_gimple_stmt (dump_file, stmt, 0, 0);
1222 	  fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1223 		   cost, level->num);
1224 	}
1225 
1226       e = loop_preheader_edge (level);
1227       gcc_assert (!gimple_vdef (stmt));
1228       if (gimple_vuse (stmt))
1229 	{
1230 	  /* The new VUSE is the one from the virtual PHI in the loop
1231 	     header or the one already present.  */
1232 	  gphi_iterator gsi2;
1233 	  for (gsi2 = gsi_start_phis (e->dest);
1234 	       !gsi_end_p (gsi2); gsi_next (&gsi2))
1235 	    {
1236 	      gphi *phi = gsi2.phi ();
1237 	      if (virtual_operand_p (gimple_phi_result (phi)))
1238 		{
1239 		  gimple_set_vuse (stmt, PHI_ARG_DEF_FROM_EDGE (phi, e));
1240 		  break;
1241 		}
1242 	    }
1243 	}
1244       gsi_remove (&bsi, false);
1245       if (gimple_has_lhs (stmt)
1246 	  && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
1247 	  && INTEGRAL_TYPE_P (TREE_TYPE (gimple_get_lhs (stmt)))
1248 	  && (!ALWAYS_EXECUTED_IN (bb)
1249 	      || !(ALWAYS_EXECUTED_IN (bb) == level
1250 		   || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1251 	{
1252 	  tree lhs = gimple_get_lhs (stmt);
1253 	  SSA_NAME_RANGE_INFO (lhs) = NULL;
1254 	}
1255       /* In case this is a stmt that is not unconditionally executed
1256          when the target loop header is executed and the stmt may
1257 	 invoke undefined integer or pointer overflow rewrite it to
1258 	 unsigned arithmetic.  */
1259       if (is_gimple_assign (stmt)
1260 	  && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))
1261 	  && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt)))
1262 	  && arith_code_with_undefined_signed_overflow
1263 	       (gimple_assign_rhs_code (stmt))
1264 	  && (!ALWAYS_EXECUTED_IN (bb)
1265 	      || !(ALWAYS_EXECUTED_IN (bb) == level
1266 		   || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1267 	gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt));
1268       else
1269 	gsi_insert_on_edge (e, stmt);
1270     }
1271 
1272   return todo;
1273 }
1274 
1275 /* Hoist the statements out of the loops prescribed by data stored in
1276    LIM_DATA structures associated with each statement.*/
1277 
1278 static unsigned int
move_computations(void)1279 move_computations (void)
1280 {
1281   int *rpo = XNEWVEC (int, last_basic_block_for_fn (cfun));
1282   int n = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, false);
1283   unsigned todo = 0;
1284 
1285   for (int i = 0; i < n; ++i)
1286     todo |= move_computations_worker (BASIC_BLOCK_FOR_FN (cfun, rpo[i]));
1287 
1288   free (rpo);
1289 
1290   gsi_commit_edge_inserts ();
1291   if (need_ssa_update_p (cfun))
1292     rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1293 
1294   return todo;
1295 }
1296 
1297 /* Checks whether the statement defining variable *INDEX can be hoisted
1298    out of the loop passed in DATA.  Callback for for_each_index.  */
1299 
1300 static bool
may_move_till(tree ref,tree * index,void * data)1301 may_move_till (tree ref, tree *index, void *data)
1302 {
1303   struct loop *loop = (struct loop *) data, *max_loop;
1304 
1305   /* If REF is an array reference, check also that the step and the lower
1306      bound is invariant in LOOP.  */
1307   if (TREE_CODE (ref) == ARRAY_REF)
1308     {
1309       tree step = TREE_OPERAND (ref, 3);
1310       tree lbound = TREE_OPERAND (ref, 2);
1311 
1312       max_loop = outermost_invariant_loop (step, loop);
1313       if (!max_loop)
1314 	return false;
1315 
1316       max_loop = outermost_invariant_loop (lbound, loop);
1317       if (!max_loop)
1318 	return false;
1319     }
1320 
1321   max_loop = outermost_invariant_loop (*index, loop);
1322   if (!max_loop)
1323     return false;
1324 
1325   return true;
1326 }
1327 
1328 /* If OP is SSA NAME, force the statement that defines it to be
1329    moved out of the LOOP.  ORIG_LOOP is the loop in that EXPR is used.  */
1330 
1331 static void
force_move_till_op(tree op,struct loop * orig_loop,struct loop * loop)1332 force_move_till_op (tree op, struct loop *orig_loop, struct loop *loop)
1333 {
1334   gimple *stmt;
1335 
1336   if (!op
1337       || is_gimple_min_invariant (op))
1338     return;
1339 
1340   gcc_assert (TREE_CODE (op) == SSA_NAME);
1341 
1342   stmt = SSA_NAME_DEF_STMT (op);
1343   if (gimple_nop_p (stmt))
1344     return;
1345 
1346   set_level (stmt, orig_loop, loop);
1347 }
1348 
1349 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1350    the LOOP.  The reference REF is used in the loop ORIG_LOOP.  Callback for
1351    for_each_index.  */
1352 
1353 struct fmt_data
1354 {
1355   struct loop *loop;
1356   struct loop *orig_loop;
1357 };
1358 
1359 static bool
force_move_till(tree ref,tree * index,void * data)1360 force_move_till (tree ref, tree *index, void *data)
1361 {
1362   struct fmt_data *fmt_data = (struct fmt_data *) data;
1363 
1364   if (TREE_CODE (ref) == ARRAY_REF)
1365     {
1366       tree step = TREE_OPERAND (ref, 3);
1367       tree lbound = TREE_OPERAND (ref, 2);
1368 
1369       force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1370       force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
1371     }
1372 
1373   force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
1374 
1375   return true;
1376 }
1377 
1378 /* A function to free the mem_ref object OBJ.  */
1379 
1380 static void
memref_free(struct im_mem_ref * mem)1381 memref_free (struct im_mem_ref *mem)
1382 {
1383   mem->accesses_in_loop.release ();
1384 }
1385 
1386 /* Allocates and returns a memory reference description for MEM whose hash
1387    value is HASH and id is ID.  */
1388 
1389 static im_mem_ref *
mem_ref_alloc(tree mem,unsigned hash,unsigned id)1390 mem_ref_alloc (tree mem, unsigned hash, unsigned id)
1391 {
1392   im_mem_ref *ref = XOBNEW (&mem_ref_obstack, struct im_mem_ref);
1393   ao_ref_init (&ref->mem, mem);
1394   ref->id = id;
1395   ref->hash = hash;
1396   ref->stored = NULL;
1397   bitmap_initialize (&ref->indep_loop, &lim_bitmap_obstack);
1398   bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
1399   ref->accesses_in_loop.create (1);
1400 
1401   return ref;
1402 }
1403 
1404 /* Records memory reference location *LOC in LOOP to the memory reference
1405    description REF.  The reference occurs in statement STMT.  */
1406 
1407 static void
record_mem_ref_loc(im_mem_ref * ref,gimple * stmt,tree * loc)1408 record_mem_ref_loc (im_mem_ref *ref, gimple *stmt, tree *loc)
1409 {
1410   mem_ref_loc aref;
1411   aref.stmt = stmt;
1412   aref.ref = loc;
1413   ref->accesses_in_loop.safe_push (aref);
1414 }
1415 
1416 /* Set the LOOP bit in REF stored bitmap and allocate that if
1417    necessary.  Return whether a bit was changed.  */
1418 
1419 static bool
set_ref_stored_in_loop(im_mem_ref * ref,struct loop * loop)1420 set_ref_stored_in_loop (im_mem_ref *ref, struct loop *loop)
1421 {
1422   if (!ref->stored)
1423     ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1424   return bitmap_set_bit (ref->stored, loop->num);
1425 }
1426 
1427 /* Marks reference REF as stored in LOOP.  */
1428 
1429 static void
mark_ref_stored(im_mem_ref * ref,struct loop * loop)1430 mark_ref_stored (im_mem_ref *ref, struct loop *loop)
1431 {
1432   while (loop != current_loops->tree_root
1433 	 && set_ref_stored_in_loop (ref, loop))
1434     loop = loop_outer (loop);
1435 }
1436 
1437 /* Gathers memory references in statement STMT in LOOP, storing the
1438    information about them in the memory_accesses structure.  Marks
1439    the vops accessed through unrecognized statements there as
1440    well.  */
1441 
1442 static void
gather_mem_refs_stmt(struct loop * loop,gimple * stmt)1443 gather_mem_refs_stmt (struct loop *loop, gimple *stmt)
1444 {
1445   tree *mem = NULL;
1446   hashval_t hash;
1447   im_mem_ref **slot;
1448   im_mem_ref *ref;
1449   bool is_stored;
1450   unsigned id;
1451 
1452   if (!gimple_vuse (stmt))
1453     return;
1454 
1455   mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1456   if (!mem)
1457     {
1458       /* We use the shared mem_ref for all unanalyzable refs.  */
1459       id = UNANALYZABLE_MEM_ID;
1460       ref = memory_accesses.refs_list[id];
1461       if (dump_file && (dump_flags & TDF_DETAILS))
1462 	{
1463 	  fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1464 	  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1465 	}
1466       is_stored = gimple_vdef (stmt);
1467     }
1468   else
1469     {
1470       hash = iterative_hash_expr (*mem, 0);
1471       slot = memory_accesses.refs->find_slot_with_hash (*mem, hash, INSERT);
1472       if (*slot)
1473 	{
1474 	  ref = *slot;
1475 	  id = ref->id;
1476 	}
1477       else
1478 	{
1479 	  id = memory_accesses.refs_list.length ();
1480 	  ref = mem_ref_alloc (*mem, hash, id);
1481 	  memory_accesses.refs_list.safe_push (ref);
1482 	  *slot = ref;
1483 
1484 	  if (dump_file && (dump_flags & TDF_DETAILS))
1485 	    {
1486 	      fprintf (dump_file, "Memory reference %u: ", id);
1487 	      print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
1488 	      fprintf (dump_file, "\n");
1489 	    }
1490 	}
1491 
1492       record_mem_ref_loc (ref, stmt, mem);
1493     }
1494   bitmap_set_bit (&memory_accesses.refs_in_loop[loop->num], ref->id);
1495   if (is_stored)
1496     {
1497       bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
1498       mark_ref_stored (ref, loop);
1499     }
1500   return;
1501 }
1502 
1503 static unsigned *bb_loop_postorder;
1504 
1505 /* qsort sort function to sort blocks after their loop fathers postorder.  */
1506 
1507 static int
sort_bbs_in_loop_postorder_cmp(const void * bb1_,const void * bb2_)1508 sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_)
1509 {
1510   basic_block bb1 = *(basic_block *)const_cast<void *>(bb1_);
1511   basic_block bb2 = *(basic_block *)const_cast<void *>(bb2_);
1512   struct loop *loop1 = bb1->loop_father;
1513   struct loop *loop2 = bb2->loop_father;
1514   if (loop1->num == loop2->num)
1515     return 0;
1516   return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1517 }
1518 
1519 /* qsort sort function to sort ref locs after their loop fathers postorder.  */
1520 
1521 static int
sort_locs_in_loop_postorder_cmp(const void * loc1_,const void * loc2_)1522 sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_)
1523 {
1524   mem_ref_loc *loc1 = (mem_ref_loc *)const_cast<void *>(loc1_);
1525   mem_ref_loc *loc2 = (mem_ref_loc *)const_cast<void *>(loc2_);
1526   struct loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
1527   struct loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
1528   if (loop1->num == loop2->num)
1529     return 0;
1530   return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1531 }
1532 
1533 /* Gathers memory references in loops.  */
1534 
1535 static void
analyze_memory_references(void)1536 analyze_memory_references (void)
1537 {
1538   gimple_stmt_iterator bsi;
1539   basic_block bb, *bbs;
1540   struct loop *loop, *outer;
1541   unsigned i, n;
1542 
1543   /* Collect all basic-blocks in loops and sort them after their
1544      loops postorder.  */
1545   i = 0;
1546   bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
1547   FOR_EACH_BB_FN (bb, cfun)
1548     if (bb->loop_father != current_loops->tree_root)
1549       bbs[i++] = bb;
1550   n = i;
1551   qsort (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp);
1552 
1553   /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1554      That results in better locality for all the bitmaps.  */
1555   for (i = 0; i < n; ++i)
1556     {
1557       basic_block bb = bbs[i];
1558       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1559         gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
1560     }
1561 
1562   /* Sort the location list of gathered memory references after their
1563      loop postorder number.  */
1564   im_mem_ref *ref;
1565   FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
1566     ref->accesses_in_loop.qsort (sort_locs_in_loop_postorder_cmp);
1567 
1568   free (bbs);
1569 //  free (bb_loop_postorder);
1570 
1571   /* Propagate the information about accessed memory references up
1572      the loop hierarchy.  */
1573   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1574     {
1575       /* Finalize the overall touched references (including subloops).  */
1576       bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
1577 		       &memory_accesses.refs_stored_in_loop[loop->num]);
1578 
1579       /* Propagate the information about accessed memory references up
1580 	 the loop hierarchy.  */
1581       outer = loop_outer (loop);
1582       if (outer == current_loops->tree_root)
1583 	continue;
1584 
1585       bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
1586 		       &memory_accesses.all_refs_stored_in_loop[loop->num]);
1587     }
1588 }
1589 
1590 /* Returns true if MEM1 and MEM2 may alias.  TTAE_CACHE is used as a cache in
1591    tree_to_aff_combination_expand.  */
1592 
1593 static bool
mem_refs_may_alias_p(im_mem_ref * mem1,im_mem_ref * mem2,hash_map<tree,name_expansion * > ** ttae_cache)1594 mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2,
1595 		      hash_map<tree, name_expansion *> **ttae_cache)
1596 {
1597   /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1598      object and their offset differ in such a way that the locations cannot
1599      overlap, then they cannot alias.  */
1600   widest_int size1, size2;
1601   aff_tree off1, off2;
1602 
1603   /* Perform basic offset and type-based disambiguation.  */
1604   if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, true))
1605     return false;
1606 
1607   /* The expansion of addresses may be a bit expensive, thus we only do
1608      the check at -O2 and higher optimization levels.  */
1609   if (optimize < 2)
1610     return true;
1611 
1612   get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
1613   get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
1614   aff_combination_expand (&off1, ttae_cache);
1615   aff_combination_expand (&off2, ttae_cache);
1616   aff_combination_scale (&off1, -1);
1617   aff_combination_add (&off2, &off1);
1618 
1619   if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1620     return false;
1621 
1622   return true;
1623 }
1624 
1625 /* Compare function for bsearch searching for reference locations
1626    in a loop.  */
1627 
1628 static int
find_ref_loc_in_loop_cmp(const void * loop_,const void * loc_)1629 find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_)
1630 {
1631   struct loop *loop = (struct loop *)const_cast<void *>(loop_);
1632   mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
1633   struct loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
1634   if (loop->num  == loc_loop->num
1635       || flow_loop_nested_p (loop, loc_loop))
1636     return 0;
1637   return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
1638 	  ? -1 : 1);
1639 }
1640 
1641 /* Iterates over all locations of REF in LOOP and its subloops calling
1642    fn.operator() with the location as argument.  When that operator
1643    returns true the iteration is stopped and true is returned.
1644    Otherwise false is returned.  */
1645 
1646 template <typename FN>
1647 static bool
for_all_locs_in_loop(struct loop * loop,im_mem_ref * ref,FN fn)1648 for_all_locs_in_loop (struct loop *loop, im_mem_ref *ref, FN fn)
1649 {
1650   unsigned i;
1651   mem_ref_loc *loc;
1652 
1653   /* Search for the cluster of locs in the accesses_in_loop vector
1654      which is sorted after postorder index of the loop father.  */
1655   loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp);
1656   if (!loc)
1657     return false;
1658 
1659   /* We have found one location inside loop or its sub-loops.  Iterate
1660      both forward and backward to cover the whole cluster.  */
1661   i = loc - ref->accesses_in_loop.address ();
1662   while (i > 0)
1663     {
1664       --i;
1665       mem_ref_loc *l = &ref->accesses_in_loop[i];
1666       if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1667 	break;
1668       if (fn (l))
1669 	return true;
1670     }
1671   for (i = loc - ref->accesses_in_loop.address ();
1672        i < ref->accesses_in_loop.length (); ++i)
1673     {
1674       mem_ref_loc *l = &ref->accesses_in_loop[i];
1675       if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1676 	break;
1677       if (fn (l))
1678 	return true;
1679     }
1680 
1681   return false;
1682 }
1683 
1684 /* Rewrites location LOC by TMP_VAR.  */
1685 
1686 struct rewrite_mem_ref_loc
1687 {
rewrite_mem_ref_locrewrite_mem_ref_loc1688   rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
1689   bool operator () (mem_ref_loc *loc);
1690   tree tmp_var;
1691 };
1692 
1693 bool
operator()1694 rewrite_mem_ref_loc::operator () (mem_ref_loc *loc)
1695 {
1696   *loc->ref = tmp_var;
1697   update_stmt (loc->stmt);
1698   return false;
1699 }
1700 
1701 /* Rewrites all references to REF in LOOP by variable TMP_VAR.  */
1702 
1703 static void
rewrite_mem_refs(struct loop * loop,im_mem_ref * ref,tree tmp_var)1704 rewrite_mem_refs (struct loop *loop, im_mem_ref *ref, tree tmp_var)
1705 {
1706   for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
1707 }
1708 
1709 /* Stores the first reference location in LOCP.  */
1710 
1711 struct first_mem_ref_loc_1
1712 {
first_mem_ref_loc_1first_mem_ref_loc_11713   first_mem_ref_loc_1 (mem_ref_loc **locp_) : locp (locp_) {}
1714   bool operator () (mem_ref_loc *loc);
1715   mem_ref_loc **locp;
1716 };
1717 
1718 bool
operator()1719 first_mem_ref_loc_1::operator () (mem_ref_loc *loc)
1720 {
1721   *locp = loc;
1722   return true;
1723 }
1724 
1725 /* Returns the first reference location to REF in LOOP.  */
1726 
1727 static mem_ref_loc *
first_mem_ref_loc(struct loop * loop,im_mem_ref * ref)1728 first_mem_ref_loc (struct loop *loop, im_mem_ref *ref)
1729 {
1730   mem_ref_loc *locp = NULL;
1731   for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
1732   return locp;
1733 }
1734 
1735 struct prev_flag_edges {
1736   /* Edge to insert new flag comparison code.  */
1737   edge append_cond_position;
1738 
1739   /* Edge for fall through from previous flag comparison.  */
1740   edge last_cond_fallthru;
1741 };
1742 
1743 /* Helper function for execute_sm.  Emit code to store TMP_VAR into
1744    MEM along edge EX.
1745 
1746    The store is only done if MEM has changed.  We do this so no
1747    changes to MEM occur on code paths that did not originally store
1748    into it.
1749 
1750    The common case for execute_sm will transform:
1751 
1752      for (...) {
1753        if (foo)
1754          stuff;
1755        else
1756          MEM = TMP_VAR;
1757      }
1758 
1759    into:
1760 
1761      lsm = MEM;
1762      for (...) {
1763        if (foo)
1764          stuff;
1765        else
1766          lsm = TMP_VAR;
1767      }
1768      MEM = lsm;
1769 
1770   This function will generate:
1771 
1772      lsm = MEM;
1773 
1774      lsm_flag = false;
1775      ...
1776      for (...) {
1777        if (foo)
1778          stuff;
1779        else {
1780          lsm = TMP_VAR;
1781          lsm_flag = true;
1782        }
1783      }
1784      if (lsm_flag)	<--
1785        MEM = lsm;	<--
1786 */
1787 
1788 static void
execute_sm_if_changed(edge ex,tree mem,tree tmp_var,tree flag)1789 execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag)
1790 {
1791   basic_block new_bb, then_bb, old_dest;
1792   bool loop_has_only_one_exit;
1793   edge then_old_edge, orig_ex = ex;
1794   gimple_stmt_iterator gsi;
1795   gimple *stmt;
1796   struct prev_flag_edges *prev_edges = (struct prev_flag_edges *) ex->aux;
1797   bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
1798 
1799   /* ?? Insert store after previous store if applicable.  See note
1800      below.  */
1801   if (prev_edges)
1802     ex = prev_edges->append_cond_position;
1803 
1804   loop_has_only_one_exit = single_pred_p (ex->dest);
1805 
1806   if (loop_has_only_one_exit)
1807     ex = split_block_after_labels (ex->dest);
1808   else
1809     {
1810       for (gphi_iterator gpi = gsi_start_phis (ex->dest);
1811 	   !gsi_end_p (gpi); gsi_next (&gpi))
1812 	{
1813 	  gphi *phi = gpi.phi ();
1814 	  if (virtual_operand_p (gimple_phi_result (phi)))
1815 	    continue;
1816 
1817 	  /* When the destination has a non-virtual PHI node with multiple
1818 	     predecessors make sure we preserve the PHI structure by
1819 	     forcing a forwarder block so that hoisting of that PHI will
1820 	     still work.  */
1821 	  split_edge (ex);
1822 	  break;
1823 	}
1824     }
1825 
1826   old_dest = ex->dest;
1827   new_bb = split_edge (ex);
1828   then_bb = create_empty_bb (new_bb);
1829   if (irr)
1830     then_bb->flags = BB_IRREDUCIBLE_LOOP;
1831   add_bb_to_loop (then_bb, new_bb->loop_father);
1832 
1833   gsi = gsi_start_bb (new_bb);
1834   stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
1835 			    NULL_TREE, NULL_TREE);
1836   gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1837 
1838   gsi = gsi_start_bb (then_bb);
1839   /* Insert actual store.  */
1840   stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
1841   gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1842 
1843   make_edge (new_bb, then_bb,
1844 	     EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1845   make_edge (new_bb, old_dest,
1846 	     EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1847   then_old_edge = make_edge (then_bb, old_dest,
1848 			     EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1849 
1850   set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
1851 
1852   if (prev_edges)
1853     {
1854       basic_block prevbb = prev_edges->last_cond_fallthru->src;
1855       redirect_edge_succ (prev_edges->last_cond_fallthru, new_bb);
1856       set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
1857       set_immediate_dominator (CDI_DOMINATORS, old_dest,
1858 			       recompute_dominator (CDI_DOMINATORS, old_dest));
1859     }
1860 
1861   /* ?? Because stores may alias, they must happen in the exact
1862      sequence they originally happened.  Save the position right after
1863      the (_lsm) store we just created so we can continue appending after
1864      it and maintain the original order.  */
1865   {
1866     struct prev_flag_edges *p;
1867 
1868     if (orig_ex->aux)
1869       orig_ex->aux = NULL;
1870     alloc_aux_for_edge (orig_ex, sizeof (struct prev_flag_edges));
1871     p = (struct prev_flag_edges *) orig_ex->aux;
1872     p->append_cond_position = then_old_edge;
1873     p->last_cond_fallthru = find_edge (new_bb, old_dest);
1874     orig_ex->aux = (void *) p;
1875   }
1876 
1877   if (!loop_has_only_one_exit)
1878     for (gphi_iterator gpi = gsi_start_phis (old_dest);
1879 	 !gsi_end_p (gpi); gsi_next (&gpi))
1880       {
1881 	gphi *phi = gpi.phi ();
1882 	unsigned i;
1883 
1884 	for (i = 0; i < gimple_phi_num_args (phi); i++)
1885 	  if (gimple_phi_arg_edge (phi, i)->src == new_bb)
1886 	    {
1887 	      tree arg = gimple_phi_arg_def (phi, i);
1888 	      add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
1889 	      update_stmt (phi);
1890 	    }
1891       }
1892   /* Remove the original fall through edge.  This was the
1893      single_succ_edge (new_bb).  */
1894   EDGE_SUCC (new_bb, 0)->flags &= ~EDGE_FALLTHRU;
1895 }
1896 
1897 /* When REF is set on the location, set flag indicating the store.  */
1898 
1899 struct sm_set_flag_if_changed
1900 {
sm_set_flag_if_changedsm_set_flag_if_changed1901   sm_set_flag_if_changed (tree flag_) : flag (flag_) {}
1902   bool operator () (mem_ref_loc *loc);
1903   tree flag;
1904 };
1905 
1906 bool
operator()1907 sm_set_flag_if_changed::operator () (mem_ref_loc *loc)
1908 {
1909   /* Only set the flag for writes.  */
1910   if (is_gimple_assign (loc->stmt)
1911       && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
1912     {
1913       gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
1914       gimple *stmt = gimple_build_assign (flag, boolean_true_node);
1915       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1916     }
1917   return false;
1918 }
1919 
1920 /* Helper function for execute_sm.  On every location where REF is
1921    set, set an appropriate flag indicating the store.  */
1922 
1923 static tree
execute_sm_if_changed_flag_set(struct loop * loop,im_mem_ref * ref)1924 execute_sm_if_changed_flag_set (struct loop *loop, im_mem_ref *ref)
1925 {
1926   tree flag;
1927   char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
1928   flag = create_tmp_reg (boolean_type_node, str);
1929   for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag));
1930   return flag;
1931 }
1932 
1933 /* Executes store motion of memory reference REF from LOOP.
1934    Exits from the LOOP are stored in EXITS.  The initialization of the
1935    temporary variable is put to the preheader of the loop, and assignments
1936    to the reference from the temporary variable are emitted to exits.  */
1937 
1938 static void
execute_sm(struct loop * loop,vec<edge> exits,im_mem_ref * ref)1939 execute_sm (struct loop *loop, vec<edge> exits, im_mem_ref *ref)
1940 {
1941   tree tmp_var, store_flag = NULL_TREE;
1942   unsigned i;
1943   gassign *load;
1944   struct fmt_data fmt_data;
1945   edge ex;
1946   struct lim_aux_data *lim_data;
1947   bool multi_threaded_model_p = false;
1948   gimple_stmt_iterator gsi;
1949 
1950   if (dump_file && (dump_flags & TDF_DETAILS))
1951     {
1952       fprintf (dump_file, "Executing store motion of ");
1953       print_generic_expr (dump_file, ref->mem.ref, 0);
1954       fprintf (dump_file, " from loop %d\n", loop->num);
1955     }
1956 
1957   tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
1958 			    get_lsm_tmp_name (ref->mem.ref, ~0));
1959 
1960   fmt_data.loop = loop;
1961   fmt_data.orig_loop = loop;
1962   for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
1963 
1964   if (bb_in_transaction (loop_preheader_edge (loop)->src)
1965       || !PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES))
1966     multi_threaded_model_p = true;
1967 
1968   if (multi_threaded_model_p)
1969     store_flag = execute_sm_if_changed_flag_set (loop, ref);
1970 
1971   rewrite_mem_refs (loop, ref, tmp_var);
1972 
1973   /* Emit the load code on a random exit edge or into the latch if
1974      the loop does not exit, so that we are sure it will be processed
1975      by move_computations after all dependencies.  */
1976   gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
1977 
1978   /* FIXME/TODO: For the multi-threaded variant, we could avoid this
1979      load altogether, since the store is predicated by a flag.  We
1980      could, do the load only if it was originally in the loop.  */
1981   load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
1982   lim_data = init_lim_data (load);
1983   lim_data->max_loop = loop;
1984   lim_data->tgt_loop = loop;
1985   gsi_insert_before (&gsi, load, GSI_SAME_STMT);
1986 
1987   if (multi_threaded_model_p)
1988     {
1989       load = gimple_build_assign (store_flag, boolean_false_node);
1990       lim_data = init_lim_data (load);
1991       lim_data->max_loop = loop;
1992       lim_data->tgt_loop = loop;
1993       gsi_insert_before (&gsi, load, GSI_SAME_STMT);
1994     }
1995 
1996   /* Sink the store to every exit from the loop.  */
1997   FOR_EACH_VEC_ELT (exits, i, ex)
1998     if (!multi_threaded_model_p)
1999       {
2000 	gassign *store;
2001 	store = gimple_build_assign (unshare_expr (ref->mem.ref), tmp_var);
2002 	gsi_insert_on_edge (ex, store);
2003       }
2004     else
2005       execute_sm_if_changed (ex, ref->mem.ref, tmp_var, store_flag);
2006 }
2007 
2008 /* Hoists memory references MEM_REFS out of LOOP.  EXITS is the list of exit
2009    edges of the LOOP.  */
2010 
2011 static void
hoist_memory_references(struct loop * loop,bitmap mem_refs,vec<edge> exits)2012 hoist_memory_references (struct loop *loop, bitmap mem_refs,
2013 			 vec<edge> exits)
2014 {
2015   im_mem_ref *ref;
2016   unsigned  i;
2017   bitmap_iterator bi;
2018 
2019   EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2020     {
2021       ref = memory_accesses.refs_list[i];
2022       execute_sm (loop, exits, ref);
2023     }
2024 }
2025 
2026 struct ref_always_accessed
2027 {
ref_always_accessedref_always_accessed2028   ref_always_accessed (struct loop *loop_, bool stored_p_)
2029       : loop (loop_), stored_p (stored_p_) {}
2030   bool operator () (mem_ref_loc *loc);
2031   struct loop *loop;
2032   bool stored_p;
2033 };
2034 
2035 bool
operator()2036 ref_always_accessed::operator () (mem_ref_loc *loc)
2037 {
2038   struct loop *must_exec;
2039 
2040   if (!get_lim_data (loc->stmt))
2041     return false;
2042 
2043   /* If we require an always executed store make sure the statement
2044      stores to the reference.  */
2045   if (stored_p)
2046     {
2047       tree lhs = gimple_get_lhs (loc->stmt);
2048       if (!lhs
2049 	  || lhs != *loc->ref)
2050 	return false;
2051     }
2052 
2053   must_exec = get_lim_data (loc->stmt)->always_executed_in;
2054   if (!must_exec)
2055     return false;
2056 
2057   if (must_exec == loop
2058       || flow_loop_nested_p (must_exec, loop))
2059     return true;
2060 
2061   return false;
2062 }
2063 
2064 /* Returns true if REF is always accessed in LOOP.  If STORED_P is true
2065    make sure REF is always stored to in LOOP.  */
2066 
2067 static bool
ref_always_accessed_p(struct loop * loop,im_mem_ref * ref,bool stored_p)2068 ref_always_accessed_p (struct loop *loop, im_mem_ref *ref, bool stored_p)
2069 {
2070   return for_all_locs_in_loop (loop, ref,
2071 			       ref_always_accessed (loop, stored_p));
2072 }
2073 
2074 /* Returns true if REF1 and REF2 are independent.  */
2075 
2076 static bool
refs_independent_p(im_mem_ref * ref1,im_mem_ref * ref2)2077 refs_independent_p (im_mem_ref *ref1, im_mem_ref *ref2)
2078 {
2079   if (ref1 == ref2)
2080     return true;
2081 
2082   if (dump_file && (dump_flags & TDF_DETAILS))
2083     fprintf (dump_file, "Querying dependency of refs %u and %u: ",
2084 	     ref1->id, ref2->id);
2085 
2086   if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache))
2087     {
2088       if (dump_file && (dump_flags & TDF_DETAILS))
2089 	fprintf (dump_file, "dependent.\n");
2090       return false;
2091     }
2092   else
2093     {
2094       if (dump_file && (dump_flags & TDF_DETAILS))
2095 	fprintf (dump_file, "independent.\n");
2096       return true;
2097     }
2098 }
2099 
2100 /* Mark REF dependent on stores or loads (according to STORED_P) in LOOP
2101    and its super-loops.  */
2102 
2103 static void
record_dep_loop(struct loop * loop,im_mem_ref * ref,bool stored_p)2104 record_dep_loop (struct loop *loop, im_mem_ref *ref, bool stored_p)
2105 {
2106   /* We can propagate dependent-in-loop bits up the loop
2107      hierarchy to all outer loops.  */
2108   while (loop != current_loops->tree_root
2109 	 && bitmap_set_bit (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2110     loop = loop_outer (loop);
2111 }
2112 
2113 /* Returns true if REF is independent on all other memory references in
2114    LOOP.  */
2115 
2116 static bool
ref_indep_loop_p_1(struct loop * loop,im_mem_ref * ref,bool stored_p)2117 ref_indep_loop_p_1 (struct loop *loop, im_mem_ref *ref, bool stored_p)
2118 {
2119   bitmap refs_to_check;
2120   unsigned i;
2121   bitmap_iterator bi;
2122   im_mem_ref *aref;
2123 
2124   if (stored_p)
2125     refs_to_check = &memory_accesses.refs_in_loop[loop->num];
2126   else
2127     refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
2128 
2129   if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID))
2130     return false;
2131 
2132   EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
2133     {
2134       aref = memory_accesses.refs_list[i];
2135       if (!refs_independent_p (ref, aref))
2136 	return false;
2137     }
2138 
2139   return true;
2140 }
2141 
2142 /* Returns true if REF is independent on all other memory references in
2143    LOOP.  Wrapper over ref_indep_loop_p_1, caching its results.  */
2144 
2145 static bool
ref_indep_loop_p_2(struct loop * loop,im_mem_ref * ref,bool stored_p)2146 ref_indep_loop_p_2 (struct loop *loop, im_mem_ref *ref, bool stored_p)
2147 {
2148   stored_p |= (ref->stored && bitmap_bit_p (ref->stored, loop->num));
2149 
2150   if (bitmap_bit_p (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2151     return true;
2152   if (bitmap_bit_p (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2153     return false;
2154 
2155   struct loop *inner = loop->inner;
2156   while (inner)
2157     {
2158       if (!ref_indep_loop_p_2 (inner, ref, stored_p))
2159 	return false;
2160       inner = inner->next;
2161     }
2162 
2163   bool indep_p = ref_indep_loop_p_1 (loop, ref, stored_p);
2164 
2165   if (dump_file && (dump_flags & TDF_DETAILS))
2166     fprintf (dump_file, "Querying dependencies of ref %u in loop %d: %s\n",
2167 	     ref->id, loop->num, indep_p ? "independent" : "dependent");
2168 
2169   /* Record the computed result in the cache.  */
2170   if (indep_p)
2171     {
2172       if (bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p))
2173 	  && stored_p)
2174 	{
2175 	  /* If it's independend against all refs then it's independent
2176 	     against stores, too.  */
2177 	  bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, false));
2178 	}
2179     }
2180   else
2181     {
2182       record_dep_loop (loop, ref, stored_p);
2183       if (!stored_p)
2184 	{
2185 	  /* If it's dependent against stores it's dependent against
2186 	     all refs, too.  */
2187 	  record_dep_loop (loop, ref, true);
2188 	}
2189     }
2190 
2191   return indep_p;
2192 }
2193 
2194 /* Returns true if REF is independent on all other memory references in
2195    LOOP.  */
2196 
2197 static bool
ref_indep_loop_p(struct loop * loop,im_mem_ref * ref)2198 ref_indep_loop_p (struct loop *loop, im_mem_ref *ref)
2199 {
2200   gcc_checking_assert (MEM_ANALYZABLE (ref));
2201 
2202   return ref_indep_loop_p_2 (loop, ref, false);
2203 }
2204 
2205 /* Returns true if we can perform store motion of REF from LOOP.  */
2206 
2207 static bool
can_sm_ref_p(struct loop * loop,im_mem_ref * ref)2208 can_sm_ref_p (struct loop *loop, im_mem_ref *ref)
2209 {
2210   tree base;
2211 
2212   /* Can't hoist unanalyzable refs.  */
2213   if (!MEM_ANALYZABLE (ref))
2214     return false;
2215 
2216   /* It should be movable.  */
2217   if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
2218       || TREE_THIS_VOLATILE (ref->mem.ref)
2219       || !for_each_index (&ref->mem.ref, may_move_till, loop))
2220     return false;
2221 
2222   /* If it can throw fail, we do not properly update EH info.  */
2223   if (tree_could_throw_p (ref->mem.ref))
2224     return false;
2225 
2226   /* If it can trap, it must be always executed in LOOP.
2227      Readonly memory locations may trap when storing to them, but
2228      tree_could_trap_p is a predicate for rvalues, so check that
2229      explicitly.  */
2230   base = get_base_address (ref->mem.ref);
2231   if ((tree_could_trap_p (ref->mem.ref)
2232        || (DECL_P (base) && TREE_READONLY (base)))
2233       && !ref_always_accessed_p (loop, ref, true))
2234     return false;
2235 
2236   /* And it must be independent on all other memory references
2237      in LOOP.  */
2238   if (!ref_indep_loop_p (loop, ref))
2239     return false;
2240 
2241   return true;
2242 }
2243 
2244 /* Marks the references in LOOP for that store motion should be performed
2245    in REFS_TO_SM.  SM_EXECUTED is the set of references for that store
2246    motion was performed in one of the outer loops.  */
2247 
2248 static void
find_refs_for_sm(struct loop * loop,bitmap sm_executed,bitmap refs_to_sm)2249 find_refs_for_sm (struct loop *loop, bitmap sm_executed, bitmap refs_to_sm)
2250 {
2251   bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
2252   unsigned i;
2253   bitmap_iterator bi;
2254   im_mem_ref *ref;
2255 
2256   EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
2257     {
2258       ref = memory_accesses.refs_list[i];
2259       if (can_sm_ref_p (loop, ref))
2260 	bitmap_set_bit (refs_to_sm, i);
2261     }
2262 }
2263 
2264 /* Checks whether LOOP (with exits stored in EXITS array) is suitable
2265    for a store motion optimization (i.e. whether we can insert statement
2266    on its exits).  */
2267 
2268 static bool
loop_suitable_for_sm(struct loop * loop ATTRIBUTE_UNUSED,vec<edge> exits)2269 loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED,
2270 		      vec<edge> exits)
2271 {
2272   unsigned i;
2273   edge ex;
2274 
2275   FOR_EACH_VEC_ELT (exits, i, ex)
2276     if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
2277       return false;
2278 
2279   return true;
2280 }
2281 
2282 /* Try to perform store motion for all memory references modified inside
2283    LOOP.  SM_EXECUTED is the bitmap of the memory references for that
2284    store motion was executed in one of the outer loops.  */
2285 
2286 static void
store_motion_loop(struct loop * loop,bitmap sm_executed)2287 store_motion_loop (struct loop *loop, bitmap sm_executed)
2288 {
2289   vec<edge> exits = get_loop_exit_edges (loop);
2290   struct loop *subloop;
2291   bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
2292 
2293   if (loop_suitable_for_sm (loop, exits))
2294     {
2295       find_refs_for_sm (loop, sm_executed, sm_in_loop);
2296       hoist_memory_references (loop, sm_in_loop, exits);
2297     }
2298   exits.release ();
2299 
2300   bitmap_ior_into (sm_executed, sm_in_loop);
2301   for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
2302     store_motion_loop (subloop, sm_executed);
2303   bitmap_and_compl_into (sm_executed, sm_in_loop);
2304   BITMAP_FREE (sm_in_loop);
2305 }
2306 
2307 /* Try to perform store motion for all memory references modified inside
2308    loops.  */
2309 
2310 static void
store_motion(void)2311 store_motion (void)
2312 {
2313   struct loop *loop;
2314   bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
2315 
2316   for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
2317     store_motion_loop (loop, sm_executed);
2318 
2319   BITMAP_FREE (sm_executed);
2320   gsi_commit_edge_inserts ();
2321 }
2322 
2323 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
2324    for each such basic block bb records the outermost loop for that execution
2325    of its header implies execution of bb.  CONTAINS_CALL is the bitmap of
2326    blocks that contain a nonpure call.  */
2327 
2328 static void
fill_always_executed_in_1(struct loop * loop,sbitmap contains_call)2329 fill_always_executed_in_1 (struct loop *loop, sbitmap contains_call)
2330 {
2331   basic_block bb = NULL, *bbs, last = NULL;
2332   unsigned i;
2333   edge e;
2334   struct loop *inn_loop = loop;
2335 
2336   if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
2337     {
2338       bbs = get_loop_body_in_dom_order (loop);
2339 
2340       for (i = 0; i < loop->num_nodes; i++)
2341 	{
2342 	  edge_iterator ei;
2343 	  bb = bbs[i];
2344 
2345 	  if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2346 	    last = bb;
2347 
2348 	  if (bitmap_bit_p (contains_call, bb->index))
2349 	    break;
2350 
2351 	  FOR_EACH_EDGE (e, ei, bb->succs)
2352 	    {
2353 	      /* If there is an exit from this BB.  */
2354 	      if (!flow_bb_inside_loop_p (loop, e->dest))
2355 		break;
2356 	      /* Or we enter a possibly non-finite loop.  */
2357 	      if (flow_loop_nested_p (bb->loop_father,
2358 				      e->dest->loop_father)
2359 		  && ! finite_loop_p (e->dest->loop_father))
2360 		break;
2361 	    }
2362 	  if (e)
2363 	    break;
2364 
2365 	  /* A loop might be infinite (TODO use simple loop analysis
2366 	     to disprove this if possible).  */
2367 	  if (bb->flags & BB_IRREDUCIBLE_LOOP)
2368 	    break;
2369 
2370 	  if (!flow_bb_inside_loop_p (inn_loop, bb))
2371 	    break;
2372 
2373 	  if (bb->loop_father->header == bb)
2374 	    {
2375 	      if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2376 		break;
2377 
2378 	      /* In a loop that is always entered we may proceed anyway.
2379 		 But record that we entered it and stop once we leave it.  */
2380 	      inn_loop = bb->loop_father;
2381 	    }
2382 	}
2383 
2384       while (1)
2385 	{
2386 	  SET_ALWAYS_EXECUTED_IN (last, loop);
2387 	  if (last == loop->header)
2388 	    break;
2389 	  last = get_immediate_dominator (CDI_DOMINATORS, last);
2390 	}
2391 
2392       free (bbs);
2393     }
2394 
2395   for (loop = loop->inner; loop; loop = loop->next)
2396     fill_always_executed_in_1 (loop, contains_call);
2397 }
2398 
2399 /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
2400    for each such basic block bb records the outermost loop for that execution
2401    of its header implies execution of bb.  */
2402 
2403 static void
fill_always_executed_in(void)2404 fill_always_executed_in (void)
2405 {
2406   sbitmap contains_call = sbitmap_alloc (last_basic_block_for_fn (cfun));
2407   basic_block bb;
2408   struct loop *loop;
2409 
2410   bitmap_clear (contains_call);
2411   FOR_EACH_BB_FN (bb, cfun)
2412     {
2413       gimple_stmt_iterator gsi;
2414       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2415 	{
2416 	  if (nonpure_call_p (gsi_stmt (gsi)))
2417 	    break;
2418 	}
2419 
2420       if (!gsi_end_p (gsi))
2421 	bitmap_set_bit (contains_call, bb->index);
2422     }
2423 
2424   for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
2425     fill_always_executed_in_1 (loop, contains_call);
2426 
2427   sbitmap_free (contains_call);
2428 }
2429 
2430 
2431 /* Compute the global information needed by the loop invariant motion pass.  */
2432 
2433 static void
tree_ssa_lim_initialize(void)2434 tree_ssa_lim_initialize (void)
2435 {
2436   struct loop *loop;
2437   unsigned i;
2438 
2439   bitmap_obstack_initialize (&lim_bitmap_obstack);
2440   gcc_obstack_init (&mem_ref_obstack);
2441   lim_aux_data_map = new hash_map<gimple *, lim_aux_data *>;
2442 
2443   if (flag_tm)
2444     compute_transaction_bits ();
2445 
2446   alloc_aux_for_edges (0);
2447 
2448   memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
2449   memory_accesses.refs_list.create (100);
2450   /* Allocate a special, unanalyzable mem-ref with ID zero.  */
2451   memory_accesses.refs_list.quick_push
2452     (mem_ref_alloc (error_mark_node, 0, UNANALYZABLE_MEM_ID));
2453 
2454   memory_accesses.refs_in_loop.create (number_of_loops (cfun));
2455   memory_accesses.refs_in_loop.quick_grow (number_of_loops (cfun));
2456   memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
2457   memory_accesses.refs_stored_in_loop.quick_grow (number_of_loops (cfun));
2458   memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
2459   memory_accesses.all_refs_stored_in_loop.quick_grow (number_of_loops (cfun));
2460 
2461   for (i = 0; i < number_of_loops (cfun); i++)
2462     {
2463       bitmap_initialize (&memory_accesses.refs_in_loop[i],
2464 			 &lim_bitmap_obstack);
2465       bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
2466 			 &lim_bitmap_obstack);
2467       bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
2468 			 &lim_bitmap_obstack);
2469     }
2470 
2471   memory_accesses.ttae_cache = NULL;
2472 
2473   /* Initialize bb_loop_postorder with a mapping from loop->num to
2474      its postorder index.  */
2475   i = 0;
2476   bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
2477   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2478     bb_loop_postorder[loop->num] = i++;
2479 }
2480 
2481 /* Cleans up after the invariant motion pass.  */
2482 
2483 static void
tree_ssa_lim_finalize(void)2484 tree_ssa_lim_finalize (void)
2485 {
2486   basic_block bb;
2487   unsigned i;
2488   im_mem_ref *ref;
2489 
2490   free_aux_for_edges ();
2491 
2492   FOR_EACH_BB_FN (bb, cfun)
2493     SET_ALWAYS_EXECUTED_IN (bb, NULL);
2494 
2495   bitmap_obstack_release (&lim_bitmap_obstack);
2496   delete lim_aux_data_map;
2497 
2498   delete memory_accesses.refs;
2499   memory_accesses.refs = NULL;
2500 
2501   FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
2502     memref_free (ref);
2503   memory_accesses.refs_list.release ();
2504   obstack_free (&mem_ref_obstack, NULL);
2505 
2506   memory_accesses.refs_in_loop.release ();
2507   memory_accesses.refs_stored_in_loop.release ();
2508   memory_accesses.all_refs_stored_in_loop.release ();
2509 
2510   if (memory_accesses.ttae_cache)
2511     free_affine_expand_cache (&memory_accesses.ttae_cache);
2512 
2513   free (bb_loop_postorder);
2514 }
2515 
2516 /* Moves invariants from loops.  Only "expensive" invariants are moved out --
2517    i.e. those that are likely to be win regardless of the register pressure.  */
2518 
2519 static unsigned int
tree_ssa_lim(void)2520 tree_ssa_lim (void)
2521 {
2522   unsigned int todo;
2523 
2524   tree_ssa_lim_initialize ();
2525 
2526   /* Gathers information about memory accesses in the loops.  */
2527   analyze_memory_references ();
2528 
2529   /* Fills ALWAYS_EXECUTED_IN information for basic blocks.  */
2530   fill_always_executed_in ();
2531 
2532   /* For each statement determine the outermost loop in that it is
2533      invariant and cost for computing the invariant.  */
2534   invariantness_dom_walker (CDI_DOMINATORS)
2535     .walk (cfun->cfg->x_entry_block_ptr);
2536 
2537   /* Execute store motion.  Force the necessary invariants to be moved
2538      out of the loops as well.  */
2539   store_motion ();
2540 
2541   /* Move the expressions that are expensive enough.  */
2542   todo = move_computations ();
2543 
2544   tree_ssa_lim_finalize ();
2545 
2546   return todo;
2547 }
2548 
2549 /* Loop invariant motion pass.  */
2550 
2551 namespace {
2552 
2553 const pass_data pass_data_lim =
2554 {
2555   GIMPLE_PASS, /* type */
2556   "lim", /* name */
2557   OPTGROUP_LOOP, /* optinfo_flags */
2558   TV_LIM, /* tv_id */
2559   PROP_cfg, /* properties_required */
2560   0, /* properties_provided */
2561   0, /* properties_destroyed */
2562   0, /* todo_flags_start */
2563   0, /* todo_flags_finish */
2564 };
2565 
2566 class pass_lim : public gimple_opt_pass
2567 {
2568 public:
pass_lim(gcc::context * ctxt)2569   pass_lim (gcc::context *ctxt)
2570     : gimple_opt_pass (pass_data_lim, ctxt)
2571   {}
2572 
2573   /* opt_pass methods: */
clone()2574   opt_pass * clone () { return new pass_lim (m_ctxt); }
gate(function *)2575   virtual bool gate (function *) { return flag_tree_loop_im != 0; }
2576   virtual unsigned int execute (function *);
2577 
2578 }; // class pass_lim
2579 
2580 unsigned int
execute(function * fun)2581 pass_lim::execute (function *fun)
2582 {
2583   bool in_loop_pipeline = scev_initialized_p ();
2584   if (!in_loop_pipeline)
2585     loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2586 
2587   if (number_of_loops (fun) <= 1)
2588     return 0;
2589   unsigned int todo = tree_ssa_lim ();
2590 
2591   if (!in_loop_pipeline)
2592     loop_optimizer_finalize ();
2593   return todo;
2594 }
2595 
2596 } // anon namespace
2597 
2598 gimple_opt_pass *
make_pass_lim(gcc::context * ctxt)2599 make_pass_lim (gcc::context *ctxt)
2600 {
2601   return new pass_lim (ctxt);
2602 }
2603 
2604 
2605