1 /* Loop distribution.
2    Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3    Free Software Foundation, Inc.
4    Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
5    and Sebastian Pop <sebastian.pop@amd.com>.
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 3, or (at your option) any
12 later version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 /* This pass performs loop distribution: for example, the loop
24 
25    |DO I = 2, N
26    |    A(I) = B(I) + C
27    |    D(I) = A(I-1)*E
28    |ENDDO
29 
30    is transformed to
31 
32    |DOALL I = 2, N
33    |   A(I) = B(I) + C
34    |ENDDO
35    |
36    |DOALL I = 2, N
37    |   D(I) = A(I-1)*E
38    |ENDDO
39 
40    This pass uses an RDG, Reduced Dependence Graph built on top of the
41    data dependence relations.  The RDG is then topologically sorted to
42    obtain a map of information producers/consumers based on which it
43    generates the new loops.  */
44 
45 #include "config.h"
46 #include "system.h"
47 #include "coretypes.h"
48 #include "tree-flow.h"
49 #include "cfgloop.h"
50 #include "tree-chrec.h"
51 #include "tree-data-ref.h"
52 #include "tree-scalar-evolution.h"
53 #include "tree-pass.h"
54 
55 /* If bit I is not set, it means that this node represents an
56    operation that has already been performed, and that should not be
57    performed again.  This is the subgraph of remaining important
58    computations that is passed to the DFS algorithm for avoiding to
59    include several times the same stores in different loops.  */
60 static bitmap remaining_stmts;
61 
62 /* A node of the RDG is marked in this bitmap when it has as a
63    predecessor a node that writes to memory.  */
64 static bitmap upstream_mem_writes;
65 
66 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
67    the LOOP.  */
68 
69 static bool
ssa_name_has_uses_outside_loop_p(tree def,loop_p loop)70 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
71 {
72   imm_use_iterator imm_iter;
73   use_operand_p use_p;
74 
75   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
76     if (loop != loop_containing_stmt (USE_STMT (use_p)))
77       return true;
78 
79   return false;
80 }
81 
82 /* Returns true when STMT defines a scalar variable used after the
83    loop.  */
84 
85 static bool
stmt_has_scalar_dependences_outside_loop(gimple stmt)86 stmt_has_scalar_dependences_outside_loop (gimple stmt)
87 {
88   tree name;
89 
90   switch (gimple_code (stmt))
91     {
92     case GIMPLE_CALL:
93     case GIMPLE_ASSIGN:
94       name = gimple_get_lhs (stmt);
95       break;
96 
97     case GIMPLE_PHI:
98       name = gimple_phi_result (stmt);
99       break;
100 
101     default:
102       return false;
103     }
104 
105   return (name
106 	  && TREE_CODE (name) == SSA_NAME
107 	  && ssa_name_has_uses_outside_loop_p (name,
108 					       loop_containing_stmt (stmt)));
109 }
110 
111 /* Update the PHI nodes of NEW_LOOP.  NEW_LOOP is a duplicate of
112    ORIG_LOOP.  */
113 
114 static void
update_phis_for_loop_copy(struct loop * orig_loop,struct loop * new_loop)115 update_phis_for_loop_copy (struct loop *orig_loop, struct loop *new_loop)
116 {
117   tree new_ssa_name;
118   gimple_stmt_iterator si_new, si_orig;
119   edge orig_loop_latch = loop_latch_edge (orig_loop);
120   edge orig_entry_e = loop_preheader_edge (orig_loop);
121   edge new_loop_entry_e = loop_preheader_edge (new_loop);
122 
123   /* Scan the phis in the headers of the old and new loops
124      (they are organized in exactly the same order).  */
125   for (si_new = gsi_start_phis (new_loop->header),
126        si_orig = gsi_start_phis (orig_loop->header);
127        !gsi_end_p (si_new) && !gsi_end_p (si_orig);
128        gsi_next (&si_new), gsi_next (&si_orig))
129     {
130       tree def;
131       source_location locus;
132       gimple phi_new = gsi_stmt (si_new);
133       gimple phi_orig = gsi_stmt (si_orig);
134 
135       /* Add the first phi argument for the phi in NEW_LOOP (the one
136 	 associated with the entry of NEW_LOOP)  */
137       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_entry_e);
138       locus = gimple_phi_arg_location_from_edge (phi_orig, orig_entry_e);
139       add_phi_arg (phi_new, def, new_loop_entry_e, locus);
140 
141       /* Add the second phi argument for the phi in NEW_LOOP (the one
142 	 associated with the latch of NEW_LOOP)  */
143       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
144       locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
145 
146       if (TREE_CODE (def) == SSA_NAME)
147 	{
148 	  new_ssa_name = get_current_def (def);
149 
150 	  if (!new_ssa_name)
151 	    /* This only happens if there are no definitions inside the
152 	       loop.  Use the the invariant in the new loop as is.  */
153 	    new_ssa_name = def;
154 	}
155       else
156 	/* Could be an integer.  */
157 	new_ssa_name = def;
158 
159       add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
160     }
161 }
162 
163 /* Return a copy of LOOP placed before LOOP.  */
164 
165 static struct loop *
copy_loop_before(struct loop * loop)166 copy_loop_before (struct loop *loop)
167 {
168   struct loop *res;
169   edge preheader = loop_preheader_edge (loop);
170 
171   if (!single_exit (loop))
172     return NULL;
173 
174   initialize_original_copy_tables ();
175   res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
176   free_original_copy_tables ();
177 
178   if (!res)
179     return NULL;
180 
181   update_phis_for_loop_copy (loop, res);
182   rename_variables_in_loop (res);
183 
184   return res;
185 }
186 
187 /* Creates an empty basic block after LOOP.  */
188 
189 static void
create_bb_after_loop(struct loop * loop)190 create_bb_after_loop (struct loop *loop)
191 {
192   edge exit = single_exit (loop);
193 
194   if (!exit)
195     return;
196 
197   split_edge (exit);
198 }
199 
200 /* Generate code for PARTITION from the code in LOOP.  The loop is
201    copied when COPY_P is true.  All the statements not flagged in the
202    PARTITION bitmap are removed from the loop or from its copy.  The
203    statements are indexed in sequence inside a basic block, and the
204    basic blocks of a loop are taken in dom order.  Returns true when
205    the code gen succeeded. */
206 
207 static bool
generate_loops_for_partition(struct loop * loop,bitmap partition,bool copy_p)208 generate_loops_for_partition (struct loop *loop, bitmap partition, bool copy_p)
209 {
210   unsigned i, x;
211   gimple_stmt_iterator bsi;
212   basic_block *bbs;
213 
214   if (copy_p)
215     {
216       loop = copy_loop_before (loop);
217       create_preheader (loop, CP_SIMPLE_PREHEADERS);
218       create_bb_after_loop (loop);
219     }
220 
221   if (loop == NULL)
222     return false;
223 
224   /* Remove stmts not in the PARTITION bitmap.  The order in which we
225      visit the phi nodes and the statements is exactly as in
226      stmts_from_loop.  */
227   bbs = get_loop_body_in_dom_order (loop);
228 
229   if (MAY_HAVE_DEBUG_STMTS)
230     for (x = 0, i = 0; i < loop->num_nodes; i++)
231       {
232 	basic_block bb = bbs[i];
233 
234 	for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
235 	  if (!bitmap_bit_p (partition, x++))
236 	    reset_debug_uses (gsi_stmt (bsi));
237 
238 	for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
239 	  {
240 	    gimple stmt = gsi_stmt (bsi);
241 	    if (gimple_code (stmt) != GIMPLE_LABEL
242 		&& !is_gimple_debug (stmt)
243 		&& !bitmap_bit_p (partition, x++))
244 	      reset_debug_uses (stmt);
245 	  }
246       }
247 
248   for (x = 0, i = 0; i < loop->num_nodes; i++)
249     {
250       basic_block bb = bbs[i];
251 
252       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
253 	if (!bitmap_bit_p (partition, x++))
254 	  {
255 	    gimple phi = gsi_stmt (bsi);
256 	    if (!is_gimple_reg (gimple_phi_result (phi)))
257 	      mark_virtual_phi_result_for_renaming (phi);
258 	    remove_phi_node (&bsi, true);
259 	  }
260 	else
261 	  gsi_next (&bsi);
262 
263       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
264 	{
265 	  gimple stmt = gsi_stmt (bsi);
266 	  if (gimple_code (stmt) != GIMPLE_LABEL
267 	      && !is_gimple_debug (stmt)
268 	      && !bitmap_bit_p (partition, x++))
269 	    {
270 	      unlink_stmt_vdef (stmt);
271 	      gsi_remove (&bsi, true);
272 	      release_defs (stmt);
273 	    }
274 	  else
275 	    gsi_next (&bsi);
276 	}
277     }
278 
279   free (bbs);
280   return true;
281 }
282 
283 /* Build the size argument for a memset call.  */
284 
285 static inline tree
build_size_arg_loc(location_t loc,tree nb_iter,tree op,gimple_seq * stmt_list)286 build_size_arg_loc (location_t loc, tree nb_iter, tree op,
287 		    gimple_seq *stmt_list)
288 {
289   gimple_seq stmts;
290   tree x = fold_build2_loc (loc, MULT_EXPR, size_type_node,
291 			    fold_convert_loc (loc, size_type_node, nb_iter),
292 			    fold_convert_loc (loc, size_type_node,
293 					      TYPE_SIZE_UNIT (TREE_TYPE (op))));
294   x = force_gimple_operand (x, &stmts, true, NULL);
295   gimple_seq_add_seq (stmt_list, stmts);
296 
297   return x;
298 }
299 
300 /* Generate a call to memset.  Return true when the operation succeeded.  */
301 
302 static void
generate_memset_zero(gimple stmt,tree op0,tree nb_iter,gimple_stmt_iterator bsi)303 generate_memset_zero (gimple stmt, tree op0, tree nb_iter,
304 		      gimple_stmt_iterator bsi)
305 {
306   tree addr_base, nb_bytes;
307   bool res = false;
308   gimple_seq stmt_list = NULL, stmts;
309   gimple fn_call;
310   tree mem, fn;
311   struct data_reference *dr = XCNEW (struct data_reference);
312   location_t loc = gimple_location (stmt);
313 
314   DR_STMT (dr) = stmt;
315   DR_REF (dr) = op0;
316   res = dr_analyze_innermost (dr, loop_containing_stmt (stmt));
317   gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)));
318 
319   nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
320   addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
321   addr_base = fold_convert_loc (loc, sizetype, addr_base);
322 
323   /* Test for a negative stride, iterating over every element.  */
324   if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
325     {
326       addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
327 				  fold_convert_loc (loc, sizetype, nb_bytes));
328       addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
329 				  TYPE_SIZE_UNIT (TREE_TYPE (op0)));
330     }
331 
332   addr_base = fold_build_pointer_plus_loc (loc,
333 					   DR_BASE_ADDRESS (dr), addr_base);
334   mem = force_gimple_operand (addr_base, &stmts, true, NULL);
335   gimple_seq_add_seq (&stmt_list, stmts);
336 
337   fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
338   fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes);
339   gimple_seq_add_stmt (&stmt_list, fn_call);
340   gsi_insert_seq_after (&bsi, stmt_list, GSI_CONTINUE_LINKING);
341 
342   if (dump_file && (dump_flags & TDF_DETAILS))
343     fprintf (dump_file, "generated memset zero\n");
344 
345   free_data_ref (dr);
346 }
347 
348 /* Tries to generate a builtin function for the instructions of LOOP
349    pointed to by the bits set in PARTITION.  Returns true when the
350    operation succeeded.  */
351 
352 static bool
generate_builtin(struct loop * loop,bitmap partition,bool copy_p)353 generate_builtin (struct loop *loop, bitmap partition, bool copy_p)
354 {
355   bool res = false;
356   unsigned i, x = 0;
357   basic_block *bbs;
358   gimple write = NULL;
359   gimple_stmt_iterator bsi;
360   tree nb_iter = number_of_exit_cond_executions (loop);
361 
362   if (!nb_iter || nb_iter == chrec_dont_know)
363     return false;
364 
365   bbs = get_loop_body_in_dom_order (loop);
366 
367   for (i = 0; i < loop->num_nodes; i++)
368     {
369       basic_block bb = bbs[i];
370 
371       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
372 	x++;
373 
374       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
375 	{
376 	  gimple stmt = gsi_stmt (bsi);
377 
378 	  if (gimple_code (stmt) == GIMPLE_LABEL
379 	      || is_gimple_debug (stmt))
380 	    continue;
381 
382 	  if (!bitmap_bit_p (partition, x++))
383 	    continue;
384 
385 	  /* If the stmt has uses outside of the loop fail.  */
386 	  if (stmt_has_scalar_dependences_outside_loop (stmt))
387 	    goto end;
388 
389 	  if (is_gimple_assign (stmt)
390 	      && !is_gimple_reg (gimple_assign_lhs (stmt)))
391 	    {
392 	      /* Don't generate the builtins when there are more than
393 		 one memory write.  */
394 	      if (write != NULL)
395 		goto end;
396 
397 	      write = stmt;
398 	      if (bb == loop->latch)
399 		nb_iter = number_of_latch_executions (loop);
400 	    }
401 	}
402     }
403 
404   if (!stmt_with_adjacent_zero_store_dr_p (write))
405     goto end;
406 
407   /* The new statements will be placed before LOOP.  */
408   bsi = gsi_last_bb (loop_preheader_edge (loop)->src);
409   generate_memset_zero (write, gimple_assign_lhs (write), nb_iter, bsi);
410   res = true;
411 
412   /* If this is the last partition for which we generate code, we have
413      to destroy the loop.  */
414   if (!copy_p)
415     {
416       unsigned nbbs = loop->num_nodes;
417       edge exit = single_exit (loop);
418       basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
419       redirect_edge_pred (exit, src);
420       exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
421       exit->flags |= EDGE_FALLTHRU;
422       cancel_loop_tree (loop);
423       rescan_loop_exit (exit, false, true);
424 
425       for (i = 0; i < nbbs; i++)
426 	delete_basic_block (bbs[i]);
427 
428       set_immediate_dominator (CDI_DOMINATORS, dest,
429 			       recompute_dominator (CDI_DOMINATORS, dest));
430     }
431 
432  end:
433   free (bbs);
434   return res;
435 }
436 
437 /* Generates code for PARTITION.  For simple loops, this function can
438    generate a built-in.  */
439 
440 static bool
generate_code_for_partition(struct loop * loop,bitmap partition,bool copy_p)441 generate_code_for_partition (struct loop *loop, bitmap partition, bool copy_p)
442 {
443   if (generate_builtin (loop, partition, copy_p))
444     return true;
445 
446   return generate_loops_for_partition (loop, partition, copy_p);
447 }
448 
449 
450 /* Returns true if the node V of RDG cannot be recomputed.  */
451 
452 static bool
rdg_cannot_recompute_vertex_p(struct graph * rdg,int v)453 rdg_cannot_recompute_vertex_p (struct graph *rdg, int v)
454 {
455   if (RDG_MEM_WRITE_STMT (rdg, v))
456     return true;
457 
458   return false;
459 }
460 
461 /* Returns true when the vertex V has already been generated in the
462    current partition (V is in PROCESSED), or when V belongs to another
463    partition and cannot be recomputed (V is not in REMAINING_STMTS).  */
464 
465 static inline bool
already_processed_vertex_p(bitmap processed,int v)466 already_processed_vertex_p (bitmap processed, int v)
467 {
468   return (bitmap_bit_p (processed, v)
469 	  || !bitmap_bit_p (remaining_stmts, v));
470 }
471 
472 /* Returns NULL when there is no anti-dependence among the successors
473    of vertex V, otherwise returns the edge with the anti-dep.  */
474 
475 static struct graph_edge *
has_anti_dependence(struct vertex * v)476 has_anti_dependence (struct vertex *v)
477 {
478   struct graph_edge *e;
479 
480   if (v->succ)
481     for (e = v->succ; e; e = e->succ_next)
482       if (RDGE_TYPE (e) == anti_dd)
483 	return e;
484 
485   return NULL;
486 }
487 
488 /* Returns true when V has an anti-dependence edge among its successors.  */
489 
490 static bool
predecessor_has_mem_write(struct graph * rdg,struct vertex * v)491 predecessor_has_mem_write (struct graph *rdg, struct vertex *v)
492 {
493   struct graph_edge *e;
494 
495   if (v->pred)
496     for (e = v->pred; e; e = e->pred_next)
497       if (bitmap_bit_p (upstream_mem_writes, e->src)
498 	  /* Don't consider flow channels: a write to memory followed
499 	     by a read from memory.  These channels allow the split of
500 	     the RDG in different partitions.  */
501 	  && !RDG_MEM_WRITE_STMT (rdg, e->src))
502 	return true;
503 
504   return false;
505 }
506 
507 /* Initializes the upstream_mem_writes bitmap following the
508    information from RDG.  */
509 
510 static void
mark_nodes_having_upstream_mem_writes(struct graph * rdg)511 mark_nodes_having_upstream_mem_writes (struct graph *rdg)
512 {
513   int v, x;
514   bitmap seen = BITMAP_ALLOC (NULL);
515 
516   for (v = rdg->n_vertices - 1; v >= 0; v--)
517     if (!bitmap_bit_p (seen, v))
518       {
519 	unsigned i;
520 	VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
521 
522 	graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
523 
524 	FOR_EACH_VEC_ELT (int, nodes, i, x)
525 	  {
526 	    if (!bitmap_set_bit (seen, x))
527 	      continue;
528 
529 	    if (RDG_MEM_WRITE_STMT (rdg, x)
530 		|| predecessor_has_mem_write (rdg, &(rdg->vertices[x]))
531 		/* In anti dependences the read should occur before
532 		   the write, this is why both the read and the write
533 		   should be placed in the same partition.  */
534 		|| has_anti_dependence (&(rdg->vertices[x])))
535 	      {
536 		bitmap_set_bit (upstream_mem_writes, x);
537 	      }
538 	  }
539 
540 	VEC_free (int, heap, nodes);
541       }
542 }
543 
544 /* Returns true when vertex u has a memory write node as a predecessor
545    in RDG.  */
546 
547 static bool
has_upstream_mem_writes(int u)548 has_upstream_mem_writes (int u)
549 {
550   return bitmap_bit_p (upstream_mem_writes, u);
551 }
552 
553 static void rdg_flag_vertex_and_dependent (struct graph *, int, bitmap, bitmap,
554 					   bitmap, bool *);
555 
556 /* Flag the uses of U stopping following the information from
557    upstream_mem_writes.  */
558 
559 static void
rdg_flag_uses(struct graph * rdg,int u,bitmap partition,bitmap loops,bitmap processed,bool * part_has_writes)560 rdg_flag_uses (struct graph *rdg, int u, bitmap partition, bitmap loops,
561 	       bitmap processed, bool *part_has_writes)
562 {
563   use_operand_p use_p;
564   struct vertex *x = &(rdg->vertices[u]);
565   gimple stmt = RDGV_STMT (x);
566   struct graph_edge *anti_dep = has_anti_dependence (x);
567 
568   /* Keep in the same partition the destination of an antidependence,
569      because this is a store to the exact same location.  Putting this
570      in another partition is bad for cache locality.  */
571   if (anti_dep)
572     {
573       int v = anti_dep->dest;
574 
575       if (!already_processed_vertex_p (processed, v))
576 	rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
577 				       processed, part_has_writes);
578     }
579 
580   if (gimple_code (stmt) != GIMPLE_PHI)
581     {
582       if ((use_p = gimple_vuse_op (stmt)) != NULL_USE_OPERAND_P)
583 	{
584 	  tree use = USE_FROM_PTR (use_p);
585 
586 	  if (TREE_CODE (use) == SSA_NAME)
587 	    {
588 	      gimple def_stmt = SSA_NAME_DEF_STMT (use);
589 	      int v = rdg_vertex_for_stmt (rdg, def_stmt);
590 
591 	      if (v >= 0
592 		  && !already_processed_vertex_p (processed, v))
593 		rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
594 					       processed, part_has_writes);
595 	    }
596 	}
597     }
598 
599   if (is_gimple_assign (stmt) && has_upstream_mem_writes (u))
600     {
601       tree op0 = gimple_assign_lhs (stmt);
602 
603       /* Scalar channels don't have enough space for transmitting data
604 	 between tasks, unless we add more storage by privatizing.  */
605       if (is_gimple_reg (op0))
606 	{
607 	  use_operand_p use_p;
608 	  imm_use_iterator iter;
609 
610 	  FOR_EACH_IMM_USE_FAST (use_p, iter, op0)
611 	    {
612 	      int v = rdg_vertex_for_stmt (rdg, USE_STMT (use_p));
613 
614 	      if (!already_processed_vertex_p (processed, v))
615 		rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
616 					       processed, part_has_writes);
617 	    }
618 	}
619     }
620 }
621 
622 /* Flag V from RDG as part of PARTITION, and also flag its loop number
623    in LOOPS.  */
624 
625 static void
rdg_flag_vertex(struct graph * rdg,int v,bitmap partition,bitmap loops,bool * part_has_writes)626 rdg_flag_vertex (struct graph *rdg, int v, bitmap partition, bitmap loops,
627 		 bool *part_has_writes)
628 {
629   struct loop *loop;
630 
631   if (!bitmap_set_bit (partition, v))
632     return;
633 
634   loop = loop_containing_stmt (RDG_STMT (rdg, v));
635   bitmap_set_bit (loops, loop->num);
636 
637   if (rdg_cannot_recompute_vertex_p (rdg, v))
638     {
639       *part_has_writes = true;
640       bitmap_clear_bit (remaining_stmts, v);
641     }
642 }
643 
644 /* Flag in the bitmap PARTITION the vertex V and all its predecessors.
645    Also flag their loop number in LOOPS.  */
646 
647 static void
rdg_flag_vertex_and_dependent(struct graph * rdg,int v,bitmap partition,bitmap loops,bitmap processed,bool * part_has_writes)648 rdg_flag_vertex_and_dependent (struct graph *rdg, int v, bitmap partition,
649 			       bitmap loops, bitmap processed,
650 			       bool *part_has_writes)
651 {
652   unsigned i;
653   VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
654   int x;
655 
656   bitmap_set_bit (processed, v);
657   rdg_flag_uses (rdg, v, partition, loops, processed, part_has_writes);
658   graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts);
659   rdg_flag_vertex (rdg, v, partition, loops, part_has_writes);
660 
661   FOR_EACH_VEC_ELT (int, nodes, i, x)
662     if (!already_processed_vertex_p (processed, x))
663       rdg_flag_vertex_and_dependent (rdg, x, partition, loops, processed,
664 				     part_has_writes);
665 
666   VEC_free (int, heap, nodes);
667 }
668 
669 /* Initialize CONDS with all the condition statements from the basic
670    blocks of LOOP.  */
671 
672 static void
collect_condition_stmts(struct loop * loop,VEC (gimple,heap)** conds)673 collect_condition_stmts (struct loop *loop, VEC (gimple, heap) **conds)
674 {
675   unsigned i;
676   edge e;
677   VEC (edge, heap) *exits = get_loop_exit_edges (loop);
678 
679   FOR_EACH_VEC_ELT (edge, exits, i, e)
680     {
681       gimple cond = last_stmt (e->src);
682 
683       if (cond)
684 	VEC_safe_push (gimple, heap, *conds, cond);
685     }
686 
687   VEC_free (edge, heap, exits);
688 }
689 
690 /* Add to PARTITION all the exit condition statements for LOOPS
691    together with all their dependent statements determined from
692    RDG.  */
693 
694 static void
rdg_flag_loop_exits(struct graph * rdg,bitmap loops,bitmap partition,bitmap processed,bool * part_has_writes)695 rdg_flag_loop_exits (struct graph *rdg, bitmap loops, bitmap partition,
696 		     bitmap processed, bool *part_has_writes)
697 {
698   unsigned i;
699   bitmap_iterator bi;
700   VEC (gimple, heap) *conds = VEC_alloc (gimple, heap, 3);
701 
702   EXECUTE_IF_SET_IN_BITMAP (loops, 0, i, bi)
703     collect_condition_stmts (get_loop (i), &conds);
704 
705   while (!VEC_empty (gimple, conds))
706     {
707       gimple cond = VEC_pop (gimple, conds);
708       int v = rdg_vertex_for_stmt (rdg, cond);
709       bitmap new_loops = BITMAP_ALLOC (NULL);
710 
711       if (!already_processed_vertex_p (processed, v))
712 	rdg_flag_vertex_and_dependent (rdg, v, partition, new_loops, processed,
713 				       part_has_writes);
714 
715       EXECUTE_IF_SET_IN_BITMAP (new_loops, 0, i, bi)
716 	if (bitmap_set_bit (loops, i))
717 	  collect_condition_stmts (get_loop (i), &conds);
718 
719       BITMAP_FREE (new_loops);
720     }
721 
722   VEC_free (gimple, heap, conds);
723 }
724 
725 /* Returns a bitmap in which all the statements needed for computing
726    the strongly connected component C of the RDG are flagged, also
727    including the loop exit conditions.  */
728 
729 static bitmap
build_rdg_partition_for_component(struct graph * rdg,rdgc c,bool * part_has_writes)730 build_rdg_partition_for_component (struct graph *rdg, rdgc c,
731 				   bool *part_has_writes)
732 {
733   int i, v;
734   bitmap partition = BITMAP_ALLOC (NULL);
735   bitmap loops = BITMAP_ALLOC (NULL);
736   bitmap processed = BITMAP_ALLOC (NULL);
737 
738   FOR_EACH_VEC_ELT (int, c->vertices, i, v)
739     if (!already_processed_vertex_p (processed, v))
740       rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed,
741 				     part_has_writes);
742 
743   rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes);
744 
745   BITMAP_FREE (processed);
746   BITMAP_FREE (loops);
747   return partition;
748 }
749 
750 /* Free memory for COMPONENTS.  */
751 
752 static void
free_rdg_components(VEC (rdgc,heap)* components)753 free_rdg_components (VEC (rdgc, heap) *components)
754 {
755   int i;
756   rdgc x;
757 
758   FOR_EACH_VEC_ELT (rdgc, components, i, x)
759     {
760       VEC_free (int, heap, x->vertices);
761       free (x);
762     }
763 
764   VEC_free (rdgc, heap, components);
765 }
766 
767 /* Build the COMPONENTS vector with the strongly connected components
768    of RDG in which the STARTING_VERTICES occur.  */
769 
770 static void
rdg_build_components(struct graph * rdg,VEC (int,heap)* starting_vertices,VEC (rdgc,heap)** components)771 rdg_build_components (struct graph *rdg, VEC (int, heap) *starting_vertices,
772 		      VEC (rdgc, heap) **components)
773 {
774   int i, v;
775   bitmap saved_components = BITMAP_ALLOC (NULL);
776   int n_components = graphds_scc (rdg, NULL);
777   VEC (int, heap) **all_components = XNEWVEC (VEC (int, heap) *, n_components);
778 
779   for (i = 0; i < n_components; i++)
780     all_components[i] = VEC_alloc (int, heap, 3);
781 
782   for (i = 0; i < rdg->n_vertices; i++)
783     VEC_safe_push (int, heap, all_components[rdg->vertices[i].component], i);
784 
785   FOR_EACH_VEC_ELT (int, starting_vertices, i, v)
786     {
787       int c = rdg->vertices[v].component;
788 
789       if (bitmap_set_bit (saved_components, c))
790 	{
791 	  rdgc x = XCNEW (struct rdg_component);
792 	  x->num = c;
793 	  x->vertices = all_components[c];
794 
795 	  VEC_safe_push (rdgc, heap, *components, x);
796 	}
797     }
798 
799   for (i = 0; i < n_components; i++)
800     if (!bitmap_bit_p (saved_components, i))
801       VEC_free (int, heap, all_components[i]);
802 
803   free (all_components);
804   BITMAP_FREE (saved_components);
805 }
806 
807 /* Returns true when it is possible to generate a builtin pattern for
808    the PARTITION of RDG.  For the moment we detect only the memset
809    zero pattern.  */
810 
811 static bool
can_generate_builtin(struct graph * rdg,bitmap partition)812 can_generate_builtin (struct graph *rdg, bitmap partition)
813 {
814   unsigned i;
815   bitmap_iterator bi;
816   int nb_reads = 0;
817   int nb_writes = 0;
818   int stores_zero = 0;
819 
820   EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, bi)
821     if (RDG_MEM_READS_STMT (rdg, i))
822       nb_reads++;
823     else if (RDG_MEM_WRITE_STMT (rdg, i))
824       {
825 	nb_writes++;
826 	if (stmt_with_adjacent_zero_store_dr_p (RDG_STMT (rdg, i)))
827 	  stores_zero++;
828       }
829 
830   return stores_zero == 1 && nb_writes == 1 && nb_reads == 0;
831 }
832 
833 /* Returns true when PARTITION1 and PARTITION2 have similar memory
834    accesses in RDG.  */
835 
836 static bool
similar_memory_accesses(struct graph * rdg,bitmap partition1,bitmap partition2)837 similar_memory_accesses (struct graph *rdg, bitmap partition1,
838 			 bitmap partition2)
839 {
840   unsigned i, j;
841   bitmap_iterator bi, bj;
842 
843   EXECUTE_IF_SET_IN_BITMAP (partition1, 0, i, bi)
844     if (RDG_MEM_WRITE_STMT (rdg, i)
845 	|| RDG_MEM_READS_STMT (rdg, i))
846       EXECUTE_IF_SET_IN_BITMAP (partition2, 0, j, bj)
847 	if (RDG_MEM_WRITE_STMT (rdg, j)
848 	    || RDG_MEM_READS_STMT (rdg, j))
849 	  if (rdg_has_similar_memory_accesses (rdg, i, j))
850 	    return true;
851 
852   return false;
853 }
854 
855 /* Fuse all the partitions from PARTITIONS that contain similar memory
856    references, i.e., we're taking care of cache locality.  This
857    function does not fuse those partitions that contain patterns that
858    can be code generated with builtins.  */
859 
860 static void
fuse_partitions_with_similar_memory_accesses(struct graph * rdg,VEC (bitmap,heap)** partitions)861 fuse_partitions_with_similar_memory_accesses (struct graph *rdg,
862 					      VEC (bitmap, heap) **partitions)
863 {
864   int p1, p2;
865   bitmap partition1, partition2;
866 
867   FOR_EACH_VEC_ELT (bitmap, *partitions, p1, partition1)
868     if (!can_generate_builtin (rdg, partition1))
869       FOR_EACH_VEC_ELT (bitmap, *partitions, p2, partition2)
870 	if (p1 != p2
871 	    && !can_generate_builtin (rdg, partition2)
872 	    && similar_memory_accesses (rdg, partition1, partition2))
873 	  {
874 	    bitmap_ior_into (partition1, partition2);
875 	    VEC_ordered_remove (bitmap, *partitions, p2);
876 	    p2--;
877 	  }
878 }
879 
880 /* Returns true when STMT will be code generated in a partition of RDG
881    different than PART and that will not be code generated as a
882    builtin.  */
883 
884 static bool
stmt_generated_in_another_partition(struct graph * rdg,gimple stmt,int part,VEC (bitmap,heap)* partitions)885 stmt_generated_in_another_partition (struct graph *rdg, gimple stmt, int part,
886 				     VEC (bitmap, heap) *partitions)
887 {
888   int p;
889   bitmap pp;
890   unsigned i;
891   bitmap_iterator bi;
892 
893   FOR_EACH_VEC_ELT (bitmap, partitions, p, pp)
894     if (p != part
895 	&& !can_generate_builtin (rdg, pp))
896       EXECUTE_IF_SET_IN_BITMAP (pp, 0, i, bi)
897 	if (stmt == RDG_STMT (rdg, i))
898 	  return true;
899 
900   return false;
901 }
902 
903 /* For each partition in PARTITIONS that will be code generated using
904    a builtin, add its scalar computations used after the loop to
905    PARTITION.  */
906 
907 static void
add_scalar_computations_to_partition(struct graph * rdg,VEC (bitmap,heap)* partitions,bitmap partition)908 add_scalar_computations_to_partition (struct graph *rdg,
909 				      VEC (bitmap, heap) *partitions,
910 				      bitmap partition)
911 {
912   int p;
913   bitmap pp;
914   unsigned i;
915   bitmap_iterator bi;
916   bitmap l = BITMAP_ALLOC (NULL);
917   bitmap pr = BITMAP_ALLOC (NULL);
918   bool f = false;
919 
920   FOR_EACH_VEC_ELT (bitmap, partitions, p, pp)
921     if (can_generate_builtin (rdg, pp))
922       EXECUTE_IF_SET_IN_BITMAP (pp, 0, i, bi)
923 	if (stmt_has_scalar_dependences_outside_loop (RDG_STMT (rdg, i))
924 	    && !stmt_generated_in_another_partition (rdg, RDG_STMT (rdg, i), p,
925 						     partitions))
926 	  rdg_flag_vertex_and_dependent (rdg, i, partition, l, pr, &f);
927 
928   rdg_flag_loop_exits (rdg, l, partition, pr, &f);
929 
930   BITMAP_FREE (pr);
931   BITMAP_FREE (l);
932 }
933 
934 /* Aggregate several components into a useful partition that is
935    registered in the PARTITIONS vector.  Partitions will be
936    distributed in different loops.  */
937 
938 static void
rdg_build_partitions(struct graph * rdg,VEC (rdgc,heap)* components,VEC (int,heap)** other_stores,VEC (bitmap,heap)** partitions,bitmap processed)939 rdg_build_partitions (struct graph *rdg, VEC (rdgc, heap) *components,
940 		      VEC (int, heap) **other_stores,
941 		      VEC (bitmap, heap) **partitions, bitmap processed)
942 {
943   int i;
944   rdgc x;
945   bitmap partition = BITMAP_ALLOC (NULL);
946 
947   FOR_EACH_VEC_ELT (rdgc, components, i, x)
948     {
949       bitmap np;
950       bool part_has_writes = false;
951       int v = VEC_index (int, x->vertices, 0);
952 
953       if (bitmap_bit_p (processed, v))
954 	continue;
955 
956       np = build_rdg_partition_for_component (rdg, x, &part_has_writes);
957       bitmap_ior_into (partition, np);
958       bitmap_ior_into (processed, np);
959       BITMAP_FREE (np);
960 
961       if (part_has_writes)
962 	{
963 	  if (dump_file && (dump_flags & TDF_DETAILS))
964 	    {
965 	      fprintf (dump_file, "ldist useful partition:\n");
966 	      dump_bitmap (dump_file, partition);
967 	    }
968 
969 	  VEC_safe_push (bitmap, heap, *partitions, partition);
970 	  partition = BITMAP_ALLOC (NULL);
971 	}
972     }
973 
974   /* Add the nodes from the RDG that were not marked as processed, and
975      that are used outside the current loop.  These are scalar
976      computations that are not yet part of previous partitions.  */
977   for (i = 0; i < rdg->n_vertices; i++)
978     if (!bitmap_bit_p (processed, i)
979 	&& rdg_defs_used_in_other_loops_p (rdg, i))
980       VEC_safe_push (int, heap, *other_stores, i);
981 
982   /* If there are still statements left in the OTHER_STORES array,
983      create other components and partitions with these stores and
984      their dependences.  */
985   if (VEC_length (int, *other_stores) > 0)
986     {
987       VEC (rdgc, heap) *comps = VEC_alloc (rdgc, heap, 3);
988       VEC (int, heap) *foo = VEC_alloc (int, heap, 3);
989 
990       rdg_build_components (rdg, *other_stores, &comps);
991       rdg_build_partitions (rdg, comps, &foo, partitions, processed);
992 
993       VEC_free (int, heap, foo);
994       free_rdg_components (comps);
995     }
996 
997   add_scalar_computations_to_partition (rdg, *partitions, partition);
998 
999   /* If there is something left in the last partition, save it.  */
1000   if (bitmap_count_bits (partition) > 0)
1001     VEC_safe_push (bitmap, heap, *partitions, partition);
1002   else
1003     BITMAP_FREE (partition);
1004 
1005   fuse_partitions_with_similar_memory_accesses (rdg, partitions);
1006 }
1007 
1008 /* Dump to FILE the PARTITIONS.  */
1009 
1010 static void
dump_rdg_partitions(FILE * file,VEC (bitmap,heap)* partitions)1011 dump_rdg_partitions (FILE *file, VEC (bitmap, heap) *partitions)
1012 {
1013   int i;
1014   bitmap partition;
1015 
1016   FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1017     debug_bitmap_file (file, partition);
1018 }
1019 
1020 /* Debug PARTITIONS.  */
1021 extern void debug_rdg_partitions (VEC (bitmap, heap) *);
1022 
1023 DEBUG_FUNCTION void
debug_rdg_partitions(VEC (bitmap,heap)* partitions)1024 debug_rdg_partitions (VEC (bitmap, heap) *partitions)
1025 {
1026   dump_rdg_partitions (stderr, partitions);
1027 }
1028 
1029 /* Returns the number of read and write operations in the RDG.  */
1030 
1031 static int
number_of_rw_in_rdg(struct graph * rdg)1032 number_of_rw_in_rdg (struct graph *rdg)
1033 {
1034   int i, res = 0;
1035 
1036   for (i = 0; i < rdg->n_vertices; i++)
1037     {
1038       if (RDG_MEM_WRITE_STMT (rdg, i))
1039 	++res;
1040 
1041       if (RDG_MEM_READS_STMT (rdg, i))
1042 	++res;
1043     }
1044 
1045   return res;
1046 }
1047 
1048 /* Returns the number of read and write operations in a PARTITION of
1049    the RDG.  */
1050 
1051 static int
number_of_rw_in_partition(struct graph * rdg,bitmap partition)1052 number_of_rw_in_partition (struct graph *rdg, bitmap partition)
1053 {
1054   int res = 0;
1055   unsigned i;
1056   bitmap_iterator ii;
1057 
1058   EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, ii)
1059     {
1060       if (RDG_MEM_WRITE_STMT (rdg, i))
1061 	++res;
1062 
1063       if (RDG_MEM_READS_STMT (rdg, i))
1064 	++res;
1065     }
1066 
1067   return res;
1068 }
1069 
1070 /* Returns true when one of the PARTITIONS contains all the read or
1071    write operations of RDG.  */
1072 
1073 static bool
partition_contains_all_rw(struct graph * rdg,VEC (bitmap,heap)* partitions)1074 partition_contains_all_rw (struct graph *rdg, VEC (bitmap, heap) *partitions)
1075 {
1076   int i;
1077   bitmap partition;
1078   int nrw = number_of_rw_in_rdg (rdg);
1079 
1080   FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1081     if (nrw == number_of_rw_in_partition (rdg, partition))
1082       return true;
1083 
1084   return false;
1085 }
1086 
1087 /* Generate code from STARTING_VERTICES in RDG.  Returns the number of
1088    distributed loops.  */
1089 
1090 static int
ldist_gen(struct loop * loop,struct graph * rdg,VEC (int,heap)* starting_vertices)1091 ldist_gen (struct loop *loop, struct graph *rdg,
1092 	   VEC (int, heap) *starting_vertices)
1093 {
1094   int i, nbp;
1095   VEC (rdgc, heap) *components = VEC_alloc (rdgc, heap, 3);
1096   VEC (bitmap, heap) *partitions = VEC_alloc (bitmap, heap, 3);
1097   VEC (int, heap) *other_stores = VEC_alloc (int, heap, 3);
1098   bitmap partition, processed = BITMAP_ALLOC (NULL);
1099 
1100   remaining_stmts = BITMAP_ALLOC (NULL);
1101   upstream_mem_writes = BITMAP_ALLOC (NULL);
1102 
1103   for (i = 0; i < rdg->n_vertices; i++)
1104     {
1105       bitmap_set_bit (remaining_stmts, i);
1106 
1107       /* Save in OTHER_STORES all the memory writes that are not in
1108 	 STARTING_VERTICES.  */
1109       if (RDG_MEM_WRITE_STMT (rdg, i))
1110 	{
1111 	  int v;
1112 	  unsigned j;
1113 	  bool found = false;
1114 
1115 	  FOR_EACH_VEC_ELT (int, starting_vertices, j, v)
1116 	    if (i == v)
1117 	      {
1118 		found = true;
1119 		break;
1120 	      }
1121 
1122 	  if (!found)
1123 	    VEC_safe_push (int, heap, other_stores, i);
1124 	}
1125     }
1126 
1127   mark_nodes_having_upstream_mem_writes (rdg);
1128   rdg_build_components (rdg, starting_vertices, &components);
1129   rdg_build_partitions (rdg, components, &other_stores, &partitions,
1130 			processed);
1131   BITMAP_FREE (processed);
1132   nbp = VEC_length (bitmap, partitions);
1133 
1134   if (nbp <= 1
1135       || partition_contains_all_rw (rdg, partitions))
1136     goto ldist_done;
1137 
1138   if (dump_file && (dump_flags & TDF_DETAILS))
1139     dump_rdg_partitions (dump_file, partitions);
1140 
1141   FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1142     if (!generate_code_for_partition (loop, partition, i < nbp - 1))
1143       goto ldist_done;
1144 
1145   rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1146   mark_sym_for_renaming (gimple_vop (cfun));
1147   update_ssa (TODO_update_ssa_only_virtuals);
1148 
1149  ldist_done:
1150 
1151   BITMAP_FREE (remaining_stmts);
1152   BITMAP_FREE (upstream_mem_writes);
1153 
1154   FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1155     BITMAP_FREE (partition);
1156 
1157   VEC_free (int, heap, other_stores);
1158   VEC_free (bitmap, heap, partitions);
1159   free_rdg_components (components);
1160   return nbp;
1161 }
1162 
1163 /* Distributes the code from LOOP in such a way that producer
1164    statements are placed before consumer statements.  When STMTS is
1165    NULL, performs the maximal distribution, if STMTS is not NULL,
1166    tries to separate only these statements from the LOOP's body.
1167    Returns the number of distributed loops.  */
1168 
1169 static int
distribute_loop(struct loop * loop,VEC (gimple,heap)* stmts)1170 distribute_loop (struct loop *loop, VEC (gimple, heap) *stmts)
1171 {
1172   int res = 0;
1173   struct graph *rdg;
1174   gimple s;
1175   unsigned i;
1176   VEC (int, heap) *vertices;
1177   VEC (ddr_p, heap) *dependence_relations;
1178   VEC (data_reference_p, heap) *datarefs;
1179   VEC (loop_p, heap) *loop_nest;
1180 
1181   if (loop->num_nodes > 2)
1182     {
1183       if (dump_file && (dump_flags & TDF_DETAILS))
1184 	fprintf (dump_file,
1185 		 "FIXME: Loop %d not distributed: it has more than two basic blocks.\n",
1186 		 loop->num);
1187 
1188       return res;
1189     }
1190 
1191   datarefs = VEC_alloc (data_reference_p, heap, 10);
1192   dependence_relations = VEC_alloc (ddr_p, heap, 100);
1193   loop_nest = VEC_alloc (loop_p, heap, 3);
1194   rdg = build_rdg (loop, &loop_nest, &dependence_relations, &datarefs);
1195 
1196   if (!rdg)
1197     {
1198       if (dump_file && (dump_flags & TDF_DETAILS))
1199 	fprintf (dump_file,
1200 		 "FIXME: Loop %d not distributed: failed to build the RDG.\n",
1201 		 loop->num);
1202 
1203       free_dependence_relations (dependence_relations);
1204       free_data_refs (datarefs);
1205       VEC_free (loop_p, heap, loop_nest);
1206       return res;
1207     }
1208 
1209   vertices = VEC_alloc (int, heap, 3);
1210 
1211   if (dump_file && (dump_flags & TDF_DETAILS))
1212     dump_rdg (dump_file, rdg);
1213 
1214   FOR_EACH_VEC_ELT (gimple, stmts, i, s)
1215     {
1216       int v = rdg_vertex_for_stmt (rdg, s);
1217 
1218       if (v >= 0)
1219 	{
1220 	  VEC_safe_push (int, heap, vertices, v);
1221 
1222 	  if (dump_file && (dump_flags & TDF_DETAILS))
1223 	    fprintf (dump_file,
1224 		     "ldist asked to generate code for vertex %d\n", v);
1225 	}
1226     }
1227 
1228   res = ldist_gen (loop, rdg, vertices);
1229   VEC_free (int, heap, vertices);
1230   free_rdg (rdg);
1231   free_dependence_relations (dependence_relations);
1232   free_data_refs (datarefs);
1233   VEC_free (loop_p, heap, loop_nest);
1234   return res;
1235 }
1236 
1237 /* Distribute all loops in the current function.  */
1238 
1239 static unsigned int
tree_loop_distribution(void)1240 tree_loop_distribution (void)
1241 {
1242   struct loop *loop;
1243   loop_iterator li;
1244   int nb_generated_loops = 0;
1245 
1246   FOR_EACH_LOOP (li, loop, 0)
1247     {
1248       VEC (gimple, heap) *work_list = NULL;
1249       int num = loop->num;
1250 
1251       /* If the loop doesn't have a single exit we will fail anyway,
1252 	 so do that early.  */
1253       if (!single_exit (loop))
1254 	continue;
1255 
1256       /* If both flag_tree_loop_distribute_patterns and
1257 	 flag_tree_loop_distribution are set, then only
1258 	 distribute_patterns is executed.  */
1259       if (flag_tree_loop_distribute_patterns)
1260 	{
1261 	  /* With the following working list, we're asking
1262 	     distribute_loop to separate from the rest of the loop the
1263 	     stores of the form "A[i] = 0".  */
1264 	  stores_zero_from_loop (loop, &work_list);
1265 
1266 	  /* Do nothing if there are no patterns to be distributed.  */
1267 	  if (VEC_length (gimple, work_list) > 0)
1268 	    nb_generated_loops = distribute_loop (loop, work_list);
1269 	}
1270       else if (flag_tree_loop_distribution)
1271 	{
1272 	  /* With the following working list, we're asking
1273 	     distribute_loop to separate the stores of the loop: when
1274 	     dependences allow, it will end on having one store per
1275 	     loop.  */
1276 	  stores_from_loop (loop, &work_list);
1277 
1278 	  /* A simple heuristic for cache locality is to not split
1279 	     stores to the same array.  Without this call, an unrolled
1280 	     loop would be split into as many loops as unroll factor,
1281 	     each loop storing in the same array.  */
1282 	  remove_similar_memory_refs (&work_list);
1283 
1284 	  nb_generated_loops = distribute_loop (loop, work_list);
1285 	}
1286 
1287       if (dump_file && (dump_flags & TDF_DETAILS))
1288 	{
1289 	  if (nb_generated_loops > 1)
1290 	    fprintf (dump_file, "Loop %d distributed: split to %d loops.\n",
1291 		     num, nb_generated_loops);
1292 	  else
1293 	    fprintf (dump_file, "Loop %d is the same.\n", num);
1294 	}
1295 
1296       verify_loop_structure ();
1297 
1298       VEC_free (gimple, heap, work_list);
1299     }
1300 
1301   return 0;
1302 }
1303 
1304 static bool
gate_tree_loop_distribution(void)1305 gate_tree_loop_distribution (void)
1306 {
1307   return flag_tree_loop_distribution
1308     || flag_tree_loop_distribute_patterns;
1309 }
1310 
1311 struct gimple_opt_pass pass_loop_distribution =
1312 {
1313  {
1314   GIMPLE_PASS,
1315   "ldist",			/* name */
1316   gate_tree_loop_distribution,  /* gate */
1317   tree_loop_distribution,       /* execute */
1318   NULL,				/* sub */
1319   NULL,				/* next */
1320   0,				/* static_pass_number */
1321   TV_TREE_LOOP_DISTRIBUTION,    /* tv_id */
1322   PROP_cfg | PROP_ssa,		/* properties_required */
1323   0,				/* properties_provided */
1324   0,				/* properties_destroyed */
1325   0,				/* todo_flags_start */
1326   TODO_ggc_collect
1327   | TODO_verify_ssa             /* todo_flags_finish */
1328  }
1329 };
1330