1*e4b17023SJohn Marino /* Loop distribution.
2*e4b17023SJohn Marino    Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3*e4b17023SJohn Marino    Free Software Foundation, Inc.
4*e4b17023SJohn Marino    Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
5*e4b17023SJohn Marino    and Sebastian Pop <sebastian.pop@amd.com>.
6*e4b17023SJohn Marino 
7*e4b17023SJohn Marino This file is part of GCC.
8*e4b17023SJohn Marino 
9*e4b17023SJohn Marino GCC is free software; you can redistribute it and/or modify it
10*e4b17023SJohn Marino under the terms of the GNU General Public License as published by the
11*e4b17023SJohn Marino Free Software Foundation; either version 3, or (at your option) any
12*e4b17023SJohn Marino later version.
13*e4b17023SJohn Marino 
14*e4b17023SJohn Marino GCC is distributed in the hope that it will be useful, but WITHOUT
15*e4b17023SJohn Marino ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16*e4b17023SJohn Marino FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17*e4b17023SJohn Marino for more details.
18*e4b17023SJohn Marino 
19*e4b17023SJohn Marino You should have received a copy of the GNU General Public License
20*e4b17023SJohn Marino along with GCC; see the file COPYING3.  If not see
21*e4b17023SJohn Marino <http://www.gnu.org/licenses/>.  */
22*e4b17023SJohn Marino 
23*e4b17023SJohn Marino /* This pass performs loop distribution: for example, the loop
24*e4b17023SJohn Marino 
25*e4b17023SJohn Marino    |DO I = 2, N
26*e4b17023SJohn Marino    |    A(I) = B(I) + C
27*e4b17023SJohn Marino    |    D(I) = A(I-1)*E
28*e4b17023SJohn Marino    |ENDDO
29*e4b17023SJohn Marino 
30*e4b17023SJohn Marino    is transformed to
31*e4b17023SJohn Marino 
32*e4b17023SJohn Marino    |DOALL I = 2, N
33*e4b17023SJohn Marino    |   A(I) = B(I) + C
34*e4b17023SJohn Marino    |ENDDO
35*e4b17023SJohn Marino    |
36*e4b17023SJohn Marino    |DOALL I = 2, N
37*e4b17023SJohn Marino    |   D(I) = A(I-1)*E
38*e4b17023SJohn Marino    |ENDDO
39*e4b17023SJohn Marino 
40*e4b17023SJohn Marino    This pass uses an RDG, Reduced Dependence Graph built on top of the
41*e4b17023SJohn Marino    data dependence relations.  The RDG is then topologically sorted to
42*e4b17023SJohn Marino    obtain a map of information producers/consumers based on which it
43*e4b17023SJohn Marino    generates the new loops.  */
44*e4b17023SJohn Marino 
45*e4b17023SJohn Marino #include "config.h"
46*e4b17023SJohn Marino #include "system.h"
47*e4b17023SJohn Marino #include "coretypes.h"
48*e4b17023SJohn Marino #include "tree-flow.h"
49*e4b17023SJohn Marino #include "cfgloop.h"
50*e4b17023SJohn Marino #include "tree-chrec.h"
51*e4b17023SJohn Marino #include "tree-data-ref.h"
52*e4b17023SJohn Marino #include "tree-scalar-evolution.h"
53*e4b17023SJohn Marino #include "tree-pass.h"
54*e4b17023SJohn Marino 
55*e4b17023SJohn Marino /* If bit I is not set, it means that this node represents an
56*e4b17023SJohn Marino    operation that has already been performed, and that should not be
57*e4b17023SJohn Marino    performed again.  This is the subgraph of remaining important
58*e4b17023SJohn Marino    computations that is passed to the DFS algorithm for avoiding to
59*e4b17023SJohn Marino    include several times the same stores in different loops.  */
60*e4b17023SJohn Marino static bitmap remaining_stmts;
61*e4b17023SJohn Marino 
62*e4b17023SJohn Marino /* A node of the RDG is marked in this bitmap when it has as a
63*e4b17023SJohn Marino    predecessor a node that writes to memory.  */
64*e4b17023SJohn Marino static bitmap upstream_mem_writes;
65*e4b17023SJohn Marino 
66*e4b17023SJohn Marino /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
67*e4b17023SJohn Marino    the LOOP.  */
68*e4b17023SJohn Marino 
69*e4b17023SJohn Marino static bool
ssa_name_has_uses_outside_loop_p(tree def,loop_p loop)70*e4b17023SJohn Marino ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
71*e4b17023SJohn Marino {
72*e4b17023SJohn Marino   imm_use_iterator imm_iter;
73*e4b17023SJohn Marino   use_operand_p use_p;
74*e4b17023SJohn Marino 
75*e4b17023SJohn Marino   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
76*e4b17023SJohn Marino     if (loop != loop_containing_stmt (USE_STMT (use_p)))
77*e4b17023SJohn Marino       return true;
78*e4b17023SJohn Marino 
79*e4b17023SJohn Marino   return false;
80*e4b17023SJohn Marino }
81*e4b17023SJohn Marino 
82*e4b17023SJohn Marino /* Returns true when STMT defines a scalar variable used after the
83*e4b17023SJohn Marino    loop.  */
84*e4b17023SJohn Marino 
85*e4b17023SJohn Marino static bool
stmt_has_scalar_dependences_outside_loop(gimple stmt)86*e4b17023SJohn Marino stmt_has_scalar_dependences_outside_loop (gimple stmt)
87*e4b17023SJohn Marino {
88*e4b17023SJohn Marino   tree name;
89*e4b17023SJohn Marino 
90*e4b17023SJohn Marino   switch (gimple_code (stmt))
91*e4b17023SJohn Marino     {
92*e4b17023SJohn Marino     case GIMPLE_CALL:
93*e4b17023SJohn Marino     case GIMPLE_ASSIGN:
94*e4b17023SJohn Marino       name = gimple_get_lhs (stmt);
95*e4b17023SJohn Marino       break;
96*e4b17023SJohn Marino 
97*e4b17023SJohn Marino     case GIMPLE_PHI:
98*e4b17023SJohn Marino       name = gimple_phi_result (stmt);
99*e4b17023SJohn Marino       break;
100*e4b17023SJohn Marino 
101*e4b17023SJohn Marino     default:
102*e4b17023SJohn Marino       return false;
103*e4b17023SJohn Marino     }
104*e4b17023SJohn Marino 
105*e4b17023SJohn Marino   return (name
106*e4b17023SJohn Marino 	  && TREE_CODE (name) == SSA_NAME
107*e4b17023SJohn Marino 	  && ssa_name_has_uses_outside_loop_p (name,
108*e4b17023SJohn Marino 					       loop_containing_stmt (stmt)));
109*e4b17023SJohn Marino }
110*e4b17023SJohn Marino 
111*e4b17023SJohn Marino /* Update the PHI nodes of NEW_LOOP.  NEW_LOOP is a duplicate of
112*e4b17023SJohn Marino    ORIG_LOOP.  */
113*e4b17023SJohn Marino 
114*e4b17023SJohn Marino static void
update_phis_for_loop_copy(struct loop * orig_loop,struct loop * new_loop)115*e4b17023SJohn Marino update_phis_for_loop_copy (struct loop *orig_loop, struct loop *new_loop)
116*e4b17023SJohn Marino {
117*e4b17023SJohn Marino   tree new_ssa_name;
118*e4b17023SJohn Marino   gimple_stmt_iterator si_new, si_orig;
119*e4b17023SJohn Marino   edge orig_loop_latch = loop_latch_edge (orig_loop);
120*e4b17023SJohn Marino   edge orig_entry_e = loop_preheader_edge (orig_loop);
121*e4b17023SJohn Marino   edge new_loop_entry_e = loop_preheader_edge (new_loop);
122*e4b17023SJohn Marino 
123*e4b17023SJohn Marino   /* Scan the phis in the headers of the old and new loops
124*e4b17023SJohn Marino      (they are organized in exactly the same order).  */
125*e4b17023SJohn Marino   for (si_new = gsi_start_phis (new_loop->header),
126*e4b17023SJohn Marino        si_orig = gsi_start_phis (orig_loop->header);
127*e4b17023SJohn Marino        !gsi_end_p (si_new) && !gsi_end_p (si_orig);
128*e4b17023SJohn Marino        gsi_next (&si_new), gsi_next (&si_orig))
129*e4b17023SJohn Marino     {
130*e4b17023SJohn Marino       tree def;
131*e4b17023SJohn Marino       source_location locus;
132*e4b17023SJohn Marino       gimple phi_new = gsi_stmt (si_new);
133*e4b17023SJohn Marino       gimple phi_orig = gsi_stmt (si_orig);
134*e4b17023SJohn Marino 
135*e4b17023SJohn Marino       /* Add the first phi argument for the phi in NEW_LOOP (the one
136*e4b17023SJohn Marino 	 associated with the entry of NEW_LOOP)  */
137*e4b17023SJohn Marino       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_entry_e);
138*e4b17023SJohn Marino       locus = gimple_phi_arg_location_from_edge (phi_orig, orig_entry_e);
139*e4b17023SJohn Marino       add_phi_arg (phi_new, def, new_loop_entry_e, locus);
140*e4b17023SJohn Marino 
141*e4b17023SJohn Marino       /* Add the second phi argument for the phi in NEW_LOOP (the one
142*e4b17023SJohn Marino 	 associated with the latch of NEW_LOOP)  */
143*e4b17023SJohn Marino       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
144*e4b17023SJohn Marino       locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
145*e4b17023SJohn Marino 
146*e4b17023SJohn Marino       if (TREE_CODE (def) == SSA_NAME)
147*e4b17023SJohn Marino 	{
148*e4b17023SJohn Marino 	  new_ssa_name = get_current_def (def);
149*e4b17023SJohn Marino 
150*e4b17023SJohn Marino 	  if (!new_ssa_name)
151*e4b17023SJohn Marino 	    /* This only happens if there are no definitions inside the
152*e4b17023SJohn Marino 	       loop.  Use the the invariant in the new loop as is.  */
153*e4b17023SJohn Marino 	    new_ssa_name = def;
154*e4b17023SJohn Marino 	}
155*e4b17023SJohn Marino       else
156*e4b17023SJohn Marino 	/* Could be an integer.  */
157*e4b17023SJohn Marino 	new_ssa_name = def;
158*e4b17023SJohn Marino 
159*e4b17023SJohn Marino       add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
160*e4b17023SJohn Marino     }
161*e4b17023SJohn Marino }
162*e4b17023SJohn Marino 
163*e4b17023SJohn Marino /* Return a copy of LOOP placed before LOOP.  */
164*e4b17023SJohn Marino 
165*e4b17023SJohn Marino static struct loop *
copy_loop_before(struct loop * loop)166*e4b17023SJohn Marino copy_loop_before (struct loop *loop)
167*e4b17023SJohn Marino {
168*e4b17023SJohn Marino   struct loop *res;
169*e4b17023SJohn Marino   edge preheader = loop_preheader_edge (loop);
170*e4b17023SJohn Marino 
171*e4b17023SJohn Marino   if (!single_exit (loop))
172*e4b17023SJohn Marino     return NULL;
173*e4b17023SJohn Marino 
174*e4b17023SJohn Marino   initialize_original_copy_tables ();
175*e4b17023SJohn Marino   res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
176*e4b17023SJohn Marino   free_original_copy_tables ();
177*e4b17023SJohn Marino 
178*e4b17023SJohn Marino   if (!res)
179*e4b17023SJohn Marino     return NULL;
180*e4b17023SJohn Marino 
181*e4b17023SJohn Marino   update_phis_for_loop_copy (loop, res);
182*e4b17023SJohn Marino   rename_variables_in_loop (res);
183*e4b17023SJohn Marino 
184*e4b17023SJohn Marino   return res;
185*e4b17023SJohn Marino }
186*e4b17023SJohn Marino 
187*e4b17023SJohn Marino /* Creates an empty basic block after LOOP.  */
188*e4b17023SJohn Marino 
189*e4b17023SJohn Marino static void
create_bb_after_loop(struct loop * loop)190*e4b17023SJohn Marino create_bb_after_loop (struct loop *loop)
191*e4b17023SJohn Marino {
192*e4b17023SJohn Marino   edge exit = single_exit (loop);
193*e4b17023SJohn Marino 
194*e4b17023SJohn Marino   if (!exit)
195*e4b17023SJohn Marino     return;
196*e4b17023SJohn Marino 
197*e4b17023SJohn Marino   split_edge (exit);
198*e4b17023SJohn Marino }
199*e4b17023SJohn Marino 
200*e4b17023SJohn Marino /* Generate code for PARTITION from the code in LOOP.  The loop is
201*e4b17023SJohn Marino    copied when COPY_P is true.  All the statements not flagged in the
202*e4b17023SJohn Marino    PARTITION bitmap are removed from the loop or from its copy.  The
203*e4b17023SJohn Marino    statements are indexed in sequence inside a basic block, and the
204*e4b17023SJohn Marino    basic blocks of a loop are taken in dom order.  Returns true when
205*e4b17023SJohn Marino    the code gen succeeded. */
206*e4b17023SJohn Marino 
207*e4b17023SJohn Marino static bool
generate_loops_for_partition(struct loop * loop,bitmap partition,bool copy_p)208*e4b17023SJohn Marino generate_loops_for_partition (struct loop *loop, bitmap partition, bool copy_p)
209*e4b17023SJohn Marino {
210*e4b17023SJohn Marino   unsigned i, x;
211*e4b17023SJohn Marino   gimple_stmt_iterator bsi;
212*e4b17023SJohn Marino   basic_block *bbs;
213*e4b17023SJohn Marino 
214*e4b17023SJohn Marino   if (copy_p)
215*e4b17023SJohn Marino     {
216*e4b17023SJohn Marino       loop = copy_loop_before (loop);
217*e4b17023SJohn Marino       create_preheader (loop, CP_SIMPLE_PREHEADERS);
218*e4b17023SJohn Marino       create_bb_after_loop (loop);
219*e4b17023SJohn Marino     }
220*e4b17023SJohn Marino 
221*e4b17023SJohn Marino   if (loop == NULL)
222*e4b17023SJohn Marino     return false;
223*e4b17023SJohn Marino 
224*e4b17023SJohn Marino   /* Remove stmts not in the PARTITION bitmap.  The order in which we
225*e4b17023SJohn Marino      visit the phi nodes and the statements is exactly as in
226*e4b17023SJohn Marino      stmts_from_loop.  */
227*e4b17023SJohn Marino   bbs = get_loop_body_in_dom_order (loop);
228*e4b17023SJohn Marino 
229*e4b17023SJohn Marino   if (MAY_HAVE_DEBUG_STMTS)
230*e4b17023SJohn Marino     for (x = 0, i = 0; i < loop->num_nodes; i++)
231*e4b17023SJohn Marino       {
232*e4b17023SJohn Marino 	basic_block bb = bbs[i];
233*e4b17023SJohn Marino 
234*e4b17023SJohn Marino 	for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
235*e4b17023SJohn Marino 	  if (!bitmap_bit_p (partition, x++))
236*e4b17023SJohn Marino 	    reset_debug_uses (gsi_stmt (bsi));
237*e4b17023SJohn Marino 
238*e4b17023SJohn Marino 	for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
239*e4b17023SJohn Marino 	  {
240*e4b17023SJohn Marino 	    gimple stmt = gsi_stmt (bsi);
241*e4b17023SJohn Marino 	    if (gimple_code (stmt) != GIMPLE_LABEL
242*e4b17023SJohn Marino 		&& !is_gimple_debug (stmt)
243*e4b17023SJohn Marino 		&& !bitmap_bit_p (partition, x++))
244*e4b17023SJohn Marino 	      reset_debug_uses (stmt);
245*e4b17023SJohn Marino 	  }
246*e4b17023SJohn Marino       }
247*e4b17023SJohn Marino 
248*e4b17023SJohn Marino   for (x = 0, i = 0; i < loop->num_nodes; i++)
249*e4b17023SJohn Marino     {
250*e4b17023SJohn Marino       basic_block bb = bbs[i];
251*e4b17023SJohn Marino 
252*e4b17023SJohn Marino       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
253*e4b17023SJohn Marino 	if (!bitmap_bit_p (partition, x++))
254*e4b17023SJohn Marino 	  {
255*e4b17023SJohn Marino 	    gimple phi = gsi_stmt (bsi);
256*e4b17023SJohn Marino 	    if (!is_gimple_reg (gimple_phi_result (phi)))
257*e4b17023SJohn Marino 	      mark_virtual_phi_result_for_renaming (phi);
258*e4b17023SJohn Marino 	    remove_phi_node (&bsi, true);
259*e4b17023SJohn Marino 	  }
260*e4b17023SJohn Marino 	else
261*e4b17023SJohn Marino 	  gsi_next (&bsi);
262*e4b17023SJohn Marino 
263*e4b17023SJohn Marino       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
264*e4b17023SJohn Marino 	{
265*e4b17023SJohn Marino 	  gimple stmt = gsi_stmt (bsi);
266*e4b17023SJohn Marino 	  if (gimple_code (stmt) != GIMPLE_LABEL
267*e4b17023SJohn Marino 	      && !is_gimple_debug (stmt)
268*e4b17023SJohn Marino 	      && !bitmap_bit_p (partition, x++))
269*e4b17023SJohn Marino 	    {
270*e4b17023SJohn Marino 	      unlink_stmt_vdef (stmt);
271*e4b17023SJohn Marino 	      gsi_remove (&bsi, true);
272*e4b17023SJohn Marino 	      release_defs (stmt);
273*e4b17023SJohn Marino 	    }
274*e4b17023SJohn Marino 	  else
275*e4b17023SJohn Marino 	    gsi_next (&bsi);
276*e4b17023SJohn Marino 	}
277*e4b17023SJohn Marino     }
278*e4b17023SJohn Marino 
279*e4b17023SJohn Marino   free (bbs);
280*e4b17023SJohn Marino   return true;
281*e4b17023SJohn Marino }
282*e4b17023SJohn Marino 
283*e4b17023SJohn Marino /* Build the size argument for a memset call.  */
284*e4b17023SJohn Marino 
285*e4b17023SJohn Marino static inline tree
build_size_arg_loc(location_t loc,tree nb_iter,tree op,gimple_seq * stmt_list)286*e4b17023SJohn Marino build_size_arg_loc (location_t loc, tree nb_iter, tree op,
287*e4b17023SJohn Marino 		    gimple_seq *stmt_list)
288*e4b17023SJohn Marino {
289*e4b17023SJohn Marino   gimple_seq stmts;
290*e4b17023SJohn Marino   tree x = fold_build2_loc (loc, MULT_EXPR, size_type_node,
291*e4b17023SJohn Marino 			    fold_convert_loc (loc, size_type_node, nb_iter),
292*e4b17023SJohn Marino 			    fold_convert_loc (loc, size_type_node,
293*e4b17023SJohn Marino 					      TYPE_SIZE_UNIT (TREE_TYPE (op))));
294*e4b17023SJohn Marino   x = force_gimple_operand (x, &stmts, true, NULL);
295*e4b17023SJohn Marino   gimple_seq_add_seq (stmt_list, stmts);
296*e4b17023SJohn Marino 
297*e4b17023SJohn Marino   return x;
298*e4b17023SJohn Marino }
299*e4b17023SJohn Marino 
300*e4b17023SJohn Marino /* Generate a call to memset.  Return true when the operation succeeded.  */
301*e4b17023SJohn Marino 
302*e4b17023SJohn Marino static void
generate_memset_zero(gimple stmt,tree op0,tree nb_iter,gimple_stmt_iterator bsi)303*e4b17023SJohn Marino generate_memset_zero (gimple stmt, tree op0, tree nb_iter,
304*e4b17023SJohn Marino 		      gimple_stmt_iterator bsi)
305*e4b17023SJohn Marino {
306*e4b17023SJohn Marino   tree addr_base, nb_bytes;
307*e4b17023SJohn Marino   bool res = false;
308*e4b17023SJohn Marino   gimple_seq stmt_list = NULL, stmts;
309*e4b17023SJohn Marino   gimple fn_call;
310*e4b17023SJohn Marino   tree mem, fn;
311*e4b17023SJohn Marino   struct data_reference *dr = XCNEW (struct data_reference);
312*e4b17023SJohn Marino   location_t loc = gimple_location (stmt);
313*e4b17023SJohn Marino 
314*e4b17023SJohn Marino   DR_STMT (dr) = stmt;
315*e4b17023SJohn Marino   DR_REF (dr) = op0;
316*e4b17023SJohn Marino   res = dr_analyze_innermost (dr, loop_containing_stmt (stmt));
317*e4b17023SJohn Marino   gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)));
318*e4b17023SJohn Marino 
319*e4b17023SJohn Marino   nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
320*e4b17023SJohn Marino   addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
321*e4b17023SJohn Marino   addr_base = fold_convert_loc (loc, sizetype, addr_base);
322*e4b17023SJohn Marino 
323*e4b17023SJohn Marino   /* Test for a negative stride, iterating over every element.  */
324*e4b17023SJohn Marino   if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
325*e4b17023SJohn Marino     {
326*e4b17023SJohn Marino       addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
327*e4b17023SJohn Marino 				  fold_convert_loc (loc, sizetype, nb_bytes));
328*e4b17023SJohn Marino       addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
329*e4b17023SJohn Marino 				  TYPE_SIZE_UNIT (TREE_TYPE (op0)));
330*e4b17023SJohn Marino     }
331*e4b17023SJohn Marino 
332*e4b17023SJohn Marino   addr_base = fold_build_pointer_plus_loc (loc,
333*e4b17023SJohn Marino 					   DR_BASE_ADDRESS (dr), addr_base);
334*e4b17023SJohn Marino   mem = force_gimple_operand (addr_base, &stmts, true, NULL);
335*e4b17023SJohn Marino   gimple_seq_add_seq (&stmt_list, stmts);
336*e4b17023SJohn Marino 
337*e4b17023SJohn Marino   fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
338*e4b17023SJohn Marino   fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes);
339*e4b17023SJohn Marino   gimple_seq_add_stmt (&stmt_list, fn_call);
340*e4b17023SJohn Marino   gsi_insert_seq_after (&bsi, stmt_list, GSI_CONTINUE_LINKING);
341*e4b17023SJohn Marino 
342*e4b17023SJohn Marino   if (dump_file && (dump_flags & TDF_DETAILS))
343*e4b17023SJohn Marino     fprintf (dump_file, "generated memset zero\n");
344*e4b17023SJohn Marino 
345*e4b17023SJohn Marino   free_data_ref (dr);
346*e4b17023SJohn Marino }
347*e4b17023SJohn Marino 
348*e4b17023SJohn Marino /* Tries to generate a builtin function for the instructions of LOOP
349*e4b17023SJohn Marino    pointed to by the bits set in PARTITION.  Returns true when the
350*e4b17023SJohn Marino    operation succeeded.  */
351*e4b17023SJohn Marino 
352*e4b17023SJohn Marino static bool
generate_builtin(struct loop * loop,bitmap partition,bool copy_p)353*e4b17023SJohn Marino generate_builtin (struct loop *loop, bitmap partition, bool copy_p)
354*e4b17023SJohn Marino {
355*e4b17023SJohn Marino   bool res = false;
356*e4b17023SJohn Marino   unsigned i, x = 0;
357*e4b17023SJohn Marino   basic_block *bbs;
358*e4b17023SJohn Marino   gimple write = NULL;
359*e4b17023SJohn Marino   gimple_stmt_iterator bsi;
360*e4b17023SJohn Marino   tree nb_iter = number_of_exit_cond_executions (loop);
361*e4b17023SJohn Marino 
362*e4b17023SJohn Marino   if (!nb_iter || nb_iter == chrec_dont_know)
363*e4b17023SJohn Marino     return false;
364*e4b17023SJohn Marino 
365*e4b17023SJohn Marino   bbs = get_loop_body_in_dom_order (loop);
366*e4b17023SJohn Marino 
367*e4b17023SJohn Marino   for (i = 0; i < loop->num_nodes; i++)
368*e4b17023SJohn Marino     {
369*e4b17023SJohn Marino       basic_block bb = bbs[i];
370*e4b17023SJohn Marino 
371*e4b17023SJohn Marino       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
372*e4b17023SJohn Marino 	x++;
373*e4b17023SJohn Marino 
374*e4b17023SJohn Marino       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
375*e4b17023SJohn Marino 	{
376*e4b17023SJohn Marino 	  gimple stmt = gsi_stmt (bsi);
377*e4b17023SJohn Marino 
378*e4b17023SJohn Marino 	  if (gimple_code (stmt) == GIMPLE_LABEL
379*e4b17023SJohn Marino 	      || is_gimple_debug (stmt))
380*e4b17023SJohn Marino 	    continue;
381*e4b17023SJohn Marino 
382*e4b17023SJohn Marino 	  if (!bitmap_bit_p (partition, x++))
383*e4b17023SJohn Marino 	    continue;
384*e4b17023SJohn Marino 
385*e4b17023SJohn Marino 	  /* If the stmt has uses outside of the loop fail.  */
386*e4b17023SJohn Marino 	  if (stmt_has_scalar_dependences_outside_loop (stmt))
387*e4b17023SJohn Marino 	    goto end;
388*e4b17023SJohn Marino 
389*e4b17023SJohn Marino 	  if (is_gimple_assign (stmt)
390*e4b17023SJohn Marino 	      && !is_gimple_reg (gimple_assign_lhs (stmt)))
391*e4b17023SJohn Marino 	    {
392*e4b17023SJohn Marino 	      /* Don't generate the builtins when there are more than
393*e4b17023SJohn Marino 		 one memory write.  */
394*e4b17023SJohn Marino 	      if (write != NULL)
395*e4b17023SJohn Marino 		goto end;
396*e4b17023SJohn Marino 
397*e4b17023SJohn Marino 	      write = stmt;
398*e4b17023SJohn Marino 	      if (bb == loop->latch)
399*e4b17023SJohn Marino 		nb_iter = number_of_latch_executions (loop);
400*e4b17023SJohn Marino 	    }
401*e4b17023SJohn Marino 	}
402*e4b17023SJohn Marino     }
403*e4b17023SJohn Marino 
404*e4b17023SJohn Marino   if (!stmt_with_adjacent_zero_store_dr_p (write))
405*e4b17023SJohn Marino     goto end;
406*e4b17023SJohn Marino 
407*e4b17023SJohn Marino   /* The new statements will be placed before LOOP.  */
408*e4b17023SJohn Marino   bsi = gsi_last_bb (loop_preheader_edge (loop)->src);
409*e4b17023SJohn Marino   generate_memset_zero (write, gimple_assign_lhs (write), nb_iter, bsi);
410*e4b17023SJohn Marino   res = true;
411*e4b17023SJohn Marino 
412*e4b17023SJohn Marino   /* If this is the last partition for which we generate code, we have
413*e4b17023SJohn Marino      to destroy the loop.  */
414*e4b17023SJohn Marino   if (!copy_p)
415*e4b17023SJohn Marino     {
416*e4b17023SJohn Marino       unsigned nbbs = loop->num_nodes;
417*e4b17023SJohn Marino       edge exit = single_exit (loop);
418*e4b17023SJohn Marino       basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
419*e4b17023SJohn Marino       redirect_edge_pred (exit, src);
420*e4b17023SJohn Marino       exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
421*e4b17023SJohn Marino       exit->flags |= EDGE_FALLTHRU;
422*e4b17023SJohn Marino       cancel_loop_tree (loop);
423*e4b17023SJohn Marino       rescan_loop_exit (exit, false, true);
424*e4b17023SJohn Marino 
425*e4b17023SJohn Marino       for (i = 0; i < nbbs; i++)
426*e4b17023SJohn Marino 	delete_basic_block (bbs[i]);
427*e4b17023SJohn Marino 
428*e4b17023SJohn Marino       set_immediate_dominator (CDI_DOMINATORS, dest,
429*e4b17023SJohn Marino 			       recompute_dominator (CDI_DOMINATORS, dest));
430*e4b17023SJohn Marino     }
431*e4b17023SJohn Marino 
432*e4b17023SJohn Marino  end:
433*e4b17023SJohn Marino   free (bbs);
434*e4b17023SJohn Marino   return res;
435*e4b17023SJohn Marino }
436*e4b17023SJohn Marino 
437*e4b17023SJohn Marino /* Generates code for PARTITION.  For simple loops, this function can
438*e4b17023SJohn Marino    generate a built-in.  */
439*e4b17023SJohn Marino 
440*e4b17023SJohn Marino static bool
generate_code_for_partition(struct loop * loop,bitmap partition,bool copy_p)441*e4b17023SJohn Marino generate_code_for_partition (struct loop *loop, bitmap partition, bool copy_p)
442*e4b17023SJohn Marino {
443*e4b17023SJohn Marino   if (generate_builtin (loop, partition, copy_p))
444*e4b17023SJohn Marino     return true;
445*e4b17023SJohn Marino 
446*e4b17023SJohn Marino   return generate_loops_for_partition (loop, partition, copy_p);
447*e4b17023SJohn Marino }
448*e4b17023SJohn Marino 
449*e4b17023SJohn Marino 
450*e4b17023SJohn Marino /* Returns true if the node V of RDG cannot be recomputed.  */
451*e4b17023SJohn Marino 
452*e4b17023SJohn Marino static bool
rdg_cannot_recompute_vertex_p(struct graph * rdg,int v)453*e4b17023SJohn Marino rdg_cannot_recompute_vertex_p (struct graph *rdg, int v)
454*e4b17023SJohn Marino {
455*e4b17023SJohn Marino   if (RDG_MEM_WRITE_STMT (rdg, v))
456*e4b17023SJohn Marino     return true;
457*e4b17023SJohn Marino 
458*e4b17023SJohn Marino   return false;
459*e4b17023SJohn Marino }
460*e4b17023SJohn Marino 
461*e4b17023SJohn Marino /* Returns true when the vertex V has already been generated in the
462*e4b17023SJohn Marino    current partition (V is in PROCESSED), or when V belongs to another
463*e4b17023SJohn Marino    partition and cannot be recomputed (V is not in REMAINING_STMTS).  */
464*e4b17023SJohn Marino 
465*e4b17023SJohn Marino static inline bool
already_processed_vertex_p(bitmap processed,int v)466*e4b17023SJohn Marino already_processed_vertex_p (bitmap processed, int v)
467*e4b17023SJohn Marino {
468*e4b17023SJohn Marino   return (bitmap_bit_p (processed, v)
469*e4b17023SJohn Marino 	  || !bitmap_bit_p (remaining_stmts, v));
470*e4b17023SJohn Marino }
471*e4b17023SJohn Marino 
472*e4b17023SJohn Marino /* Returns NULL when there is no anti-dependence among the successors
473*e4b17023SJohn Marino    of vertex V, otherwise returns the edge with the anti-dep.  */
474*e4b17023SJohn Marino 
475*e4b17023SJohn Marino static struct graph_edge *
has_anti_dependence(struct vertex * v)476*e4b17023SJohn Marino has_anti_dependence (struct vertex *v)
477*e4b17023SJohn Marino {
478*e4b17023SJohn Marino   struct graph_edge *e;
479*e4b17023SJohn Marino 
480*e4b17023SJohn Marino   if (v->succ)
481*e4b17023SJohn Marino     for (e = v->succ; e; e = e->succ_next)
482*e4b17023SJohn Marino       if (RDGE_TYPE (e) == anti_dd)
483*e4b17023SJohn Marino 	return e;
484*e4b17023SJohn Marino 
485*e4b17023SJohn Marino   return NULL;
486*e4b17023SJohn Marino }
487*e4b17023SJohn Marino 
488*e4b17023SJohn Marino /* Returns true when V has an anti-dependence edge among its successors.  */
489*e4b17023SJohn Marino 
490*e4b17023SJohn Marino static bool
predecessor_has_mem_write(struct graph * rdg,struct vertex * v)491*e4b17023SJohn Marino predecessor_has_mem_write (struct graph *rdg, struct vertex *v)
492*e4b17023SJohn Marino {
493*e4b17023SJohn Marino   struct graph_edge *e;
494*e4b17023SJohn Marino 
495*e4b17023SJohn Marino   if (v->pred)
496*e4b17023SJohn Marino     for (e = v->pred; e; e = e->pred_next)
497*e4b17023SJohn Marino       if (bitmap_bit_p (upstream_mem_writes, e->src)
498*e4b17023SJohn Marino 	  /* Don't consider flow channels: a write to memory followed
499*e4b17023SJohn Marino 	     by a read from memory.  These channels allow the split of
500*e4b17023SJohn Marino 	     the RDG in different partitions.  */
501*e4b17023SJohn Marino 	  && !RDG_MEM_WRITE_STMT (rdg, e->src))
502*e4b17023SJohn Marino 	return true;
503*e4b17023SJohn Marino 
504*e4b17023SJohn Marino   return false;
505*e4b17023SJohn Marino }
506*e4b17023SJohn Marino 
507*e4b17023SJohn Marino /* Initializes the upstream_mem_writes bitmap following the
508*e4b17023SJohn Marino    information from RDG.  */
509*e4b17023SJohn Marino 
510*e4b17023SJohn Marino static void
mark_nodes_having_upstream_mem_writes(struct graph * rdg)511*e4b17023SJohn Marino mark_nodes_having_upstream_mem_writes (struct graph *rdg)
512*e4b17023SJohn Marino {
513*e4b17023SJohn Marino   int v, x;
514*e4b17023SJohn Marino   bitmap seen = BITMAP_ALLOC (NULL);
515*e4b17023SJohn Marino 
516*e4b17023SJohn Marino   for (v = rdg->n_vertices - 1; v >= 0; v--)
517*e4b17023SJohn Marino     if (!bitmap_bit_p (seen, v))
518*e4b17023SJohn Marino       {
519*e4b17023SJohn Marino 	unsigned i;
520*e4b17023SJohn Marino 	VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
521*e4b17023SJohn Marino 
522*e4b17023SJohn Marino 	graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
523*e4b17023SJohn Marino 
524*e4b17023SJohn Marino 	FOR_EACH_VEC_ELT (int, nodes, i, x)
525*e4b17023SJohn Marino 	  {
526*e4b17023SJohn Marino 	    if (!bitmap_set_bit (seen, x))
527*e4b17023SJohn Marino 	      continue;
528*e4b17023SJohn Marino 
529*e4b17023SJohn Marino 	    if (RDG_MEM_WRITE_STMT (rdg, x)
530*e4b17023SJohn Marino 		|| predecessor_has_mem_write (rdg, &(rdg->vertices[x]))
531*e4b17023SJohn Marino 		/* In anti dependences the read should occur before
532*e4b17023SJohn Marino 		   the write, this is why both the read and the write
533*e4b17023SJohn Marino 		   should be placed in the same partition.  */
534*e4b17023SJohn Marino 		|| has_anti_dependence (&(rdg->vertices[x])))
535*e4b17023SJohn Marino 	      {
536*e4b17023SJohn Marino 		bitmap_set_bit (upstream_mem_writes, x);
537*e4b17023SJohn Marino 	      }
538*e4b17023SJohn Marino 	  }
539*e4b17023SJohn Marino 
540*e4b17023SJohn Marino 	VEC_free (int, heap, nodes);
541*e4b17023SJohn Marino       }
542*e4b17023SJohn Marino }
543*e4b17023SJohn Marino 
544*e4b17023SJohn Marino /* Returns true when vertex u has a memory write node as a predecessor
545*e4b17023SJohn Marino    in RDG.  */
546*e4b17023SJohn Marino 
547*e4b17023SJohn Marino static bool
has_upstream_mem_writes(int u)548*e4b17023SJohn Marino has_upstream_mem_writes (int u)
549*e4b17023SJohn Marino {
550*e4b17023SJohn Marino   return bitmap_bit_p (upstream_mem_writes, u);
551*e4b17023SJohn Marino }
552*e4b17023SJohn Marino 
553*e4b17023SJohn Marino static void rdg_flag_vertex_and_dependent (struct graph *, int, bitmap, bitmap,
554*e4b17023SJohn Marino 					   bitmap, bool *);
555*e4b17023SJohn Marino 
556*e4b17023SJohn Marino /* Flag the uses of U stopping following the information from
557*e4b17023SJohn Marino    upstream_mem_writes.  */
558*e4b17023SJohn Marino 
559*e4b17023SJohn Marino static void
rdg_flag_uses(struct graph * rdg,int u,bitmap partition,bitmap loops,bitmap processed,bool * part_has_writes)560*e4b17023SJohn Marino rdg_flag_uses (struct graph *rdg, int u, bitmap partition, bitmap loops,
561*e4b17023SJohn Marino 	       bitmap processed, bool *part_has_writes)
562*e4b17023SJohn Marino {
563*e4b17023SJohn Marino   use_operand_p use_p;
564*e4b17023SJohn Marino   struct vertex *x = &(rdg->vertices[u]);
565*e4b17023SJohn Marino   gimple stmt = RDGV_STMT (x);
566*e4b17023SJohn Marino   struct graph_edge *anti_dep = has_anti_dependence (x);
567*e4b17023SJohn Marino 
568*e4b17023SJohn Marino   /* Keep in the same partition the destination of an antidependence,
569*e4b17023SJohn Marino      because this is a store to the exact same location.  Putting this
570*e4b17023SJohn Marino      in another partition is bad for cache locality.  */
571*e4b17023SJohn Marino   if (anti_dep)
572*e4b17023SJohn Marino     {
573*e4b17023SJohn Marino       int v = anti_dep->dest;
574*e4b17023SJohn Marino 
575*e4b17023SJohn Marino       if (!already_processed_vertex_p (processed, v))
576*e4b17023SJohn Marino 	rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
577*e4b17023SJohn Marino 				       processed, part_has_writes);
578*e4b17023SJohn Marino     }
579*e4b17023SJohn Marino 
580*e4b17023SJohn Marino   if (gimple_code (stmt) != GIMPLE_PHI)
581*e4b17023SJohn Marino     {
582*e4b17023SJohn Marino       if ((use_p = gimple_vuse_op (stmt)) != NULL_USE_OPERAND_P)
583*e4b17023SJohn Marino 	{
584*e4b17023SJohn Marino 	  tree use = USE_FROM_PTR (use_p);
585*e4b17023SJohn Marino 
586*e4b17023SJohn Marino 	  if (TREE_CODE (use) == SSA_NAME)
587*e4b17023SJohn Marino 	    {
588*e4b17023SJohn Marino 	      gimple def_stmt = SSA_NAME_DEF_STMT (use);
589*e4b17023SJohn Marino 	      int v = rdg_vertex_for_stmt (rdg, def_stmt);
590*e4b17023SJohn Marino 
591*e4b17023SJohn Marino 	      if (v >= 0
592*e4b17023SJohn Marino 		  && !already_processed_vertex_p (processed, v))
593*e4b17023SJohn Marino 		rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
594*e4b17023SJohn Marino 					       processed, part_has_writes);
595*e4b17023SJohn Marino 	    }
596*e4b17023SJohn Marino 	}
597*e4b17023SJohn Marino     }
598*e4b17023SJohn Marino 
599*e4b17023SJohn Marino   if (is_gimple_assign (stmt) && has_upstream_mem_writes (u))
600*e4b17023SJohn Marino     {
601*e4b17023SJohn Marino       tree op0 = gimple_assign_lhs (stmt);
602*e4b17023SJohn Marino 
603*e4b17023SJohn Marino       /* Scalar channels don't have enough space for transmitting data
604*e4b17023SJohn Marino 	 between tasks, unless we add more storage by privatizing.  */
605*e4b17023SJohn Marino       if (is_gimple_reg (op0))
606*e4b17023SJohn Marino 	{
607*e4b17023SJohn Marino 	  use_operand_p use_p;
608*e4b17023SJohn Marino 	  imm_use_iterator iter;
609*e4b17023SJohn Marino 
610*e4b17023SJohn Marino 	  FOR_EACH_IMM_USE_FAST (use_p, iter, op0)
611*e4b17023SJohn Marino 	    {
612*e4b17023SJohn Marino 	      int v = rdg_vertex_for_stmt (rdg, USE_STMT (use_p));
613*e4b17023SJohn Marino 
614*e4b17023SJohn Marino 	      if (!already_processed_vertex_p (processed, v))
615*e4b17023SJohn Marino 		rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
616*e4b17023SJohn Marino 					       processed, part_has_writes);
617*e4b17023SJohn Marino 	    }
618*e4b17023SJohn Marino 	}
619*e4b17023SJohn Marino     }
620*e4b17023SJohn Marino }
621*e4b17023SJohn Marino 
622*e4b17023SJohn Marino /* Flag V from RDG as part of PARTITION, and also flag its loop number
623*e4b17023SJohn Marino    in LOOPS.  */
624*e4b17023SJohn Marino 
625*e4b17023SJohn Marino static void
rdg_flag_vertex(struct graph * rdg,int v,bitmap partition,bitmap loops,bool * part_has_writes)626*e4b17023SJohn Marino rdg_flag_vertex (struct graph *rdg, int v, bitmap partition, bitmap loops,
627*e4b17023SJohn Marino 		 bool *part_has_writes)
628*e4b17023SJohn Marino {
629*e4b17023SJohn Marino   struct loop *loop;
630*e4b17023SJohn Marino 
631*e4b17023SJohn Marino   if (!bitmap_set_bit (partition, v))
632*e4b17023SJohn Marino     return;
633*e4b17023SJohn Marino 
634*e4b17023SJohn Marino   loop = loop_containing_stmt (RDG_STMT (rdg, v));
635*e4b17023SJohn Marino   bitmap_set_bit (loops, loop->num);
636*e4b17023SJohn Marino 
637*e4b17023SJohn Marino   if (rdg_cannot_recompute_vertex_p (rdg, v))
638*e4b17023SJohn Marino     {
639*e4b17023SJohn Marino       *part_has_writes = true;
640*e4b17023SJohn Marino       bitmap_clear_bit (remaining_stmts, v);
641*e4b17023SJohn Marino     }
642*e4b17023SJohn Marino }
643*e4b17023SJohn Marino 
644*e4b17023SJohn Marino /* Flag in the bitmap PARTITION the vertex V and all its predecessors.
645*e4b17023SJohn Marino    Also flag their loop number in LOOPS.  */
646*e4b17023SJohn Marino 
647*e4b17023SJohn Marino static void
rdg_flag_vertex_and_dependent(struct graph * rdg,int v,bitmap partition,bitmap loops,bitmap processed,bool * part_has_writes)648*e4b17023SJohn Marino rdg_flag_vertex_and_dependent (struct graph *rdg, int v, bitmap partition,
649*e4b17023SJohn Marino 			       bitmap loops, bitmap processed,
650*e4b17023SJohn Marino 			       bool *part_has_writes)
651*e4b17023SJohn Marino {
652*e4b17023SJohn Marino   unsigned i;
653*e4b17023SJohn Marino   VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
654*e4b17023SJohn Marino   int x;
655*e4b17023SJohn Marino 
656*e4b17023SJohn Marino   bitmap_set_bit (processed, v);
657*e4b17023SJohn Marino   rdg_flag_uses (rdg, v, partition, loops, processed, part_has_writes);
658*e4b17023SJohn Marino   graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts);
659*e4b17023SJohn Marino   rdg_flag_vertex (rdg, v, partition, loops, part_has_writes);
660*e4b17023SJohn Marino 
661*e4b17023SJohn Marino   FOR_EACH_VEC_ELT (int, nodes, i, x)
662*e4b17023SJohn Marino     if (!already_processed_vertex_p (processed, x))
663*e4b17023SJohn Marino       rdg_flag_vertex_and_dependent (rdg, x, partition, loops, processed,
664*e4b17023SJohn Marino 				     part_has_writes);
665*e4b17023SJohn Marino 
666*e4b17023SJohn Marino   VEC_free (int, heap, nodes);
667*e4b17023SJohn Marino }
668*e4b17023SJohn Marino 
669*e4b17023SJohn Marino /* Initialize CONDS with all the condition statements from the basic
670*e4b17023SJohn Marino    blocks of LOOP.  */
671*e4b17023SJohn Marino 
672*e4b17023SJohn Marino static void
collect_condition_stmts(struct loop * loop,VEC (gimple,heap)** conds)673*e4b17023SJohn Marino collect_condition_stmts (struct loop *loop, VEC (gimple, heap) **conds)
674*e4b17023SJohn Marino {
675*e4b17023SJohn Marino   unsigned i;
676*e4b17023SJohn Marino   edge e;
677*e4b17023SJohn Marino   VEC (edge, heap) *exits = get_loop_exit_edges (loop);
678*e4b17023SJohn Marino 
679*e4b17023SJohn Marino   FOR_EACH_VEC_ELT (edge, exits, i, e)
680*e4b17023SJohn Marino     {
681*e4b17023SJohn Marino       gimple cond = last_stmt (e->src);
682*e4b17023SJohn Marino 
683*e4b17023SJohn Marino       if (cond)
684*e4b17023SJohn Marino 	VEC_safe_push (gimple, heap, *conds, cond);
685*e4b17023SJohn Marino     }
686*e4b17023SJohn Marino 
687*e4b17023SJohn Marino   VEC_free (edge, heap, exits);
688*e4b17023SJohn Marino }
689*e4b17023SJohn Marino 
690*e4b17023SJohn Marino /* Add to PARTITION all the exit condition statements for LOOPS
691*e4b17023SJohn Marino    together with all their dependent statements determined from
692*e4b17023SJohn Marino    RDG.  */
693*e4b17023SJohn Marino 
694*e4b17023SJohn Marino static void
rdg_flag_loop_exits(struct graph * rdg,bitmap loops,bitmap partition,bitmap processed,bool * part_has_writes)695*e4b17023SJohn Marino rdg_flag_loop_exits (struct graph *rdg, bitmap loops, bitmap partition,
696*e4b17023SJohn Marino 		     bitmap processed, bool *part_has_writes)
697*e4b17023SJohn Marino {
698*e4b17023SJohn Marino   unsigned i;
699*e4b17023SJohn Marino   bitmap_iterator bi;
700*e4b17023SJohn Marino   VEC (gimple, heap) *conds = VEC_alloc (gimple, heap, 3);
701*e4b17023SJohn Marino 
702*e4b17023SJohn Marino   EXECUTE_IF_SET_IN_BITMAP (loops, 0, i, bi)
703*e4b17023SJohn Marino     collect_condition_stmts (get_loop (i), &conds);
704*e4b17023SJohn Marino 
705*e4b17023SJohn Marino   while (!VEC_empty (gimple, conds))
706*e4b17023SJohn Marino     {
707*e4b17023SJohn Marino       gimple cond = VEC_pop (gimple, conds);
708*e4b17023SJohn Marino       int v = rdg_vertex_for_stmt (rdg, cond);
709*e4b17023SJohn Marino       bitmap new_loops = BITMAP_ALLOC (NULL);
710*e4b17023SJohn Marino 
711*e4b17023SJohn Marino       if (!already_processed_vertex_p (processed, v))
712*e4b17023SJohn Marino 	rdg_flag_vertex_and_dependent (rdg, v, partition, new_loops, processed,
713*e4b17023SJohn Marino 				       part_has_writes);
714*e4b17023SJohn Marino 
715*e4b17023SJohn Marino       EXECUTE_IF_SET_IN_BITMAP (new_loops, 0, i, bi)
716*e4b17023SJohn Marino 	if (bitmap_set_bit (loops, i))
717*e4b17023SJohn Marino 	  collect_condition_stmts (get_loop (i), &conds);
718*e4b17023SJohn Marino 
719*e4b17023SJohn Marino       BITMAP_FREE (new_loops);
720*e4b17023SJohn Marino     }
721*e4b17023SJohn Marino 
722*e4b17023SJohn Marino   VEC_free (gimple, heap, conds);
723*e4b17023SJohn Marino }
724*e4b17023SJohn Marino 
725*e4b17023SJohn Marino /* Returns a bitmap in which all the statements needed for computing
726*e4b17023SJohn Marino    the strongly connected component C of the RDG are flagged, also
727*e4b17023SJohn Marino    including the loop exit conditions.  */
728*e4b17023SJohn Marino 
729*e4b17023SJohn Marino static bitmap
build_rdg_partition_for_component(struct graph * rdg,rdgc c,bool * part_has_writes)730*e4b17023SJohn Marino build_rdg_partition_for_component (struct graph *rdg, rdgc c,
731*e4b17023SJohn Marino 				   bool *part_has_writes)
732*e4b17023SJohn Marino {
733*e4b17023SJohn Marino   int i, v;
734*e4b17023SJohn Marino   bitmap partition = BITMAP_ALLOC (NULL);
735*e4b17023SJohn Marino   bitmap loops = BITMAP_ALLOC (NULL);
736*e4b17023SJohn Marino   bitmap processed = BITMAP_ALLOC (NULL);
737*e4b17023SJohn Marino 
738*e4b17023SJohn Marino   FOR_EACH_VEC_ELT (int, c->vertices, i, v)
739*e4b17023SJohn Marino     if (!already_processed_vertex_p (processed, v))
740*e4b17023SJohn Marino       rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed,
741*e4b17023SJohn Marino 				     part_has_writes);
742*e4b17023SJohn Marino 
743*e4b17023SJohn Marino   rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes);
744*e4b17023SJohn Marino 
745*e4b17023SJohn Marino   BITMAP_FREE (processed);
746*e4b17023SJohn Marino   BITMAP_FREE (loops);
747*e4b17023SJohn Marino   return partition;
748*e4b17023SJohn Marino }
749*e4b17023SJohn Marino 
750*e4b17023SJohn Marino /* Free memory for COMPONENTS.  */
751*e4b17023SJohn Marino 
752*e4b17023SJohn Marino static void
free_rdg_components(VEC (rdgc,heap)* components)753*e4b17023SJohn Marino free_rdg_components (VEC (rdgc, heap) *components)
754*e4b17023SJohn Marino {
755*e4b17023SJohn Marino   int i;
756*e4b17023SJohn Marino   rdgc x;
757*e4b17023SJohn Marino 
758*e4b17023SJohn Marino   FOR_EACH_VEC_ELT (rdgc, components, i, x)
759*e4b17023SJohn Marino     {
760*e4b17023SJohn Marino       VEC_free (int, heap, x->vertices);
761*e4b17023SJohn Marino       free (x);
762*e4b17023SJohn Marino     }
763*e4b17023SJohn Marino 
764*e4b17023SJohn Marino   VEC_free (rdgc, heap, components);
765*e4b17023SJohn Marino }
766*e4b17023SJohn Marino 
767*e4b17023SJohn Marino /* Build the COMPONENTS vector with the strongly connected components
768*e4b17023SJohn Marino    of RDG in which the STARTING_VERTICES occur.  */
769*e4b17023SJohn Marino 
770*e4b17023SJohn Marino static void
rdg_build_components(struct graph * rdg,VEC (int,heap)* starting_vertices,VEC (rdgc,heap)** components)771*e4b17023SJohn Marino rdg_build_components (struct graph *rdg, VEC (int, heap) *starting_vertices,
772*e4b17023SJohn Marino 		      VEC (rdgc, heap) **components)
773*e4b17023SJohn Marino {
774*e4b17023SJohn Marino   int i, v;
775*e4b17023SJohn Marino   bitmap saved_components = BITMAP_ALLOC (NULL);
776*e4b17023SJohn Marino   int n_components = graphds_scc (rdg, NULL);
777*e4b17023SJohn Marino   VEC (int, heap) **all_components = XNEWVEC (VEC (int, heap) *, n_components);
778*e4b17023SJohn Marino 
779*e4b17023SJohn Marino   for (i = 0; i < n_components; i++)
780*e4b17023SJohn Marino     all_components[i] = VEC_alloc (int, heap, 3);
781*e4b17023SJohn Marino 
782*e4b17023SJohn Marino   for (i = 0; i < rdg->n_vertices; i++)
783*e4b17023SJohn Marino     VEC_safe_push (int, heap, all_components[rdg->vertices[i].component], i);
784*e4b17023SJohn Marino 
785*e4b17023SJohn Marino   FOR_EACH_VEC_ELT (int, starting_vertices, i, v)
786*e4b17023SJohn Marino     {
787*e4b17023SJohn Marino       int c = rdg->vertices[v].component;
788*e4b17023SJohn Marino 
789*e4b17023SJohn Marino       if (bitmap_set_bit (saved_components, c))
790*e4b17023SJohn Marino 	{
791*e4b17023SJohn Marino 	  rdgc x = XCNEW (struct rdg_component);
792*e4b17023SJohn Marino 	  x->num = c;
793*e4b17023SJohn Marino 	  x->vertices = all_components[c];
794*e4b17023SJohn Marino 
795*e4b17023SJohn Marino 	  VEC_safe_push (rdgc, heap, *components, x);
796*e4b17023SJohn Marino 	}
797*e4b17023SJohn Marino     }
798*e4b17023SJohn Marino 
799*e4b17023SJohn Marino   for (i = 0; i < n_components; i++)
800*e4b17023SJohn Marino     if (!bitmap_bit_p (saved_components, i))
801*e4b17023SJohn Marino       VEC_free (int, heap, all_components[i]);
802*e4b17023SJohn Marino 
803*e4b17023SJohn Marino   free (all_components);
804*e4b17023SJohn Marino   BITMAP_FREE (saved_components);
805*e4b17023SJohn Marino }
806*e4b17023SJohn Marino 
807*e4b17023SJohn Marino /* Returns true when it is possible to generate a builtin pattern for
808*e4b17023SJohn Marino    the PARTITION of RDG.  For the moment we detect only the memset
809*e4b17023SJohn Marino    zero pattern.  */
810*e4b17023SJohn Marino 
811*e4b17023SJohn Marino static bool
can_generate_builtin(struct graph * rdg,bitmap partition)812*e4b17023SJohn Marino can_generate_builtin (struct graph *rdg, bitmap partition)
813*e4b17023SJohn Marino {
814*e4b17023SJohn Marino   unsigned i;
815*e4b17023SJohn Marino   bitmap_iterator bi;
816*e4b17023SJohn Marino   int nb_reads = 0;
817*e4b17023SJohn Marino   int nb_writes = 0;
818*e4b17023SJohn Marino   int stores_zero = 0;
819*e4b17023SJohn Marino 
820*e4b17023SJohn Marino   EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, bi)
821*e4b17023SJohn Marino     if (RDG_MEM_READS_STMT (rdg, i))
822*e4b17023SJohn Marino       nb_reads++;
823*e4b17023SJohn Marino     else if (RDG_MEM_WRITE_STMT (rdg, i))
824*e4b17023SJohn Marino       {
825*e4b17023SJohn Marino 	nb_writes++;
826*e4b17023SJohn Marino 	if (stmt_with_adjacent_zero_store_dr_p (RDG_STMT (rdg, i)))
827*e4b17023SJohn Marino 	  stores_zero++;
828*e4b17023SJohn Marino       }
829*e4b17023SJohn Marino 
830*e4b17023SJohn Marino   return stores_zero == 1 && nb_writes == 1 && nb_reads == 0;
831*e4b17023SJohn Marino }
832*e4b17023SJohn Marino 
833*e4b17023SJohn Marino /* Returns true when PARTITION1 and PARTITION2 have similar memory
834*e4b17023SJohn Marino    accesses in RDG.  */
835*e4b17023SJohn Marino 
836*e4b17023SJohn Marino static bool
similar_memory_accesses(struct graph * rdg,bitmap partition1,bitmap partition2)837*e4b17023SJohn Marino similar_memory_accesses (struct graph *rdg, bitmap partition1,
838*e4b17023SJohn Marino 			 bitmap partition2)
839*e4b17023SJohn Marino {
840*e4b17023SJohn Marino   unsigned i, j;
841*e4b17023SJohn Marino   bitmap_iterator bi, bj;
842*e4b17023SJohn Marino 
843*e4b17023SJohn Marino   EXECUTE_IF_SET_IN_BITMAP (partition1, 0, i, bi)
844*e4b17023SJohn Marino     if (RDG_MEM_WRITE_STMT (rdg, i)
845*e4b17023SJohn Marino 	|| RDG_MEM_READS_STMT (rdg, i))
846*e4b17023SJohn Marino       EXECUTE_IF_SET_IN_BITMAP (partition2, 0, j, bj)
847*e4b17023SJohn Marino 	if (RDG_MEM_WRITE_STMT (rdg, j)
848*e4b17023SJohn Marino 	    || RDG_MEM_READS_STMT (rdg, j))
849*e4b17023SJohn Marino 	  if (rdg_has_similar_memory_accesses (rdg, i, j))
850*e4b17023SJohn Marino 	    return true;
851*e4b17023SJohn Marino 
852*e4b17023SJohn Marino   return false;
853*e4b17023SJohn Marino }
854*e4b17023SJohn Marino 
855*e4b17023SJohn Marino /* Fuse all the partitions from PARTITIONS that contain similar memory
856*e4b17023SJohn Marino    references, i.e., we're taking care of cache locality.  This
857*e4b17023SJohn Marino    function does not fuse those partitions that contain patterns that
858*e4b17023SJohn Marino    can be code generated with builtins.  */
859*e4b17023SJohn Marino 
860*e4b17023SJohn Marino static void
fuse_partitions_with_similar_memory_accesses(struct graph * rdg,VEC (bitmap,heap)** partitions)861*e4b17023SJohn Marino fuse_partitions_with_similar_memory_accesses (struct graph *rdg,
862*e4b17023SJohn Marino 					      VEC (bitmap, heap) **partitions)
863*e4b17023SJohn Marino {
864*e4b17023SJohn Marino   int p1, p2;
865*e4b17023SJohn Marino   bitmap partition1, partition2;
866*e4b17023SJohn Marino 
867*e4b17023SJohn Marino   FOR_EACH_VEC_ELT (bitmap, *partitions, p1, partition1)
868*e4b17023SJohn Marino     if (!can_generate_builtin (rdg, partition1))
869*e4b17023SJohn Marino       FOR_EACH_VEC_ELT (bitmap, *partitions, p2, partition2)
870*e4b17023SJohn Marino 	if (p1 != p2
871*e4b17023SJohn Marino 	    && !can_generate_builtin (rdg, partition2)
872*e4b17023SJohn Marino 	    && similar_memory_accesses (rdg, partition1, partition2))
873*e4b17023SJohn Marino 	  {
874*e4b17023SJohn Marino 	    bitmap_ior_into (partition1, partition2);
875*e4b17023SJohn Marino 	    VEC_ordered_remove (bitmap, *partitions, p2);
876*e4b17023SJohn Marino 	    p2--;
877*e4b17023SJohn Marino 	  }
878*e4b17023SJohn Marino }
879*e4b17023SJohn Marino 
880*e4b17023SJohn Marino /* Returns true when STMT will be code generated in a partition of RDG
881*e4b17023SJohn Marino    different than PART and that will not be code generated as a
882*e4b17023SJohn Marino    builtin.  */
883*e4b17023SJohn Marino 
884*e4b17023SJohn Marino static bool
stmt_generated_in_another_partition(struct graph * rdg,gimple stmt,int part,VEC (bitmap,heap)* partitions)885*e4b17023SJohn Marino stmt_generated_in_another_partition (struct graph *rdg, gimple stmt, int part,
886*e4b17023SJohn Marino 				     VEC (bitmap, heap) *partitions)
887*e4b17023SJohn Marino {
888*e4b17023SJohn Marino   int p;
889*e4b17023SJohn Marino   bitmap pp;
890*e4b17023SJohn Marino   unsigned i;
891*e4b17023SJohn Marino   bitmap_iterator bi;
892*e4b17023SJohn Marino 
893*e4b17023SJohn Marino   FOR_EACH_VEC_ELT (bitmap, partitions, p, pp)
894*e4b17023SJohn Marino     if (p != part
895*e4b17023SJohn Marino 	&& !can_generate_builtin (rdg, pp))
896*e4b17023SJohn Marino       EXECUTE_IF_SET_IN_BITMAP (pp, 0, i, bi)
897*e4b17023SJohn Marino 	if (stmt == RDG_STMT (rdg, i))
898*e4b17023SJohn Marino 	  return true;
899*e4b17023SJohn Marino 
900*e4b17023SJohn Marino   return false;
901*e4b17023SJohn Marino }
902*e4b17023SJohn Marino 
903*e4b17023SJohn Marino /* For each partition in PARTITIONS that will be code generated using
904*e4b17023SJohn Marino    a builtin, add its scalar computations used after the loop to
905*e4b17023SJohn Marino    PARTITION.  */
906*e4b17023SJohn Marino 
907*e4b17023SJohn Marino static void
add_scalar_computations_to_partition(struct graph * rdg,VEC (bitmap,heap)* partitions,bitmap partition)908*e4b17023SJohn Marino add_scalar_computations_to_partition (struct graph *rdg,
909*e4b17023SJohn Marino 				      VEC (bitmap, heap) *partitions,
910*e4b17023SJohn Marino 				      bitmap partition)
911*e4b17023SJohn Marino {
912*e4b17023SJohn Marino   int p;
913*e4b17023SJohn Marino   bitmap pp;
914*e4b17023SJohn Marino   unsigned i;
915*e4b17023SJohn Marino   bitmap_iterator bi;
916*e4b17023SJohn Marino   bitmap l = BITMAP_ALLOC (NULL);
917*e4b17023SJohn Marino   bitmap pr = BITMAP_ALLOC (NULL);
918*e4b17023SJohn Marino   bool f = false;
919*e4b17023SJohn Marino 
920*e4b17023SJohn Marino   FOR_EACH_VEC_ELT (bitmap, partitions, p, pp)
921*e4b17023SJohn Marino     if (can_generate_builtin (rdg, pp))
922*e4b17023SJohn Marino       EXECUTE_IF_SET_IN_BITMAP (pp, 0, i, bi)
923*e4b17023SJohn Marino 	if (stmt_has_scalar_dependences_outside_loop (RDG_STMT (rdg, i))
924*e4b17023SJohn Marino 	    && !stmt_generated_in_another_partition (rdg, RDG_STMT (rdg, i), p,
925*e4b17023SJohn Marino 						     partitions))
926*e4b17023SJohn Marino 	  rdg_flag_vertex_and_dependent (rdg, i, partition, l, pr, &f);
927*e4b17023SJohn Marino 
928*e4b17023SJohn Marino   rdg_flag_loop_exits (rdg, l, partition, pr, &f);
929*e4b17023SJohn Marino 
930*e4b17023SJohn Marino   BITMAP_FREE (pr);
931*e4b17023SJohn Marino   BITMAP_FREE (l);
932*e4b17023SJohn Marino }
933*e4b17023SJohn Marino 
934*e4b17023SJohn Marino /* Aggregate several components into a useful partition that is
935*e4b17023SJohn Marino    registered in the PARTITIONS vector.  Partitions will be
936*e4b17023SJohn Marino    distributed in different loops.  */
937*e4b17023SJohn Marino 
938*e4b17023SJohn Marino static void
rdg_build_partitions(struct graph * rdg,VEC (rdgc,heap)* components,VEC (int,heap)** other_stores,VEC (bitmap,heap)** partitions,bitmap processed)939*e4b17023SJohn Marino rdg_build_partitions (struct graph *rdg, VEC (rdgc, heap) *components,
940*e4b17023SJohn Marino 		      VEC (int, heap) **other_stores,
941*e4b17023SJohn Marino 		      VEC (bitmap, heap) **partitions, bitmap processed)
942*e4b17023SJohn Marino {
943*e4b17023SJohn Marino   int i;
944*e4b17023SJohn Marino   rdgc x;
945*e4b17023SJohn Marino   bitmap partition = BITMAP_ALLOC (NULL);
946*e4b17023SJohn Marino 
947*e4b17023SJohn Marino   FOR_EACH_VEC_ELT (rdgc, components, i, x)
948*e4b17023SJohn Marino     {
949*e4b17023SJohn Marino       bitmap np;
950*e4b17023SJohn Marino       bool part_has_writes = false;
951*e4b17023SJohn Marino       int v = VEC_index (int, x->vertices, 0);
952*e4b17023SJohn Marino 
953*e4b17023SJohn Marino       if (bitmap_bit_p (processed, v))
954*e4b17023SJohn Marino 	continue;
955*e4b17023SJohn Marino 
956*e4b17023SJohn Marino       np = build_rdg_partition_for_component (rdg, x, &part_has_writes);
957*e4b17023SJohn Marino       bitmap_ior_into (partition, np);
958*e4b17023SJohn Marino       bitmap_ior_into (processed, np);
959*e4b17023SJohn Marino       BITMAP_FREE (np);
960*e4b17023SJohn Marino 
961*e4b17023SJohn Marino       if (part_has_writes)
962*e4b17023SJohn Marino 	{
963*e4b17023SJohn Marino 	  if (dump_file && (dump_flags & TDF_DETAILS))
964*e4b17023SJohn Marino 	    {
965*e4b17023SJohn Marino 	      fprintf (dump_file, "ldist useful partition:\n");
966*e4b17023SJohn Marino 	      dump_bitmap (dump_file, partition);
967*e4b17023SJohn Marino 	    }
968*e4b17023SJohn Marino 
969*e4b17023SJohn Marino 	  VEC_safe_push (bitmap, heap, *partitions, partition);
970*e4b17023SJohn Marino 	  partition = BITMAP_ALLOC (NULL);
971*e4b17023SJohn Marino 	}
972*e4b17023SJohn Marino     }
973*e4b17023SJohn Marino 
974*e4b17023SJohn Marino   /* Add the nodes from the RDG that were not marked as processed, and
975*e4b17023SJohn Marino      that are used outside the current loop.  These are scalar
976*e4b17023SJohn Marino      computations that are not yet part of previous partitions.  */
977*e4b17023SJohn Marino   for (i = 0; i < rdg->n_vertices; i++)
978*e4b17023SJohn Marino     if (!bitmap_bit_p (processed, i)
979*e4b17023SJohn Marino 	&& rdg_defs_used_in_other_loops_p (rdg, i))
980*e4b17023SJohn Marino       VEC_safe_push (int, heap, *other_stores, i);
981*e4b17023SJohn Marino 
982*e4b17023SJohn Marino   /* If there are still statements left in the OTHER_STORES array,
983*e4b17023SJohn Marino      create other components and partitions with these stores and
984*e4b17023SJohn Marino      their dependences.  */
985*e4b17023SJohn Marino   if (VEC_length (int, *other_stores) > 0)
986*e4b17023SJohn Marino     {
987*e4b17023SJohn Marino       VEC (rdgc, heap) *comps = VEC_alloc (rdgc, heap, 3);
988*e4b17023SJohn Marino       VEC (int, heap) *foo = VEC_alloc (int, heap, 3);
989*e4b17023SJohn Marino 
990*e4b17023SJohn Marino       rdg_build_components (rdg, *other_stores, &comps);
991*e4b17023SJohn Marino       rdg_build_partitions (rdg, comps, &foo, partitions, processed);
992*e4b17023SJohn Marino 
993*e4b17023SJohn Marino       VEC_free (int, heap, foo);
994*e4b17023SJohn Marino       free_rdg_components (comps);
995*e4b17023SJohn Marino     }
996*e4b17023SJohn Marino 
997*e4b17023SJohn Marino   add_scalar_computations_to_partition (rdg, *partitions, partition);
998*e4b17023SJohn Marino 
999*e4b17023SJohn Marino   /* If there is something left in the last partition, save it.  */
1000*e4b17023SJohn Marino   if (bitmap_count_bits (partition) > 0)
1001*e4b17023SJohn Marino     VEC_safe_push (bitmap, heap, *partitions, partition);
1002*e4b17023SJohn Marino   else
1003*e4b17023SJohn Marino     BITMAP_FREE (partition);
1004*e4b17023SJohn Marino 
1005*e4b17023SJohn Marino   fuse_partitions_with_similar_memory_accesses (rdg, partitions);
1006*e4b17023SJohn Marino }
1007*e4b17023SJohn Marino 
1008*e4b17023SJohn Marino /* Dump to FILE the PARTITIONS.  */
1009*e4b17023SJohn Marino 
1010*e4b17023SJohn Marino static void
dump_rdg_partitions(FILE * file,VEC (bitmap,heap)* partitions)1011*e4b17023SJohn Marino dump_rdg_partitions (FILE *file, VEC (bitmap, heap) *partitions)
1012*e4b17023SJohn Marino {
1013*e4b17023SJohn Marino   int i;
1014*e4b17023SJohn Marino   bitmap partition;
1015*e4b17023SJohn Marino 
1016*e4b17023SJohn Marino   FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1017*e4b17023SJohn Marino     debug_bitmap_file (file, partition);
1018*e4b17023SJohn Marino }
1019*e4b17023SJohn Marino 
1020*e4b17023SJohn Marino /* Debug PARTITIONS.  */
1021*e4b17023SJohn Marino extern void debug_rdg_partitions (VEC (bitmap, heap) *);
1022*e4b17023SJohn Marino 
1023*e4b17023SJohn Marino DEBUG_FUNCTION void
debug_rdg_partitions(VEC (bitmap,heap)* partitions)1024*e4b17023SJohn Marino debug_rdg_partitions (VEC (bitmap, heap) *partitions)
1025*e4b17023SJohn Marino {
1026*e4b17023SJohn Marino   dump_rdg_partitions (stderr, partitions);
1027*e4b17023SJohn Marino }
1028*e4b17023SJohn Marino 
1029*e4b17023SJohn Marino /* Returns the number of read and write operations in the RDG.  */
1030*e4b17023SJohn Marino 
1031*e4b17023SJohn Marino static int
number_of_rw_in_rdg(struct graph * rdg)1032*e4b17023SJohn Marino number_of_rw_in_rdg (struct graph *rdg)
1033*e4b17023SJohn Marino {
1034*e4b17023SJohn Marino   int i, res = 0;
1035*e4b17023SJohn Marino 
1036*e4b17023SJohn Marino   for (i = 0; i < rdg->n_vertices; i++)
1037*e4b17023SJohn Marino     {
1038*e4b17023SJohn Marino       if (RDG_MEM_WRITE_STMT (rdg, i))
1039*e4b17023SJohn Marino 	++res;
1040*e4b17023SJohn Marino 
1041*e4b17023SJohn Marino       if (RDG_MEM_READS_STMT (rdg, i))
1042*e4b17023SJohn Marino 	++res;
1043*e4b17023SJohn Marino     }
1044*e4b17023SJohn Marino 
1045*e4b17023SJohn Marino   return res;
1046*e4b17023SJohn Marino }
1047*e4b17023SJohn Marino 
1048*e4b17023SJohn Marino /* Returns the number of read and write operations in a PARTITION of
1049*e4b17023SJohn Marino    the RDG.  */
1050*e4b17023SJohn Marino 
1051*e4b17023SJohn Marino static int
number_of_rw_in_partition(struct graph * rdg,bitmap partition)1052*e4b17023SJohn Marino number_of_rw_in_partition (struct graph *rdg, bitmap partition)
1053*e4b17023SJohn Marino {
1054*e4b17023SJohn Marino   int res = 0;
1055*e4b17023SJohn Marino   unsigned i;
1056*e4b17023SJohn Marino   bitmap_iterator ii;
1057*e4b17023SJohn Marino 
1058*e4b17023SJohn Marino   EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, ii)
1059*e4b17023SJohn Marino     {
1060*e4b17023SJohn Marino       if (RDG_MEM_WRITE_STMT (rdg, i))
1061*e4b17023SJohn Marino 	++res;
1062*e4b17023SJohn Marino 
1063*e4b17023SJohn Marino       if (RDG_MEM_READS_STMT (rdg, i))
1064*e4b17023SJohn Marino 	++res;
1065*e4b17023SJohn Marino     }
1066*e4b17023SJohn Marino 
1067*e4b17023SJohn Marino   return res;
1068*e4b17023SJohn Marino }
1069*e4b17023SJohn Marino 
1070*e4b17023SJohn Marino /* Returns true when one of the PARTITIONS contains all the read or
1071*e4b17023SJohn Marino    write operations of RDG.  */
1072*e4b17023SJohn Marino 
1073*e4b17023SJohn Marino static bool
partition_contains_all_rw(struct graph * rdg,VEC (bitmap,heap)* partitions)1074*e4b17023SJohn Marino partition_contains_all_rw (struct graph *rdg, VEC (bitmap, heap) *partitions)
1075*e4b17023SJohn Marino {
1076*e4b17023SJohn Marino   int i;
1077*e4b17023SJohn Marino   bitmap partition;
1078*e4b17023SJohn Marino   int nrw = number_of_rw_in_rdg (rdg);
1079*e4b17023SJohn Marino 
1080*e4b17023SJohn Marino   FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1081*e4b17023SJohn Marino     if (nrw == number_of_rw_in_partition (rdg, partition))
1082*e4b17023SJohn Marino       return true;
1083*e4b17023SJohn Marino 
1084*e4b17023SJohn Marino   return false;
1085*e4b17023SJohn Marino }
1086*e4b17023SJohn Marino 
1087*e4b17023SJohn Marino /* Generate code from STARTING_VERTICES in RDG.  Returns the number of
1088*e4b17023SJohn Marino    distributed loops.  */
1089*e4b17023SJohn Marino 
1090*e4b17023SJohn Marino static int
ldist_gen(struct loop * loop,struct graph * rdg,VEC (int,heap)* starting_vertices)1091*e4b17023SJohn Marino ldist_gen (struct loop *loop, struct graph *rdg,
1092*e4b17023SJohn Marino 	   VEC (int, heap) *starting_vertices)
1093*e4b17023SJohn Marino {
1094*e4b17023SJohn Marino   int i, nbp;
1095*e4b17023SJohn Marino   VEC (rdgc, heap) *components = VEC_alloc (rdgc, heap, 3);
1096*e4b17023SJohn Marino   VEC (bitmap, heap) *partitions = VEC_alloc (bitmap, heap, 3);
1097*e4b17023SJohn Marino   VEC (int, heap) *other_stores = VEC_alloc (int, heap, 3);
1098*e4b17023SJohn Marino   bitmap partition, processed = BITMAP_ALLOC (NULL);
1099*e4b17023SJohn Marino 
1100*e4b17023SJohn Marino   remaining_stmts = BITMAP_ALLOC (NULL);
1101*e4b17023SJohn Marino   upstream_mem_writes = BITMAP_ALLOC (NULL);
1102*e4b17023SJohn Marino 
1103*e4b17023SJohn Marino   for (i = 0; i < rdg->n_vertices; i++)
1104*e4b17023SJohn Marino     {
1105*e4b17023SJohn Marino       bitmap_set_bit (remaining_stmts, i);
1106*e4b17023SJohn Marino 
1107*e4b17023SJohn Marino       /* Save in OTHER_STORES all the memory writes that are not in
1108*e4b17023SJohn Marino 	 STARTING_VERTICES.  */
1109*e4b17023SJohn Marino       if (RDG_MEM_WRITE_STMT (rdg, i))
1110*e4b17023SJohn Marino 	{
1111*e4b17023SJohn Marino 	  int v;
1112*e4b17023SJohn Marino 	  unsigned j;
1113*e4b17023SJohn Marino 	  bool found = false;
1114*e4b17023SJohn Marino 
1115*e4b17023SJohn Marino 	  FOR_EACH_VEC_ELT (int, starting_vertices, j, v)
1116*e4b17023SJohn Marino 	    if (i == v)
1117*e4b17023SJohn Marino 	      {
1118*e4b17023SJohn Marino 		found = true;
1119*e4b17023SJohn Marino 		break;
1120*e4b17023SJohn Marino 	      }
1121*e4b17023SJohn Marino 
1122*e4b17023SJohn Marino 	  if (!found)
1123*e4b17023SJohn Marino 	    VEC_safe_push (int, heap, other_stores, i);
1124*e4b17023SJohn Marino 	}
1125*e4b17023SJohn Marino     }
1126*e4b17023SJohn Marino 
1127*e4b17023SJohn Marino   mark_nodes_having_upstream_mem_writes (rdg);
1128*e4b17023SJohn Marino   rdg_build_components (rdg, starting_vertices, &components);
1129*e4b17023SJohn Marino   rdg_build_partitions (rdg, components, &other_stores, &partitions,
1130*e4b17023SJohn Marino 			processed);
1131*e4b17023SJohn Marino   BITMAP_FREE (processed);
1132*e4b17023SJohn Marino   nbp = VEC_length (bitmap, partitions);
1133*e4b17023SJohn Marino 
1134*e4b17023SJohn Marino   if (nbp <= 1
1135*e4b17023SJohn Marino       || partition_contains_all_rw (rdg, partitions))
1136*e4b17023SJohn Marino     goto ldist_done;
1137*e4b17023SJohn Marino 
1138*e4b17023SJohn Marino   if (dump_file && (dump_flags & TDF_DETAILS))
1139*e4b17023SJohn Marino     dump_rdg_partitions (dump_file, partitions);
1140*e4b17023SJohn Marino 
1141*e4b17023SJohn Marino   FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1142*e4b17023SJohn Marino     if (!generate_code_for_partition (loop, partition, i < nbp - 1))
1143*e4b17023SJohn Marino       goto ldist_done;
1144*e4b17023SJohn Marino 
1145*e4b17023SJohn Marino   rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1146*e4b17023SJohn Marino   mark_sym_for_renaming (gimple_vop (cfun));
1147*e4b17023SJohn Marino   update_ssa (TODO_update_ssa_only_virtuals);
1148*e4b17023SJohn Marino 
1149*e4b17023SJohn Marino  ldist_done:
1150*e4b17023SJohn Marino 
1151*e4b17023SJohn Marino   BITMAP_FREE (remaining_stmts);
1152*e4b17023SJohn Marino   BITMAP_FREE (upstream_mem_writes);
1153*e4b17023SJohn Marino 
1154*e4b17023SJohn Marino   FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1155*e4b17023SJohn Marino     BITMAP_FREE (partition);
1156*e4b17023SJohn Marino 
1157*e4b17023SJohn Marino   VEC_free (int, heap, other_stores);
1158*e4b17023SJohn Marino   VEC_free (bitmap, heap, partitions);
1159*e4b17023SJohn Marino   free_rdg_components (components);
1160*e4b17023SJohn Marino   return nbp;
1161*e4b17023SJohn Marino }
1162*e4b17023SJohn Marino 
1163*e4b17023SJohn Marino /* Distributes the code from LOOP in such a way that producer
1164*e4b17023SJohn Marino    statements are placed before consumer statements.  When STMTS is
1165*e4b17023SJohn Marino    NULL, performs the maximal distribution, if STMTS is not NULL,
1166*e4b17023SJohn Marino    tries to separate only these statements from the LOOP's body.
1167*e4b17023SJohn Marino    Returns the number of distributed loops.  */
1168*e4b17023SJohn Marino 
1169*e4b17023SJohn Marino static int
distribute_loop(struct loop * loop,VEC (gimple,heap)* stmts)1170*e4b17023SJohn Marino distribute_loop (struct loop *loop, VEC (gimple, heap) *stmts)
1171*e4b17023SJohn Marino {
1172*e4b17023SJohn Marino   int res = 0;
1173*e4b17023SJohn Marino   struct graph *rdg;
1174*e4b17023SJohn Marino   gimple s;
1175*e4b17023SJohn Marino   unsigned i;
1176*e4b17023SJohn Marino   VEC (int, heap) *vertices;
1177*e4b17023SJohn Marino   VEC (ddr_p, heap) *dependence_relations;
1178*e4b17023SJohn Marino   VEC (data_reference_p, heap) *datarefs;
1179*e4b17023SJohn Marino   VEC (loop_p, heap) *loop_nest;
1180*e4b17023SJohn Marino 
1181*e4b17023SJohn Marino   if (loop->num_nodes > 2)
1182*e4b17023SJohn Marino     {
1183*e4b17023SJohn Marino       if (dump_file && (dump_flags & TDF_DETAILS))
1184*e4b17023SJohn Marino 	fprintf (dump_file,
1185*e4b17023SJohn Marino 		 "FIXME: Loop %d not distributed: it has more than two basic blocks.\n",
1186*e4b17023SJohn Marino 		 loop->num);
1187*e4b17023SJohn Marino 
1188*e4b17023SJohn Marino       return res;
1189*e4b17023SJohn Marino     }
1190*e4b17023SJohn Marino 
1191*e4b17023SJohn Marino   datarefs = VEC_alloc (data_reference_p, heap, 10);
1192*e4b17023SJohn Marino   dependence_relations = VEC_alloc (ddr_p, heap, 100);
1193*e4b17023SJohn Marino   loop_nest = VEC_alloc (loop_p, heap, 3);
1194*e4b17023SJohn Marino   rdg = build_rdg (loop, &loop_nest, &dependence_relations, &datarefs);
1195*e4b17023SJohn Marino 
1196*e4b17023SJohn Marino   if (!rdg)
1197*e4b17023SJohn Marino     {
1198*e4b17023SJohn Marino       if (dump_file && (dump_flags & TDF_DETAILS))
1199*e4b17023SJohn Marino 	fprintf (dump_file,
1200*e4b17023SJohn Marino 		 "FIXME: Loop %d not distributed: failed to build the RDG.\n",
1201*e4b17023SJohn Marino 		 loop->num);
1202*e4b17023SJohn Marino 
1203*e4b17023SJohn Marino       free_dependence_relations (dependence_relations);
1204*e4b17023SJohn Marino       free_data_refs (datarefs);
1205*e4b17023SJohn Marino       VEC_free (loop_p, heap, loop_nest);
1206*e4b17023SJohn Marino       return res;
1207*e4b17023SJohn Marino     }
1208*e4b17023SJohn Marino 
1209*e4b17023SJohn Marino   vertices = VEC_alloc (int, heap, 3);
1210*e4b17023SJohn Marino 
1211*e4b17023SJohn Marino   if (dump_file && (dump_flags & TDF_DETAILS))
1212*e4b17023SJohn Marino     dump_rdg (dump_file, rdg);
1213*e4b17023SJohn Marino 
1214*e4b17023SJohn Marino   FOR_EACH_VEC_ELT (gimple, stmts, i, s)
1215*e4b17023SJohn Marino     {
1216*e4b17023SJohn Marino       int v = rdg_vertex_for_stmt (rdg, s);
1217*e4b17023SJohn Marino 
1218*e4b17023SJohn Marino       if (v >= 0)
1219*e4b17023SJohn Marino 	{
1220*e4b17023SJohn Marino 	  VEC_safe_push (int, heap, vertices, v);
1221*e4b17023SJohn Marino 
1222*e4b17023SJohn Marino 	  if (dump_file && (dump_flags & TDF_DETAILS))
1223*e4b17023SJohn Marino 	    fprintf (dump_file,
1224*e4b17023SJohn Marino 		     "ldist asked to generate code for vertex %d\n", v);
1225*e4b17023SJohn Marino 	}
1226*e4b17023SJohn Marino     }
1227*e4b17023SJohn Marino 
1228*e4b17023SJohn Marino   res = ldist_gen (loop, rdg, vertices);
1229*e4b17023SJohn Marino   VEC_free (int, heap, vertices);
1230*e4b17023SJohn Marino   free_rdg (rdg);
1231*e4b17023SJohn Marino   free_dependence_relations (dependence_relations);
1232*e4b17023SJohn Marino   free_data_refs (datarefs);
1233*e4b17023SJohn Marino   VEC_free (loop_p, heap, loop_nest);
1234*e4b17023SJohn Marino   return res;
1235*e4b17023SJohn Marino }
1236*e4b17023SJohn Marino 
1237*e4b17023SJohn Marino /* Distribute all loops in the current function.  */
1238*e4b17023SJohn Marino 
1239*e4b17023SJohn Marino static unsigned int
tree_loop_distribution(void)1240*e4b17023SJohn Marino tree_loop_distribution (void)
1241*e4b17023SJohn Marino {
1242*e4b17023SJohn Marino   struct loop *loop;
1243*e4b17023SJohn Marino   loop_iterator li;
1244*e4b17023SJohn Marino   int nb_generated_loops = 0;
1245*e4b17023SJohn Marino 
1246*e4b17023SJohn Marino   FOR_EACH_LOOP (li, loop, 0)
1247*e4b17023SJohn Marino     {
1248*e4b17023SJohn Marino       VEC (gimple, heap) *work_list = NULL;
1249*e4b17023SJohn Marino       int num = loop->num;
1250*e4b17023SJohn Marino 
1251*e4b17023SJohn Marino       /* If the loop doesn't have a single exit we will fail anyway,
1252*e4b17023SJohn Marino 	 so do that early.  */
1253*e4b17023SJohn Marino       if (!single_exit (loop))
1254*e4b17023SJohn Marino 	continue;
1255*e4b17023SJohn Marino 
1256*e4b17023SJohn Marino       /* If both flag_tree_loop_distribute_patterns and
1257*e4b17023SJohn Marino 	 flag_tree_loop_distribution are set, then only
1258*e4b17023SJohn Marino 	 distribute_patterns is executed.  */
1259*e4b17023SJohn Marino       if (flag_tree_loop_distribute_patterns)
1260*e4b17023SJohn Marino 	{
1261*e4b17023SJohn Marino 	  /* With the following working list, we're asking
1262*e4b17023SJohn Marino 	     distribute_loop to separate from the rest of the loop the
1263*e4b17023SJohn Marino 	     stores of the form "A[i] = 0".  */
1264*e4b17023SJohn Marino 	  stores_zero_from_loop (loop, &work_list);
1265*e4b17023SJohn Marino 
1266*e4b17023SJohn Marino 	  /* Do nothing if there are no patterns to be distributed.  */
1267*e4b17023SJohn Marino 	  if (VEC_length (gimple, work_list) > 0)
1268*e4b17023SJohn Marino 	    nb_generated_loops = distribute_loop (loop, work_list);
1269*e4b17023SJohn Marino 	}
1270*e4b17023SJohn Marino       else if (flag_tree_loop_distribution)
1271*e4b17023SJohn Marino 	{
1272*e4b17023SJohn Marino 	  /* With the following working list, we're asking
1273*e4b17023SJohn Marino 	     distribute_loop to separate the stores of the loop: when
1274*e4b17023SJohn Marino 	     dependences allow, it will end on having one store per
1275*e4b17023SJohn Marino 	     loop.  */
1276*e4b17023SJohn Marino 	  stores_from_loop (loop, &work_list);
1277*e4b17023SJohn Marino 
1278*e4b17023SJohn Marino 	  /* A simple heuristic for cache locality is to not split
1279*e4b17023SJohn Marino 	     stores to the same array.  Without this call, an unrolled
1280*e4b17023SJohn Marino 	     loop would be split into as many loops as unroll factor,
1281*e4b17023SJohn Marino 	     each loop storing in the same array.  */
1282*e4b17023SJohn Marino 	  remove_similar_memory_refs (&work_list);
1283*e4b17023SJohn Marino 
1284*e4b17023SJohn Marino 	  nb_generated_loops = distribute_loop (loop, work_list);
1285*e4b17023SJohn Marino 	}
1286*e4b17023SJohn Marino 
1287*e4b17023SJohn Marino       if (dump_file && (dump_flags & TDF_DETAILS))
1288*e4b17023SJohn Marino 	{
1289*e4b17023SJohn Marino 	  if (nb_generated_loops > 1)
1290*e4b17023SJohn Marino 	    fprintf (dump_file, "Loop %d distributed: split to %d loops.\n",
1291*e4b17023SJohn Marino 		     num, nb_generated_loops);
1292*e4b17023SJohn Marino 	  else
1293*e4b17023SJohn Marino 	    fprintf (dump_file, "Loop %d is the same.\n", num);
1294*e4b17023SJohn Marino 	}
1295*e4b17023SJohn Marino 
1296*e4b17023SJohn Marino       verify_loop_structure ();
1297*e4b17023SJohn Marino 
1298*e4b17023SJohn Marino       VEC_free (gimple, heap, work_list);
1299*e4b17023SJohn Marino     }
1300*e4b17023SJohn Marino 
1301*e4b17023SJohn Marino   return 0;
1302*e4b17023SJohn Marino }
1303*e4b17023SJohn Marino 
1304*e4b17023SJohn Marino static bool
gate_tree_loop_distribution(void)1305*e4b17023SJohn Marino gate_tree_loop_distribution (void)
1306*e4b17023SJohn Marino {
1307*e4b17023SJohn Marino   return flag_tree_loop_distribution
1308*e4b17023SJohn Marino     || flag_tree_loop_distribute_patterns;
1309*e4b17023SJohn Marino }
1310*e4b17023SJohn Marino 
1311*e4b17023SJohn Marino struct gimple_opt_pass pass_loop_distribution =
1312*e4b17023SJohn Marino {
1313*e4b17023SJohn Marino  {
1314*e4b17023SJohn Marino   GIMPLE_PASS,
1315*e4b17023SJohn Marino   "ldist",			/* name */
1316*e4b17023SJohn Marino   gate_tree_loop_distribution,  /* gate */
1317*e4b17023SJohn Marino   tree_loop_distribution,       /* execute */
1318*e4b17023SJohn Marino   NULL,				/* sub */
1319*e4b17023SJohn Marino   NULL,				/* next */
1320*e4b17023SJohn Marino   0,				/* static_pass_number */
1321*e4b17023SJohn Marino   TV_TREE_LOOP_DISTRIBUTION,    /* tv_id */
1322*e4b17023SJohn Marino   PROP_cfg | PROP_ssa,		/* properties_required */
1323*e4b17023SJohn Marino   0,				/* properties_provided */
1324*e4b17023SJohn Marino   0,				/* properties_destroyed */
1325*e4b17023SJohn Marino   0,				/* todo_flags_start */
1326*e4b17023SJohn Marino   TODO_ggc_collect
1327*e4b17023SJohn Marino   | TODO_verify_ssa             /* todo_flags_finish */
1328*e4b17023SJohn Marino  }
1329*e4b17023SJohn Marino };
1330