1 /* Loop header copying on trees.
2    Copyright (C) 2004-2019 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "tree.h"
25 #include "gimple.h"
26 #include "cfghooks.h"
27 #include "tree-pass.h"
28 #include "gimple-ssa.h"
29 #include "gimple-iterator.h"
30 #include "tree-cfg.h"
31 #include "tree-into-ssa.h"
32 #include "cfgloop.h"
33 #include "tree-inline.h"
34 #include "tree-ssa-scopedtables.h"
35 #include "tree-ssa-threadedge.h"
36 #include "tree-ssa-sccvn.h"
37 #include "tree-phinodes.h"
38 #include "ssa-iterators.h"
39 #include "params.h"
40 
41 /* Duplicates headers of loops if they are small enough, so that the statements
42    in the loop body are always executed when the loop is entered.  This
43    increases effectiveness of code motion optimizations, and reduces the need
44    for loop preconditioning.  */
45 
46 /* Check whether we should duplicate HEADER of LOOP.  At most *LIMIT
47    instructions should be duplicated, limit is decreased by the actual
48    amount.  */
49 
50 static bool
should_duplicate_loop_header_p(basic_block header,struct loop * loop,int * limit)51 should_duplicate_loop_header_p (basic_block header, struct loop *loop,
52 				int *limit)
53 {
54   gimple_stmt_iterator bsi;
55 
56   gcc_assert (!header->aux);
57 
58   /* Loop header copying usually increases size of the code.  This used not to
59      be true, since quite often it is possible to verify that the condition is
60      satisfied in the first iteration and therefore to eliminate it.  Jump
61      threading handles these cases now.  */
62   if (optimize_loop_for_size_p (loop)
63       && !loop->force_vectorize)
64     {
65       if (dump_file && (dump_flags & TDF_DETAILS))
66 	fprintf (dump_file,
67 		 "  Not duplicating bb %i: optimizing for size.\n",
68 		 header->index);
69       return false;
70     }
71 
72   gcc_assert (EDGE_COUNT (header->succs) > 0);
73   if (single_succ_p (header))
74     {
75       if (dump_file && (dump_flags & TDF_DETAILS))
76 	fprintf (dump_file,
77 		 "  Not duplicating bb %i: it is single succ.\n",
78 		 header->index);
79       return false;
80     }
81 
82   if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest)
83       && flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 1)->dest))
84     {
85       if (dump_file && (dump_flags & TDF_DETAILS))
86 	fprintf (dump_file,
87 		 "  Not duplicating bb %i: both sucessors are in loop.\n",
88 		 loop->num);
89       return false;
90     }
91 
92   /* If this is not the original loop header, we want it to have just
93      one predecessor in order to match the && pattern.  */
94   if (header != loop->header && !single_pred_p (header))
95     {
96       if (dump_file && (dump_flags & TDF_DETAILS))
97 	fprintf (dump_file,
98 		 "  Not duplicating bb %i: it has mutiple predecestors.\n",
99 		 header->index);
100       return false;
101     }
102 
103   gcond *last = safe_dyn_cast <gcond *> (last_stmt (header));
104   if (!last)
105     {
106       if (dump_file && (dump_flags & TDF_DETAILS))
107 	fprintf (dump_file,
108 		 "  Not duplicating bb %i: it does not end by conditional.\n",
109 		 header->index);
110       return false;
111     }
112 
113   for (gphi_iterator psi = gsi_start_phis (header); !gsi_end_p (psi);
114        gsi_next (&psi))
115     {
116       gphi *phi = psi.phi ();
117       tree res = gimple_phi_result (phi);
118       if (INTEGRAL_TYPE_P (TREE_TYPE (res))
119 	  || POINTER_TYPE_P (TREE_TYPE (res)))
120 	gimple_set_uid (phi, 1 /* IV */);
121       else
122 	gimple_set_uid (phi, 0);
123     }
124 
125   /* Count number of instructions and punt on calls.
126      Populate stmts INV/IV flag to later apply heuristics to the
127      kind of conditions we want to copy.  */
128   for (bsi = gsi_start_bb (header); !gsi_end_p (bsi); gsi_next (&bsi))
129     {
130       gimple *last = gsi_stmt (bsi);
131 
132       if (gimple_code (last) == GIMPLE_LABEL)
133 	continue;
134 
135       if (is_gimple_debug (last))
136 	continue;
137 
138       if (gimple_code (last) == GIMPLE_CALL
139 	  && (!gimple_inexpensive_call_p (as_a <gcall *> (last))
140 	      /* IFN_LOOP_DIST_ALIAS means that inner loop is distributed
141 		 at current loop's header.  Don't copy in this case.  */
142 	      || gimple_call_internal_p (last, IFN_LOOP_DIST_ALIAS)))
143 	{
144 	  if (dump_file && (dump_flags & TDF_DETAILS))
145 	    fprintf (dump_file,
146 		     "  Not duplicating bb %i: it contains call.\n",
147 		     header->index);
148 	  return false;
149 	}
150 
151       *limit -= estimate_num_insns (last, &eni_size_weights);
152       if (*limit < 0)
153 	{
154 	  if (dump_file && (dump_flags & TDF_DETAILS))
155 	    fprintf (dump_file,
156 		     "  Not duplicating bb %i contains too many insns.\n",
157 		     header->index);
158 	  return false;
159 	}
160 
161       /* Classify the stmt based on whether its computation is based
162          on a IV or whether it is invariant in the loop.  */
163       gimple_set_uid (last, 0);
164       if (!gimple_vuse (last))
165 	{
166 	  bool inv = true;
167 	  bool iv = false;
168 	  ssa_op_iter i;
169 	  tree op;
170 	  FOR_EACH_SSA_TREE_OPERAND (op, last, i, SSA_OP_USE)
171 	    if (!SSA_NAME_IS_DEFAULT_DEF (op)
172 		&& flow_bb_inside_loop_p (loop,
173 					  gimple_bb (SSA_NAME_DEF_STMT (op))))
174 	      {
175 		if (!(gimple_uid (SSA_NAME_DEF_STMT (op)) & 2 /* INV */))
176 		  inv = false;
177 		if (gimple_uid (SSA_NAME_DEF_STMT (op)) & 1 /* IV */)
178 		  iv = true;
179 	      }
180 	  gimple_set_uid (last, (iv ? 1 : 0) | (inv ? 2 : 0));
181 	}
182     }
183 
184   /* If the condition tests a non-IV loop variant we do not want to rotate
185      the loop further.  Unless this is the original loop header.  */
186   tree lhs = gimple_cond_lhs (last);
187   tree rhs = gimple_cond_rhs (last);
188   if (header != loop->header
189       && ((TREE_CODE (lhs) == SSA_NAME
190 	   && !SSA_NAME_IS_DEFAULT_DEF (lhs)
191 	   && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (lhs)))
192 	   && gimple_uid (SSA_NAME_DEF_STMT (lhs)) == 0)
193 	  || (TREE_CODE (rhs) == SSA_NAME
194 	      && !SSA_NAME_IS_DEFAULT_DEF (rhs)
195 	      && flow_bb_inside_loop_p (loop,
196 					gimple_bb (SSA_NAME_DEF_STMT (rhs)))
197 	      && gimple_uid (SSA_NAME_DEF_STMT (rhs)) == 0)))
198     {
199       if (dump_file && (dump_flags & TDF_DETAILS))
200 	fprintf (dump_file,
201 		 "  Not duplicating bb %i: condition based on non-IV loop"
202 		 "variant.\n", header->index);
203       return false;
204     }
205 
206   if (dump_file && (dump_flags & TDF_DETAILS))
207     fprintf (dump_file, "    Will duplicate bb %i\n", header->index);
208   return true;
209 }
210 
211 /* Checks whether LOOP is a do-while style loop.  */
212 
213 static bool
do_while_loop_p(struct loop * loop)214 do_while_loop_p (struct loop *loop)
215 {
216   gimple *stmt = last_stmt (loop->latch);
217 
218   /* If the latch of the loop is not empty, it is not a do-while loop.  */
219   if (stmt
220       && gimple_code (stmt) != GIMPLE_LABEL)
221     {
222       if (dump_file && (dump_flags & TDF_DETAILS))
223 	fprintf (dump_file,
224 		 "Loop %i is not do-while loop: latch is not empty.\n",
225 		 loop->num);
226       return false;
227     }
228 
229   /* If the latch does not have a single predecessor, it is not a
230      do-while loop.  */
231   if (!single_pred_p (loop->latch))
232     {
233       if (dump_file && (dump_flags & TDF_DETAILS))
234 	fprintf (dump_file,
235 		 "Loop %i is not do-while loop: latch has multiple "
236 		 "predecessors.\n", loop->num);
237       return false;
238     }
239 
240   /* If the latch predecessor doesn't exit the loop, it is not a
241      do-while loop.  */
242   if (!loop_exits_from_bb_p (loop, single_pred (loop->latch)))
243     {
244       if (dump_file && (dump_flags & TDF_DETAILS))
245 	fprintf (dump_file,
246 		 "Loop %i is not do-while loop: latch predecessor "
247 		 "does not exit loop.\n", loop->num);
248       return false;
249     }
250 
251   if (dump_file && (dump_flags & TDF_DETAILS))
252     fprintf (dump_file, "Loop %i is do-while loop\n", loop->num);
253 
254   return true;
255 }
256 
257 namespace {
258 
259 /* Common superclass for both header-copying phases.  */
260 class ch_base : public gimple_opt_pass
261 {
262   protected:
ch_base(pass_data data,gcc::context * ctxt)263     ch_base (pass_data data, gcc::context *ctxt)
264       : gimple_opt_pass (data, ctxt)
265     {}
266 
267   /* Copies headers of all loops in FUN for which process_loop_p is true.  */
268   unsigned int copy_headers (function *fun);
269 
270   /* Return true to copy headers of LOOP or false to skip.  */
271   virtual bool process_loop_p (struct loop *loop) = 0;
272 };
273 
274 const pass_data pass_data_ch =
275 {
276   GIMPLE_PASS, /* type */
277   "ch", /* name */
278   OPTGROUP_LOOP, /* optinfo_flags */
279   TV_TREE_CH, /* tv_id */
280   ( PROP_cfg | PROP_ssa ), /* properties_required */
281   0, /* properties_provided */
282   0, /* properties_destroyed */
283   0, /* todo_flags_start */
284   0, /* todo_flags_finish */
285 };
286 
287 class pass_ch : public ch_base
288 {
289 public:
pass_ch(gcc::context * ctxt)290   pass_ch (gcc::context *ctxt)
291     : ch_base (pass_data_ch, ctxt)
292   {}
293 
294   /* opt_pass methods: */
gate(function *)295   virtual bool gate (function *) { return flag_tree_ch != 0; }
296 
297   /* Initialize and finalize loop structures, copying headers inbetween.  */
298   virtual unsigned int execute (function *);
299 
clone()300   opt_pass * clone () { return new pass_ch (m_ctxt); }
301 
302 protected:
303   /* ch_base method: */
304   virtual bool process_loop_p (struct loop *loop);
305 }; // class pass_ch
306 
307 const pass_data pass_data_ch_vect =
308 {
309   GIMPLE_PASS, /* type */
310   "ch_vect", /* name */
311   OPTGROUP_LOOP, /* optinfo_flags */
312   TV_TREE_CH, /* tv_id */
313   ( PROP_cfg | PROP_ssa ), /* properties_required */
314   0, /* properties_provided */
315   0, /* properties_destroyed */
316   0, /* todo_flags_start */
317   0, /* todo_flags_finish */
318 };
319 
320 /* This is a more aggressive version of the same pass, designed to run just
321    before if-conversion and vectorization, to put more loops into the form
322    required for those phases.  */
323 class pass_ch_vect : public ch_base
324 {
325 public:
pass_ch_vect(gcc::context * ctxt)326   pass_ch_vect (gcc::context *ctxt)
327     : ch_base (pass_data_ch_vect, ctxt)
328   {}
329 
330   /* opt_pass methods: */
gate(function * fun)331   virtual bool gate (function *fun)
332   {
333     return flag_tree_ch != 0
334 	   && (flag_tree_loop_vectorize != 0 || fun->has_force_vectorize_loops);
335   }
336 
337   /* Just copy headers, no initialization/finalization of loop structures.  */
338   virtual unsigned int execute (function *);
339 
340 protected:
341   /* ch_base method: */
342   virtual bool process_loop_p (struct loop *loop);
343 }; // class pass_ch_vect
344 
345 /* For all loops, copy the condition at the end of the loop body in front
346    of the loop.  This is beneficial since it increases efficiency of
347    code motion optimizations.  It also saves one jump on entry to the loop.  */
348 
349 unsigned int
copy_headers(function * fun)350 ch_base::copy_headers (function *fun)
351 {
352   struct loop *loop;
353   basic_block header;
354   edge exit, entry;
355   basic_block *bbs, *copied_bbs;
356   unsigned n_bbs;
357   unsigned bbs_size;
358   bool changed = false;
359 
360   if (number_of_loops (fun) <= 1)
361     return 0;
362 
363   bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
364   copied_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
365   bbs_size = n_basic_blocks_for_fn (fun);
366 
367   auto_vec<std::pair<edge, loop_p> > copied;
368 
369   FOR_EACH_LOOP (loop, 0)
370     {
371       int initial_limit = PARAM_VALUE (PARAM_MAX_LOOP_HEADER_INSNS);
372       int remaining_limit = initial_limit;
373       if (dump_file && (dump_flags & TDF_DETAILS))
374 	fprintf (dump_file,
375 		 "Analyzing loop %i\n", loop->num);
376 
377       header = loop->header;
378 
379       /* If the loop is already a do-while style one (either because it was
380 	 written as such, or because jump threading transformed it into one),
381 	 we might be in fact peeling the first iteration of the loop.  This
382 	 in general is not a good idea.  Also avoid touching infinite loops.  */
383       if (!loop_has_exit_edges (loop)
384 	  || !process_loop_p (loop))
385 	continue;
386 
387       /* Iterate the header copying up to limit; this takes care of the cases
388 	 like while (a && b) {...}, where we want to have both of the conditions
389 	 copied.  TODO -- handle while (a || b) - like cases, by not requiring
390 	 the header to have just a single successor and copying up to
391 	 postdominator.  */
392 
393       exit = NULL;
394       n_bbs = 0;
395       while (should_duplicate_loop_header_p (header, loop, &remaining_limit))
396 	{
397 	  /* Find a successor of header that is inside a loop; i.e. the new
398 	     header after the condition is copied.  */
399 	  if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest))
400 	    exit = EDGE_SUCC (header, 0);
401 	  else
402 	    exit = EDGE_SUCC (header, 1);
403 	  bbs[n_bbs++] = header;
404 	  gcc_assert (bbs_size > n_bbs);
405 	  header = exit->dest;
406 	}
407 
408       if (!exit)
409 	continue;
410 
411       if (dump_file && (dump_flags & TDF_DETAILS))
412 	fprintf (dump_file,
413 		 "Duplicating header of the loop %d up to edge %d->%d,"
414 		 " %i insns.\n",
415 		 loop->num, exit->src->index, exit->dest->index,
416 		 initial_limit - remaining_limit);
417 
418       /* Ensure that the header will have just the latch as a predecessor
419 	 inside the loop.  */
420       if (!single_pred_p (exit->dest))
421 	exit = single_pred_edge (split_edge (exit));
422 
423       entry = loop_preheader_edge (loop);
424 
425       propagate_threaded_block_debug_into (exit->dest, entry->dest);
426       if (!gimple_duplicate_sese_region (entry, exit, bbs, n_bbs, copied_bbs,
427 					 true))
428 	{
429 	  fprintf (dump_file, "Duplication failed.\n");
430 	  continue;
431 	}
432       copied.safe_push (std::make_pair (entry, loop));
433 
434       /* If the loop has the form "for (i = j; i < j + 10; i++)" then
435 	 this copying can introduce a case where we rely on undefined
436 	 signed overflow to eliminate the preheader condition, because
437 	 we assume that "j < j + 10" is true.  We don't want to warn
438 	 about that case for -Wstrict-overflow, because in general we
439 	 don't warn about overflow involving loops.  Prevent the
440 	 warning by setting the no_warning flag in the condition.  */
441       if (warn_strict_overflow > 0)
442 	{
443 	  unsigned int i;
444 
445 	  for (i = 0; i < n_bbs; ++i)
446 	    {
447 	      gimple_stmt_iterator bsi;
448 
449 	      for (bsi = gsi_start_bb (copied_bbs[i]);
450 		   !gsi_end_p (bsi);
451 		   gsi_next (&bsi))
452 		{
453 		  gimple *stmt = gsi_stmt (bsi);
454 		  if (gimple_code (stmt) == GIMPLE_COND)
455 		    {
456 		      tree lhs = gimple_cond_lhs (stmt);
457 		      if (gimple_cond_code (stmt) != EQ_EXPR
458 			  && gimple_cond_code (stmt) != NE_EXPR
459 			  && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
460 			  && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs)))
461 			gimple_set_no_warning (stmt, true);
462 		    }
463 		  else if (is_gimple_assign (stmt))
464 		    {
465 		      enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
466 		      tree rhs1 = gimple_assign_rhs1 (stmt);
467 		      if (TREE_CODE_CLASS (rhs_code) == tcc_comparison
468 			  && rhs_code != EQ_EXPR
469 			  && rhs_code != NE_EXPR
470 			  && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
471 			  && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (rhs1)))
472 			gimple_set_no_warning (stmt, true);
473 		    }
474 		}
475 	    }
476 	}
477 
478       /* Ensure that the latch and the preheader is simple (we know that they
479 	 are not now, since there was the loop exit condition.  */
480       split_edge (loop_preheader_edge (loop));
481       split_edge (loop_latch_edge (loop));
482 
483       if (dump_file && (dump_flags & TDF_DETAILS))
484 	{
485 	  if (do_while_loop_p (loop))
486 	    fprintf (dump_file, "Loop %d is now do-while loop.\n", loop->num);
487 	  else
488 	    fprintf (dump_file, "Loop %d is still not do-while loop.\n",
489 		     loop->num);
490 	}
491 
492       changed = true;
493     }
494 
495   if (changed)
496     {
497       update_ssa (TODO_update_ssa);
498       /* After updating SSA form perform CSE on the loop header
499 	 copies.  This is esp. required for the pass before
500 	 vectorization since nothing cleans up copied exit tests
501 	 that can now be simplified.  CSE from the entry of the
502 	 region we copied till all loop exit blocks but not
503 	 entering the loop itself.  */
504       for (unsigned i = 0; i < copied.length (); ++i)
505 	{
506 	  edge entry = copied[i].first;
507 	  loop_p loop = copied[i].second;
508 	  vec<edge> exit_edges = get_loop_exit_edges (loop);
509 	  bitmap exit_bbs = BITMAP_ALLOC (NULL);
510 	  for (unsigned j = 0; j < exit_edges.length (); ++j)
511 	    bitmap_set_bit (exit_bbs, exit_edges[j]->dest->index);
512 	  bitmap_set_bit (exit_bbs, loop->header->index);
513 	  do_rpo_vn (cfun, entry, exit_bbs);
514 	  BITMAP_FREE (exit_bbs);
515 	  exit_edges.release ();
516 	}
517     }
518   free (bbs);
519   free (copied_bbs);
520 
521   return changed ? TODO_cleanup_cfg : 0;
522 }
523 
524 /* Initialize the loop structures we need, and finalize after.  */
525 
526 unsigned int
execute(function * fun)527 pass_ch::execute (function *fun)
528 {
529   loop_optimizer_init (LOOPS_HAVE_PREHEADERS
530 		       | LOOPS_HAVE_SIMPLE_LATCHES
531 		       | LOOPS_HAVE_RECORDED_EXITS);
532 
533   unsigned int res = copy_headers (fun);
534 
535   loop_optimizer_finalize ();
536   return res;
537 }
538 
539 /* Assume an earlier phase has already initialized all the loop structures that
540    we need here (and perhaps others too), and that these will be finalized by
541    a later phase.  */
542 
543 unsigned int
execute(function * fun)544 pass_ch_vect::execute (function *fun)
545 {
546   return copy_headers (fun);
547 }
548 
549 /* Apply header copying according to a very simple test of do-while shape.  */
550 
551 bool
process_loop_p(struct loop * loop)552 pass_ch::process_loop_p (struct loop *loop)
553 {
554   return !do_while_loop_p (loop);
555 }
556 
557 /* Apply header-copying to loops where we might enable vectorization.  */
558 
559 bool
process_loop_p(struct loop * loop)560 pass_ch_vect::process_loop_p (struct loop *loop)
561 {
562   if (!flag_tree_loop_vectorize && !loop->force_vectorize)
563     return false;
564 
565   if (loop->dont_vectorize)
566     return false;
567 
568   /* The vectorizer won't handle anything with multiple exits, so skip.  */
569   edge exit = single_exit (loop);
570   if (!exit)
571     return false;
572 
573   if (!do_while_loop_p (loop))
574     return true;
575 
576   return false;
577 }
578 
579 } // anon namespace
580 
581 gimple_opt_pass *
make_pass_ch_vect(gcc::context * ctxt)582 make_pass_ch_vect (gcc::context *ctxt)
583 {
584   return new pass_ch_vect (ctxt);
585 }
586 
587 gimple_opt_pass *
make_pass_ch(gcc::context * ctxt)588 make_pass_ch (gcc::context *ctxt)
589 {
590   return new pass_ch (ctxt);
591 }
592