1 /* Loop optimizer initialization routines and RTL loop optimization passes.
2    Copyright (C) 2002-2018 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "target.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "cfghooks.h"
28 #include "df.h"
29 #include "regs.h"
30 #include "cfgcleanup.h"
31 #include "cfgloop.h"
32 #include "tree-pass.h"
33 #include "tree-ssa-loop-niter.h"
34 #include "loop-unroll.h"
35 #include "tree-scalar-evolution.h"
36 
37 
38 /* Apply FLAGS to the loop state.  */
39 
40 static void
apply_loop_flags(unsigned flags)41 apply_loop_flags (unsigned flags)
42 {
43   if (flags & LOOPS_MAY_HAVE_MULTIPLE_LATCHES)
44     {
45       /* If the loops may have multiple latches, we cannot canonicalize
46 	 them further (and most of the loop manipulation functions will
47 	 not work).  However, we avoid modifying cfg, which some
48 	 passes may want.  */
49       gcc_assert ((flags & ~(LOOPS_MAY_HAVE_MULTIPLE_LATCHES
50 			     | LOOPS_HAVE_RECORDED_EXITS)) == 0);
51       loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
52     }
53   else
54     disambiguate_loops_with_multiple_latches ();
55 
56   /* Create pre-headers.  */
57   if (flags & LOOPS_HAVE_PREHEADERS)
58     {
59       int cp_flags = CP_SIMPLE_PREHEADERS;
60 
61       if (flags & LOOPS_HAVE_FALLTHRU_PREHEADERS)
62         cp_flags |= CP_FALLTHRU_PREHEADERS;
63 
64       create_preheaders (cp_flags);
65     }
66 
67   /* Force all latches to have only single successor.  */
68   if (flags & LOOPS_HAVE_SIMPLE_LATCHES)
69     force_single_succ_latches ();
70 
71   /* Mark irreducible loops.  */
72   if (flags & LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)
73     mark_irreducible_loops ();
74 
75   if (flags & LOOPS_HAVE_RECORDED_EXITS)
76     record_loop_exits ();
77 }
78 
79 /* Initialize loop structures.  This is used by the tree and RTL loop
80    optimizers.  FLAGS specify what properties to compute and/or ensure for
81    loops.  */
82 
83 void
loop_optimizer_init(unsigned flags)84 loop_optimizer_init (unsigned flags)
85 {
86   timevar_push (TV_LOOP_INIT);
87 
88   if (!current_loops)
89     {
90       gcc_assert (!(cfun->curr_properties & PROP_loops));
91 
92       /* Find the loops.  */
93       current_loops = flow_loops_find (NULL);
94     }
95   else
96     {
97       bool recorded_exits = loops_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS);
98       bool needs_fixup = loops_state_satisfies_p (LOOPS_NEED_FIXUP);
99 
100       gcc_assert (cfun->curr_properties & PROP_loops);
101 
102       /* Ensure that the dominators are computed, like flow_loops_find does.  */
103       calculate_dominance_info (CDI_DOMINATORS);
104 
105       if (!needs_fixup)
106 	checking_verify_loop_structure ();
107 
108       /* Clear all flags.  */
109       if (recorded_exits)
110 	release_recorded_exits (cfun);
111       loops_state_clear (~0U);
112 
113       if (needs_fixup)
114 	{
115 	  /* Apply LOOPS_MAY_HAVE_MULTIPLE_LATCHES early as fix_loop_structure
116 	     re-applies flags.  */
117 	  loops_state_set (flags & LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
118 	  fix_loop_structure (NULL);
119 	}
120     }
121 
122   /* Apply flags to loops.  */
123   apply_loop_flags (flags);
124 
125   /* Dump loops.  */
126   flow_loops_dump (dump_file, NULL, 1);
127 
128   checking_verify_loop_structure ();
129 
130   timevar_pop (TV_LOOP_INIT);
131 }
132 
133 /* Finalize loop structures.  */
134 
135 void
loop_optimizer_finalize(struct function * fn)136 loop_optimizer_finalize (struct function *fn)
137 {
138   struct loop *loop;
139   basic_block bb;
140 
141   timevar_push (TV_LOOP_FINI);
142 
143   if (loops_state_satisfies_p (fn, LOOPS_HAVE_RECORDED_EXITS))
144     release_recorded_exits (fn);
145 
146   free_numbers_of_iterations_estimates (fn);
147 
148   /* If we should preserve loop structure, do not free it but clear
149      flags that advanced properties are there as we are not preserving
150      that in full.  */
151   if (fn->curr_properties & PROP_loops)
152     {
153       loops_state_clear (fn, LOOP_CLOSED_SSA
154 			 | LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS
155 			 | LOOPS_HAVE_PREHEADERS
156 			 | LOOPS_HAVE_SIMPLE_LATCHES
157 			 | LOOPS_HAVE_FALLTHRU_PREHEADERS);
158       loops_state_set (fn, LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
159       goto loop_fini_done;
160     }
161 
162   FOR_EACH_LOOP_FN (fn, loop, 0)
163     free_simple_loop_desc (loop);
164 
165   /* Clean up.  */
166   flow_loops_free (loops_for_fn (fn));
167   ggc_free (loops_for_fn (fn));
168   set_loops_for_fn (fn, NULL);
169 
170   FOR_ALL_BB_FN (bb, fn)
171     {
172       bb->loop_father = NULL;
173     }
174 
175 loop_fini_done:
176   timevar_pop (TV_LOOP_FINI);
177 }
178 
179 /* The structure of loops might have changed.  Some loops might get removed
180    (and their headers and latches were set to NULL), loop exists might get
181    removed (thus the loop nesting may be wrong), and some blocks and edges
182    were changed (so the information about bb --> loop mapping does not have
183    to be correct).  But still for the remaining loops the header dominates
184    the latch, and loops did not get new subloops (new loops might possibly
185    get created, but we are not interested in them).  Fix up the mess.
186 
187    If CHANGED_BBS is not NULL, basic blocks whose loop depth has changed are
188    marked in it.
189 
190    Returns the number of new discovered loops.  */
191 
192 unsigned
fix_loop_structure(bitmap changed_bbs)193 fix_loop_structure (bitmap changed_bbs)
194 {
195   basic_block bb;
196   int record_exits = 0;
197   struct loop *loop;
198   unsigned old_nloops, i;
199 
200   timevar_push (TV_LOOP_INIT);
201 
202   if (dump_file && (dump_flags & TDF_DETAILS))
203     fprintf (dump_file, "fix_loop_structure: fixing up loops for function\n");
204 
205   /* We need exact and fast dominance info to be available.  */
206   gcc_assert (dom_info_state (CDI_DOMINATORS) == DOM_OK);
207 
208   if (loops_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS))
209     {
210       release_recorded_exits (cfun);
211       record_exits = LOOPS_HAVE_RECORDED_EXITS;
212     }
213 
214   /* Remember the depth of the blocks in the loop hierarchy, so that we can
215      recognize blocks whose loop nesting relationship has changed.  */
216   if (changed_bbs)
217     FOR_EACH_BB_FN (bb, cfun)
218       bb->aux = (void *) (size_t) loop_depth (bb->loop_father);
219 
220   /* Remove the dead loops from structures.  We start from the innermost
221      loops, so that when we remove the loops, we know that the loops inside
222      are preserved, and do not waste time relinking loops that will be
223      removed later.  */
224   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
225     {
226       /* Detect the case that the loop is no longer present even though
227          it wasn't marked for removal.
228 	 ???  If we do that we can get away with not marking loops for
229 	 removal at all.  And possibly avoid some spurious removals.  */
230       if (loop->header
231 	  && bb_loop_header_p (loop->header))
232 	continue;
233 
234       if (dump_file && (dump_flags & TDF_DETAILS))
235 	fprintf (dump_file, "fix_loop_structure: removing loop %d\n",
236 		 loop->num);
237 
238       while (loop->inner)
239 	{
240 	  struct loop *ploop = loop->inner;
241 	  flow_loop_tree_node_remove (ploop);
242 	  flow_loop_tree_node_add (loop_outer (loop), ploop);
243 	}
244 
245       /* Remove the loop.  */
246       if (loop->header)
247 	loop->former_header = loop->header;
248       else
249 	gcc_assert (loop->former_header != NULL);
250       loop->header = NULL;
251       flow_loop_tree_node_remove (loop);
252     }
253 
254   /* Remember the number of loops so we can return how many new loops
255      flow_loops_find discovered.  */
256   old_nloops = number_of_loops (cfun);
257 
258   /* Re-compute loop structure in-place.  */
259   flow_loops_find (current_loops);
260 
261   /* Mark the blocks whose loop has changed.  */
262   if (changed_bbs)
263     {
264       FOR_EACH_BB_FN (bb, cfun)
265 	{
266 	  if ((void *) (size_t) loop_depth (bb->loop_father) != bb->aux)
267 	    bitmap_set_bit (changed_bbs, bb->index);
268 
269     	  bb->aux = NULL;
270 	}
271     }
272 
273   /* Finally free deleted loops.  */
274   bool any_deleted = false;
275   FOR_EACH_VEC_ELT (*get_loops (cfun), i, loop)
276     if (loop && loop->header == NULL)
277       {
278 	if (dump_file
279 	    && ((unsigned) loop->former_header->index
280 		< basic_block_info_for_fn (cfun)->length ()))
281 	  {
282 	    basic_block former_header
283 	      = BASIC_BLOCK_FOR_FN (cfun, loop->former_header->index);
284 	    /* If the old header still exists we want to check if the
285 	       original loop is re-discovered or the old header is now
286 	       part of a newly discovered loop.
287 	       In both cases we should have avoided removing the loop.  */
288 	    if (former_header == loop->former_header)
289 	      {
290 		if (former_header->loop_father->header == former_header)
291 		  fprintf (dump_file, "fix_loop_structure: rediscovered "
292 			   "removed loop %d as loop %d with old header %d\n",
293 			   loop->num, former_header->loop_father->num,
294 			   former_header->index);
295 		else if ((unsigned) former_header->loop_father->num
296 			 >= old_nloops)
297 		  fprintf (dump_file, "fix_loop_structure: header %d of "
298 			   "removed loop %d is part of the newly "
299 			   "discovered loop %d with header %d\n",
300 			   former_header->index, loop->num,
301 			   former_header->loop_father->num,
302 			   former_header->loop_father->header->index);
303 	      }
304 	  }
305 	(*get_loops (cfun))[i] = NULL;
306 	flow_loop_free (loop);
307 	any_deleted = true;
308       }
309 
310   /* If we deleted loops then the cached scalar evolutions refering to
311      those loops become invalid.  */
312   if (any_deleted && scev_initialized_p ())
313     scev_reset_htab ();
314 
315   loops_state_clear (LOOPS_NEED_FIXUP);
316 
317   /* Apply flags to loops.  */
318   apply_loop_flags (current_loops->state | record_exits);
319 
320   checking_verify_loop_structure ();
321 
322   timevar_pop (TV_LOOP_INIT);
323 
324   return number_of_loops (cfun) - old_nloops;
325 }
326 
327 /* The RTL loop superpass.  The actual passes are subpasses.  See passes.c for
328    more on that.  */
329 
330 namespace {
331 
332 const pass_data pass_data_loop2 =
333 {
334   RTL_PASS, /* type */
335   "loop2", /* name */
336   OPTGROUP_LOOP, /* optinfo_flags */
337   TV_LOOP, /* tv_id */
338   0, /* properties_required */
339   0, /* properties_provided */
340   0, /* properties_destroyed */
341   0, /* todo_flags_start */
342   0, /* todo_flags_finish */
343 };
344 
345 class pass_loop2 : public rtl_opt_pass
346 {
347 public:
pass_loop2(gcc::context * ctxt)348   pass_loop2 (gcc::context *ctxt)
349     : rtl_opt_pass (pass_data_loop2, ctxt)
350   {}
351 
352   /* opt_pass methods: */
353   virtual bool gate (function *);
354 
355 }; // class pass_loop2
356 
357 bool
gate(function * fun)358 pass_loop2::gate (function *fun)
359 {
360   if (optimize > 0
361       && (flag_move_loop_invariants
362 	  || flag_unswitch_loops
363 	  || flag_unroll_loops
364 	  || (flag_branch_on_count_reg && targetm.have_doloop_end ())
365 	  || cfun->has_unroll))
366     return true;
367   else
368     {
369       /* No longer preserve loops, remove them now.  */
370       fun->curr_properties &= ~PROP_loops;
371       if (current_loops)
372 	loop_optimizer_finalize ();
373       return false;
374     }
375 }
376 
377 } // anon namespace
378 
379 rtl_opt_pass *
make_pass_loop2(gcc::context * ctxt)380 make_pass_loop2 (gcc::context *ctxt)
381 {
382   return new pass_loop2 (ctxt);
383 }
384 
385 
386 /* Initialization of the RTL loop passes.  */
387 static unsigned int
rtl_loop_init(void)388 rtl_loop_init (void)
389 {
390   gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT);
391 
392   if (dump_file)
393     {
394       dump_reg_info (dump_file);
395       dump_flow_info (dump_file, dump_flags);
396     }
397 
398   loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
399   return 0;
400 }
401 
402 namespace {
403 
404 const pass_data pass_data_rtl_loop_init =
405 {
406   RTL_PASS, /* type */
407   "loop2_init", /* name */
408   OPTGROUP_LOOP, /* optinfo_flags */
409   TV_LOOP, /* tv_id */
410   0, /* properties_required */
411   0, /* properties_provided */
412   0, /* properties_destroyed */
413   0, /* todo_flags_start */
414   0, /* todo_flags_finish */
415 };
416 
417 class pass_rtl_loop_init : public rtl_opt_pass
418 {
419 public:
pass_rtl_loop_init(gcc::context * ctxt)420   pass_rtl_loop_init (gcc::context *ctxt)
421     : rtl_opt_pass (pass_data_rtl_loop_init, ctxt)
422   {}
423 
424   /* opt_pass methods: */
execute(function *)425   virtual unsigned int execute (function *) { return rtl_loop_init (); }
426 
427 }; // class pass_rtl_loop_init
428 
429 } // anon namespace
430 
431 rtl_opt_pass *
make_pass_rtl_loop_init(gcc::context * ctxt)432 make_pass_rtl_loop_init (gcc::context *ctxt)
433 {
434   return new pass_rtl_loop_init (ctxt);
435 }
436 
437 
438 /* Finalization of the RTL loop passes.  */
439 
440 namespace {
441 
442 const pass_data pass_data_rtl_loop_done =
443 {
444   RTL_PASS, /* type */
445   "loop2_done", /* name */
446   OPTGROUP_LOOP, /* optinfo_flags */
447   TV_LOOP, /* tv_id */
448   0, /* properties_required */
449   0, /* properties_provided */
450   PROP_loops, /* properties_destroyed */
451   0, /* todo_flags_start */
452   0, /* todo_flags_finish */
453 };
454 
455 class pass_rtl_loop_done : public rtl_opt_pass
456 {
457 public:
pass_rtl_loop_done(gcc::context * ctxt)458   pass_rtl_loop_done (gcc::context *ctxt)
459     : rtl_opt_pass (pass_data_rtl_loop_done, ctxt)
460   {}
461 
462   /* opt_pass methods: */
463   virtual unsigned int execute (function *);
464 
465 }; // class pass_rtl_loop_done
466 
467 unsigned int
execute(function * fun)468 pass_rtl_loop_done::execute (function *fun)
469 {
470   /* No longer preserve loops, remove them now.  */
471   fun->curr_properties &= ~PROP_loops;
472   loop_optimizer_finalize ();
473   free_dominance_info (CDI_DOMINATORS);
474 
475   cleanup_cfg (0);
476   if (dump_file)
477     {
478       dump_reg_info (dump_file);
479       dump_flow_info (dump_file, dump_flags);
480     }
481 
482   return 0;
483 }
484 
485 } // anon namespace
486 
487 rtl_opt_pass *
make_pass_rtl_loop_done(gcc::context * ctxt)488 make_pass_rtl_loop_done (gcc::context *ctxt)
489 {
490   return new pass_rtl_loop_done (ctxt);
491 }
492 
493 
494 /* Loop invariant code motion.  */
495 
496 namespace {
497 
498 const pass_data pass_data_rtl_move_loop_invariants =
499 {
500   RTL_PASS, /* type */
501   "loop2_invariant", /* name */
502   OPTGROUP_LOOP, /* optinfo_flags */
503   TV_LOOP_MOVE_INVARIANTS, /* tv_id */
504   0, /* properties_required */
505   0, /* properties_provided */
506   0, /* properties_destroyed */
507   0, /* todo_flags_start */
508   ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
509 };
510 
511 class pass_rtl_move_loop_invariants : public rtl_opt_pass
512 {
513 public:
pass_rtl_move_loop_invariants(gcc::context * ctxt)514   pass_rtl_move_loop_invariants (gcc::context *ctxt)
515     : rtl_opt_pass (pass_data_rtl_move_loop_invariants, ctxt)
516   {}
517 
518   /* opt_pass methods: */
gate(function *)519   virtual bool gate (function *) { return flag_move_loop_invariants; }
execute(function * fun)520   virtual unsigned int execute (function *fun)
521     {
522       if (number_of_loops (fun) > 1)
523 	move_loop_invariants ();
524       return 0;
525     }
526 
527 }; // class pass_rtl_move_loop_invariants
528 
529 } // anon namespace
530 
531 rtl_opt_pass *
make_pass_rtl_move_loop_invariants(gcc::context * ctxt)532 make_pass_rtl_move_loop_invariants (gcc::context *ctxt)
533 {
534   return new pass_rtl_move_loop_invariants (ctxt);
535 }
536 
537 
538 namespace {
539 
540 const pass_data pass_data_rtl_unroll_loops =
541 {
542   RTL_PASS, /* type */
543   "loop2_unroll", /* name */
544   OPTGROUP_LOOP, /* optinfo_flags */
545   TV_LOOP_UNROLL, /* tv_id */
546   0, /* properties_required */
547   0, /* properties_provided */
548   0, /* properties_destroyed */
549   0, /* todo_flags_start */
550   0, /* todo_flags_finish */
551 };
552 
553 class pass_rtl_unroll_loops : public rtl_opt_pass
554 {
555 public:
pass_rtl_unroll_loops(gcc::context * ctxt)556   pass_rtl_unroll_loops (gcc::context *ctxt)
557     : rtl_opt_pass (pass_data_rtl_unroll_loops, ctxt)
558   {}
559 
560   /* opt_pass methods: */
gate(function *)561   virtual bool gate (function *)
562     {
563       return (flag_unroll_loops || flag_unroll_all_loops || cfun->has_unroll);
564     }
565 
566   virtual unsigned int execute (function *);
567 
568 }; // class pass_rtl_unroll_loops
569 
570 unsigned int
execute(function * fun)571 pass_rtl_unroll_loops::execute (function *fun)
572 {
573   if (number_of_loops (fun) > 1)
574     {
575       int flags = 0;
576       if (dump_file)
577 	df_dump (dump_file);
578 
579       if (flag_unroll_loops)
580 	flags |= UAP_UNROLL;
581       if (flag_unroll_all_loops)
582 	flags |= UAP_UNROLL_ALL;
583 
584       unroll_loops (flags);
585     }
586   return 0;
587 }
588 
589 } // anon namespace
590 
591 rtl_opt_pass *
make_pass_rtl_unroll_loops(gcc::context * ctxt)592 make_pass_rtl_unroll_loops (gcc::context *ctxt)
593 {
594   return new pass_rtl_unroll_loops (ctxt);
595 }
596 
597 
598 namespace {
599 
600 const pass_data pass_data_rtl_doloop =
601 {
602   RTL_PASS, /* type */
603   "loop2_doloop", /* name */
604   OPTGROUP_LOOP, /* optinfo_flags */
605   TV_LOOP_DOLOOP, /* tv_id */
606   0, /* properties_required */
607   0, /* properties_provided */
608   0, /* properties_destroyed */
609   0, /* todo_flags_start */
610   0, /* todo_flags_finish */
611 };
612 
613 class pass_rtl_doloop : public rtl_opt_pass
614 {
615 public:
pass_rtl_doloop(gcc::context * ctxt)616   pass_rtl_doloop (gcc::context *ctxt)
617     : rtl_opt_pass (pass_data_rtl_doloop, ctxt)
618   {}
619 
620   /* opt_pass methods: */
621   virtual bool gate (function *);
622   virtual unsigned int execute (function *);
623 
624 }; // class pass_rtl_doloop
625 
626 bool
gate(function *)627 pass_rtl_doloop::gate (function *)
628 {
629   return (flag_branch_on_count_reg && targetm.have_doloop_end ());
630 }
631 
632 unsigned int
execute(function * fun)633 pass_rtl_doloop::execute (function *fun)
634 {
635   if (number_of_loops (fun) > 1)
636     doloop_optimize_loops ();
637   return 0;
638 }
639 
640 } // anon namespace
641 
642 rtl_opt_pass *
make_pass_rtl_doloop(gcc::context * ctxt)643 make_pass_rtl_doloop (gcc::context *ctxt)
644 {
645   return new pass_rtl_doloop (ctxt);
646 }
647