1 /* Code to analyze doloop loops in order for targets to perform late
2    optimizations converting doloops to other forms of hardware loops.
3    Copyright (C) 2011-2016 Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "df.h"
27 #include "insn-config.h"
28 #include "regs.h"
29 #include "emit-rtl.h"
30 #include "recog.h"
31 #include "cfgrtl.h"
32 #include "hw-doloop.h"
33 #include "dumpfile.h"
34 
35 /* Dump information collected in LOOPS.  */
36 static void
dump_hwloops(hwloop_info loops)37 dump_hwloops (hwloop_info loops)
38 {
39   hwloop_info loop;
40 
41   for (loop = loops; loop; loop = loop->next)
42     {
43       hwloop_info i;
44       basic_block b;
45       unsigned ix;
46 
47       fprintf (dump_file, ";; loop %d: ", loop->loop_no);
48       if (loop->bad)
49 	fprintf (dump_file, "(bad) ");
50       fprintf (dump_file, "{head:%d, depth:%d, reg:%u}",
51 	       loop->head == NULL ? -1 : loop->head->index,
52 	       loop->depth, REGNO (loop->iter_reg));
53 
54       fprintf (dump_file, " blocks: [ ");
55       for (ix = 0; loop->blocks.iterate (ix, &b); ix++)
56 	fprintf (dump_file, "%d ", b->index);
57       fprintf (dump_file, "] ");
58 
59       fprintf (dump_file, " inner loops: [ ");
60       for (ix = 0; loop->loops.iterate (ix, &i); ix++)
61 	fprintf (dump_file, "%d ", i->loop_no);
62       fprintf (dump_file, "]\n");
63     }
64   fprintf (dump_file, "\n");
65 }
66 
67 /* Return true if BB is part of LOOP.  */
68 static bool
bb_in_loop_p(hwloop_info loop,basic_block bb)69 bb_in_loop_p (hwloop_info loop, basic_block bb)
70 {
71   return bitmap_bit_p (loop->block_bitmap, bb->index);
72 }
73 
74 /* Scan the blocks of LOOP (and its inferiors), and record things such as
75    hard registers set, jumps out of and within the loop.  */
76 static void
scan_loop(hwloop_info loop)77 scan_loop (hwloop_info loop)
78 {
79   unsigned ix;
80   basic_block bb;
81 
82   if (loop->bad)
83     return;
84 
85   if (REGNO_REG_SET_P (df_get_live_in (loop->successor),
86 		       REGNO (loop->iter_reg)))
87     loop->iter_reg_used_outside = true;
88 
89   for (ix = 0; loop->blocks.iterate (ix, &bb); ix++)
90     {
91       rtx_insn *insn;
92       edge e;
93       edge_iterator ei;
94 
95       if (bb != loop->tail)
96 	FOR_EACH_EDGE (e, ei, bb->succs)
97 	  {
98 	    if (bb_in_loop_p (loop, e->dest))
99 	      {
100 		if (!(e->flags & EDGE_FALLTHRU))
101 		  loop->jumps_within = true;
102 	      }
103 	    else
104 	      {
105 		loop->jumps_outof = true;
106 		if (!loop->bad)
107 		  gcc_assert (!REGNO_REG_SET_P (df_get_live_in (e->dest),
108 						REGNO (loop->iter_reg)));
109 	      }
110 	  }
111 
112       for (insn = BB_HEAD (bb);
113 	   insn != NEXT_INSN (BB_END (bb));
114 	   insn = NEXT_INSN (insn))
115 	{
116 	  df_ref def;
117 	  HARD_REG_SET set_this_insn;
118 
119 	  if (!NONDEBUG_INSN_P (insn))
120 	    continue;
121 
122 	  if (recog_memoized (insn) < 0
123 	      && (GET_CODE (PATTERN (insn)) == ASM_INPUT
124 		  || asm_noperands (PATTERN (insn)) >= 0))
125 	    loop->has_asm = true;
126 
127 	  CLEAR_HARD_REG_SET (set_this_insn);
128 	  FOR_EACH_INSN_DEF (def, insn)
129 	    {
130 	      rtx dreg = DF_REF_REG (def);
131 
132 	      if (!REG_P (dreg))
133 		continue;
134 
135 	      add_to_hard_reg_set (&set_this_insn, GET_MODE (dreg),
136 				   REGNO (dreg));
137 	    }
138 
139 	  if (insn == loop->loop_end)
140 	    CLEAR_HARD_REG_BIT (set_this_insn, REGNO (loop->iter_reg));
141 	  else if (reg_mentioned_p (loop->iter_reg, PATTERN (insn)))
142 	    loop->iter_reg_used = true;
143 	  IOR_HARD_REG_SET (loop->regs_set_in_loop, set_this_insn);
144 	}
145     }
146 }
147 
148 /* Compute the incoming_dest and incoming_src members of LOOP by
149    identifying the edges that jump into the loop.  If there is more
150    than one block that jumps into the loop, incoming_src will be set
151    to NULL; likewise, if there is more than one block in the loop that
152    is the destination of an incoming edge, incoming_dest will be NULL.
153 
154    Return true if either of these two fields is nonnull, false
155    otherwise.  */
156 static bool
process_incoming_edges(hwloop_info loop)157 process_incoming_edges (hwloop_info loop)
158 {
159   edge e;
160   edge_iterator ei;
161   bool first = true;
162 
163   FOR_EACH_EDGE (e, ei, loop->incoming)
164     {
165       if (first)
166 	{
167 	  loop->incoming_src = e->src;
168 	  loop->incoming_dest = e->dest;
169 	  first = false;
170 	}
171       else
172 	{
173 	  if (e->dest != loop->incoming_dest)
174 	    loop->incoming_dest = NULL;
175 	  if (e->src != loop->incoming_src)
176 	    loop->incoming_src = NULL;
177 	}
178     }
179   if (loop->incoming_src == NULL && loop->incoming_dest == NULL)
180     return false;
181 
182   return true;
183 }
184 
185 /* Try to identify a forwarder block that jump into LOOP, and add it to
186    the set of blocks in the loop, updating the vector of incoming blocks as
187    well.  This transformation gives a second chance to loops we couldn't
188    otherwise handle by increasing the chance that we'll end up with one
189    incoming_src block.
190    Return true if we made a change, false otherwise.  */
191 static bool
add_forwarder_blocks(hwloop_info loop)192 add_forwarder_blocks (hwloop_info loop)
193 {
194   edge e;
195   edge_iterator ei;
196 
197   FOR_EACH_EDGE (e, ei, loop->incoming)
198     {
199       if (forwarder_block_p (e->src))
200 	{
201 	  edge e2;
202 	  edge_iterator ei2;
203 
204 	  if (dump_file)
205 	    fprintf (dump_file,
206 		     ";; Adding forwarder block %d to loop %d and retrying\n",
207 		     e->src->index, loop->loop_no);
208 	  loop->blocks.safe_push (e->src);
209 	  bitmap_set_bit (loop->block_bitmap, e->src->index);
210 	  FOR_EACH_EDGE (e2, ei2, e->src->preds)
211 	    vec_safe_push (loop->incoming, e2);
212 	  loop->incoming->unordered_remove (ei.index);
213 	  return true;
214 	}
215     }
216   return false;
217 }
218 
219 /* Called from reorg_loops when a potential loop end is found.  LOOP is
220    a newly set up structure describing the loop, it is this function's
221    responsibility to fill most of it.  TAIL_BB and TAIL_INSN point to the
222    loop_end insn and its enclosing basic block.  REG is the loop counter
223    register.
224    For our purposes, a loop is defined by the set of blocks reachable from
225    the loop head in which the loop counter register is live.  This matches
226    the expected use; targets that call into this code usually replace the
227    loop counter with a different special register.  */
228 static void
discover_loop(hwloop_info loop,basic_block tail_bb,rtx_insn * tail_insn,rtx reg)229 discover_loop (hwloop_info loop, basic_block tail_bb, rtx_insn *tail_insn, rtx reg)
230 {
231   bool found_tail;
232   unsigned dwork = 0;
233   basic_block bb;
234 
235   loop->tail = tail_bb;
236   loop->loop_end = tail_insn;
237   loop->iter_reg = reg;
238   vec_alloc (loop->incoming, 2);
239   loop->start_label = as_a <rtx_insn *> (JUMP_LABEL (tail_insn));
240 
241   if (EDGE_COUNT (tail_bb->succs) != 2)
242     {
243       loop->bad = true;
244       return;
245     }
246   loop->head = BRANCH_EDGE (tail_bb)->dest;
247   loop->successor = FALLTHRU_EDGE (tail_bb)->dest;
248 
249   auto_vec<basic_block, 20> works;
250   works.safe_push (loop->head);
251 
252   found_tail = false;
253   for (dwork = 0; works.iterate (dwork, &bb); dwork++)
254     {
255       edge e;
256       edge_iterator ei;
257       if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
258 	{
259 	  /* We've reached the exit block.  The loop must be bad. */
260 	  if (dump_file)
261 	    fprintf (dump_file,
262 		     ";; Loop is bad - reached exit block while scanning\n");
263 	  loop->bad = true;
264 	  break;
265 	}
266 
267       if (bitmap_bit_p (loop->block_bitmap, bb->index))
268 	continue;
269 
270       /* We've not seen this block before.  Add it to the loop's
271 	 list and then add each successor to the work list.  */
272 
273       loop->blocks.safe_push (bb);
274       bitmap_set_bit (loop->block_bitmap, bb->index);
275 
276       if (bb == tail_bb)
277 	found_tail = true;
278       else
279 	{
280 	  FOR_EACH_EDGE (e, ei, bb->succs)
281 	    {
282 	      basic_block succ = EDGE_SUCC (bb, ei.index)->dest;
283 	      if (REGNO_REG_SET_P (df_get_live_in (succ),
284 				   REGNO (loop->iter_reg)))
285 		works.safe_push (succ);
286 	    }
287 	}
288     }
289 
290   if (!found_tail)
291     loop->bad = true;
292 
293   /* Find the predecessor, and make sure nothing else jumps into this loop.  */
294   if (!loop->bad)
295     {
296       FOR_EACH_VEC_ELT (loop->blocks, dwork, bb)
297 	{
298 	  edge e;
299 	  edge_iterator ei;
300 	  FOR_EACH_EDGE (e, ei, bb->preds)
301 	    {
302 	      basic_block pred = e->src;
303 
304 	      if (!bb_in_loop_p (loop, pred))
305 		{
306 		  if (dump_file)
307 		    fprintf (dump_file, ";; Loop %d: incoming edge %d -> %d\n",
308 			     loop->loop_no, pred->index,
309 			     e->dest->index);
310 		  vec_safe_push (loop->incoming, e);
311 		}
312 	    }
313 	}
314 
315       if (!process_incoming_edges (loop))
316 	{
317 	  if (dump_file)
318 	    fprintf (dump_file,
319 		     ";; retrying loop %d with forwarder blocks\n",
320 		     loop->loop_no);
321 	  if (!add_forwarder_blocks (loop))
322 	    {
323 	      if (dump_file)
324 		fprintf (dump_file, ";; No forwarder blocks found\n");
325 	      loop->bad = true;
326 	    }
327 	  else if (!process_incoming_edges (loop))
328 	    {
329 	      if (dump_file)
330 		fprintf (dump_file,
331 			 ";; can't find suitable entry for loop %d\n",
332 			 loop->loop_no);
333 	    }
334 	}
335     }
336 }
337 
338 /* Analyze the structure of the loops in the current function.  Use
339    LOOP_STACK for bitmap allocations.  Returns all the valid candidates for
340    hardware loops found in this function.  HOOKS is the argument
341    passed to reorg_loops, used here to find the iteration registers
342    from a loop_end pattern.  */
343 static hwloop_info
discover_loops(bitmap_obstack * loop_stack,struct hw_doloop_hooks * hooks)344 discover_loops (bitmap_obstack *loop_stack, struct hw_doloop_hooks *hooks)
345 {
346   hwloop_info loops = NULL;
347   hwloop_info loop;
348   basic_block bb;
349   int nloops = 0;
350 
351   /* Find all the possible loop tails.  This means searching for every
352      loop_end instruction.  For each one found, create a hwloop_info
353      structure and add the head block to the work list. */
354   FOR_EACH_BB_FN (bb, cfun)
355     {
356       rtx_insn *tail = BB_END (bb);
357       rtx_insn *insn;
358       rtx reg;
359 
360       while (tail && NOTE_P (tail) && tail != BB_HEAD (bb))
361 	tail = PREV_INSN (tail);
362 
363       if (tail == NULL_RTX)
364 	continue;
365 
366       if (!JUMP_P (tail))
367 	continue;
368       reg = hooks->end_pattern_reg (tail);
369       if (reg == NULL_RTX)
370 	continue;
371 
372       /* A possible loop end */
373 
374       /* There's a degenerate case we can handle - an empty loop consisting
375 	 of only a back branch.  Handle that by deleting the branch.  */
376       insn = JUMP_LABEL_AS_INSN (tail);
377       while (insn && !NONDEBUG_INSN_P (insn))
378 	insn = NEXT_INSN (insn);
379       if (insn == tail)
380 	{
381 	  basic_block succ = FALLTHRU_EDGE (bb)->dest;
382 	  if (dump_file)
383 	    {
384 	      fprintf (dump_file, ";; degenerate loop ending at\n");
385 	      print_rtl_single (dump_file, tail);
386 	    }
387 	  if (!REGNO_REG_SET_P (df_get_live_in (succ), REGNO (reg)))
388 	    {
389 	      if (dump_file)
390 		fprintf (dump_file, ";; deleting it\n");
391 	      delete_insn_and_edges (tail);
392 	      continue;
393 	    }
394 	}
395 
396       loop = XCNEW (struct hwloop_info_d);
397       loop->next = loops;
398       loops = loop;
399       loop->loop_no = nloops++;
400       loop->blocks.create (20);
401       loop->block_bitmap = BITMAP_ALLOC (loop_stack);
402 
403       if (dump_file)
404 	{
405 	  fprintf (dump_file, ";; potential loop %d ending at\n",
406 		   loop->loop_no);
407 	  print_rtl_single (dump_file, tail);
408 	}
409 
410       discover_loop (loop, bb, tail, reg);
411     }
412 
413   /* Compute loop nestings.  Given two loops A and B, either the sets
414      of their blocks don't intersect at all, or one is the subset of
415      the other, or the blocks don't form a good nesting structure.  */
416   for (loop = loops; loop; loop = loop->next)
417     {
418       hwloop_info other;
419 
420       if (loop->bad)
421 	continue;
422 
423       for (other = loops; other; other = other->next)
424 	{
425 	  if (other->bad)
426 	    continue;
427 
428 	  if (!bitmap_intersect_p (other->block_bitmap, loop->block_bitmap))
429 	    continue;
430 	  if (!bitmap_intersect_compl_p (other->block_bitmap,
431 					 loop->block_bitmap))
432 	    loop->loops.safe_push (other);
433 	  else if (!bitmap_intersect_compl_p (loop->block_bitmap,
434 					      other->block_bitmap))
435 	    other->loops.safe_push (loop);
436 	  else
437 	    {
438 	      if (dump_file)
439 		fprintf (dump_file,
440 			 ";; can't find suitable nesting for loops %d and %d\n",
441 			 loop->loop_no, other->loop_no);
442 	      loop->bad = other->bad = true;
443 	    }
444 	}
445     }
446 
447   if (dump_file)
448     dump_hwloops (loops);
449 
450   return loops;
451 }
452 
453 /* Free up the loop structures in LOOPS.  */
454 static void
free_loops(hwloop_info loops)455 free_loops (hwloop_info loops)
456 {
457   while (loops)
458     {
459       hwloop_info loop = loops;
460       loops = loop->next;
461       loop->loops.release ();
462       loop->blocks.release ();
463       BITMAP_FREE (loop->block_bitmap);
464       XDELETE (loop);
465     }
466 }
467 
468 #define BB_AUX_INDEX(BB) ((intptr_t) (BB)->aux)
469 
470 /* Initialize the aux fields to give ascending indices to basic blocks.  */
471 static void
set_bb_indices(void)472 set_bb_indices (void)
473 {
474   basic_block bb;
475   intptr_t index;
476 
477   index = 0;
478   FOR_EACH_BB_FN (bb, cfun)
479     bb->aux = (void *) index++;
480 }
481 
482 /* The taken-branch edge from the loop end can actually go forward.
483    If the target's hardware loop support requires that the loop end be
484    after the loop start, try to reorder a loop's basic blocks when we
485    find such a case.
486    This is not very aggressive; it only moves at most one block.  It
487    does not introduce new branches into loops; it may remove them, or
488    it may switch fallthru/jump edges.  */
489 static void
reorder_loops(hwloop_info loops)490 reorder_loops (hwloop_info loops)
491 {
492   basic_block bb;
493   hwloop_info loop;
494 
495   cfg_layout_initialize (0);
496 
497   set_bb_indices ();
498 
499   for (loop = loops; loop; loop = loop->next)
500     {
501       edge e;
502       edge_iterator ei;
503 
504       if (loop->bad)
505 	continue;
506 
507       if (BB_AUX_INDEX (loop->head) <= BB_AUX_INDEX (loop->tail))
508 	continue;
509 
510       FOR_EACH_EDGE (e, ei, loop->head->succs)
511 	{
512 	  if (bitmap_bit_p (loop->block_bitmap, e->dest->index)
513 	      && BB_AUX_INDEX (e->dest) < BB_AUX_INDEX (loop->tail))
514 	    {
515 	      basic_block start_bb = e->dest;
516 	      basic_block start_prev_bb = start_bb->prev_bb;
517 
518 	      if (dump_file)
519 		fprintf (dump_file, ";; Moving block %d before block %d\n",
520 			 loop->head->index, start_bb->index);
521 	      loop->head->prev_bb->next_bb = loop->head->next_bb;
522 	      loop->head->next_bb->prev_bb = loop->head->prev_bb;
523 
524 	      loop->head->prev_bb = start_prev_bb;
525 	      loop->head->next_bb = start_bb;
526 	      start_prev_bb->next_bb = start_bb->prev_bb = loop->head;
527 
528 	      set_bb_indices ();
529 	      break;
530 	    }
531 	}
532       loops = loops->next;
533     }
534 
535   FOR_EACH_BB_FN (bb, cfun)
536     {
537       if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
538 	bb->aux = bb->next_bb;
539       else
540 	bb->aux = NULL;
541     }
542   cfg_layout_finalize ();
543   clear_aux_for_blocks ();
544   df_analyze ();
545 }
546 
547 /* Call the OPT function for LOOP and all of its sub-loops.  This is
548    done in a depth-first search; innermost loops are visited first.
549    OPTIMIZE and FAIL are the functions passed to reorg_loops by the
550    target's reorg pass.  */
551 static void
optimize_loop(hwloop_info loop,struct hw_doloop_hooks * hooks)552 optimize_loop (hwloop_info loop, struct hw_doloop_hooks *hooks)
553 {
554   int ix;
555   hwloop_info inner;
556   int inner_depth = 0;
557 
558   if (loop->visited)
559     return;
560 
561   loop->visited = 1;
562 
563   if (loop->bad)
564     {
565       if (dump_file)
566 	fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no);
567       goto bad_loop;
568     }
569 
570   /* Every loop contains in its list of inner loops every loop nested inside
571      it, even if there are intermediate loops.  This works because we're doing
572      a depth-first search here and never visit a loop more than once.
573      Recursion depth is effectively limited by the number of available
574      hardware registers.  */
575   for (ix = 0; loop->loops.iterate (ix, &inner); ix++)
576     {
577       optimize_loop (inner, hooks);
578 
579       if (!inner->bad && inner_depth < inner->depth)
580 	inner_depth = inner->depth;
581       /* The set of registers may be changed while optimizing the inner
582 	 loop.  */
583       IOR_HARD_REG_SET (loop->regs_set_in_loop, inner->regs_set_in_loop);
584     }
585 
586   loop->depth = inner_depth + 1;
587 
588   if (hooks->opt (loop))
589     return;
590 
591  bad_loop:
592   if (dump_file)
593     fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no);
594 
595   loop->bad = true;
596   hooks->fail (loop);
597 }
598 
599 /* This function can be used from a port's machine_dependent_reorg to
600    find and analyze loops that end in loop_end instructions.  It uses
601    a set of function pointers in HOOKS to call back into the
602    target-specific functions to perform the actual machine-specific
603    transformations.
604 
605    Such transformations typically involve additional set-up
606    instructions before the loop, to define loop bounds or set up a
607    special loop counter register.
608 
609    DO_REORDER should be set to true if we should try to use the
610    reorder_loops function to ensure the loop end occurs after the loop
611    start.  This is for use by targets where the loop hardware requires
612    this condition.
613 
614    HOOKS is used to pass in target specific hooks; see
615    hw-doloop.h.  */
616 void
reorg_loops(bool do_reorder,struct hw_doloop_hooks * hooks)617 reorg_loops (bool do_reorder, struct hw_doloop_hooks *hooks)
618 {
619   hwloop_info loops = NULL;
620   hwloop_info loop;
621   bitmap_obstack loop_stack;
622 
623   df_live_add_problem ();
624   df_live_set_all_dirty ();
625   df_analyze ();
626 
627   bitmap_obstack_initialize (&loop_stack);
628 
629   if (dump_file)
630     fprintf (dump_file, ";; Find loops, first pass\n\n");
631 
632   loops = discover_loops (&loop_stack, hooks);
633 
634   /* We can't enter cfglayout mode anymore if basic block partitioning
635      already happened.  */
636   if (do_reorder && !flag_reorder_blocks_and_partition)
637     {
638       reorder_loops (loops);
639       free_loops (loops);
640 
641       if (dump_file)
642 	fprintf (dump_file, ";; Find loops, second pass\n\n");
643 
644       loops = discover_loops (&loop_stack, hooks);
645     }
646 
647   for (loop = loops; loop; loop = loop->next)
648     scan_loop (loop);
649 
650   /* Now apply the optimizations.  */
651   for (loop = loops; loop; loop = loop->next)
652     optimize_loop (loop, hooks);
653 
654   if (dump_file)
655     {
656       fprintf (dump_file, ";; After hardware loops optimization:\n\n");
657       dump_hwloops (loops);
658     }
659 
660   free_loops (loops);
661   bitmap_obstack_release (&loop_stack);
662 
663   if (dump_file)
664     print_rtl (dump_file, get_insns ());
665 }
666