1 /* Code to analyze doloop loops in order for targets to perform late
2    optimizations converting doloops to other forms of hardware loops.
3    Copyright (C) 2011-2014 Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "flags.h"
27 #include "expr.h"
28 #include "hard-reg-set.h"
29 #include "regs.h"
30 #include "basic-block.h"
31 #include "tm_p.h"
32 #include "df.h"
33 #include "cfgloop.h"
34 #include "recog.h"
35 #include "target.h"
36 #include "hw-doloop.h"
37 #include "dumpfile.h"
38 
39 #ifdef HAVE_doloop_end
40 
41 /* Dump information collected in LOOPS.  */
42 static void
dump_hwloops(hwloop_info loops)43 dump_hwloops (hwloop_info loops)
44 {
45   hwloop_info loop;
46 
47   for (loop = loops; loop; loop = loop->next)
48     {
49       hwloop_info i;
50       basic_block b;
51       unsigned ix;
52 
53       fprintf (dump_file, ";; loop %d: ", loop->loop_no);
54       if (loop->bad)
55 	fprintf (dump_file, "(bad) ");
56       fprintf (dump_file, "{head:%d, depth:%d, reg:%u}",
57 	       loop->head == NULL ? -1 : loop->head->index,
58 	       loop->depth, REGNO (loop->iter_reg));
59 
60       fprintf (dump_file, " blocks: [ ");
61       for (ix = 0; loop->blocks.iterate (ix, &b); ix++)
62 	fprintf (dump_file, "%d ", b->index);
63       fprintf (dump_file, "] ");
64 
65       fprintf (dump_file, " inner loops: [ ");
66       for (ix = 0; loop->loops.iterate (ix, &i); ix++)
67 	fprintf (dump_file, "%d ", i->loop_no);
68       fprintf (dump_file, "]\n");
69     }
70   fprintf (dump_file, "\n");
71 }
72 
73 /* Return true if BB is part of LOOP.  */
74 static bool
bb_in_loop_p(hwloop_info loop,basic_block bb)75 bb_in_loop_p (hwloop_info loop, basic_block bb)
76 {
77   return bitmap_bit_p (loop->block_bitmap, bb->index);
78 }
79 
80 /* Scan the blocks of LOOP (and its inferiors), and record things such as
81    hard registers set, jumps out of and within the loop.  */
82 static void
scan_loop(hwloop_info loop)83 scan_loop (hwloop_info loop)
84 {
85   unsigned ix;
86   basic_block bb;
87 
88   if (loop->bad)
89     return;
90 
91   if (REGNO_REG_SET_P (df_get_live_in (loop->successor),
92 		       REGNO (loop->iter_reg)))
93     loop->iter_reg_used_outside = true;
94 
95   for (ix = 0; loop->blocks.iterate (ix, &bb); ix++)
96     {
97       rtx insn;
98       edge e;
99       edge_iterator ei;
100 
101       if (bb != loop->tail)
102 	FOR_EACH_EDGE (e, ei, bb->succs)
103 	  {
104 	    if (bb_in_loop_p (loop, e->dest))
105 	      {
106 		if (!(e->flags & EDGE_FALLTHRU))
107 		  loop->jumps_within = true;
108 	      }
109 	    else
110 	      {
111 		loop->jumps_outof = true;
112 		if (!loop->bad)
113 		  gcc_assert (!REGNO_REG_SET_P (df_get_live_in (e->dest),
114 						REGNO (loop->iter_reg)));
115 	      }
116 	  }
117 
118       for (insn = BB_HEAD (bb);
119 	   insn != NEXT_INSN (BB_END (bb));
120 	   insn = NEXT_INSN (insn))
121 	{
122 	  df_ref *def_rec;
123 	  HARD_REG_SET set_this_insn;
124 
125 	  if (!NONDEBUG_INSN_P (insn))
126 	    continue;
127 
128 	  if (recog_memoized (insn) < 0
129 	      && (GET_CODE (PATTERN (insn)) == ASM_INPUT
130 		  || asm_noperands (PATTERN (insn)) >= 0))
131 	    loop->has_asm = true;
132 
133 	  CLEAR_HARD_REG_SET (set_this_insn);
134 	  for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
135 	    {
136 	      rtx dreg = DF_REF_REG (*def_rec);
137 
138 	      if (!REG_P (dreg))
139 		continue;
140 
141 	      add_to_hard_reg_set (&set_this_insn, GET_MODE (dreg),
142 				   REGNO (dreg));
143 	    }
144 
145 	  if (insn == loop->loop_end)
146 	    CLEAR_HARD_REG_BIT (set_this_insn, REGNO (loop->iter_reg));
147 	  else if (reg_mentioned_p (loop->iter_reg, PATTERN (insn)))
148 	    loop->iter_reg_used = true;
149 	  IOR_HARD_REG_SET (loop->regs_set_in_loop, set_this_insn);
150 	}
151     }
152 }
153 
154 /* Compute the incoming_dest and incoming_src members of LOOP by
155    identifying the edges that jump into the loop.  If there is more
156    than one block that jumps into the loop, incoming_src will be set
157    to NULL; likewise, if there is more than one block in the loop that
158    is the destination of an incoming edge, incoming_dest will be NULL.
159 
160    Return true if either of these two fields is nonnull, false
161    otherwise.  */
162 static bool
process_incoming_edges(hwloop_info loop)163 process_incoming_edges (hwloop_info loop)
164 {
165   edge e;
166   edge_iterator ei;
167   bool first = true;
168 
169   FOR_EACH_EDGE (e, ei, loop->incoming)
170     {
171       if (first)
172 	{
173 	  loop->incoming_src = e->src;
174 	  loop->incoming_dest = e->dest;
175 	  first = false;
176 	}
177       else
178 	{
179 	  if (e->dest != loop->incoming_dest)
180 	    loop->incoming_dest = NULL;
181 	  if (e->src != loop->incoming_src)
182 	    loop->incoming_src = NULL;
183 	}
184     }
185   if (loop->incoming_src == NULL && loop->incoming_dest == NULL)
186     return false;
187 
188   return true;
189 }
190 
191 /* Try to identify a forwarder block that jump into LOOP, and add it to
192    the set of blocks in the loop, updating the vector of incoming blocks as
193    well.  This transformation gives a second chance to loops we couldn't
194    otherwise handle by increasing the chance that we'll end up with one
195    incoming_src block.
196    Return true if we made a change, false otherwise.  */
197 static bool
add_forwarder_blocks(hwloop_info loop)198 add_forwarder_blocks (hwloop_info loop)
199 {
200   edge e;
201   edge_iterator ei;
202 
203   FOR_EACH_EDGE (e, ei, loop->incoming)
204     {
205       if (forwarder_block_p (e->src))
206 	{
207 	  edge e2;
208 	  edge_iterator ei2;
209 
210 	  if (dump_file)
211 	    fprintf (dump_file,
212 		     ";; Adding forwarder block %d to loop %d and retrying\n",
213 		     e->src->index, loop->loop_no);
214 	  loop->blocks.safe_push (e->src);
215 	  bitmap_set_bit (loop->block_bitmap, e->src->index);
216 	  FOR_EACH_EDGE (e2, ei2, e->src->preds)
217 	    vec_safe_push (loop->incoming, e2);
218 	  loop->incoming->unordered_remove (ei.index);
219 	  return true;
220 	}
221     }
222   return false;
223 }
224 
225 /* Called from reorg_loops when a potential loop end is found.  LOOP is
226    a newly set up structure describing the loop, it is this function's
227    responsibility to fill most of it.  TAIL_BB and TAIL_INSN point to the
228    loop_end insn and its enclosing basic block.  REG is the loop counter
229    register.
230    For our purposes, a loop is defined by the set of blocks reachable from
231    the loop head in which the loop counter register is live.  This matches
232    the expected use; targets that call into this code usually replace the
233    loop counter with a different special register.  */
234 static void
discover_loop(hwloop_info loop,basic_block tail_bb,rtx tail_insn,rtx reg)235 discover_loop (hwloop_info loop, basic_block tail_bb, rtx tail_insn, rtx reg)
236 {
237   bool found_tail;
238   unsigned dwork = 0;
239   basic_block bb;
240 
241   loop->tail = tail_bb;
242   loop->loop_end = tail_insn;
243   loop->iter_reg = reg;
244   vec_alloc (loop->incoming, 2);
245   loop->start_label = JUMP_LABEL (tail_insn);
246 
247   if (EDGE_COUNT (tail_bb->succs) != 2)
248     {
249       loop->bad = true;
250       return;
251     }
252   loop->head = BRANCH_EDGE (tail_bb)->dest;
253   loop->successor = FALLTHRU_EDGE (tail_bb)->dest;
254 
255   auto_vec<basic_block, 20> works;
256   works.safe_push (loop->head);
257 
258   found_tail = false;
259   for (dwork = 0; works.iterate (dwork, &bb); dwork++)
260     {
261       edge e;
262       edge_iterator ei;
263       if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
264 	{
265 	  /* We've reached the exit block.  The loop must be bad. */
266 	  if (dump_file)
267 	    fprintf (dump_file,
268 		     ";; Loop is bad - reached exit block while scanning\n");
269 	  loop->bad = true;
270 	  break;
271 	}
272 
273       if (bitmap_bit_p (loop->block_bitmap, bb->index))
274 	continue;
275 
276       /* We've not seen this block before.  Add it to the loop's
277 	 list and then add each successor to the work list.  */
278 
279       loop->blocks.safe_push (bb);
280       bitmap_set_bit (loop->block_bitmap, bb->index);
281 
282       if (bb == tail_bb)
283 	found_tail = true;
284       else
285 	{
286 	  FOR_EACH_EDGE (e, ei, bb->succs)
287 	    {
288 	      basic_block succ = EDGE_SUCC (bb, ei.index)->dest;
289 	      if (REGNO_REG_SET_P (df_get_live_in (succ),
290 				   REGNO (loop->iter_reg)))
291 		works.safe_push (succ);
292 	    }
293 	}
294     }
295 
296   if (!found_tail)
297     loop->bad = true;
298 
299   /* Find the predecessor, and make sure nothing else jumps into this loop.  */
300   if (!loop->bad)
301     {
302       FOR_EACH_VEC_ELT (loop->blocks, dwork, bb)
303 	{
304 	  edge e;
305 	  edge_iterator ei;
306 	  FOR_EACH_EDGE (e, ei, bb->preds)
307 	    {
308 	      basic_block pred = e->src;
309 
310 	      if (!bb_in_loop_p (loop, pred))
311 		{
312 		  if (dump_file)
313 		    fprintf (dump_file, ";; Loop %d: incoming edge %d -> %d\n",
314 			     loop->loop_no, pred->index,
315 			     e->dest->index);
316 		  vec_safe_push (loop->incoming, e);
317 		}
318 	    }
319 	}
320 
321       if (!process_incoming_edges (loop))
322 	{
323 	  if (dump_file)
324 	    fprintf (dump_file,
325 		     ";; retrying loop %d with forwarder blocks\n",
326 		     loop->loop_no);
327 	  if (!add_forwarder_blocks (loop))
328 	    {
329 	      if (dump_file)
330 		fprintf (dump_file, ";; No forwarder blocks found\n");
331 	      loop->bad = true;
332 	    }
333 	  else if (!process_incoming_edges (loop))
334 	    {
335 	      if (dump_file)
336 		fprintf (dump_file,
337 			 ";; can't find suitable entry for loop %d\n",
338 			 loop->loop_no);
339 	    }
340 	}
341     }
342 }
343 
344 /* Analyze the structure of the loops in the current function.  Use
345    LOOP_STACK for bitmap allocations.  Returns all the valid candidates for
346    hardware loops found in this function.  HOOKS is the argument
347    passed to reorg_loops, used here to find the iteration registers
348    from a loop_end pattern.  */
349 static hwloop_info
discover_loops(bitmap_obstack * loop_stack,struct hw_doloop_hooks * hooks)350 discover_loops (bitmap_obstack *loop_stack, struct hw_doloop_hooks *hooks)
351 {
352   hwloop_info loops = NULL;
353   hwloop_info loop;
354   basic_block bb;
355   int nloops = 0;
356 
357   /* Find all the possible loop tails.  This means searching for every
358      loop_end instruction.  For each one found, create a hwloop_info
359      structure and add the head block to the work list. */
360   FOR_EACH_BB_FN (bb, cfun)
361     {
362       rtx tail = BB_END (bb);
363       rtx insn, reg;
364 
365       while (tail && NOTE_P (tail) && tail != BB_HEAD (bb))
366 	tail = PREV_INSN (tail);
367 
368       if (tail == NULL_RTX)
369 	continue;
370 
371       if (!JUMP_P (tail))
372 	continue;
373       reg = hooks->end_pattern_reg (tail);
374       if (reg == NULL_RTX)
375 	continue;
376 
377       /* A possible loop end */
378 
379       /* There's a degenerate case we can handle - an empty loop consisting
380 	 of only a back branch.  Handle that by deleting the branch.  */
381       insn = JUMP_LABEL (tail);
382       while (insn && !NONDEBUG_INSN_P (insn))
383 	insn = NEXT_INSN (insn);
384       if (insn == tail)
385 	{
386 	  basic_block succ = FALLTHRU_EDGE (bb)->dest;
387 	  if (dump_file)
388 	    {
389 	      fprintf (dump_file, ";; degenerate loop ending at\n");
390 	      print_rtl_single (dump_file, tail);
391 	    }
392 	  if (!REGNO_REG_SET_P (df_get_live_in (succ), REGNO (reg)))
393 	    {
394 	      if (dump_file)
395 		fprintf (dump_file, ";; deleting it\n");
396 	      delete_insn_and_edges (tail);
397 	      continue;
398 	    }
399 	}
400 
401       loop = XCNEW (struct hwloop_info_d);
402       loop->next = loops;
403       loops = loop;
404       loop->loop_no = nloops++;
405       loop->blocks.create (20);
406       loop->block_bitmap = BITMAP_ALLOC (loop_stack);
407 
408       if (dump_file)
409 	{
410 	  fprintf (dump_file, ";; potential loop %d ending at\n",
411 		   loop->loop_no);
412 	  print_rtl_single (dump_file, tail);
413 	}
414 
415       discover_loop (loop, bb, tail, reg);
416     }
417 
418   /* Compute loop nestings.  Given two loops A and B, either the sets
419      of their blocks don't intersect at all, or one is the subset of
420      the other, or the blocks don't form a good nesting structure.  */
421   for (loop = loops; loop; loop = loop->next)
422     {
423       hwloop_info other;
424 
425       if (loop->bad)
426 	continue;
427 
428       for (other = loops; other; other = other->next)
429 	{
430 	  if (other->bad)
431 	    continue;
432 
433 	  if (!bitmap_intersect_p (other->block_bitmap, loop->block_bitmap))
434 	    continue;
435 	  if (!bitmap_intersect_compl_p (other->block_bitmap,
436 					 loop->block_bitmap))
437 	    loop->loops.safe_push (other);
438 	  else if (!bitmap_intersect_compl_p (loop->block_bitmap,
439 					      other->block_bitmap))
440 	    other->loops.safe_push (loop);
441 	  else
442 	    {
443 	      if (dump_file)
444 		fprintf (dump_file,
445 			 ";; can't find suitable nesting for loops %d and %d\n",
446 			 loop->loop_no, other->loop_no);
447 	      loop->bad = other->bad = true;
448 	    }
449 	}
450     }
451 
452   if (dump_file)
453     dump_hwloops (loops);
454 
455   return loops;
456 }
457 
458 /* Free up the loop structures in LOOPS.  */
459 static void
free_loops(hwloop_info loops)460 free_loops (hwloop_info loops)
461 {
462   while (loops)
463     {
464       hwloop_info loop = loops;
465       loops = loop->next;
466       loop->loops.release ();
467       loop->blocks.release ();
468       BITMAP_FREE (loop->block_bitmap);
469       XDELETE (loop);
470     }
471 }
472 
473 #define BB_AUX_INDEX(BB) ((intptr_t) (BB)->aux)
474 
475 /* Initialize the aux fields to give ascending indices to basic blocks.  */
476 static void
set_bb_indices(void)477 set_bb_indices (void)
478 {
479   basic_block bb;
480   intptr_t index;
481 
482   index = 0;
483   FOR_EACH_BB_FN (bb, cfun)
484     bb->aux = (void *) index++;
485 }
486 
487 /* The taken-branch edge from the loop end can actually go forward.
488    If the target's hardware loop support requires that the loop end be
489    after the loop start, try to reorder a loop's basic blocks when we
490    find such a case.
491    This is not very aggressive; it only moves at most one block.  It
492    does not introduce new branches into loops; it may remove them, or
493    it may switch fallthru/jump edges.  */
494 static void
reorder_loops(hwloop_info loops)495 reorder_loops (hwloop_info loops)
496 {
497   basic_block bb;
498   hwloop_info loop;
499 
500   cfg_layout_initialize (0);
501 
502   set_bb_indices ();
503 
504   for (loop = loops; loop; loop = loop->next)
505     {
506       edge e;
507       edge_iterator ei;
508 
509       if (loop->bad)
510 	continue;
511 
512       if (BB_AUX_INDEX (loop->head) <= BB_AUX_INDEX (loop->tail))
513 	continue;
514 
515       FOR_EACH_EDGE (e, ei, loop->head->succs)
516 	{
517 	  if (bitmap_bit_p (loop->block_bitmap, e->dest->index)
518 	      && BB_AUX_INDEX (e->dest) < BB_AUX_INDEX (loop->tail))
519 	    {
520 	      basic_block start_bb = e->dest;
521 	      basic_block start_prev_bb = start_bb->prev_bb;
522 
523 	      if (dump_file)
524 		fprintf (dump_file, ";; Moving block %d before block %d\n",
525 			 loop->head->index, start_bb->index);
526 	      loop->head->prev_bb->next_bb = loop->head->next_bb;
527 	      loop->head->next_bb->prev_bb = loop->head->prev_bb;
528 
529 	      loop->head->prev_bb = start_prev_bb;
530 	      loop->head->next_bb = start_bb;
531 	      start_prev_bb->next_bb = start_bb->prev_bb = loop->head;
532 
533 	      set_bb_indices ();
534 	      break;
535 	    }
536 	}
537       loops = loops->next;
538     }
539 
540   FOR_EACH_BB_FN (bb, cfun)
541     {
542       if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
543 	bb->aux = bb->next_bb;
544       else
545 	bb->aux = NULL;
546     }
547   cfg_layout_finalize ();
548   clear_aux_for_blocks ();
549   df_analyze ();
550 }
551 
552 /* Call the OPT function for LOOP and all of its sub-loops.  This is
553    done in a depth-first search; innermost loops are visited first.
554    OPTIMIZE and FAIL are the functions passed to reorg_loops by the
555    target's reorg pass.  */
556 static void
optimize_loop(hwloop_info loop,struct hw_doloop_hooks * hooks)557 optimize_loop (hwloop_info loop, struct hw_doloop_hooks *hooks)
558 {
559   int ix;
560   hwloop_info inner;
561   int inner_depth = 0;
562 
563   if (loop->visited)
564     return;
565 
566   loop->visited = 1;
567 
568   if (loop->bad)
569     {
570       if (dump_file)
571 	fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no);
572       goto bad_loop;
573     }
574 
575   /* Every loop contains in its list of inner loops every loop nested inside
576      it, even if there are intermediate loops.  This works because we're doing
577      a depth-first search here and never visit a loop more than once.
578      Recursion depth is effectively limited by the number of available
579      hardware registers.  */
580   for (ix = 0; loop->loops.iterate (ix, &inner); ix++)
581     {
582       optimize_loop (inner, hooks);
583 
584       if (!inner->bad && inner_depth < inner->depth)
585 	inner_depth = inner->depth;
586       /* The set of registers may be changed while optimizing the inner
587 	 loop.  */
588       IOR_HARD_REG_SET (loop->regs_set_in_loop, inner->regs_set_in_loop);
589     }
590 
591   loop->depth = inner_depth + 1;
592 
593   if (hooks->opt (loop))
594     return;
595 
596  bad_loop:
597   if (dump_file)
598     fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no);
599 
600   loop->bad = true;
601   hooks->fail (loop);
602 }
603 
604 /* This function can be used from a port's machine_dependent_reorg to
605    find and analyze loops that end in loop_end instructions.  It uses
606    a set of function pointers in HOOKS to call back into the
607    target-specific functions to perform the actual machine-specific
608    transformations.
609 
610    Such transformations typically involve additional set-up
611    instructions before the loop, to define loop bounds or set up a
612    special loop counter register.
613 
614    DO_REORDER should be set to true if we should try to use the
615    reorder_loops function to ensure the loop end occurs after the loop
616    start.  This is for use by targets where the loop hardware requires
617    this condition.
618 
619    HOOKS is used to pass in target specific hooks; see
620    hw-doloop.h.  */
621 void
reorg_loops(bool do_reorder,struct hw_doloop_hooks * hooks)622 reorg_loops (bool do_reorder, struct hw_doloop_hooks *hooks)
623 {
624   hwloop_info loops = NULL;
625   hwloop_info loop;
626   bitmap_obstack loop_stack;
627 
628   df_live_add_problem ();
629   df_live_set_all_dirty ();
630   df_analyze ();
631 
632   bitmap_obstack_initialize (&loop_stack);
633 
634   if (dump_file)
635     fprintf (dump_file, ";; Find loops, first pass\n\n");
636 
637   loops = discover_loops (&loop_stack, hooks);
638 
639   if (do_reorder)
640     {
641       reorder_loops (loops);
642       free_loops (loops);
643 
644       if (dump_file)
645 	fprintf (dump_file, ";; Find loops, second pass\n\n");
646 
647       loops = discover_loops (&loop_stack, hooks);
648     }
649 
650   for (loop = loops; loop; loop = loop->next)
651     scan_loop (loop);
652 
653   /* Now apply the optimizations.  */
654   for (loop = loops; loop; loop = loop->next)
655     optimize_loop (loop, hooks);
656 
657   if (dump_file)
658     {
659       fprintf (dump_file, ";; After hardware loops optimization:\n\n");
660       dump_hwloops (loops);
661     }
662 
663   free_loops (loops);
664   bitmap_obstack_release (&loop_stack);
665 
666   if (dump_file)
667     print_rtl (dump_file, get_insns ());
668 }
669 #endif
670