xref: /dragonfly/contrib/gcc-4.7/gcc/hw-doloop.c (revision e5a92d33)
1 /* Code to analyze doloop loops in order for targets to perform late
2    optimizations converting doloops to other forms of hardware loops.
3    Copyright (C) 2011 Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "flags.h"
27 #include "expr.h"
28 #include "hard-reg-set.h"
29 #include "regs.h"
30 #include "basic-block.h"
31 #include "tm_p.h"
32 #include "df.h"
33 #include "cfglayout.h"
34 #include "cfgloop.h"
35 #include "output.h"
36 #include "recog.h"
37 #include "target.h"
38 #include "hw-doloop.h"
39 
40 #ifdef HAVE_doloop_end
41 
42 /* Dump information collected in LOOPS.  */
43 static void
44 dump_hwloops (hwloop_info loops)
45 {
46   hwloop_info loop;
47 
48   for (loop = loops; loop; loop = loop->next)
49     {
50       hwloop_info i;
51       basic_block b;
52       unsigned ix;
53 
54       fprintf (dump_file, ";; loop %d: ", loop->loop_no);
55       if (loop->bad)
56 	fprintf (dump_file, "(bad) ");
57       fprintf (dump_file, "{head:%d, depth:%d, reg:%u}",
58 	       loop->head == NULL ? -1 : loop->head->index,
59 	       loop->depth, REGNO (loop->iter_reg));
60 
61       fprintf (dump_file, " blocks: [ ");
62       for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, b); ix++)
63 	fprintf (dump_file, "%d ", b->index);
64       fprintf (dump_file, "] ");
65 
66       fprintf (dump_file, " inner loops: [ ");
67       for (ix = 0; VEC_iterate (hwloop_info, loop->loops, ix, i); ix++)
68 	fprintf (dump_file, "%d ", i->loop_no);
69       fprintf (dump_file, "]\n");
70     }
71   fprintf (dump_file, "\n");
72 }
73 
74 /* Return true if BB is part of LOOP.  */
75 static bool
76 bb_in_loop_p (hwloop_info loop, basic_block bb)
77 {
78   return bitmap_bit_p (loop->block_bitmap, bb->index);
79 }
80 
81 /* Scan the blocks of LOOP (and its inferiors), and record things such as
82    hard registers set, jumps out of and within the loop.  */
83 static void
84 scan_loop (hwloop_info loop)
85 {
86   unsigned ix;
87   basic_block bb;
88 
89   if (loop->bad)
90     return;
91 
92   if (REGNO_REG_SET_P (df_get_live_in (loop->successor),
93 		       REGNO (loop->iter_reg)))
94     loop->iter_reg_used_outside = true;
95 
96   for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, bb); ix++)
97     {
98       rtx insn;
99       edge e;
100       edge_iterator ei;
101 
102       if (bb != loop->tail)
103 	FOR_EACH_EDGE (e, ei, bb->succs)
104 	  {
105 	    if (bb_in_loop_p (loop, e->dest))
106 	      {
107 		if (!(e->flags & EDGE_FALLTHRU))
108 		  loop->jumps_within = true;
109 	      }
110 	    else
111 	      {
112 		loop->jumps_outof = true;
113 		if (!loop->bad)
114 		  gcc_assert (!REGNO_REG_SET_P (df_get_live_in (e->dest),
115 						REGNO (loop->iter_reg)));
116 	      }
117 	  }
118 
119       for (insn = BB_HEAD (bb);
120 	   insn != NEXT_INSN (BB_END (bb));
121 	   insn = NEXT_INSN (insn))
122 	{
123 	  df_ref *def_rec;
124 	  HARD_REG_SET set_this_insn;
125 
126 	  if (!NONDEBUG_INSN_P (insn))
127 	    continue;
128 
129 	  if (recog_memoized (insn) < 0
130 	      && (GET_CODE (PATTERN (insn)) == ASM_INPUT
131 		  || asm_noperands (PATTERN (insn)) >= 0))
132 	    loop->has_asm = true;
133 
134 	  CLEAR_HARD_REG_SET (set_this_insn);
135 	  for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
136 	    {
137 	      rtx dreg = DF_REF_REG (*def_rec);
138 
139 	      if (!REG_P (dreg))
140 		continue;
141 
142 	      add_to_hard_reg_set (&set_this_insn, GET_MODE (dreg),
143 				   REGNO (dreg));
144 	    }
145 
146 	  if (insn == loop->loop_end)
147 	    CLEAR_HARD_REG_BIT (set_this_insn, REGNO (loop->iter_reg));
148 	  else if (reg_mentioned_p (loop->iter_reg, PATTERN (insn)))
149 	    loop->iter_reg_used = true;
150 	  IOR_HARD_REG_SET (loop->regs_set_in_loop, set_this_insn);
151 	}
152     }
153 }
154 
155 /* Compute the incoming_dest and incoming_src members of LOOP by
156    identifying the edges that jump into the loop.  If there is more
157    than one block that jumps into the loop, incoming_src will be set
158    to NULL; likewise, if there is more than one block in the loop that
159    is the destination of an incoming edge, incoming_dest will be NULL.
160 
161    Return true if either of these two fields is nonnull, false
162    otherwise.  */
163 static bool
164 process_incoming_edges (hwloop_info loop)
165 {
166   edge e;
167   edge_iterator ei;
168   bool first = true;
169 
170   FOR_EACH_EDGE (e, ei, loop->incoming)
171     {
172       if (first)
173 	{
174 	  loop->incoming_src = e->src;
175 	  loop->incoming_dest = e->dest;
176 	  first = false;
177 	}
178       else
179 	{
180 	  if (e->dest != loop->incoming_dest)
181 	    loop->incoming_dest = NULL;
182 	  if (e->src != loop->incoming_src)
183 	    loop->incoming_src = NULL;
184 	}
185     }
186   if (loop->incoming_src == NULL && loop->incoming_dest == NULL)
187     return false;
188 
189   return true;
190 }
191 
192 /* Try to identify a forwarder block that jump into LOOP, and add it to
193    the set of blocks in the loop, updating the vector of incoming blocks as
194    well.  This transformation gives a second chance to loops we couldn't
195    otherwise handle by increasing the chance that we'll end up with one
196    incoming_src block.
197    Return true if we made a change, false otherwise.  */
198 static bool
199 add_forwarder_blocks (hwloop_info loop)
200 {
201   edge e;
202   edge_iterator ei;
203 
204   FOR_EACH_EDGE (e, ei, loop->incoming)
205     {
206       if (forwarder_block_p (e->src))
207 	{
208 	  edge e2;
209 	  edge_iterator ei2;
210 
211 	  if (dump_file)
212 	    fprintf (dump_file,
213 		     ";; Adding forwarder block %d to loop %d and retrying\n",
214 		     e->src->index, loop->loop_no);
215 	  VEC_safe_push (basic_block, heap, loop->blocks, e->src);
216 	  bitmap_set_bit (loop->block_bitmap, e->src->index);
217 	  FOR_EACH_EDGE (e2, ei2, e->src->preds)
218 	    VEC_safe_push (edge, gc, loop->incoming, e2);
219 	  VEC_unordered_remove (edge, loop->incoming, ei.index);
220 	  return true;
221 	}
222     }
223   return false;
224 }
225 
226 /* Called from reorg_loops when a potential loop end is found.  LOOP is
227    a newly set up structure describing the loop, it is this function's
228    responsibility to fill most of it.  TAIL_BB and TAIL_INSN point to the
229    loop_end insn and its enclosing basic block.  REG is the loop counter
230    register.
231    For our purposes, a loop is defined by the set of blocks reachable from
232    the loop head in which the loop counter register is live.  This matches
233    the expected use; targets that call into this code usually replace the
234    loop counter with a different special register.  */
235 static void
236 discover_loop (hwloop_info loop, basic_block tail_bb, rtx tail_insn, rtx reg)
237 {
238   bool found_tail;
239   unsigned dwork = 0;
240   basic_block bb;
241   VEC (basic_block,heap) *works;
242 
243   loop->tail = tail_bb;
244   loop->loop_end = tail_insn;
245   loop->iter_reg = reg;
246   loop->incoming = VEC_alloc (edge, gc, 2);
247   loop->start_label = JUMP_LABEL (tail_insn);
248 
249   if (EDGE_COUNT (tail_bb->succs) != 2)
250     {
251       loop->bad = true;
252       return;
253     }
254   loop->head = BRANCH_EDGE (tail_bb)->dest;
255   loop->successor = FALLTHRU_EDGE (tail_bb)->dest;
256 
257   works = VEC_alloc (basic_block, heap, 20);
258   VEC_safe_push (basic_block, heap, works, loop->head);
259 
260   found_tail = false;
261   for (dwork = 0; VEC_iterate (basic_block, works, dwork, bb); dwork++)
262     {
263       edge e;
264       edge_iterator ei;
265       if (bb == EXIT_BLOCK_PTR)
266 	{
267 	  /* We've reached the exit block.  The loop must be bad. */
268 	  if (dump_file)
269 	    fprintf (dump_file,
270 		     ";; Loop is bad - reached exit block while scanning\n");
271 	  loop->bad = true;
272 	  break;
273 	}
274 
275       if (bitmap_bit_p (loop->block_bitmap, bb->index))
276 	continue;
277 
278       /* We've not seen this block before.  Add it to the loop's
279 	 list and then add each successor to the work list.  */
280 
281       VEC_safe_push (basic_block, heap, loop->blocks, bb);
282       bitmap_set_bit (loop->block_bitmap, bb->index);
283 
284       if (bb == tail_bb)
285 	found_tail = true;
286       else
287 	{
288 	  FOR_EACH_EDGE (e, ei, bb->succs)
289 	    {
290 	      basic_block succ = EDGE_SUCC (bb, ei.index)->dest;
291 	      if (REGNO_REG_SET_P (df_get_live_in (succ),
292 				   REGNO (loop->iter_reg)))
293 		VEC_safe_push (basic_block, heap, works, succ);
294 	    }
295 	}
296     }
297 
298   if (!found_tail)
299     loop->bad = true;
300 
301   /* Find the predecessor, and make sure nothing else jumps into this loop.  */
302   if (!loop->bad)
303     {
304       FOR_EACH_VEC_ELT (basic_block, loop->blocks, dwork, bb)
305 	{
306 	  edge e;
307 	  edge_iterator ei;
308 	  FOR_EACH_EDGE (e, ei, bb->preds)
309 	    {
310 	      basic_block pred = e->src;
311 
312 	      if (!bb_in_loop_p (loop, pred))
313 		{
314 		  if (dump_file)
315 		    fprintf (dump_file, ";; Loop %d: incoming edge %d -> %d\n",
316 			     loop->loop_no, pred->index,
317 			     e->dest->index);
318 		  VEC_safe_push (edge, gc, loop->incoming, e);
319 		}
320 	    }
321 	}
322 
323       if (!process_incoming_edges (loop))
324 	{
325 	  if (dump_file)
326 	    fprintf (dump_file,
327 		     ";; retrying loop %d with forwarder blocks\n",
328 		     loop->loop_no);
329 	  if (!add_forwarder_blocks (loop))
330 	    {
331 	      if (dump_file)
332 		fprintf (dump_file, ";; No forwarder blocks found\n");
333 	      loop->bad = true;
334 	    }
335 	  else if (!process_incoming_edges (loop))
336 	    {
337 	      if (dump_file)
338 		fprintf (dump_file,
339 			 ";; can't find suitable entry for loop %d\n",
340 			 loop->loop_no);
341 	    }
342 	}
343     }
344 
345   VEC_free (basic_block, heap, works);
346 }
347 
348 /* Analyze the structure of the loops in the current function.  Use
349    STACK for bitmap allocations.  Returns all the valid candidates for
350    hardware loops found in this function.  HOOKS is the argument
351    passed to reorg_loops, used here to find the iteration registers
352    from a loop_end pattern.  */
353 static hwloop_info
354 discover_loops (bitmap_obstack *stack, struct hw_doloop_hooks *hooks)
355 {
356   hwloop_info loops = NULL;
357   hwloop_info loop;
358   basic_block bb;
359   int nloops = 0;
360 
361   /* Find all the possible loop tails.  This means searching for every
362      loop_end instruction.  For each one found, create a hwloop_info
363      structure and add the head block to the work list. */
364   FOR_EACH_BB (bb)
365     {
366       rtx tail = BB_END (bb);
367       rtx insn, reg;
368 
369       while (tail && GET_CODE (tail) == NOTE && tail != BB_HEAD (bb))
370 	tail = PREV_INSN (tail);
371 
372       if (tail == NULL_RTX)
373 	continue;
374 
375       if (!JUMP_P (tail))
376 	continue;
377       reg = hooks->end_pattern_reg (tail);
378       if (reg == NULL_RTX)
379 	continue;
380 
381       /* A possible loop end */
382 
383       /* There's a degenerate case we can handle - an empty loop consisting
384 	 of only a back branch.  Handle that by deleting the branch.  */
385       insn = JUMP_LABEL (tail);
386       while (insn && !NONDEBUG_INSN_P (insn))
387 	insn = NEXT_INSN (insn);
388       if (insn == tail)
389 	{
390 	  basic_block succ = FALLTHRU_EDGE (bb)->dest;
391 	  if (dump_file)
392 	    {
393 	      fprintf (dump_file, ";; degenerate loop ending at\n");
394 	      print_rtl_single (dump_file, tail);
395 	    }
396 	  if (!REGNO_REG_SET_P (df_get_live_in (succ), REGNO (reg)))
397 	    {
398 	      if (dump_file)
399 		fprintf (dump_file, ";; deleting it\n");
400 	      delete_insn_and_edges (tail);
401 	      continue;
402 	    }
403 	}
404 
405       loop = XCNEW (struct hwloop_info_d);
406       loop->next = loops;
407       loops = loop;
408       loop->loop_no = nloops++;
409       loop->blocks = VEC_alloc (basic_block, heap, 20);
410       loop->block_bitmap = BITMAP_ALLOC (stack);
411 
412       if (dump_file)
413 	{
414 	  fprintf (dump_file, ";; potential loop %d ending at\n",
415 		   loop->loop_no);
416 	  print_rtl_single (dump_file, tail);
417 	}
418 
419       discover_loop (loop, bb, tail, reg);
420     }
421 
422   /* Compute loop nestings.  Given two loops A and B, either the sets
423      of their blocks don't intersect at all, or one is the subset of
424      the other, or the blocks don't form a good nesting structure.  */
425   for (loop = loops; loop; loop = loop->next)
426     {
427       hwloop_info other;
428 
429       if (loop->bad)
430 	continue;
431 
432       for (other = loops; other; other = other->next)
433 	{
434 	  if (other->bad)
435 	    continue;
436 
437 	  if (!bitmap_intersect_p (other->block_bitmap, loop->block_bitmap))
438 	    continue;
439 	  if (!bitmap_intersect_compl_p (other->block_bitmap,
440 					 loop->block_bitmap))
441 	    VEC_safe_push (hwloop_info, heap, loop->loops, other);
442 	  else if (!bitmap_intersect_compl_p (loop->block_bitmap,
443 					      other->block_bitmap))
444 	    VEC_safe_push (hwloop_info, heap, other->loops, loop);
445 	  else
446 	    {
447 	      if (dump_file)
448 		fprintf (dump_file,
449 			 ";; can't find suitable nesting for loops %d and %d\n",
450 			 loop->loop_no, other->loop_no);
451 	      loop->bad = other->bad = true;
452 	    }
453 	}
454     }
455 
456   if (dump_file)
457     dump_hwloops (loops);
458 
459   return loops;
460 }
461 
462 /* Free up the loop structures in LOOPS.  */
463 static void
464 free_loops (hwloop_info loops)
465 {
466   while (loops)
467     {
468       hwloop_info loop = loops;
469       loops = loop->next;
470       VEC_free (hwloop_info, heap, loop->loops);
471       VEC_free (basic_block, heap, loop->blocks);
472       BITMAP_FREE (loop->block_bitmap);
473       XDELETE (loop);
474     }
475 }
476 
477 #define BB_AUX_INDEX(BB) ((intptr_t) (BB)->aux)
478 
479 /* Initialize the aux fields to give ascending indices to basic blocks.  */
480 static void
481 set_bb_indices (void)
482 {
483   basic_block bb;
484   intptr_t index;
485 
486   index = 0;
487   FOR_EACH_BB (bb)
488     bb->aux = (void *) index++;
489 }
490 
491 /* The taken-branch edge from the loop end can actually go forward.
492    If the target's hardware loop support requires that the loop end be
493    after the loop start, try to reorder a loop's basic blocks when we
494    find such a case.
495    This is not very aggressive; it only moves at most one block.  It
496    does not introduce new branches into loops; it may remove them, or
497    it may switch fallthru/jump edges.  */
498 static void
499 reorder_loops (hwloop_info loops)
500 {
501   basic_block bb;
502   hwloop_info loop;
503 
504   cfg_layout_initialize (0);
505 
506   set_bb_indices ();
507 
508   for (loop = loops; loop; loop = loop->next)
509     {
510       edge e;
511       edge_iterator ei;
512 
513       if (loop->bad)
514 	continue;
515 
516       if (BB_AUX_INDEX (loop->head) <= BB_AUX_INDEX (loop->tail))
517 	continue;
518 
519       FOR_EACH_EDGE (e, ei, loop->head->succs)
520 	{
521 	  if (bitmap_bit_p (loop->block_bitmap, e->dest->index)
522 	      && BB_AUX_INDEX (e->dest) < BB_AUX_INDEX (loop->tail))
523 	    {
524 	      basic_block start_bb = e->dest;
525 	      basic_block start_prev_bb = start_bb->prev_bb;
526 
527 	      if (dump_file)
528 		fprintf (dump_file, ";; Moving block %d before block %d\n",
529 			 loop->head->index, start_bb->index);
530 	      loop->head->prev_bb->next_bb = loop->head->next_bb;
531 	      loop->head->next_bb->prev_bb = loop->head->prev_bb;
532 
533 	      loop->head->prev_bb = start_prev_bb;
534 	      loop->head->next_bb = start_bb;
535 	      start_prev_bb->next_bb = start_bb->prev_bb = loop->head;
536 
537 	      set_bb_indices ();
538 	      break;
539 	    }
540 	}
541       loops = loops->next;
542     }
543 
544   FOR_EACH_BB (bb)
545     {
546       if (bb->next_bb != EXIT_BLOCK_PTR)
547 	bb->aux = bb->next_bb;
548       else
549 	bb->aux = NULL;
550     }
551   cfg_layout_finalize ();
552   clear_aux_for_blocks ();
553   df_analyze ();
554 }
555 
556 /* Call the OPT function for LOOP and all of its sub-loops.  This is
557    done in a depth-first search; innermost loops are visited first.
558    OPTIMIZE and FAIL are the functions passed to reorg_loops by the
559    target's reorg pass.  */
560 static void
561 optimize_loop (hwloop_info loop, struct hw_doloop_hooks *hooks)
562 {
563   int ix;
564   hwloop_info inner;
565   int inner_depth = 0;
566 
567   if (loop->visited)
568     return;
569 
570   loop->visited = 1;
571 
572   if (loop->bad)
573     {
574       if (dump_file)
575 	fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no);
576       goto bad_loop;
577     }
578 
579   /* Every loop contains in its list of inner loops every loop nested inside
580      it, even if there are intermediate loops.  This works because we're doing
581      a depth-first search here and never visit a loop more than once.
582      Recursion depth is effectively limited by the number of available
583      hardware registers.  */
584   for (ix = 0; VEC_iterate (hwloop_info, loop->loops, ix, inner); ix++)
585     {
586       optimize_loop (inner, hooks);
587 
588       if (!inner->bad && inner_depth < inner->depth)
589 	inner_depth = inner->depth;
590       /* The set of registers may be changed while optimizing the inner
591 	 loop.  */
592       IOR_HARD_REG_SET (loop->regs_set_in_loop, inner->regs_set_in_loop);
593     }
594 
595   loop->depth = inner_depth + 1;
596 
597   if (hooks->opt (loop))
598     return;
599 
600  bad_loop:
601   if (dump_file)
602     fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no);
603 
604   loop->bad = true;
605   hooks->fail (loop);
606 }
607 
608 /* This function can be used from a port's machine_dependent_reorg to
609    find and analyze loops that end in loop_end instructions.  It uses
610    a set of function pointers in HOOKS to call back into the
611    target-specific functions to perform the actual machine-specific
612    transformations.
613 
614    Such transformations typically involve additional set-up
615    instructions before the loop, to define loop bounds or set up a
616    special loop counter register.
617 
618    DO_REORDER should be set to true if we should try to use the
619    reorder_loops function to ensure the loop end occurs after the loop
620    start.  This is for use by targets where the loop hardware requires
621    this condition.
622 
623    HOOKS is used to pass in target specific hooks; see
624    hw-doloop.h.  */
625 void
626 reorg_loops (bool do_reorder, struct hw_doloop_hooks *hooks)
627 {
628   hwloop_info loops = NULL;
629   hwloop_info loop;
630   bitmap_obstack stack;
631 
632   df_live_add_problem ();
633   df_live_set_all_dirty ();
634   df_analyze ();
635 
636   bitmap_obstack_initialize (&stack);
637 
638   if (dump_file)
639     fprintf (dump_file, ";; Find loops, first pass\n\n");
640 
641   loops = discover_loops (&stack, hooks);
642 
643   if (do_reorder)
644     {
645       reorder_loops (loops);
646       free_loops (loops);
647 
648       if (dump_file)
649 	fprintf (dump_file, ";; Find loops, second pass\n\n");
650 
651       loops = discover_loops (&stack, hooks);
652     }
653 
654   for (loop = loops; loop; loop = loop->next)
655     scan_loop (loop);
656 
657   /* Now apply the optimizations.  */
658   for (loop = loops; loop; loop = loop->next)
659     optimize_loop (loop, hooks);
660 
661   if (dump_file)
662     {
663       fprintf (dump_file, ";; After hardware loops optimization:\n\n");
664       dump_hwloops (loops);
665     }
666 
667   free_loops (loops);
668 
669   if (dump_file)
670     print_rtl (dump_file, get_insns ());
671 }
672 #endif
673