1 /* Code to analyze doloop loops in order for targets to perform late
2    optimizations converting doloops to other forms of hardware loops.
3    Copyright (C) 2011-2018 Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "df.h"
27 #include "insn-config.h"
28 #include "regs.h"
29 #include "memmodel.h"
30 #include "emit-rtl.h"
31 #include "recog.h"
32 #include "cfgrtl.h"
33 #include "hw-doloop.h"
34 #include "dumpfile.h"
35 
36 /* Dump information collected in LOOPS.  */
37 static void
dump_hwloops(hwloop_info loops)38 dump_hwloops (hwloop_info loops)
39 {
40   hwloop_info loop;
41 
42   for (loop = loops; loop; loop = loop->next)
43     {
44       hwloop_info i;
45       basic_block b;
46       unsigned ix;
47 
48       fprintf (dump_file, ";; loop %d: ", loop->loop_no);
49       if (loop->bad)
50 	fprintf (dump_file, "(bad) ");
51       fprintf (dump_file, "{head:%d, depth:%d, reg:%u}",
52 	       loop->head == NULL ? -1 : loop->head->index,
53 	       loop->depth, REGNO (loop->iter_reg));
54 
55       fprintf (dump_file, " blocks: [ ");
56       for (ix = 0; loop->blocks.iterate (ix, &b); ix++)
57 	fprintf (dump_file, "%d ", b->index);
58       fprintf (dump_file, "] ");
59 
60       fprintf (dump_file, " inner loops: [ ");
61       for (ix = 0; loop->loops.iterate (ix, &i); ix++)
62 	fprintf (dump_file, "%d ", i->loop_no);
63       fprintf (dump_file, "]\n");
64     }
65   fprintf (dump_file, "\n");
66 }
67 
68 /* Return true if BB is part of LOOP.  */
69 static bool
bb_in_loop_p(hwloop_info loop,basic_block bb)70 bb_in_loop_p (hwloop_info loop, basic_block bb)
71 {
72   return bitmap_bit_p (loop->block_bitmap, bb->index);
73 }
74 
75 /* Scan the blocks of LOOP (and its inferiors), and record things such as
76    hard registers set, jumps out of and within the loop.  */
77 static void
scan_loop(hwloop_info loop)78 scan_loop (hwloop_info loop)
79 {
80   unsigned ix;
81   basic_block bb;
82 
83   if (loop->bad)
84     return;
85 
86   if (REGNO_REG_SET_P (df_get_live_in (loop->successor),
87 		       REGNO (loop->iter_reg)))
88     loop->iter_reg_used_outside = true;
89 
90   for (ix = 0; loop->blocks.iterate (ix, &bb); ix++)
91     {
92       rtx_insn *insn;
93       edge e;
94       edge_iterator ei;
95 
96       if (bb != loop->tail)
97 	FOR_EACH_EDGE (e, ei, bb->succs)
98 	  {
99 	    if (bb_in_loop_p (loop, e->dest))
100 	      {
101 		if (!(e->flags & EDGE_FALLTHRU))
102 		  loop->jumps_within = true;
103 	      }
104 	    else
105 	      {
106 		loop->jumps_outof = true;
107 		if (!loop->bad)
108 		  gcc_assert (!REGNO_REG_SET_P (df_get_live_in (e->dest),
109 						REGNO (loop->iter_reg)));
110 	      }
111 	  }
112 
113       for (insn = BB_HEAD (bb);
114 	   insn != NEXT_INSN (BB_END (bb));
115 	   insn = NEXT_INSN (insn))
116 	{
117 	  df_ref def;
118 	  HARD_REG_SET set_this_insn;
119 
120 	  if (!NONDEBUG_INSN_P (insn))
121 	    continue;
122 
123 	  if (recog_memoized (insn) < 0
124 	      && (GET_CODE (PATTERN (insn)) == ASM_INPUT
125 		  || asm_noperands (PATTERN (insn)) >= 0))
126 	    loop->has_asm = true;
127 
128 	  CLEAR_HARD_REG_SET (set_this_insn);
129 	  FOR_EACH_INSN_DEF (def, insn)
130 	    {
131 	      rtx dreg = DF_REF_REG (def);
132 
133 	      if (!REG_P (dreg))
134 		continue;
135 
136 	      add_to_hard_reg_set (&set_this_insn, GET_MODE (dreg),
137 				   REGNO (dreg));
138 	    }
139 
140 	  if (insn == loop->loop_end)
141 	    CLEAR_HARD_REG_BIT (set_this_insn, REGNO (loop->iter_reg));
142 	  else if (reg_mentioned_p (loop->iter_reg, PATTERN (insn)))
143 	    loop->iter_reg_used = true;
144 	  IOR_HARD_REG_SET (loop->regs_set_in_loop, set_this_insn);
145 	}
146     }
147 }
148 
149 /* Compute the incoming_dest and incoming_src members of LOOP by
150    identifying the edges that jump into the loop.  If there is more
151    than one block that jumps into the loop, incoming_src will be set
152    to NULL; likewise, if there is more than one block in the loop that
153    is the destination of an incoming edge, incoming_dest will be NULL.
154 
155    Return true if either of these two fields is nonnull, false
156    otherwise.  */
157 static bool
process_incoming_edges(hwloop_info loop)158 process_incoming_edges (hwloop_info loop)
159 {
160   edge e;
161   edge_iterator ei;
162   bool first = true;
163 
164   FOR_EACH_EDGE (e, ei, loop->incoming)
165     {
166       if (first)
167 	{
168 	  loop->incoming_src = e->src;
169 	  loop->incoming_dest = e->dest;
170 	  first = false;
171 	}
172       else
173 	{
174 	  if (e->dest != loop->incoming_dest)
175 	    loop->incoming_dest = NULL;
176 	  if (e->src != loop->incoming_src)
177 	    loop->incoming_src = NULL;
178 	}
179     }
180   if (loop->incoming_src == NULL && loop->incoming_dest == NULL)
181     return false;
182 
183   return true;
184 }
185 
186 /* Try to identify a forwarder block that jump into LOOP, and add it to
187    the set of blocks in the loop, updating the vector of incoming blocks as
188    well.  This transformation gives a second chance to loops we couldn't
189    otherwise handle by increasing the chance that we'll end up with one
190    incoming_src block.
191    Return true if we made a change, false otherwise.  */
192 static bool
add_forwarder_blocks(hwloop_info loop)193 add_forwarder_blocks (hwloop_info loop)
194 {
195   edge e;
196   edge_iterator ei;
197 
198   FOR_EACH_EDGE (e, ei, loop->incoming)
199     {
200       if (forwarder_block_p (e->src))
201 	{
202 	  edge e2;
203 	  edge_iterator ei2;
204 
205 	  if (dump_file)
206 	    fprintf (dump_file,
207 		     ";; Adding forwarder block %d to loop %d and retrying\n",
208 		     e->src->index, loop->loop_no);
209 	  loop->blocks.safe_push (e->src);
210 	  bitmap_set_bit (loop->block_bitmap, e->src->index);
211 	  FOR_EACH_EDGE (e2, ei2, e->src->preds)
212 	    vec_safe_push (loop->incoming, e2);
213 	  loop->incoming->unordered_remove (ei.index);
214 	  return true;
215 	}
216     }
217   return false;
218 }
219 
220 /* Called from reorg_loops when a potential loop end is found.  LOOP is
221    a newly set up structure describing the loop, it is this function's
222    responsibility to fill most of it.  TAIL_BB and TAIL_INSN point to the
223    loop_end insn and its enclosing basic block.  REG is the loop counter
224    register.
225    For our purposes, a loop is defined by the set of blocks reachable from
226    the loop head in which the loop counter register is live.  This matches
227    the expected use; targets that call into this code usually replace the
228    loop counter with a different special register.  */
229 static void
discover_loop(hwloop_info loop,basic_block tail_bb,rtx_insn * tail_insn,rtx reg)230 discover_loop (hwloop_info loop, basic_block tail_bb, rtx_insn *tail_insn, rtx reg)
231 {
232   bool found_tail;
233   unsigned dwork = 0;
234   basic_block bb;
235 
236   loop->tail = tail_bb;
237   loop->loop_end = tail_insn;
238   loop->iter_reg = reg;
239   vec_alloc (loop->incoming, 2);
240   loop->start_label = as_a <rtx_insn *> (JUMP_LABEL (tail_insn));
241 
242   if (EDGE_COUNT (tail_bb->succs) != 2)
243     {
244       loop->bad = true;
245       return;
246     }
247   loop->head = BRANCH_EDGE (tail_bb)->dest;
248   loop->successor = FALLTHRU_EDGE (tail_bb)->dest;
249 
250   auto_vec<basic_block, 20> works;
251   works.safe_push (loop->head);
252 
253   found_tail = false;
254   for (dwork = 0; works.iterate (dwork, &bb); dwork++)
255     {
256       edge e;
257       edge_iterator ei;
258       if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
259 	{
260 	  /* We've reached the exit block.  The loop must be bad. */
261 	  if (dump_file)
262 	    fprintf (dump_file,
263 		     ";; Loop is bad - reached exit block while scanning\n");
264 	  loop->bad = true;
265 	  break;
266 	}
267 
268       if (bitmap_bit_p (loop->block_bitmap, bb->index))
269 	continue;
270 
271       /* We've not seen this block before.  Add it to the loop's
272 	 list and then add each successor to the work list.  */
273 
274       loop->blocks.safe_push (bb);
275       bitmap_set_bit (loop->block_bitmap, bb->index);
276 
277       if (bb == tail_bb)
278 	found_tail = true;
279       else
280 	{
281 	  FOR_EACH_EDGE (e, ei, bb->succs)
282 	    {
283 	      basic_block succ = EDGE_SUCC (bb, ei.index)->dest;
284 	      if (REGNO_REG_SET_P (df_get_live_in (succ),
285 				   REGNO (loop->iter_reg)))
286 		works.safe_push (succ);
287 	    }
288 	}
289     }
290 
291   if (!found_tail)
292     loop->bad = true;
293 
294   /* Find the predecessor, and make sure nothing else jumps into this loop.  */
295   if (!loop->bad)
296     {
297       FOR_EACH_VEC_ELT (loop->blocks, dwork, bb)
298 	{
299 	  edge e;
300 	  edge_iterator ei;
301 	  FOR_EACH_EDGE (e, ei, bb->preds)
302 	    {
303 	      basic_block pred = e->src;
304 
305 	      if (!bb_in_loop_p (loop, pred))
306 		{
307 		  if (dump_file)
308 		    fprintf (dump_file, ";; Loop %d: incoming edge %d -> %d\n",
309 			     loop->loop_no, pred->index,
310 			     e->dest->index);
311 		  vec_safe_push (loop->incoming, e);
312 		}
313 	    }
314 	}
315 
316       if (!process_incoming_edges (loop))
317 	{
318 	  if (dump_file)
319 	    fprintf (dump_file,
320 		     ";; retrying loop %d with forwarder blocks\n",
321 		     loop->loop_no);
322 	  if (!add_forwarder_blocks (loop))
323 	    {
324 	      if (dump_file)
325 		fprintf (dump_file, ";; No forwarder blocks found\n");
326 	      loop->bad = true;
327 	    }
328 	  else if (!process_incoming_edges (loop))
329 	    {
330 	      if (dump_file)
331 		fprintf (dump_file,
332 			 ";; can't find suitable entry for loop %d\n",
333 			 loop->loop_no);
334 	    }
335 	}
336     }
337 }
338 
339 /* Analyze the structure of the loops in the current function.  Use
340    LOOP_STACK for bitmap allocations.  Returns all the valid candidates for
341    hardware loops found in this function.  HOOKS is the argument
342    passed to reorg_loops, used here to find the iteration registers
343    from a loop_end pattern.  */
344 static hwloop_info
discover_loops(bitmap_obstack * loop_stack,struct hw_doloop_hooks * hooks)345 discover_loops (bitmap_obstack *loop_stack, struct hw_doloop_hooks *hooks)
346 {
347   hwloop_info loops = NULL;
348   hwloop_info loop;
349   basic_block bb;
350   int nloops = 0;
351 
352   /* Find all the possible loop tails.  This means searching for every
353      loop_end instruction.  For each one found, create a hwloop_info
354      structure and add the head block to the work list. */
355   FOR_EACH_BB_FN (bb, cfun)
356     {
357       rtx_insn *tail = BB_END (bb);
358       rtx_insn *insn;
359       rtx reg;
360 
361       while (tail && NOTE_P (tail) && tail != BB_HEAD (bb))
362 	tail = PREV_INSN (tail);
363 
364       if (tail == NULL_RTX)
365 	continue;
366 
367       if (!JUMP_P (tail))
368 	continue;
369       reg = hooks->end_pattern_reg (tail);
370       if (reg == NULL_RTX)
371 	continue;
372 
373       /* A possible loop end */
374 
375       /* There's a degenerate case we can handle - an empty loop consisting
376 	 of only a back branch.  Handle that by deleting the branch.  */
377       insn = JUMP_LABEL_AS_INSN (tail);
378       while (insn && !NONDEBUG_INSN_P (insn))
379 	insn = NEXT_INSN (insn);
380       if (insn == tail)
381 	{
382 	  basic_block succ = FALLTHRU_EDGE (bb)->dest;
383 	  if (dump_file)
384 	    {
385 	      fprintf (dump_file, ";; degenerate loop ending at\n");
386 	      print_rtl_single (dump_file, tail);
387 	    }
388 	  if (!REGNO_REG_SET_P (df_get_live_in (succ), REGNO (reg)))
389 	    {
390 	      if (dump_file)
391 		fprintf (dump_file, ";; deleting it\n");
392 	      delete_insn_and_edges (tail);
393 	      continue;
394 	    }
395 	}
396 
397       loop = XCNEW (struct hwloop_info_d);
398       loop->next = loops;
399       loops = loop;
400       loop->loop_no = nloops++;
401       loop->blocks.create (20);
402       loop->block_bitmap = BITMAP_ALLOC (loop_stack);
403 
404       if (dump_file)
405 	{
406 	  fprintf (dump_file, ";; potential loop %d ending at\n",
407 		   loop->loop_no);
408 	  print_rtl_single (dump_file, tail);
409 	}
410 
411       discover_loop (loop, bb, tail, reg);
412     }
413 
414   /* Compute loop nestings.  Given two loops A and B, either the sets
415      of their blocks don't intersect at all, or one is the subset of
416      the other, or the blocks don't form a good nesting structure.  */
417   for (loop = loops; loop; loop = loop->next)
418     {
419       hwloop_info other;
420 
421       if (loop->bad)
422 	continue;
423 
424       for (other = loops; other; other = other->next)
425 	{
426 	  if (other->bad)
427 	    continue;
428 
429 	  if (!bitmap_intersect_p (other->block_bitmap, loop->block_bitmap))
430 	    continue;
431 	  if (!bitmap_intersect_compl_p (other->block_bitmap,
432 					 loop->block_bitmap))
433 	    loop->loops.safe_push (other);
434 	  else if (!bitmap_intersect_compl_p (loop->block_bitmap,
435 					      other->block_bitmap))
436 	    other->loops.safe_push (loop);
437 	  else
438 	    {
439 	      if (dump_file)
440 		fprintf (dump_file,
441 			 ";; can't find suitable nesting for loops %d and %d\n",
442 			 loop->loop_no, other->loop_no);
443 	      loop->bad = other->bad = true;
444 	    }
445 	}
446     }
447 
448   if (dump_file)
449     dump_hwloops (loops);
450 
451   return loops;
452 }
453 
454 /* Free up the loop structures in LOOPS.  */
455 static void
free_loops(hwloop_info loops)456 free_loops (hwloop_info loops)
457 {
458   while (loops)
459     {
460       hwloop_info loop = loops;
461       loops = loop->next;
462       loop->loops.release ();
463       loop->blocks.release ();
464       BITMAP_FREE (loop->block_bitmap);
465       XDELETE (loop);
466     }
467 }
468 
469 #define BB_AUX_INDEX(BB) ((intptr_t) (BB)->aux)
470 
471 /* Initialize the aux fields to give ascending indices to basic blocks.  */
472 static void
set_bb_indices(void)473 set_bb_indices (void)
474 {
475   basic_block bb;
476   intptr_t index;
477 
478   index = 0;
479   FOR_EACH_BB_FN (bb, cfun)
480     bb->aux = (void *) index++;
481 }
482 
483 /* The taken-branch edge from the loop end can actually go forward.
484    If the target's hardware loop support requires that the loop end be
485    after the loop start, try to reorder a loop's basic blocks when we
486    find such a case.
487    This is not very aggressive; it only moves at most one block.  It
488    does not introduce new branches into loops; it may remove them, or
489    it may switch fallthru/jump edges.  */
490 static void
reorder_loops(hwloop_info loops)491 reorder_loops (hwloop_info loops)
492 {
493   basic_block bb;
494   hwloop_info loop;
495 
496   cfg_layout_initialize (0);
497 
498   set_bb_indices ();
499 
500   for (loop = loops; loop; loop = loop->next)
501     {
502       edge e;
503       edge_iterator ei;
504 
505       if (loop->bad)
506 	continue;
507 
508       if (BB_AUX_INDEX (loop->head) <= BB_AUX_INDEX (loop->tail))
509 	continue;
510 
511       FOR_EACH_EDGE (e, ei, loop->head->succs)
512 	{
513 	  if (bitmap_bit_p (loop->block_bitmap, e->dest->index)
514 	      && BB_AUX_INDEX (e->dest) < BB_AUX_INDEX (loop->tail))
515 	    {
516 	      basic_block start_bb = e->dest;
517 	      basic_block start_prev_bb = start_bb->prev_bb;
518 
519 	      if (dump_file)
520 		fprintf (dump_file, ";; Moving block %d before block %d\n",
521 			 loop->head->index, start_bb->index);
522 	      loop->head->prev_bb->next_bb = loop->head->next_bb;
523 	      loop->head->next_bb->prev_bb = loop->head->prev_bb;
524 
525 	      loop->head->prev_bb = start_prev_bb;
526 	      loop->head->next_bb = start_bb;
527 	      start_prev_bb->next_bb = start_bb->prev_bb = loop->head;
528 
529 	      set_bb_indices ();
530 	      break;
531 	    }
532 	}
533       loops = loops->next;
534     }
535 
536   FOR_EACH_BB_FN (bb, cfun)
537     {
538       if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
539 	bb->aux = bb->next_bb;
540       else
541 	bb->aux = NULL;
542     }
543   cfg_layout_finalize ();
544   clear_aux_for_blocks ();
545   df_analyze ();
546 }
547 
548 /* Call the OPT function for LOOP and all of its sub-loops.  This is
549    done in a depth-first search; innermost loops are visited first.
550    OPTIMIZE and FAIL are the functions passed to reorg_loops by the
551    target's reorg pass.  */
552 static void
optimize_loop(hwloop_info loop,struct hw_doloop_hooks * hooks)553 optimize_loop (hwloop_info loop, struct hw_doloop_hooks *hooks)
554 {
555   int ix;
556   hwloop_info inner;
557   int inner_depth = 0;
558 
559   if (loop->visited)
560     return;
561 
562   loop->visited = 1;
563 
564   if (loop->bad)
565     {
566       if (dump_file)
567 	fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no);
568       goto bad_loop;
569     }
570 
571   /* Every loop contains in its list of inner loops every loop nested inside
572      it, even if there are intermediate loops.  This works because we're doing
573      a depth-first search here and never visit a loop more than once.
574      Recursion depth is effectively limited by the number of available
575      hardware registers.  */
576   for (ix = 0; loop->loops.iterate (ix, &inner); ix++)
577     {
578       optimize_loop (inner, hooks);
579 
580       if (!inner->bad && inner_depth < inner->depth)
581 	inner_depth = inner->depth;
582       /* The set of registers may be changed while optimizing the inner
583 	 loop.  */
584       IOR_HARD_REG_SET (loop->regs_set_in_loop, inner->regs_set_in_loop);
585     }
586 
587   loop->depth = inner_depth + 1;
588 
589   if (hooks->opt (loop))
590     return;
591 
592  bad_loop:
593   if (dump_file)
594     fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no);
595 
596   loop->bad = true;
597   hooks->fail (loop);
598 }
599 
600 /* This function can be used from a port's machine_dependent_reorg to
601    find and analyze loops that end in loop_end instructions.  It uses
602    a set of function pointers in HOOKS to call back into the
603    target-specific functions to perform the actual machine-specific
604    transformations.
605 
606    Such transformations typically involve additional set-up
607    instructions before the loop, to define loop bounds or set up a
608    special loop counter register.
609 
610    DO_REORDER should be set to true if we should try to use the
611    reorder_loops function to ensure the loop end occurs after the loop
612    start.  This is for use by targets where the loop hardware requires
613    this condition.
614 
615    HOOKS is used to pass in target specific hooks; see
616    hw-doloop.h.  */
617 void
reorg_loops(bool do_reorder,struct hw_doloop_hooks * hooks)618 reorg_loops (bool do_reorder, struct hw_doloop_hooks *hooks)
619 {
620   hwloop_info loops = NULL;
621   hwloop_info loop;
622   bitmap_obstack loop_stack;
623 
624   df_live_add_problem ();
625   df_live_set_all_dirty ();
626   df_analyze ();
627 
628   bitmap_obstack_initialize (&loop_stack);
629 
630   if (dump_file)
631     fprintf (dump_file, ";; Find loops, first pass\n\n");
632 
633   loops = discover_loops (&loop_stack, hooks);
634 
635   /* We can't enter cfglayout mode anymore if basic block partitioning
636      already happened.  */
637   if (do_reorder && !crtl->has_bb_partition)
638     {
639       reorder_loops (loops);
640       free_loops (loops);
641 
642       if (dump_file)
643 	fprintf (dump_file, ";; Find loops, second pass\n\n");
644 
645       loops = discover_loops (&loop_stack, hooks);
646     }
647 
648   for (loop = loops; loop; loop = loop->next)
649     scan_loop (loop);
650 
651   /* Now apply the optimizations.  */
652   for (loop = loops; loop; loop = loop->next)
653     optimize_loop (loop, hooks);
654 
655   if (dump_file)
656     {
657       fprintf (dump_file, ";; After hardware loops optimization:\n\n");
658       dump_hwloops (loops);
659     }
660 
661   free_loops (loops);
662   bitmap_obstack_release (&loop_stack);
663 
664   if (dump_file)
665     print_rtl (dump_file, get_insns ());
666 }
667