1 /* Code to analyze doloop loops in order for targets to perform late
2 optimizations converting doloops to other forms of hardware loops.
3 Copyright (C) 2011-2014 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "flags.h"
27 #include "expr.h"
28 #include "hard-reg-set.h"
29 #include "regs.h"
30 #include "basic-block.h"
31 #include "tm_p.h"
32 #include "df.h"
33 #include "cfgloop.h"
34 #include "recog.h"
35 #include "target.h"
36 #include "hw-doloop.h"
37 #include "dumpfile.h"
38
39 #ifdef HAVE_doloop_end
40
41 /* Dump information collected in LOOPS. */
42 static void
dump_hwloops(hwloop_info loops)43 dump_hwloops (hwloop_info loops)
44 {
45 hwloop_info loop;
46
47 for (loop = loops; loop; loop = loop->next)
48 {
49 hwloop_info i;
50 basic_block b;
51 unsigned ix;
52
53 fprintf (dump_file, ";; loop %d: ", loop->loop_no);
54 if (loop->bad)
55 fprintf (dump_file, "(bad) ");
56 fprintf (dump_file, "{head:%d, depth:%d, reg:%u}",
57 loop->head == NULL ? -1 : loop->head->index,
58 loop->depth, REGNO (loop->iter_reg));
59
60 fprintf (dump_file, " blocks: [ ");
61 for (ix = 0; loop->blocks.iterate (ix, &b); ix++)
62 fprintf (dump_file, "%d ", b->index);
63 fprintf (dump_file, "] ");
64
65 fprintf (dump_file, " inner loops: [ ");
66 for (ix = 0; loop->loops.iterate (ix, &i); ix++)
67 fprintf (dump_file, "%d ", i->loop_no);
68 fprintf (dump_file, "]\n");
69 }
70 fprintf (dump_file, "\n");
71 }
72
73 /* Return true if BB is part of LOOP. */
74 static bool
bb_in_loop_p(hwloop_info loop,basic_block bb)75 bb_in_loop_p (hwloop_info loop, basic_block bb)
76 {
77 return bitmap_bit_p (loop->block_bitmap, bb->index);
78 }
79
80 /* Scan the blocks of LOOP (and its inferiors), and record things such as
81 hard registers set, jumps out of and within the loop. */
82 static void
scan_loop(hwloop_info loop)83 scan_loop (hwloop_info loop)
84 {
85 unsigned ix;
86 basic_block bb;
87
88 if (loop->bad)
89 return;
90
91 if (REGNO_REG_SET_P (df_get_live_in (loop->successor),
92 REGNO (loop->iter_reg)))
93 loop->iter_reg_used_outside = true;
94
95 for (ix = 0; loop->blocks.iterate (ix, &bb); ix++)
96 {
97 rtx insn;
98 edge e;
99 edge_iterator ei;
100
101 if (bb != loop->tail)
102 FOR_EACH_EDGE (e, ei, bb->succs)
103 {
104 if (bb_in_loop_p (loop, e->dest))
105 {
106 if (!(e->flags & EDGE_FALLTHRU))
107 loop->jumps_within = true;
108 }
109 else
110 {
111 loop->jumps_outof = true;
112 if (!loop->bad)
113 gcc_assert (!REGNO_REG_SET_P (df_get_live_in (e->dest),
114 REGNO (loop->iter_reg)));
115 }
116 }
117
118 for (insn = BB_HEAD (bb);
119 insn != NEXT_INSN (BB_END (bb));
120 insn = NEXT_INSN (insn))
121 {
122 df_ref *def_rec;
123 HARD_REG_SET set_this_insn;
124
125 if (!NONDEBUG_INSN_P (insn))
126 continue;
127
128 if (recog_memoized (insn) < 0
129 && (GET_CODE (PATTERN (insn)) == ASM_INPUT
130 || asm_noperands (PATTERN (insn)) >= 0))
131 loop->has_asm = true;
132
133 CLEAR_HARD_REG_SET (set_this_insn);
134 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
135 {
136 rtx dreg = DF_REF_REG (*def_rec);
137
138 if (!REG_P (dreg))
139 continue;
140
141 add_to_hard_reg_set (&set_this_insn, GET_MODE (dreg),
142 REGNO (dreg));
143 }
144
145 if (insn == loop->loop_end)
146 CLEAR_HARD_REG_BIT (set_this_insn, REGNO (loop->iter_reg));
147 else if (reg_mentioned_p (loop->iter_reg, PATTERN (insn)))
148 loop->iter_reg_used = true;
149 IOR_HARD_REG_SET (loop->regs_set_in_loop, set_this_insn);
150 }
151 }
152 }
153
154 /* Compute the incoming_dest and incoming_src members of LOOP by
155 identifying the edges that jump into the loop. If there is more
156 than one block that jumps into the loop, incoming_src will be set
157 to NULL; likewise, if there is more than one block in the loop that
158 is the destination of an incoming edge, incoming_dest will be NULL.
159
160 Return true if either of these two fields is nonnull, false
161 otherwise. */
162 static bool
process_incoming_edges(hwloop_info loop)163 process_incoming_edges (hwloop_info loop)
164 {
165 edge e;
166 edge_iterator ei;
167 bool first = true;
168
169 FOR_EACH_EDGE (e, ei, loop->incoming)
170 {
171 if (first)
172 {
173 loop->incoming_src = e->src;
174 loop->incoming_dest = e->dest;
175 first = false;
176 }
177 else
178 {
179 if (e->dest != loop->incoming_dest)
180 loop->incoming_dest = NULL;
181 if (e->src != loop->incoming_src)
182 loop->incoming_src = NULL;
183 }
184 }
185 if (loop->incoming_src == NULL && loop->incoming_dest == NULL)
186 return false;
187
188 return true;
189 }
190
191 /* Try to identify a forwarder block that jump into LOOP, and add it to
192 the set of blocks in the loop, updating the vector of incoming blocks as
193 well. This transformation gives a second chance to loops we couldn't
194 otherwise handle by increasing the chance that we'll end up with one
195 incoming_src block.
196 Return true if we made a change, false otherwise. */
197 static bool
add_forwarder_blocks(hwloop_info loop)198 add_forwarder_blocks (hwloop_info loop)
199 {
200 edge e;
201 edge_iterator ei;
202
203 FOR_EACH_EDGE (e, ei, loop->incoming)
204 {
205 if (forwarder_block_p (e->src))
206 {
207 edge e2;
208 edge_iterator ei2;
209
210 if (dump_file)
211 fprintf (dump_file,
212 ";; Adding forwarder block %d to loop %d and retrying\n",
213 e->src->index, loop->loop_no);
214 loop->blocks.safe_push (e->src);
215 bitmap_set_bit (loop->block_bitmap, e->src->index);
216 FOR_EACH_EDGE (e2, ei2, e->src->preds)
217 vec_safe_push (loop->incoming, e2);
218 loop->incoming->unordered_remove (ei.index);
219 return true;
220 }
221 }
222 return false;
223 }
224
225 /* Called from reorg_loops when a potential loop end is found. LOOP is
226 a newly set up structure describing the loop, it is this function's
227 responsibility to fill most of it. TAIL_BB and TAIL_INSN point to the
228 loop_end insn and its enclosing basic block. REG is the loop counter
229 register.
230 For our purposes, a loop is defined by the set of blocks reachable from
231 the loop head in which the loop counter register is live. This matches
232 the expected use; targets that call into this code usually replace the
233 loop counter with a different special register. */
234 static void
discover_loop(hwloop_info loop,basic_block tail_bb,rtx tail_insn,rtx reg)235 discover_loop (hwloop_info loop, basic_block tail_bb, rtx tail_insn, rtx reg)
236 {
237 bool found_tail;
238 unsigned dwork = 0;
239 basic_block bb;
240
241 loop->tail = tail_bb;
242 loop->loop_end = tail_insn;
243 loop->iter_reg = reg;
244 vec_alloc (loop->incoming, 2);
245 loop->start_label = JUMP_LABEL (tail_insn);
246
247 if (EDGE_COUNT (tail_bb->succs) != 2)
248 {
249 loop->bad = true;
250 return;
251 }
252 loop->head = BRANCH_EDGE (tail_bb)->dest;
253 loop->successor = FALLTHRU_EDGE (tail_bb)->dest;
254
255 auto_vec<basic_block, 20> works;
256 works.safe_push (loop->head);
257
258 found_tail = false;
259 for (dwork = 0; works.iterate (dwork, &bb); dwork++)
260 {
261 edge e;
262 edge_iterator ei;
263 if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
264 {
265 /* We've reached the exit block. The loop must be bad. */
266 if (dump_file)
267 fprintf (dump_file,
268 ";; Loop is bad - reached exit block while scanning\n");
269 loop->bad = true;
270 break;
271 }
272
273 if (bitmap_bit_p (loop->block_bitmap, bb->index))
274 continue;
275
276 /* We've not seen this block before. Add it to the loop's
277 list and then add each successor to the work list. */
278
279 loop->blocks.safe_push (bb);
280 bitmap_set_bit (loop->block_bitmap, bb->index);
281
282 if (bb == tail_bb)
283 found_tail = true;
284 else
285 {
286 FOR_EACH_EDGE (e, ei, bb->succs)
287 {
288 basic_block succ = EDGE_SUCC (bb, ei.index)->dest;
289 if (REGNO_REG_SET_P (df_get_live_in (succ),
290 REGNO (loop->iter_reg)))
291 works.safe_push (succ);
292 }
293 }
294 }
295
296 if (!found_tail)
297 loop->bad = true;
298
299 /* Find the predecessor, and make sure nothing else jumps into this loop. */
300 if (!loop->bad)
301 {
302 FOR_EACH_VEC_ELT (loop->blocks, dwork, bb)
303 {
304 edge e;
305 edge_iterator ei;
306 FOR_EACH_EDGE (e, ei, bb->preds)
307 {
308 basic_block pred = e->src;
309
310 if (!bb_in_loop_p (loop, pred))
311 {
312 if (dump_file)
313 fprintf (dump_file, ";; Loop %d: incoming edge %d -> %d\n",
314 loop->loop_no, pred->index,
315 e->dest->index);
316 vec_safe_push (loop->incoming, e);
317 }
318 }
319 }
320
321 if (!process_incoming_edges (loop))
322 {
323 if (dump_file)
324 fprintf (dump_file,
325 ";; retrying loop %d with forwarder blocks\n",
326 loop->loop_no);
327 if (!add_forwarder_blocks (loop))
328 {
329 if (dump_file)
330 fprintf (dump_file, ";; No forwarder blocks found\n");
331 loop->bad = true;
332 }
333 else if (!process_incoming_edges (loop))
334 {
335 if (dump_file)
336 fprintf (dump_file,
337 ";; can't find suitable entry for loop %d\n",
338 loop->loop_no);
339 }
340 }
341 }
342 }
343
344 /* Analyze the structure of the loops in the current function. Use
345 LOOP_STACK for bitmap allocations. Returns all the valid candidates for
346 hardware loops found in this function. HOOKS is the argument
347 passed to reorg_loops, used here to find the iteration registers
348 from a loop_end pattern. */
349 static hwloop_info
discover_loops(bitmap_obstack * loop_stack,struct hw_doloop_hooks * hooks)350 discover_loops (bitmap_obstack *loop_stack, struct hw_doloop_hooks *hooks)
351 {
352 hwloop_info loops = NULL;
353 hwloop_info loop;
354 basic_block bb;
355 int nloops = 0;
356
357 /* Find all the possible loop tails. This means searching for every
358 loop_end instruction. For each one found, create a hwloop_info
359 structure and add the head block to the work list. */
360 FOR_EACH_BB_FN (bb, cfun)
361 {
362 rtx tail = BB_END (bb);
363 rtx insn, reg;
364
365 while (tail && NOTE_P (tail) && tail != BB_HEAD (bb))
366 tail = PREV_INSN (tail);
367
368 if (tail == NULL_RTX)
369 continue;
370
371 if (!JUMP_P (tail))
372 continue;
373 reg = hooks->end_pattern_reg (tail);
374 if (reg == NULL_RTX)
375 continue;
376
377 /* A possible loop end */
378
379 /* There's a degenerate case we can handle - an empty loop consisting
380 of only a back branch. Handle that by deleting the branch. */
381 insn = JUMP_LABEL (tail);
382 while (insn && !NONDEBUG_INSN_P (insn))
383 insn = NEXT_INSN (insn);
384 if (insn == tail)
385 {
386 basic_block succ = FALLTHRU_EDGE (bb)->dest;
387 if (dump_file)
388 {
389 fprintf (dump_file, ";; degenerate loop ending at\n");
390 print_rtl_single (dump_file, tail);
391 }
392 if (!REGNO_REG_SET_P (df_get_live_in (succ), REGNO (reg)))
393 {
394 if (dump_file)
395 fprintf (dump_file, ";; deleting it\n");
396 delete_insn_and_edges (tail);
397 continue;
398 }
399 }
400
401 loop = XCNEW (struct hwloop_info_d);
402 loop->next = loops;
403 loops = loop;
404 loop->loop_no = nloops++;
405 loop->blocks.create (20);
406 loop->block_bitmap = BITMAP_ALLOC (loop_stack);
407
408 if (dump_file)
409 {
410 fprintf (dump_file, ";; potential loop %d ending at\n",
411 loop->loop_no);
412 print_rtl_single (dump_file, tail);
413 }
414
415 discover_loop (loop, bb, tail, reg);
416 }
417
418 /* Compute loop nestings. Given two loops A and B, either the sets
419 of their blocks don't intersect at all, or one is the subset of
420 the other, or the blocks don't form a good nesting structure. */
421 for (loop = loops; loop; loop = loop->next)
422 {
423 hwloop_info other;
424
425 if (loop->bad)
426 continue;
427
428 for (other = loops; other; other = other->next)
429 {
430 if (other->bad)
431 continue;
432
433 if (!bitmap_intersect_p (other->block_bitmap, loop->block_bitmap))
434 continue;
435 if (!bitmap_intersect_compl_p (other->block_bitmap,
436 loop->block_bitmap))
437 loop->loops.safe_push (other);
438 else if (!bitmap_intersect_compl_p (loop->block_bitmap,
439 other->block_bitmap))
440 other->loops.safe_push (loop);
441 else
442 {
443 if (dump_file)
444 fprintf (dump_file,
445 ";; can't find suitable nesting for loops %d and %d\n",
446 loop->loop_no, other->loop_no);
447 loop->bad = other->bad = true;
448 }
449 }
450 }
451
452 if (dump_file)
453 dump_hwloops (loops);
454
455 return loops;
456 }
457
458 /* Free up the loop structures in LOOPS. */
459 static void
free_loops(hwloop_info loops)460 free_loops (hwloop_info loops)
461 {
462 while (loops)
463 {
464 hwloop_info loop = loops;
465 loops = loop->next;
466 loop->loops.release ();
467 loop->blocks.release ();
468 BITMAP_FREE (loop->block_bitmap);
469 XDELETE (loop);
470 }
471 }
472
473 #define BB_AUX_INDEX(BB) ((intptr_t) (BB)->aux)
474
475 /* Initialize the aux fields to give ascending indices to basic blocks. */
476 static void
set_bb_indices(void)477 set_bb_indices (void)
478 {
479 basic_block bb;
480 intptr_t index;
481
482 index = 0;
483 FOR_EACH_BB_FN (bb, cfun)
484 bb->aux = (void *) index++;
485 }
486
487 /* The taken-branch edge from the loop end can actually go forward.
488 If the target's hardware loop support requires that the loop end be
489 after the loop start, try to reorder a loop's basic blocks when we
490 find such a case.
491 This is not very aggressive; it only moves at most one block. It
492 does not introduce new branches into loops; it may remove them, or
493 it may switch fallthru/jump edges. */
494 static void
reorder_loops(hwloop_info loops)495 reorder_loops (hwloop_info loops)
496 {
497 basic_block bb;
498 hwloop_info loop;
499
500 cfg_layout_initialize (0);
501
502 set_bb_indices ();
503
504 for (loop = loops; loop; loop = loop->next)
505 {
506 edge e;
507 edge_iterator ei;
508
509 if (loop->bad)
510 continue;
511
512 if (BB_AUX_INDEX (loop->head) <= BB_AUX_INDEX (loop->tail))
513 continue;
514
515 FOR_EACH_EDGE (e, ei, loop->head->succs)
516 {
517 if (bitmap_bit_p (loop->block_bitmap, e->dest->index)
518 && BB_AUX_INDEX (e->dest) < BB_AUX_INDEX (loop->tail))
519 {
520 basic_block start_bb = e->dest;
521 basic_block start_prev_bb = start_bb->prev_bb;
522
523 if (dump_file)
524 fprintf (dump_file, ";; Moving block %d before block %d\n",
525 loop->head->index, start_bb->index);
526 loop->head->prev_bb->next_bb = loop->head->next_bb;
527 loop->head->next_bb->prev_bb = loop->head->prev_bb;
528
529 loop->head->prev_bb = start_prev_bb;
530 loop->head->next_bb = start_bb;
531 start_prev_bb->next_bb = start_bb->prev_bb = loop->head;
532
533 set_bb_indices ();
534 break;
535 }
536 }
537 loops = loops->next;
538 }
539
540 FOR_EACH_BB_FN (bb, cfun)
541 {
542 if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
543 bb->aux = bb->next_bb;
544 else
545 bb->aux = NULL;
546 }
547 cfg_layout_finalize ();
548 clear_aux_for_blocks ();
549 df_analyze ();
550 }
551
552 /* Call the OPT function for LOOP and all of its sub-loops. This is
553 done in a depth-first search; innermost loops are visited first.
554 OPTIMIZE and FAIL are the functions passed to reorg_loops by the
555 target's reorg pass. */
556 static void
optimize_loop(hwloop_info loop,struct hw_doloop_hooks * hooks)557 optimize_loop (hwloop_info loop, struct hw_doloop_hooks *hooks)
558 {
559 int ix;
560 hwloop_info inner;
561 int inner_depth = 0;
562
563 if (loop->visited)
564 return;
565
566 loop->visited = 1;
567
568 if (loop->bad)
569 {
570 if (dump_file)
571 fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no);
572 goto bad_loop;
573 }
574
575 /* Every loop contains in its list of inner loops every loop nested inside
576 it, even if there are intermediate loops. This works because we're doing
577 a depth-first search here and never visit a loop more than once.
578 Recursion depth is effectively limited by the number of available
579 hardware registers. */
580 for (ix = 0; loop->loops.iterate (ix, &inner); ix++)
581 {
582 optimize_loop (inner, hooks);
583
584 if (!inner->bad && inner_depth < inner->depth)
585 inner_depth = inner->depth;
586 /* The set of registers may be changed while optimizing the inner
587 loop. */
588 IOR_HARD_REG_SET (loop->regs_set_in_loop, inner->regs_set_in_loop);
589 }
590
591 loop->depth = inner_depth + 1;
592
593 if (hooks->opt (loop))
594 return;
595
596 bad_loop:
597 if (dump_file)
598 fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no);
599
600 loop->bad = true;
601 hooks->fail (loop);
602 }
603
604 /* This function can be used from a port's machine_dependent_reorg to
605 find and analyze loops that end in loop_end instructions. It uses
606 a set of function pointers in HOOKS to call back into the
607 target-specific functions to perform the actual machine-specific
608 transformations.
609
610 Such transformations typically involve additional set-up
611 instructions before the loop, to define loop bounds or set up a
612 special loop counter register.
613
614 DO_REORDER should be set to true if we should try to use the
615 reorder_loops function to ensure the loop end occurs after the loop
616 start. This is for use by targets where the loop hardware requires
617 this condition.
618
619 HOOKS is used to pass in target specific hooks; see
620 hw-doloop.h. */
621 void
reorg_loops(bool do_reorder,struct hw_doloop_hooks * hooks)622 reorg_loops (bool do_reorder, struct hw_doloop_hooks *hooks)
623 {
624 hwloop_info loops = NULL;
625 hwloop_info loop;
626 bitmap_obstack loop_stack;
627
628 df_live_add_problem ();
629 df_live_set_all_dirty ();
630 df_analyze ();
631
632 bitmap_obstack_initialize (&loop_stack);
633
634 if (dump_file)
635 fprintf (dump_file, ";; Find loops, first pass\n\n");
636
637 loops = discover_loops (&loop_stack, hooks);
638
639 if (do_reorder)
640 {
641 reorder_loops (loops);
642 free_loops (loops);
643
644 if (dump_file)
645 fprintf (dump_file, ";; Find loops, second pass\n\n");
646
647 loops = discover_loops (&loop_stack, hooks);
648 }
649
650 for (loop = loops; loop; loop = loop->next)
651 scan_loop (loop);
652
653 /* Now apply the optimizations. */
654 for (loop = loops; loop; loop = loop->next)
655 optimize_loop (loop, hooks);
656
657 if (dump_file)
658 {
659 fprintf (dump_file, ";; After hardware loops optimization:\n\n");
660 dump_hwloops (loops);
661 }
662
663 free_loops (loops);
664 bitmap_obstack_release (&loop_stack);
665
666 if (dump_file)
667 print_rtl (dump_file, get_insns ());
668 }
669 #endif
670