1 /* Swing Modulo Scheduling implementation.
2 Copyright (C) 2004-2019 Free Software Foundation, Inc.
3 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "cfghooks.h"
30 #include "df.h"
31 #include "memmodel.h"
32 #include "optabs.h"
33 #include "regs.h"
34 #include "emit-rtl.h"
35 #include "gcov-io.h"
36 #include "profile.h"
37 #include "insn-attr.h"
38 #include "cfgrtl.h"
39 #include "sched-int.h"
40 #include "cfgloop.h"
41 #include "expr.h"
42 #include "params.h"
43 #include "ddg.h"
44 #include "tree-pass.h"
45 #include "dbgcnt.h"
46 #include "loop-unroll.h"
47 #include "hard-reg-set.h"
48
49 #ifdef INSN_SCHEDULING
50
51 /* This file contains the implementation of the Swing Modulo Scheduler,
52 described in the following references:
53 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
54 Lifetime--sensitive modulo scheduling in a production environment.
55 IEEE Trans. on Comps., 50(3), March 2001
56 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
57 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
58 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
59
60 The basic structure is:
61 1. Build a data-dependence graph (DDG) for each loop.
62 2. Use the DDG to order the insns of a loop (not in topological order
63 necessarily, but rather) trying to place each insn after all its
64 predecessors _or_ after all its successors.
65 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
66 4. Use the ordering to perform list-scheduling of the loop:
67 1. Set II = MII. We will try to schedule the loop within II cycles.
68 2. Try to schedule the insns one by one according to the ordering.
69 For each insn compute an interval of cycles by considering already-
70 scheduled preds and succs (and associated latencies); try to place
71 the insn in the cycles of this window checking for potential
72 resource conflicts (using the DFA interface).
73 Note: this is different from the cycle-scheduling of schedule_insns;
74 here the insns are not scheduled monotonically top-down (nor bottom-
75 up).
76 3. If failed in scheduling all insns - bump II++ and try again, unless
77 II reaches an upper bound MaxII, in which case report failure.
78 5. If we succeeded in scheduling the loop within II cycles, we now
79 generate prolog and epilog, decrease the counter of the loop, and
80 perform modulo variable expansion for live ranges that span more than
81 II cycles (i.e. use register copies to prevent a def from overwriting
82 itself before reaching the use).
83
84 SMS works with countable loops (1) whose control part can be easily
85 decoupled from the rest of the loop and (2) whose loop count can
86 be easily adjusted. This is because we peel a constant number of
87 iterations into a prologue and epilogue for which we want to avoid
88 emitting the control part, and a kernel which is to iterate that
89 constant number of iterations less than the original loop. So the
90 control part should be a set of insns clearly identified and having
91 its own iv, not otherwise used in the loop (at-least for now), which
92 initializes a register before the loop to the number of iterations.
93 Currently SMS relies on the do-loop pattern to recognize such loops,
94 where (1) the control part comprises of all insns defining and/or
95 using a certain 'count' register and (2) the loop count can be
96 adjusted by modifying this register prior to the loop.
97 TODO: Rely on cfgloop analysis instead. */
98
99 /* This page defines partial-schedule structures and functions for
100 modulo scheduling. */
101
102 typedef struct partial_schedule *partial_schedule_ptr;
103 typedef struct ps_insn *ps_insn_ptr;
104
105 /* The minimum (absolute) cycle that a node of ps was scheduled in. */
106 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
107
108 /* The maximum (absolute) cycle that a node of ps was scheduled in. */
109 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
110
111 /* Perform signed modulo, always returning a non-negative value. */
112 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
113
114 /* The number of different iterations the nodes in ps span, assuming
115 the stage boundaries are placed efficiently. */
116 #define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \
117 + 1 + ii - 1) / ii)
118 /* The stage count of ps. */
119 #define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
120
121 /* A single instruction in the partial schedule. */
122 struct ps_insn
123 {
124 /* Identifies the instruction to be scheduled. Values smaller than
125 the ddg's num_nodes refer directly to ddg nodes. A value of
126 X - num_nodes refers to register move X. */
127 int id;
128
129 /* The (absolute) cycle in which the PS instruction is scheduled.
130 Same as SCHED_TIME (node). */
131 int cycle;
132
133 /* The next/prev PS_INSN in the same row. */
134 ps_insn_ptr next_in_row,
135 prev_in_row;
136
137 };
138
139 /* Information about a register move that has been added to a partial
140 schedule. */
141 struct ps_reg_move_info
142 {
143 /* The source of the move is defined by the ps_insn with id DEF.
144 The destination is used by the ps_insns with the ids in USES. */
145 int def;
146 sbitmap uses;
147
148 /* The original form of USES' instructions used OLD_REG, but they
149 should now use NEW_REG. */
150 rtx old_reg;
151 rtx new_reg;
152
153 /* The number of consecutive stages that the move occupies. */
154 int num_consecutive_stages;
155
156 /* An instruction that sets NEW_REG to the correct value. The first
157 move associated with DEF will have an rhs of OLD_REG; later moves
158 use the result of the previous move. */
159 rtx_insn *insn;
160 };
161
162 /* Holds the partial schedule as an array of II rows. Each entry of the
163 array points to a linked list of PS_INSNs, which represents the
164 instructions that are scheduled for that row. */
165 struct partial_schedule
166 {
167 int ii; /* Number of rows in the partial schedule. */
168 int history; /* Threshold for conflict checking using DFA. */
169
170 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
171 ps_insn_ptr *rows;
172
173 /* All the moves added for this partial schedule. Index X has
174 a ps_insn id of X + g->num_nodes. */
175 vec<ps_reg_move_info> reg_moves;
176
177 /* rows_length[i] holds the number of instructions in the row.
178 It is used only (as an optimization) to back off quickly from
179 trying to schedule a node in a full row; that is, to avoid running
180 through futile DFA state transitions. */
181 int *rows_length;
182
183 /* The earliest absolute cycle of an insn in the partial schedule. */
184 int min_cycle;
185
186 /* The latest absolute cycle of an insn in the partial schedule. */
187 int max_cycle;
188
189 ddg_ptr g; /* The DDG of the insns in the partial schedule. */
190
191 int stage_count; /* The stage count of the partial schedule. */
192 };
193
194
195 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
196 static void free_partial_schedule (partial_schedule_ptr);
197 static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
198 void print_partial_schedule (partial_schedule_ptr, FILE *);
199 static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
200 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
201 int, int, sbitmap, sbitmap);
202 static void rotate_partial_schedule (partial_schedule_ptr, int);
203 void set_row_column_for_ps (partial_schedule_ptr);
204 static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
205 static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
206
207
208 /* This page defines constants and structures for the modulo scheduling
209 driver. */
210
211 static int sms_order_nodes (ddg_ptr, int, int *, int *);
212 static void set_node_sched_params (ddg_ptr);
213 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
214 static void permute_partial_schedule (partial_schedule_ptr, rtx_insn *);
215 static int calculate_stage_count (partial_schedule_ptr, int);
216 static void calculate_must_precede_follow (ddg_node_ptr, int, int,
217 int, int, sbitmap, sbitmap, sbitmap);
218 static int get_sched_window (partial_schedule_ptr, ddg_node_ptr,
219 sbitmap, int, int *, int *, int *);
220 static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int,
221 sbitmap, int *, sbitmap, sbitmap);
222 static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
223
224 #define NODE_ASAP(node) ((node)->aux.count)
225
226 #define SCHED_PARAMS(x) (&node_sched_param_vec[x])
227 #define SCHED_TIME(x) (SCHED_PARAMS (x)->time)
228 #define SCHED_ROW(x) (SCHED_PARAMS (x)->row)
229 #define SCHED_STAGE(x) (SCHED_PARAMS (x)->stage)
230 #define SCHED_COLUMN(x) (SCHED_PARAMS (x)->column)
231
232 /* The scheduling parameters held for each node. */
233 typedef struct node_sched_params
234 {
235 int time; /* The absolute scheduling cycle. */
236
237 int row; /* Holds time % ii. */
238 int stage; /* Holds time / ii. */
239
240 /* The column of a node inside the ps. If nodes u, v are on the same row,
241 u will precede v if column (u) < column (v). */
242 int column;
243 } *node_sched_params_ptr;
244
245 /* The following three functions are copied from the current scheduler
246 code in order to use sched_analyze() for computing the dependencies.
247 They are used when initializing the sched_info structure. */
248 static const char *
sms_print_insn(const rtx_insn * insn,int aligned ATTRIBUTE_UNUSED)249 sms_print_insn (const rtx_insn *insn, int aligned ATTRIBUTE_UNUSED)
250 {
251 static char tmp[80];
252
253 sprintf (tmp, "i%4d", INSN_UID (insn));
254 return tmp;
255 }
256
257 static void
compute_jump_reg_dependencies(rtx insn ATTRIBUTE_UNUSED,regset used ATTRIBUTE_UNUSED)258 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
259 regset used ATTRIBUTE_UNUSED)
260 {
261 }
262
263 static struct common_sched_info_def sms_common_sched_info;
264
265 static struct sched_deps_info_def sms_sched_deps_info =
266 {
267 compute_jump_reg_dependencies,
268 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
269 NULL,
270 0, 0, 0
271 };
272
273 static struct haifa_sched_info sms_sched_info =
274 {
275 NULL,
276 NULL,
277 NULL,
278 NULL,
279 NULL,
280 sms_print_insn,
281 NULL,
282 NULL, /* insn_finishes_block_p */
283 NULL, NULL,
284 NULL, NULL,
285 0, 0,
286
287 NULL, NULL, NULL, NULL,
288 NULL, NULL,
289 0
290 };
291
292 /* Partial schedule instruction ID in PS is a register move. Return
293 information about it. */
294 static struct ps_reg_move_info *
ps_reg_move(partial_schedule_ptr ps,int id)295 ps_reg_move (partial_schedule_ptr ps, int id)
296 {
297 gcc_checking_assert (id >= ps->g->num_nodes);
298 return &ps->reg_moves[id - ps->g->num_nodes];
299 }
300
301 /* Return the rtl instruction that is being scheduled by partial schedule
302 instruction ID, which belongs to schedule PS. */
303 static rtx_insn *
ps_rtl_insn(partial_schedule_ptr ps,int id)304 ps_rtl_insn (partial_schedule_ptr ps, int id)
305 {
306 if (id < ps->g->num_nodes)
307 return ps->g->nodes[id].insn;
308 else
309 return ps_reg_move (ps, id)->insn;
310 }
311
312 /* Partial schedule instruction ID, which belongs to PS, occurred in
313 the original (unscheduled) loop. Return the first instruction
314 in the loop that was associated with ps_rtl_insn (PS, ID).
315 If the instruction had some notes before it, this is the first
316 of those notes. */
317 static rtx_insn *
ps_first_note(partial_schedule_ptr ps,int id)318 ps_first_note (partial_schedule_ptr ps, int id)
319 {
320 gcc_assert (id < ps->g->num_nodes);
321 return ps->g->nodes[id].first_note;
322 }
323
324 /* Return the number of consecutive stages that are occupied by
325 partial schedule instruction ID in PS. */
326 static int
ps_num_consecutive_stages(partial_schedule_ptr ps,int id)327 ps_num_consecutive_stages (partial_schedule_ptr ps, int id)
328 {
329 if (id < ps->g->num_nodes)
330 return 1;
331 else
332 return ps_reg_move (ps, id)->num_consecutive_stages;
333 }
334
335 /* Given HEAD and TAIL which are the first and last insns in a loop;
336 return the register which controls the loop. Return zero if it has
337 more than one occurrence in the loop besides the control part or the
338 do-loop pattern is not of the form we expect. */
339 static rtx
doloop_register_get(rtx_insn * head,rtx_insn * tail)340 doloop_register_get (rtx_insn *head, rtx_insn *tail)
341 {
342 rtx reg, condition;
343 rtx_insn *insn, *first_insn_not_to_check;
344
345 if (!JUMP_P (tail))
346 return NULL_RTX;
347
348 if (!targetm.code_for_doloop_end)
349 return NULL_RTX;
350
351 /* TODO: Free SMS's dependence on doloop_condition_get. */
352 condition = doloop_condition_get (tail);
353 if (! condition)
354 return NULL_RTX;
355
356 if (REG_P (XEXP (condition, 0)))
357 reg = XEXP (condition, 0);
358 else if (GET_CODE (XEXP (condition, 0)) == PLUS
359 && REG_P (XEXP (XEXP (condition, 0), 0)))
360 reg = XEXP (XEXP (condition, 0), 0);
361 else
362 gcc_unreachable ();
363
364 /* Check that the COUNT_REG has no other occurrences in the loop
365 until the decrement. We assume the control part consists of
366 either a single (parallel) branch-on-count or a (non-parallel)
367 branch immediately preceded by a single (decrement) insn. */
368 first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
369 : prev_nondebug_insn (tail));
370
371 for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
372 if (!DEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
373 {
374 if (dump_file)
375 {
376 fprintf (dump_file, "SMS count_reg found ");
377 print_rtl_single (dump_file, reg);
378 fprintf (dump_file, " outside control in insn:\n");
379 print_rtl_single (dump_file, insn);
380 }
381
382 return NULL_RTX;
383 }
384
385 return reg;
386 }
387
388 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
389 that the number of iterations is a compile-time constant. If so,
390 return the rtx_insn that sets COUNT_REG to a constant, and set COUNT to
391 this constant. Otherwise return 0. */
392 static rtx_insn *
const_iteration_count(rtx count_reg,basic_block pre_header,int64_t * count,bool * adjust_inplace)393 const_iteration_count (rtx count_reg, basic_block pre_header,
394 int64_t *count, bool* adjust_inplace)
395 {
396 rtx_insn *insn;
397 rtx_insn *head, *tail;
398
399 *adjust_inplace = false;
400 bool read_after = false;
401
402 if (! pre_header)
403 return NULL;
404
405 get_ebb_head_tail (pre_header, pre_header, &head, &tail);
406
407 for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
408 if (single_set (insn) && rtx_equal_p (count_reg,
409 SET_DEST (single_set (insn))))
410 {
411 rtx pat = single_set (insn);
412
413 if (CONST_INT_P (SET_SRC (pat)))
414 {
415 *count = INTVAL (SET_SRC (pat));
416 *adjust_inplace = !read_after;
417 return insn;
418 }
419
420 return NULL;
421 }
422 else if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (count_reg, insn))
423 {
424 read_after = true;
425 if (reg_set_p (count_reg, insn))
426 break;
427 }
428
429 return NULL;
430 }
431
432 /* A very simple resource-based lower bound on the initiation interval.
433 ??? Improve the accuracy of this bound by considering the
434 utilization of various units. */
435 static int
res_MII(ddg_ptr g)436 res_MII (ddg_ptr g)
437 {
438 if (targetm.sched.sms_res_mii)
439 return targetm.sched.sms_res_mii (g);
440
441 return ((g->num_nodes - g->num_debug) / issue_rate);
442 }
443
444
445 /* A vector that contains the sched data for each ps_insn. */
446 static vec<node_sched_params> node_sched_param_vec;
447
448 /* Allocate sched_params for each node and initialize it. */
449 static void
set_node_sched_params(ddg_ptr g)450 set_node_sched_params (ddg_ptr g)
451 {
452 node_sched_param_vec.truncate (0);
453 node_sched_param_vec.safe_grow_cleared (g->num_nodes);
454 }
455
456 /* Make sure that node_sched_param_vec has an entry for every move in PS. */
457 static void
extend_node_sched_params(partial_schedule_ptr ps)458 extend_node_sched_params (partial_schedule_ptr ps)
459 {
460 node_sched_param_vec.safe_grow_cleared (ps->g->num_nodes
461 + ps->reg_moves.length ());
462 }
463
464 /* Update the sched_params (time, row and stage) for node U using the II,
465 the CYCLE of U and MIN_CYCLE.
466 We're not simply taking the following
467 SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
468 because the stages may not be aligned on cycle 0. */
469 static void
update_node_sched_params(int u,int ii,int cycle,int min_cycle)470 update_node_sched_params (int u, int ii, int cycle, int min_cycle)
471 {
472 int sc_until_cycle_zero;
473 int stage;
474
475 SCHED_TIME (u) = cycle;
476 SCHED_ROW (u) = SMODULO (cycle, ii);
477
478 /* The calculation of stage count is done adding the number
479 of stages before cycle zero and after cycle zero. */
480 sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
481
482 if (SCHED_TIME (u) < 0)
483 {
484 stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
485 SCHED_STAGE (u) = sc_until_cycle_zero - stage;
486 }
487 else
488 {
489 stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
490 SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
491 }
492 }
493
494 static void
print_node_sched_params(FILE * file,int num_nodes,partial_schedule_ptr ps)495 print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps)
496 {
497 int i;
498
499 if (! file)
500 return;
501 for (i = 0; i < num_nodes; i++)
502 {
503 node_sched_params_ptr nsp = SCHED_PARAMS (i);
504
505 fprintf (file, "Node = %d; INSN = %d\n", i,
506 INSN_UID (ps_rtl_insn (ps, i)));
507 fprintf (file, " asap = %d:\n", NODE_ASAP (&ps->g->nodes[i]));
508 fprintf (file, " time = %d:\n", nsp->time);
509 fprintf (file, " stage = %d:\n", nsp->stage);
510 }
511 }
512
513 /* Set SCHED_COLUMN for each instruction in row ROW of PS. */
514 static void
set_columns_for_row(partial_schedule_ptr ps,int row)515 set_columns_for_row (partial_schedule_ptr ps, int row)
516 {
517 ps_insn_ptr cur_insn;
518 int column;
519
520 column = 0;
521 for (cur_insn = ps->rows[row]; cur_insn; cur_insn = cur_insn->next_in_row)
522 SCHED_COLUMN (cur_insn->id) = column++;
523 }
524
525 /* Set SCHED_COLUMN for each instruction in PS. */
526 static void
set_columns_for_ps(partial_schedule_ptr ps)527 set_columns_for_ps (partial_schedule_ptr ps)
528 {
529 int row;
530
531 for (row = 0; row < ps->ii; row++)
532 set_columns_for_row (ps, row);
533 }
534
535 /* Try to schedule the move with ps_insn identifier I_REG_MOVE in PS.
536 Its single predecessor has already been scheduled, as has its
537 ddg node successors. (The move may have also another move as its
538 successor, in which case that successor will be scheduled later.)
539
540 The move is part of a chain that satisfies register dependencies
541 between a producing ddg node and various consuming ddg nodes.
542 If some of these dependencies have a distance of 1 (meaning that
543 the use is upward-exposed) then DISTANCE1_USES is nonnull and
544 contains the set of uses with distance-1 dependencies.
545 DISTANCE1_USES is null otherwise.
546
547 MUST_FOLLOW is a scratch bitmap that is big enough to hold
548 all current ps_insn ids.
549
550 Return true on success. */
551 static bool
schedule_reg_move(partial_schedule_ptr ps,int i_reg_move,sbitmap distance1_uses,sbitmap must_follow)552 schedule_reg_move (partial_schedule_ptr ps, int i_reg_move,
553 sbitmap distance1_uses, sbitmap must_follow)
554 {
555 unsigned int u;
556 int this_time, this_distance, this_start, this_end, this_latency;
557 int start, end, c, ii;
558 sbitmap_iterator sbi;
559 ps_reg_move_info *move;
560 rtx_insn *this_insn;
561 ps_insn_ptr psi;
562
563 move = ps_reg_move (ps, i_reg_move);
564 ii = ps->ii;
565 if (dump_file)
566 {
567 fprintf (dump_file, "Scheduling register move INSN %d; ii = %d"
568 ", min cycle = %d\n\n", INSN_UID (move->insn), ii,
569 PS_MIN_CYCLE (ps));
570 print_rtl_single (dump_file, move->insn);
571 fprintf (dump_file, "\n%11s %11s %5s\n", "start", "end", "time");
572 fprintf (dump_file, "=========== =========== =====\n");
573 }
574
575 start = INT_MIN;
576 end = INT_MAX;
577
578 /* For dependencies of distance 1 between a producer ddg node A
579 and consumer ddg node B, we have a chain of dependencies:
580
581 A --(T,L1,1)--> M1 --(T,L2,0)--> M2 ... --(T,Ln,0)--> B
582
583 where Mi is the ith move. For dependencies of distance 0 between
584 a producer ddg node A and consumer ddg node C, we have a chain of
585 dependencies:
586
587 A --(T,L1',0)--> M1' --(T,L2',0)--> M2' ... --(T,Ln',0)--> C
588
589 where Mi' occupies the same position as Mi but occurs a stage later.
590 We can only schedule each move once, so if we have both types of
591 chain, we model the second as:
592
593 A --(T,L1',1)--> M1 --(T,L2',0)--> M2 ... --(T,Ln',-1)--> C
594
595 First handle the dependencies between the previously-scheduled
596 predecessor and the move. */
597 this_insn = ps_rtl_insn (ps, move->def);
598 this_latency = insn_latency (this_insn, move->insn);
599 this_distance = distance1_uses && move->def < ps->g->num_nodes ? 1 : 0;
600 this_time = SCHED_TIME (move->def) - this_distance * ii;
601 this_start = this_time + this_latency;
602 this_end = this_time + ii;
603 if (dump_file)
604 fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
605 this_start, this_end, SCHED_TIME (move->def),
606 INSN_UID (this_insn), this_latency, this_distance,
607 INSN_UID (move->insn));
608
609 if (start < this_start)
610 start = this_start;
611 if (end > this_end)
612 end = this_end;
613
614 /* Handle the dependencies between the move and previously-scheduled
615 successors. */
616 EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, u, sbi)
617 {
618 this_insn = ps_rtl_insn (ps, u);
619 this_latency = insn_latency (move->insn, this_insn);
620 if (distance1_uses && !bitmap_bit_p (distance1_uses, u))
621 this_distance = -1;
622 else
623 this_distance = 0;
624 this_time = SCHED_TIME (u) + this_distance * ii;
625 this_start = this_time - ii;
626 this_end = this_time - this_latency;
627 if (dump_file)
628 fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
629 this_start, this_end, SCHED_TIME (u), INSN_UID (move->insn),
630 this_latency, this_distance, INSN_UID (this_insn));
631
632 if (start < this_start)
633 start = this_start;
634 if (end > this_end)
635 end = this_end;
636 }
637
638 if (dump_file)
639 {
640 fprintf (dump_file, "----------- ----------- -----\n");
641 fprintf (dump_file, "%11d %11d %5s %s\n", start, end, "", "(max, min)");
642 }
643
644 bitmap_clear (must_follow);
645 bitmap_set_bit (must_follow, move->def);
646
647 start = MAX (start, end - (ii - 1));
648 for (c = end; c >= start; c--)
649 {
650 psi = ps_add_node_check_conflicts (ps, i_reg_move, c,
651 move->uses, must_follow);
652 if (psi)
653 {
654 update_node_sched_params (i_reg_move, ii, c, PS_MIN_CYCLE (ps));
655 if (dump_file)
656 fprintf (dump_file, "\nScheduled register move INSN %d at"
657 " time %d, row %d\n\n", INSN_UID (move->insn), c,
658 SCHED_ROW (i_reg_move));
659 return true;
660 }
661 }
662
663 if (dump_file)
664 fprintf (dump_file, "\nNo available slot\n\n");
665
666 return false;
667 }
668
669 /*
670 Breaking intra-loop register anti-dependences:
671 Each intra-loop register anti-dependence implies a cross-iteration true
672 dependence of distance 1. Therefore, we can remove such false dependencies
673 and figure out if the partial schedule broke them by checking if (for a
674 true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
675 if so generate a register move. The number of such moves is equal to:
676 SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
677 nreg_moves = ----------------------------------- + 1 - { dependence.
678 ii { 1 if not.
679 */
680 static bool
schedule_reg_moves(partial_schedule_ptr ps)681 schedule_reg_moves (partial_schedule_ptr ps)
682 {
683 ddg_ptr g = ps->g;
684 int ii = ps->ii;
685 int i;
686
687 for (i = 0; i < g->num_nodes; i++)
688 {
689 ddg_node_ptr u = &g->nodes[i];
690 ddg_edge_ptr e;
691 int nreg_moves = 0, i_reg_move;
692 rtx prev_reg, old_reg;
693 int first_move;
694 int distances[2];
695 sbitmap distance1_uses;
696 rtx set = single_set (u->insn);
697
698 /* Skip instructions that do not set a register. */
699 if (set && !REG_P (SET_DEST (set)))
700 continue;
701
702 /* Compute the number of reg_moves needed for u, by looking at life
703 ranges started at u (excluding self-loops). */
704 distances[0] = distances[1] = false;
705 for (e = u->out; e; e = e->next_out)
706 if (e->type == TRUE_DEP && e->dest != e->src)
707 {
708 int nreg_moves4e = (SCHED_TIME (e->dest->cuid)
709 - SCHED_TIME (e->src->cuid)) / ii;
710
711 if (e->distance == 1)
712 nreg_moves4e = (SCHED_TIME (e->dest->cuid)
713 - SCHED_TIME (e->src->cuid) + ii) / ii;
714
715 /* If dest precedes src in the schedule of the kernel, then dest
716 will read before src writes and we can save one reg_copy. */
717 if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
718 && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
719 nreg_moves4e--;
720
721 if (nreg_moves4e >= 1)
722 {
723 /* !single_set instructions are not supported yet and
724 thus we do not except to encounter them in the loop
725 except from the doloop part. For the latter case
726 we assume no regmoves are generated as the doloop
727 instructions are tied to the branch with an edge. */
728 gcc_assert (set);
729 /* If the instruction contains auto-inc register then
730 validate that the regmov is being generated for the
731 target regsiter rather then the inc'ed register. */
732 gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn));
733 }
734
735 if (nreg_moves4e)
736 {
737 gcc_assert (e->distance < 2);
738 distances[e->distance] = true;
739 }
740 nreg_moves = MAX (nreg_moves, nreg_moves4e);
741 }
742
743 if (nreg_moves == 0)
744 continue;
745
746 /* Create NREG_MOVES register moves. */
747 first_move = ps->reg_moves.length ();
748 ps->reg_moves.safe_grow_cleared (first_move + nreg_moves);
749 extend_node_sched_params (ps);
750
751 /* Record the moves associated with this node. */
752 first_move += ps->g->num_nodes;
753
754 /* Generate each move. */
755 old_reg = prev_reg = SET_DEST (set);
756 if (HARD_REGISTER_P (old_reg))
757 return false;
758
759 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
760 {
761 ps_reg_move_info *move = ps_reg_move (ps, first_move + i_reg_move);
762
763 move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i;
764 move->uses = sbitmap_alloc (first_move + nreg_moves);
765 move->old_reg = old_reg;
766 move->new_reg = gen_reg_rtx (GET_MODE (prev_reg));
767 move->num_consecutive_stages = distances[0] && distances[1] ? 2 : 1;
768 move->insn = gen_move_insn (move->new_reg, copy_rtx (prev_reg));
769 bitmap_clear (move->uses);
770
771 prev_reg = move->new_reg;
772 }
773
774 distance1_uses = distances[1] ? sbitmap_alloc (g->num_nodes) : NULL;
775
776 if (distance1_uses)
777 bitmap_clear (distance1_uses);
778
779 /* Every use of the register defined by node may require a different
780 copy of this register, depending on the time the use is scheduled.
781 Record which uses require which move results. */
782 for (e = u->out; e; e = e->next_out)
783 if (e->type == TRUE_DEP && e->dest != e->src)
784 {
785 int dest_copy = (SCHED_TIME (e->dest->cuid)
786 - SCHED_TIME (e->src->cuid)) / ii;
787
788 if (e->distance == 1)
789 dest_copy = (SCHED_TIME (e->dest->cuid)
790 - SCHED_TIME (e->src->cuid) + ii) / ii;
791
792 if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
793 && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
794 dest_copy--;
795
796 if (dest_copy)
797 {
798 ps_reg_move_info *move;
799
800 move = ps_reg_move (ps, first_move + dest_copy - 1);
801 bitmap_set_bit (move->uses, e->dest->cuid);
802 if (e->distance == 1)
803 bitmap_set_bit (distance1_uses, e->dest->cuid);
804 }
805 }
806
807 auto_sbitmap must_follow (first_move + nreg_moves);
808 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
809 if (!schedule_reg_move (ps, first_move + i_reg_move,
810 distance1_uses, must_follow))
811 break;
812 if (distance1_uses)
813 sbitmap_free (distance1_uses);
814 if (i_reg_move < nreg_moves)
815 return false;
816 }
817 return true;
818 }
819
820 /* Emit the moves associated with PS. Apply the substitutions
821 associated with them. */
822 static void
apply_reg_moves(partial_schedule_ptr ps)823 apply_reg_moves (partial_schedule_ptr ps)
824 {
825 ps_reg_move_info *move;
826 int i;
827
828 FOR_EACH_VEC_ELT (ps->reg_moves, i, move)
829 {
830 unsigned int i_use;
831 sbitmap_iterator sbi;
832
833 EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, i_use, sbi)
834 {
835 replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg);
836 df_insn_rescan (ps->g->nodes[i_use].insn);
837 }
838 }
839 }
840
841 /* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of
842 SCHED_ROW and SCHED_STAGE. Instruction scheduled on cycle AMOUNT
843 will move to cycle zero. */
844 static void
reset_sched_times(partial_schedule_ptr ps,int amount)845 reset_sched_times (partial_schedule_ptr ps, int amount)
846 {
847 int row;
848 int ii = ps->ii;
849 ps_insn_ptr crr_insn;
850
851 for (row = 0; row < ii; row++)
852 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
853 {
854 int u = crr_insn->id;
855 int normalized_time = SCHED_TIME (u) - amount;
856 int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
857
858 if (dump_file)
859 {
860 /* Print the scheduling times after the rotation. */
861 rtx_insn *insn = ps_rtl_insn (ps, u);
862
863 fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
864 "crr_insn->cycle=%d, min_cycle=%d", u,
865 INSN_UID (insn), normalized_time, new_min_cycle);
866 if (JUMP_P (insn))
867 fprintf (dump_file, " (branch)");
868 fprintf (dump_file, "\n");
869 }
870
871 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
872 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
873
874 crr_insn->cycle = normalized_time;
875 update_node_sched_params (u, ii, normalized_time, new_min_cycle);
876 }
877 }
878
879 /* Permute the insns according to their order in PS, from row 0 to
880 row ii-1, and position them right before LAST. This schedules
881 the insns of the loop kernel. */
882 static void
permute_partial_schedule(partial_schedule_ptr ps,rtx_insn * last)883 permute_partial_schedule (partial_schedule_ptr ps, rtx_insn *last)
884 {
885 int ii = ps->ii;
886 int row;
887 ps_insn_ptr ps_ij;
888
889 for (row = 0; row < ii ; row++)
890 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
891 {
892 rtx_insn *insn = ps_rtl_insn (ps, ps_ij->id);
893
894 if (PREV_INSN (last) != insn)
895 {
896 if (ps_ij->id < ps->g->num_nodes)
897 reorder_insns_nobb (ps_first_note (ps, ps_ij->id), insn,
898 PREV_INSN (last));
899 else
900 add_insn_before (insn, last, NULL);
901 }
902 }
903 }
904
905 /* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE
906 respectively only if cycle C falls on the border of the scheduling
907 window boundaries marked by START and END cycles. STEP is the
908 direction of the window. */
909 static inline void
set_must_precede_follow(sbitmap * tmp_follow,sbitmap must_follow,sbitmap * tmp_precede,sbitmap must_precede,int c,int start,int end,int step)910 set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow,
911 sbitmap *tmp_precede, sbitmap must_precede, int c,
912 int start, int end, int step)
913 {
914 *tmp_precede = NULL;
915 *tmp_follow = NULL;
916
917 if (c == start)
918 {
919 if (step == 1)
920 *tmp_precede = must_precede;
921 else /* step == -1. */
922 *tmp_follow = must_follow;
923 }
924 if (c == end - step)
925 {
926 if (step == 1)
927 *tmp_follow = must_follow;
928 else /* step == -1. */
929 *tmp_precede = must_precede;
930 }
931
932 }
933
934 /* Return True if the branch can be moved to row ii-1 while
935 normalizing the partial schedule PS to start from cycle zero and thus
936 optimize the SC. Otherwise return False. */
937 static bool
optimize_sc(partial_schedule_ptr ps,ddg_ptr g)938 optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
939 {
940 int amount = PS_MIN_CYCLE (ps);
941 int start, end, step;
942 int ii = ps->ii;
943 bool ok = false;
944 int stage_count, stage_count_curr;
945
946 /* Compare the SC after normalization and SC after bringing the branch
947 to row ii-1. If they are equal just bail out. */
948 stage_count = calculate_stage_count (ps, amount);
949 stage_count_curr =
950 calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid) - (ii - 1));
951
952 if (stage_count == stage_count_curr)
953 {
954 if (dump_file)
955 fprintf (dump_file, "SMS SC already optimized.\n");
956
957 return false;
958 }
959
960 if (dump_file)
961 {
962 fprintf (dump_file, "SMS Trying to optimize branch location\n");
963 fprintf (dump_file, "SMS partial schedule before trial:\n");
964 print_partial_schedule (ps, dump_file);
965 }
966
967 /* First, normalize the partial scheduling. */
968 reset_sched_times (ps, amount);
969 rotate_partial_schedule (ps, amount);
970 if (dump_file)
971 {
972 fprintf (dump_file,
973 "SMS partial schedule after normalization (ii, %d, SC %d):\n",
974 ii, stage_count);
975 print_partial_schedule (ps, dump_file);
976 }
977
978 if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii) == ii - 1)
979 return true;
980
981 auto_sbitmap sched_nodes (g->num_nodes);
982 bitmap_ones (sched_nodes);
983
984 /* Calculate the new placement of the branch. It should be in row
985 ii-1 and fall into it's scheduling window. */
986 if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start,
987 &step, &end) == 0)
988 {
989 bool success;
990 ps_insn_ptr next_ps_i;
991 int branch_cycle = SCHED_TIME (g->closing_branch->cuid);
992 int row = SMODULO (branch_cycle, ps->ii);
993 int num_splits = 0;
994 sbitmap tmp_precede, tmp_follow;
995 int min_cycle, c;
996
997 if (dump_file)
998 fprintf (dump_file, "\nTrying to schedule node %d "
999 "INSN = %d in (%d .. %d) step %d\n",
1000 g->closing_branch->cuid,
1001 (INSN_UID (g->closing_branch->insn)), start, end, step);
1002
1003 gcc_assert ((step > 0 && start < end) || (step < 0 && start > end));
1004 if (step == 1)
1005 {
1006 c = start + ii - SMODULO (start, ii) - 1;
1007 gcc_assert (c >= start);
1008 if (c >= end)
1009 {
1010 if (dump_file)
1011 fprintf (dump_file,
1012 "SMS failed to schedule branch at cycle: %d\n", c);
1013 return false;
1014 }
1015 }
1016 else
1017 {
1018 c = start - SMODULO (start, ii) - 1;
1019 gcc_assert (c <= start);
1020
1021 if (c <= end)
1022 {
1023 if (dump_file)
1024 fprintf (dump_file,
1025 "SMS failed to schedule branch at cycle: %d\n", c);
1026 return false;
1027 }
1028 }
1029
1030 auto_sbitmap must_precede (g->num_nodes);
1031 auto_sbitmap must_follow (g->num_nodes);
1032
1033 /* Try to schedule the branch is it's new cycle. */
1034 calculate_must_precede_follow (g->closing_branch, start, end,
1035 step, ii, sched_nodes,
1036 must_precede, must_follow);
1037
1038 set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
1039 must_precede, c, start, end, step);
1040
1041 /* Find the element in the partial schedule related to the closing
1042 branch so we can remove it from it's current cycle. */
1043 for (next_ps_i = ps->rows[row];
1044 next_ps_i; next_ps_i = next_ps_i->next_in_row)
1045 if (next_ps_i->id == g->closing_branch->cuid)
1046 break;
1047
1048 min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
1049 remove_node_from_ps (ps, next_ps_i);
1050 success =
1051 try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c,
1052 sched_nodes, &num_splits,
1053 tmp_precede, tmp_follow);
1054 gcc_assert (num_splits == 0);
1055 if (!success)
1056 {
1057 if (dump_file)
1058 fprintf (dump_file,
1059 "SMS failed to schedule branch at cycle: %d, "
1060 "bringing it back to cycle %d\n", c, branch_cycle);
1061
1062 /* The branch was failed to be placed in row ii - 1.
1063 Put it back in it's original place in the partial
1064 schedualing. */
1065 set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
1066 must_precede, branch_cycle, start, end,
1067 step);
1068 success =
1069 try_scheduling_node_in_cycle (ps, g->closing_branch->cuid,
1070 branch_cycle, sched_nodes,
1071 &num_splits, tmp_precede,
1072 tmp_follow);
1073 gcc_assert (success && (num_splits == 0));
1074 ok = false;
1075 }
1076 else
1077 {
1078 /* The branch is placed in row ii - 1. */
1079 if (dump_file)
1080 fprintf (dump_file,
1081 "SMS success in moving branch to cycle %d\n", c);
1082
1083 update_node_sched_params (g->closing_branch->cuid, ii, c,
1084 PS_MIN_CYCLE (ps));
1085 ok = true;
1086 }
1087
1088 /* This might have been added to a new first stage. */
1089 if (PS_MIN_CYCLE (ps) < min_cycle)
1090 reset_sched_times (ps, 0);
1091 }
1092
1093 return ok;
1094 }
1095
1096 static void
duplicate_insns_of_cycles(partial_schedule_ptr ps,int from_stage,int to_stage,rtx count_reg)1097 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
1098 int to_stage, rtx count_reg)
1099 {
1100 int row;
1101 ps_insn_ptr ps_ij;
1102
1103 for (row = 0; row < ps->ii; row++)
1104 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
1105 {
1106 int u = ps_ij->id;
1107 int first_u, last_u;
1108 rtx_insn *u_insn;
1109
1110 /* Do not duplicate any insn which refers to count_reg as it
1111 belongs to the control part.
1112 The closing branch is scheduled as well and thus should
1113 be ignored.
1114 TODO: This should be done by analyzing the control part of
1115 the loop. */
1116 u_insn = ps_rtl_insn (ps, u);
1117 if (reg_mentioned_p (count_reg, u_insn)
1118 || JUMP_P (u_insn))
1119 continue;
1120
1121 first_u = SCHED_STAGE (u);
1122 last_u = first_u + ps_num_consecutive_stages (ps, u) - 1;
1123 if (from_stage <= last_u && to_stage >= first_u)
1124 {
1125 if (u < ps->g->num_nodes)
1126 duplicate_insn_chain (ps_first_note (ps, u), u_insn);
1127 else
1128 emit_insn (copy_rtx (PATTERN (u_insn)));
1129 }
1130 }
1131 }
1132
1133
1134 /* Generate the instructions (including reg_moves) for prolog & epilog. */
1135 static void
generate_prolog_epilog(partial_schedule_ptr ps,struct loop * loop,rtx count_reg,bool adjust_init)1136 generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
1137 rtx count_reg, bool adjust_init)
1138 {
1139 int i;
1140 int last_stage = PS_STAGE_COUNT (ps) - 1;
1141 edge e;
1142
1143 /* Generate the prolog, inserting its insns on the loop-entry edge. */
1144 start_sequence ();
1145
1146 if (adjust_init)
1147 {
1148 /* Generate instructions at the beginning of the prolog to
1149 adjust the loop count by STAGE_COUNT. If loop count is constant
1150 and it not used anywhere in prologue, this constant is adjusted by
1151 STAGE_COUNT outside of generate_prolog_epilog function. */
1152 rtx sub_reg = NULL_RTX;
1153
1154 sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, count_reg,
1155 gen_int_mode (last_stage,
1156 GET_MODE (count_reg)),
1157 count_reg, 1, OPTAB_DIRECT);
1158 gcc_assert (REG_P (sub_reg));
1159 if (REGNO (sub_reg) != REGNO (count_reg))
1160 emit_move_insn (count_reg, sub_reg);
1161 }
1162
1163 for (i = 0; i < last_stage; i++)
1164 duplicate_insns_of_cycles (ps, 0, i, count_reg);
1165
1166 /* Put the prolog on the entry edge. */
1167 e = loop_preheader_edge (loop);
1168 split_edge_and_insert (e, get_insns ());
1169 if (!flag_resched_modulo_sched)
1170 e->dest->flags |= BB_DISABLE_SCHEDULE;
1171
1172 end_sequence ();
1173
1174 /* Generate the epilog, inserting its insns on the loop-exit edge. */
1175 start_sequence ();
1176
1177 for (i = 0; i < last_stage; i++)
1178 duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg);
1179
1180 /* Put the epilogue on the exit edge. */
1181 gcc_assert (single_exit (loop));
1182 e = single_exit (loop);
1183 split_edge_and_insert (e, get_insns ());
1184 if (!flag_resched_modulo_sched)
1185 e->dest->flags |= BB_DISABLE_SCHEDULE;
1186
1187 end_sequence ();
1188 }
1189
1190 /* Mark LOOP as software pipelined so the later
1191 scheduling passes don't touch it. */
1192 static void
mark_loop_unsched(struct loop * loop)1193 mark_loop_unsched (struct loop *loop)
1194 {
1195 unsigned i;
1196 basic_block *bbs = get_loop_body (loop);
1197
1198 for (i = 0; i < loop->num_nodes; i++)
1199 bbs[i]->flags |= BB_DISABLE_SCHEDULE;
1200
1201 free (bbs);
1202 }
1203
1204 /* Return true if all the BBs of the loop are empty except the
1205 loop header. */
1206 static bool
loop_single_full_bb_p(struct loop * loop)1207 loop_single_full_bb_p (struct loop *loop)
1208 {
1209 unsigned i;
1210 basic_block *bbs = get_loop_body (loop);
1211
1212 for (i = 0; i < loop->num_nodes ; i++)
1213 {
1214 rtx_insn *head, *tail;
1215 bool empty_bb = true;
1216
1217 if (bbs[i] == loop->header)
1218 continue;
1219
1220 /* Make sure that basic blocks other than the header
1221 have only notes labels or jumps. */
1222 get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
1223 for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
1224 {
1225 if (NOTE_P (head) || LABEL_P (head)
1226 || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head))))
1227 continue;
1228 empty_bb = false;
1229 break;
1230 }
1231
1232 if (! empty_bb)
1233 {
1234 free (bbs);
1235 return false;
1236 }
1237 }
1238 free (bbs);
1239 return true;
1240 }
1241
1242 /* Dump file:line from INSN's location info to dump_file. */
1243
1244 static void
dump_insn_location(rtx_insn * insn)1245 dump_insn_location (rtx_insn *insn)
1246 {
1247 if (dump_file && INSN_HAS_LOCATION (insn))
1248 {
1249 expanded_location xloc = insn_location (insn);
1250 fprintf (dump_file, " %s:%i", xloc.file, xloc.line);
1251 }
1252 }
1253
1254 /* A simple loop from SMS point of view; it is a loop that is composed of
1255 either a single basic block or two BBs - a header and a latch. */
1256 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
1257 && (EDGE_COUNT (loop->latch->preds) == 1) \
1258 && (EDGE_COUNT (loop->latch->succs) == 1))
1259
1260 /* Return true if the loop is in its canonical form and false if not.
1261 i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
1262 static bool
loop_canon_p(struct loop * loop)1263 loop_canon_p (struct loop *loop)
1264 {
1265
1266 if (loop->inner || !loop_outer (loop))
1267 {
1268 if (dump_file)
1269 fprintf (dump_file, "SMS loop inner or !loop_outer\n");
1270 return false;
1271 }
1272
1273 if (!single_exit (loop))
1274 {
1275 if (dump_file)
1276 {
1277 rtx_insn *insn = BB_END (loop->header);
1278
1279 fprintf (dump_file, "SMS loop many exits");
1280 dump_insn_location (insn);
1281 fprintf (dump_file, "\n");
1282 }
1283 return false;
1284 }
1285
1286 if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
1287 {
1288 if (dump_file)
1289 {
1290 rtx_insn *insn = BB_END (loop->header);
1291
1292 fprintf (dump_file, "SMS loop many BBs.");
1293 dump_insn_location (insn);
1294 fprintf (dump_file, "\n");
1295 }
1296 return false;
1297 }
1298
1299 return true;
1300 }
1301
1302 /* If there are more than one entry for the loop,
1303 make it one by splitting the first entry edge and
1304 redirecting the others to the new BB. */
1305 static void
canon_loop(struct loop * loop)1306 canon_loop (struct loop *loop)
1307 {
1308 edge e;
1309 edge_iterator i;
1310
1311 /* Avoid annoying special cases of edges going to exit
1312 block. */
1313 FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
1314 if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
1315 split_edge (e);
1316
1317 if (loop->latch == loop->header
1318 || EDGE_COUNT (loop->latch->succs) > 1)
1319 {
1320 FOR_EACH_EDGE (e, i, loop->header->preds)
1321 if (e->src == loop->latch)
1322 break;
1323 split_edge (e);
1324 }
1325 }
1326
1327 /* Setup infos. */
1328 static void
setup_sched_infos(void)1329 setup_sched_infos (void)
1330 {
1331 memcpy (&sms_common_sched_info, &haifa_common_sched_info,
1332 sizeof (sms_common_sched_info));
1333 sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
1334 common_sched_info = &sms_common_sched_info;
1335
1336 sched_deps_info = &sms_sched_deps_info;
1337 current_sched_info = &sms_sched_info;
1338 }
1339
1340 /* Probability in % that the sms-ed loop rolls enough so that optimized
1341 version may be entered. Just a guess. */
1342 #define PROB_SMS_ENOUGH_ITERATIONS 80
1343
1344 /* Used to calculate the upper bound of ii. */
1345 #define MAXII_FACTOR 2
1346
1347 /* Main entry point, perform SMS scheduling on the loops of the function
1348 that consist of single basic blocks. */
1349 static void
sms_schedule(void)1350 sms_schedule (void)
1351 {
1352 rtx_insn *insn;
1353 ddg_ptr *g_arr, g;
1354 int * node_order;
1355 int maxii, max_asap;
1356 partial_schedule_ptr ps;
1357 basic_block bb = NULL;
1358 struct loop *loop;
1359 basic_block condition_bb = NULL;
1360 edge latch_edge;
1361 HOST_WIDE_INT trip_count, max_trip_count;
1362 HARD_REG_SET prohibited_regs;
1363
1364 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
1365 | LOOPS_HAVE_RECORDED_EXITS);
1366 if (number_of_loops (cfun) <= 1)
1367 {
1368 loop_optimizer_finalize ();
1369 return; /* There are no loops to schedule. */
1370 }
1371
1372 /* Initialize issue_rate. */
1373 if (targetm.sched.issue_rate)
1374 {
1375 int temp = reload_completed;
1376
1377 reload_completed = 1;
1378 issue_rate = targetm.sched.issue_rate ();
1379 reload_completed = temp;
1380 }
1381 else
1382 issue_rate = 1;
1383
1384 /* Initialize the scheduler. */
1385 setup_sched_infos ();
1386 haifa_sched_init ();
1387
1388 /* Allocate memory to hold the DDG array one entry for each loop.
1389 We use loop->num as index into this array. */
1390 g_arr = XCNEWVEC (ddg_ptr, number_of_loops (cfun));
1391
1392 REG_SET_TO_HARD_REG_SET (prohibited_regs, &df->regular_block_artificial_uses);
1393
1394 if (dump_file)
1395 {
1396 fprintf (dump_file, "\n\nSMS analysis phase\n");
1397 fprintf (dump_file, "===================\n\n");
1398 }
1399
1400 /* Build DDGs for all the relevant loops and hold them in G_ARR
1401 indexed by the loop index. */
1402 FOR_EACH_LOOP (loop, 0)
1403 {
1404 rtx_insn *head, *tail;
1405 rtx count_reg;
1406
1407 /* For debugging. */
1408 if (dbg_cnt (sms_sched_loop) == false)
1409 {
1410 if (dump_file)
1411 fprintf (dump_file, "SMS reached max limit... \n");
1412
1413 break;
1414 }
1415
1416 if (dump_file)
1417 {
1418 rtx_insn *insn = BB_END (loop->header);
1419
1420 fprintf (dump_file, "SMS loop num: %d", loop->num);
1421 dump_insn_location (insn);
1422 fprintf (dump_file, "\n");
1423 }
1424
1425 if (! loop_canon_p (loop))
1426 continue;
1427
1428 if (! loop_single_full_bb_p (loop))
1429 {
1430 if (dump_file)
1431 fprintf (dump_file, "SMS not loop_single_full_bb_p\n");
1432 continue;
1433 }
1434
1435 bb = loop->header;
1436
1437 get_ebb_head_tail (bb, bb, &head, &tail);
1438 latch_edge = loop_latch_edge (loop);
1439 gcc_assert (single_exit (loop));
1440 trip_count = get_estimated_loop_iterations_int (loop);
1441 max_trip_count = get_max_loop_iterations_int (loop);
1442
1443 /* Perform SMS only on loops that their average count is above threshold. */
1444
1445 if ( latch_edge->count () > profile_count::zero ()
1446 && (latch_edge->count()
1447 < single_exit (loop)->count ().apply_scale
1448 (SMS_LOOP_AVERAGE_COUNT_THRESHOLD, 1)))
1449 {
1450 if (dump_file)
1451 {
1452 dump_insn_location (tail);
1453 fprintf (dump_file, "\nSMS single-bb-loop\n");
1454 if (profile_info && flag_branch_probabilities)
1455 {
1456 fprintf (dump_file, "SMS loop-count ");
1457 fprintf (dump_file, "%" PRId64,
1458 (int64_t) bb->count.to_gcov_type ());
1459 fprintf (dump_file, "\n");
1460 fprintf (dump_file, "SMS trip-count ");
1461 fprintf (dump_file, "%" PRId64 "max %" PRId64,
1462 (int64_t) trip_count, (int64_t) max_trip_count);
1463 fprintf (dump_file, "\n");
1464 }
1465 }
1466 continue;
1467 }
1468
1469 /* Make sure this is a doloop. */
1470 if ( !(count_reg = doloop_register_get (head, tail)))
1471 {
1472 if (dump_file)
1473 fprintf (dump_file, "SMS doloop_register_get failed\n");
1474 continue;
1475 }
1476
1477 /* Don't handle BBs with calls or barriers
1478 or !single_set with the exception of do-loop control part insns.
1479 ??? Should handle insns defining subregs. */
1480 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1481 {
1482 if (INSN_P (insn))
1483 {
1484 HARD_REG_SET regs;
1485 CLEAR_HARD_REG_SET (regs);
1486 note_stores (PATTERN (insn), record_hard_reg_sets, ®s);
1487 if (hard_reg_set_intersect_p (regs, prohibited_regs))
1488 break;
1489 }
1490
1491 if (CALL_P (insn)
1492 || BARRIER_P (insn)
1493 || (INSN_P (insn) && single_set (insn)
1494 && GET_CODE (SET_DEST (single_set (insn))) == SUBREG)
1495 /* Not a single set. */
1496 || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1497 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
1498 /* But non-single-set allowed in one special case. */
1499 && (insn != prev_nondebug_insn (tail)
1500 || !reg_mentioned_p (count_reg, insn))))
1501 break;
1502 }
1503
1504 if (insn != NEXT_INSN (tail))
1505 {
1506 if (dump_file)
1507 {
1508 if (CALL_P (insn))
1509 fprintf (dump_file, "SMS loop-with-call\n");
1510 else if (BARRIER_P (insn))
1511 fprintf (dump_file, "SMS loop-with-barrier\n");
1512 else if (INSN_P (insn) && single_set (insn)
1513 && GET_CODE (SET_DEST (single_set (insn))) == SUBREG)
1514 fprintf (dump_file, "SMS loop with subreg in lhs\n");
1515 else
1516 fprintf (dump_file,
1517 "SMS loop-with-not-single-set-or-prohibited-reg\n");
1518
1519 print_rtl_single (dump_file, insn);
1520 }
1521
1522 continue;
1523 }
1524
1525 /* Always schedule the closing branch with the rest of the
1526 instructions. The branch is rotated to be in row ii-1 at the
1527 end of the scheduling procedure to make sure it's the last
1528 instruction in the iteration. */
1529 if (! (g = create_ddg (bb, 1)))
1530 {
1531 if (dump_file)
1532 fprintf (dump_file, "SMS create_ddg failed\n");
1533 continue;
1534 }
1535
1536 g_arr[loop->num] = g;
1537 if (dump_file)
1538 fprintf (dump_file, "...OK\n");
1539
1540 }
1541 if (dump_file)
1542 {
1543 fprintf (dump_file, "\nSMS transformation phase\n");
1544 fprintf (dump_file, "=========================\n\n");
1545 }
1546
1547 /* We don't want to perform SMS on new loops - created by versioning. */
1548 FOR_EACH_LOOP (loop, 0)
1549 {
1550 rtx_insn *head, *tail;
1551 rtx count_reg;
1552 rtx_insn *count_init;
1553 int mii, rec_mii, stage_count, min_cycle;
1554 int64_t loop_count = 0;
1555 bool opt_sc_p, adjust_inplace = false;
1556 basic_block pre_header;
1557
1558 if (! (g = g_arr[loop->num]))
1559 continue;
1560
1561 if (dump_file)
1562 {
1563 rtx_insn *insn = BB_END (loop->header);
1564
1565 fprintf (dump_file, "SMS loop num: %d", loop->num);
1566 dump_insn_location (insn);
1567 fprintf (dump_file, "\n");
1568
1569 print_ddg (dump_file, g);
1570 }
1571
1572 get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1573
1574 latch_edge = loop_latch_edge (loop);
1575 gcc_assert (single_exit (loop));
1576 trip_count = get_estimated_loop_iterations_int (loop);
1577 max_trip_count = get_max_loop_iterations_int (loop);
1578
1579 if (dump_file)
1580 {
1581 dump_insn_location (tail);
1582 fprintf (dump_file, "\nSMS single-bb-loop\n");
1583 if (profile_info && flag_branch_probabilities)
1584 {
1585 fprintf (dump_file, "SMS loop-count ");
1586 fprintf (dump_file, "%" PRId64,
1587 (int64_t) bb->count.to_gcov_type ());
1588 fprintf (dump_file, "\n");
1589 }
1590 fprintf (dump_file, "SMS doloop\n");
1591 fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1592 fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1593 fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1594 }
1595
1596
1597 count_reg = doloop_register_get (head, tail);
1598 gcc_assert (count_reg);
1599
1600 pre_header = loop_preheader_edge (loop)->src;
1601 count_init = const_iteration_count (count_reg, pre_header, &loop_count,
1602 &adjust_inplace);
1603
1604 if (dump_file && count_init)
1605 {
1606 fprintf (dump_file, "SMS const-doloop ");
1607 fprintf (dump_file, "%" PRId64,
1608 loop_count);
1609 fprintf (dump_file, "\n");
1610 }
1611
1612 node_order = XNEWVEC (int, g->num_nodes);
1613
1614 mii = 1; /* Need to pass some estimate of mii. */
1615 rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
1616 mii = MAX (res_MII (g), rec_mii);
1617 mii = MAX (mii, 1);
1618 maxii = MAX (max_asap, MAXII_FACTOR * mii);
1619
1620 if (dump_file)
1621 fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1622 rec_mii, mii, maxii);
1623
1624 for (;;)
1625 {
1626 set_node_sched_params (g);
1627
1628 stage_count = 0;
1629 opt_sc_p = false;
1630 ps = sms_schedule_by_order (g, mii, maxii, node_order);
1631
1632 if (ps)
1633 {
1634 /* Try to achieve optimized SC by normalizing the partial
1635 schedule (having the cycles start from cycle zero).
1636 The branch location must be placed in row ii-1 in the
1637 final scheduling. If failed, shift all instructions to
1638 position the branch in row ii-1. */
1639 opt_sc_p = optimize_sc (ps, g);
1640 if (opt_sc_p)
1641 stage_count = calculate_stage_count (ps, 0);
1642 else
1643 {
1644 /* Bring the branch to cycle ii-1. */
1645 int amount = (SCHED_TIME (g->closing_branch->cuid)
1646 - (ps->ii - 1));
1647
1648 if (dump_file)
1649 fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
1650
1651 stage_count = calculate_stage_count (ps, amount);
1652 }
1653
1654 gcc_assert (stage_count >= 1);
1655 }
1656
1657 /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
1658 1 means that there is no interleaving between iterations thus
1659 we let the scheduling passes do the job in this case. */
1660 if (stage_count < PARAM_VALUE (PARAM_SMS_MIN_SC)
1661 || (count_init && (loop_count <= stage_count))
1662 || (max_trip_count >= 0 && max_trip_count <= stage_count)
1663 || (trip_count >= 0 && trip_count <= stage_count))
1664 {
1665 if (dump_file)
1666 {
1667 fprintf (dump_file, "SMS failed... \n");
1668 fprintf (dump_file, "SMS sched-failed (stage-count=%d,"
1669 " loop-count=", stage_count);
1670 fprintf (dump_file, "%" PRId64, loop_count);
1671 fprintf (dump_file, ", trip-count=");
1672 fprintf (dump_file, "%" PRId64 "max %" PRId64,
1673 (int64_t) trip_count, (int64_t) max_trip_count);
1674 fprintf (dump_file, ")\n");
1675 }
1676 break;
1677 }
1678
1679 if (!opt_sc_p)
1680 {
1681 /* Rotate the partial schedule to have the branch in row ii-1. */
1682 int amount = SCHED_TIME (g->closing_branch->cuid) - (ps->ii - 1);
1683
1684 reset_sched_times (ps, amount);
1685 rotate_partial_schedule (ps, amount);
1686 }
1687
1688 set_columns_for_ps (ps);
1689
1690 min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
1691 if (!schedule_reg_moves (ps))
1692 {
1693 mii = ps->ii + 1;
1694 free_partial_schedule (ps);
1695 continue;
1696 }
1697
1698 /* Moves that handle incoming values might have been added
1699 to a new first stage. Bump the stage count if so.
1700
1701 ??? Perhaps we could consider rotating the schedule here
1702 instead? */
1703 if (PS_MIN_CYCLE (ps) < min_cycle)
1704 {
1705 reset_sched_times (ps, 0);
1706 stage_count++;
1707 }
1708
1709 /* The stage count should now be correct without rotation. */
1710 gcc_checking_assert (stage_count == calculate_stage_count (ps, 0));
1711 PS_STAGE_COUNT (ps) = stage_count;
1712
1713 canon_loop (loop);
1714
1715 if (dump_file)
1716 {
1717 dump_insn_location (tail);
1718 fprintf (dump_file, " SMS succeeded %d %d (with ii, sc)\n",
1719 ps->ii, stage_count);
1720 print_partial_schedule (ps, dump_file);
1721 }
1722
1723 if (count_init)
1724 {
1725 if (adjust_inplace)
1726 {
1727 /* When possible, set new iteration count of loop kernel in
1728 place. Otherwise, generate_prolog_epilog creates an insn
1729 to adjust. */
1730 SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1731 - stage_count + 1);
1732 }
1733 }
1734 else
1735 {
1736 /* case the BCT count is not known , Do loop-versioning */
1737 rtx comp_rtx = gen_rtx_GT (VOIDmode, count_reg,
1738 gen_int_mode (stage_count,
1739 GET_MODE (count_reg)));
1740 profile_probability prob = profile_probability::guessed_always ()
1741 .apply_scale (PROB_SMS_ENOUGH_ITERATIONS, 100);
1742
1743 loop_version (loop, comp_rtx, &condition_bb,
1744 prob, prob.invert (),
1745 prob, prob.invert (), true);
1746 }
1747
1748 /* Now apply the scheduled kernel to the RTL of the loop. */
1749 permute_partial_schedule (ps, g->closing_branch->first_note);
1750
1751 /* Mark this loop as software pipelined so the later
1752 scheduling passes don't touch it. */
1753 if (! flag_resched_modulo_sched)
1754 mark_loop_unsched (loop);
1755
1756 /* The life-info is not valid any more. */
1757 df_set_bb_dirty (g->bb);
1758
1759 apply_reg_moves (ps);
1760 if (dump_file)
1761 print_node_sched_params (dump_file, g->num_nodes, ps);
1762 /* Generate prolog and epilog. */
1763 generate_prolog_epilog (ps, loop, count_reg, !adjust_inplace);
1764 break;
1765 }
1766
1767 free_partial_schedule (ps);
1768 node_sched_param_vec.release ();
1769 free (node_order);
1770 free_ddg (g);
1771 }
1772
1773 free (g_arr);
1774
1775 /* Release scheduler data, needed until now because of DFA. */
1776 haifa_sched_finish ();
1777 loop_optimizer_finalize ();
1778 }
1779
1780 /* The SMS scheduling algorithm itself
1781 -----------------------------------
1782 Input: 'O' an ordered list of insns of a loop.
1783 Output: A scheduling of the loop - kernel, prolog, and epilogue.
1784
1785 'Q' is the empty Set
1786 'PS' is the partial schedule; it holds the currently scheduled nodes with
1787 their cycle/slot.
1788 'PSP' previously scheduled predecessors.
1789 'PSS' previously scheduled successors.
1790 't(u)' the cycle where u is scheduled.
1791 'l(u)' is the latency of u.
1792 'd(v,u)' is the dependence distance from v to u.
1793 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1794 the node ordering phase.
1795 'check_hardware_resources_conflicts(u, PS, c)'
1796 run a trace around cycle/slot through DFA model
1797 to check resource conflicts involving instruction u
1798 at cycle c given the partial schedule PS.
1799 'add_to_partial_schedule_at_time(u, PS, c)'
1800 Add the node/instruction u to the partial schedule
1801 PS at time c.
1802 'calculate_register_pressure(PS)'
1803 Given a schedule of instructions, calculate the register
1804 pressure it implies. One implementation could be the
1805 maximum number of overlapping live ranges.
1806 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1807 registers available in the hardware.
1808
1809 1. II = MII.
1810 2. PS = empty list
1811 3. for each node u in O in pre-computed order
1812 4. if (PSP(u) != Q && PSS(u) == Q) then
1813 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1814 6. start = Early_start; end = Early_start + II - 1; step = 1
1815 11. else if (PSP(u) == Q && PSS(u) != Q) then
1816 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1817 13. start = Late_start; end = Late_start - II + 1; step = -1
1818 14. else if (PSP(u) != Q && PSS(u) != Q) then
1819 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1820 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1821 17. start = Early_start;
1822 18. end = min(Early_start + II - 1 , Late_start);
1823 19. step = 1
1824 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1825 21. start = ASAP(u); end = start + II - 1; step = 1
1826 22. endif
1827
1828 23. success = false
1829 24. for (c = start ; c != end ; c += step)
1830 25. if check_hardware_resources_conflicts(u, PS, c) then
1831 26. add_to_partial_schedule_at_time(u, PS, c)
1832 27. success = true
1833 28. break
1834 29. endif
1835 30. endfor
1836 31. if (success == false) then
1837 32. II = II + 1
1838 33. if (II > maxII) then
1839 34. finish - failed to schedule
1840 35. endif
1841 36. goto 2.
1842 37. endif
1843 38. endfor
1844 39. if (calculate_register_pressure(PS) > maxRP) then
1845 40. goto 32.
1846 41. endif
1847 42. compute epilogue & prologue
1848 43. finish - succeeded to schedule
1849
1850 ??? The algorithm restricts the scheduling window to II cycles.
1851 In rare cases, it may be better to allow windows of II+1 cycles.
1852 The window would then start and end on the same row, but with
1853 different "must precede" and "must follow" requirements. */
1854
1855 /* A limit on the number of cycles that resource conflicts can span. ??? Should
1856 be provided by DFA, and be dependent on the type of insn scheduled. Currently
1857 set to 0 to save compile time. */
1858 #define DFA_HISTORY SMS_DFA_HISTORY
1859
1860 /* A threshold for the number of repeated unsuccessful attempts to insert
1861 an empty row, before we flush the partial schedule and start over. */
1862 #define MAX_SPLIT_NUM 10
1863 /* Given the partial schedule PS, this function calculates and returns the
1864 cycles in which we can schedule the node with the given index I.
1865 NOTE: Here we do the backtracking in SMS, in some special cases. We have
1866 noticed that there are several cases in which we fail to SMS the loop
1867 because the sched window of a node is empty due to tight data-deps. In
1868 such cases we want to unschedule some of the predecessors/successors
1869 until we get non-empty scheduling window. It returns -1 if the
1870 scheduling window is empty and zero otherwise. */
1871
1872 static int
get_sched_window(partial_schedule_ptr ps,ddg_node_ptr u_node,sbitmap sched_nodes,int ii,int * start_p,int * step_p,int * end_p)1873 get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node,
1874 sbitmap sched_nodes, int ii, int *start_p, int *step_p,
1875 int *end_p)
1876 {
1877 int start, step, end;
1878 int early_start, late_start;
1879 ddg_edge_ptr e;
1880 auto_sbitmap psp (ps->g->num_nodes);
1881 auto_sbitmap pss (ps->g->num_nodes);
1882 sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1883 sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1884 int psp_not_empty;
1885 int pss_not_empty;
1886 int count_preds;
1887 int count_succs;
1888
1889 /* 1. compute sched window for u (start, end, step). */
1890 bitmap_clear (psp);
1891 bitmap_clear (pss);
1892 psp_not_empty = bitmap_and (psp, u_node_preds, sched_nodes);
1893 pss_not_empty = bitmap_and (pss, u_node_succs, sched_nodes);
1894
1895 /* We first compute a forward range (start <= end), then decide whether
1896 to reverse it. */
1897 early_start = INT_MIN;
1898 late_start = INT_MAX;
1899 start = INT_MIN;
1900 end = INT_MAX;
1901 step = 1;
1902
1903 count_preds = 0;
1904 count_succs = 0;
1905
1906 if (dump_file && (psp_not_empty || pss_not_empty))
1907 {
1908 fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)"
1909 "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii);
1910 fprintf (dump_file, "%11s %11s %11s %11s %5s\n",
1911 "start", "early start", "late start", "end", "time");
1912 fprintf (dump_file, "=========== =========== =========== ==========="
1913 " =====\n");
1914 }
1915 /* Calculate early_start and limit end. Both bounds are inclusive. */
1916 if (psp_not_empty)
1917 for (e = u_node->in; e != 0; e = e->next_in)
1918 {
1919 int v = e->src->cuid;
1920
1921 if (bitmap_bit_p (sched_nodes, v))
1922 {
1923 int p_st = SCHED_TIME (v);
1924 int earliest = p_st + e->latency - (e->distance * ii);
1925 int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX);
1926
1927 if (dump_file)
1928 {
1929 fprintf (dump_file, "%11s %11d %11s %11d %5d",
1930 "", earliest, "", latest, p_st);
1931 print_ddg_edge (dump_file, e);
1932 fprintf (dump_file, "\n");
1933 }
1934
1935 early_start = MAX (early_start, earliest);
1936 end = MIN (end, latest);
1937
1938 if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1939 count_preds++;
1940 }
1941 }
1942
1943 /* Calculate late_start and limit start. Both bounds are inclusive. */
1944 if (pss_not_empty)
1945 for (e = u_node->out; e != 0; e = e->next_out)
1946 {
1947 int v = e->dest->cuid;
1948
1949 if (bitmap_bit_p (sched_nodes, v))
1950 {
1951 int s_st = SCHED_TIME (v);
1952 int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN);
1953 int latest = s_st - e->latency + (e->distance * ii);
1954
1955 if (dump_file)
1956 {
1957 fprintf (dump_file, "%11d %11s %11d %11s %5d",
1958 earliest, "", latest, "", s_st);
1959 print_ddg_edge (dump_file, e);
1960 fprintf (dump_file, "\n");
1961 }
1962
1963 start = MAX (start, earliest);
1964 late_start = MIN (late_start, latest);
1965
1966 if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1967 count_succs++;
1968 }
1969 }
1970
1971 if (dump_file && (psp_not_empty || pss_not_empty))
1972 {
1973 fprintf (dump_file, "----------- ----------- ----------- -----------"
1974 " -----\n");
1975 fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n",
1976 start, early_start, late_start, end, "",
1977 "(max, max, min, min)");
1978 }
1979
1980 /* Get a target scheduling window no bigger than ii. */
1981 if (early_start == INT_MIN && late_start == INT_MAX)
1982 early_start = NODE_ASAP (u_node);
1983 else if (early_start == INT_MIN)
1984 early_start = late_start - (ii - 1);
1985 late_start = MIN (late_start, early_start + (ii - 1));
1986
1987 /* Apply memory dependence limits. */
1988 start = MAX (start, early_start);
1989 end = MIN (end, late_start);
1990
1991 if (dump_file && (psp_not_empty || pss_not_empty))
1992 fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n",
1993 "", start, end, "", "");
1994
1995 /* If there are at least as many successors as predecessors, schedule the
1996 node close to its successors. */
1997 if (pss_not_empty && count_succs >= count_preds)
1998 {
1999 std::swap (start, end);
2000 step = -1;
2001 }
2002
2003 /* Now that we've finalized the window, make END an exclusive rather
2004 than an inclusive bound. */
2005 end += step;
2006
2007 *start_p = start;
2008 *step_p = step;
2009 *end_p = end;
2010
2011 if ((start >= end && step == 1) || (start <= end && step == -1))
2012 {
2013 if (dump_file)
2014 fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
2015 start, end, step);
2016 return -1;
2017 }
2018
2019 return 0;
2020 }
2021
2022 /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
2023 node currently been scheduled. At the end of the calculation
2024 MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
2025 U_NODE which are (1) already scheduled in the first/last row of
2026 U_NODE's scheduling window, (2) whose dependence inequality with U
2027 becomes an equality when U is scheduled in this same row, and (3)
2028 whose dependence latency is zero.
2029
2030 The first and last rows are calculated using the following parameters:
2031 START/END rows - The cycles that begins/ends the traversal on the window;
2032 searching for an empty cycle to schedule U_NODE.
2033 STEP - The direction in which we traverse the window.
2034 II - The initiation interval. */
2035
2036 static void
calculate_must_precede_follow(ddg_node_ptr u_node,int start,int end,int step,int ii,sbitmap sched_nodes,sbitmap must_precede,sbitmap must_follow)2037 calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
2038 int step, int ii, sbitmap sched_nodes,
2039 sbitmap must_precede, sbitmap must_follow)
2040 {
2041 ddg_edge_ptr e;
2042 int first_cycle_in_window, last_cycle_in_window;
2043
2044 gcc_assert (must_precede && must_follow);
2045
2046 /* Consider the following scheduling window:
2047 {first_cycle_in_window, first_cycle_in_window+1, ...,
2048 last_cycle_in_window}. If step is 1 then the following will be
2049 the order we traverse the window: {start=first_cycle_in_window,
2050 first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
2051 or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
2052 end=first_cycle_in_window-1} if step is -1. */
2053 first_cycle_in_window = (step == 1) ? start : end - step;
2054 last_cycle_in_window = (step == 1) ? end - step : start;
2055
2056 bitmap_clear (must_precede);
2057 bitmap_clear (must_follow);
2058
2059 if (dump_file)
2060 fprintf (dump_file, "\nmust_precede: ");
2061
2062 /* Instead of checking if:
2063 (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
2064 && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
2065 first_cycle_in_window)
2066 && e->latency == 0
2067 we use the fact that latency is non-negative:
2068 SCHED_TIME (e->src) - (e->distance * ii) <=
2069 SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
2070 first_cycle_in_window
2071 and check only if
2072 SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */
2073 for (e = u_node->in; e != 0; e = e->next_in)
2074 if (bitmap_bit_p (sched_nodes, e->src->cuid)
2075 && ((SCHED_TIME (e->src->cuid) - (e->distance * ii)) ==
2076 first_cycle_in_window))
2077 {
2078 if (dump_file)
2079 fprintf (dump_file, "%d ", e->src->cuid);
2080
2081 bitmap_set_bit (must_precede, e->src->cuid);
2082 }
2083
2084 if (dump_file)
2085 fprintf (dump_file, "\nmust_follow: ");
2086
2087 /* Instead of checking if:
2088 (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
2089 && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
2090 last_cycle_in_window)
2091 && e->latency == 0
2092 we use the fact that latency is non-negative:
2093 SCHED_TIME (e->dest) + (e->distance * ii) >=
2094 SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
2095 last_cycle_in_window
2096 and check only if
2097 SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */
2098 for (e = u_node->out; e != 0; e = e->next_out)
2099 if (bitmap_bit_p (sched_nodes, e->dest->cuid)
2100 && ((SCHED_TIME (e->dest->cuid) + (e->distance * ii)) ==
2101 last_cycle_in_window))
2102 {
2103 if (dump_file)
2104 fprintf (dump_file, "%d ", e->dest->cuid);
2105
2106 bitmap_set_bit (must_follow, e->dest->cuid);
2107 }
2108
2109 if (dump_file)
2110 fprintf (dump_file, "\n");
2111 }
2112
2113 /* Return 1 if U_NODE can be scheduled in CYCLE. Use the following
2114 parameters to decide if that's possible:
2115 PS - The partial schedule.
2116 U - The serial number of U_NODE.
2117 NUM_SPLITS - The number of row splits made so far.
2118 MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
2119 the first row of the scheduling window)
2120 MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
2121 last row of the scheduling window) */
2122
2123 static bool
try_scheduling_node_in_cycle(partial_schedule_ptr ps,int u,int cycle,sbitmap sched_nodes,int * num_splits,sbitmap must_precede,sbitmap must_follow)2124 try_scheduling_node_in_cycle (partial_schedule_ptr ps,
2125 int u, int cycle, sbitmap sched_nodes,
2126 int *num_splits, sbitmap must_precede,
2127 sbitmap must_follow)
2128 {
2129 ps_insn_ptr psi;
2130 bool success = 0;
2131
2132 verify_partial_schedule (ps, sched_nodes);
2133 psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow);
2134 if (psi)
2135 {
2136 SCHED_TIME (u) = cycle;
2137 bitmap_set_bit (sched_nodes, u);
2138 success = 1;
2139 *num_splits = 0;
2140 if (dump_file)
2141 fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
2142
2143 }
2144
2145 return success;
2146 }
2147
2148 /* This function implements the scheduling algorithm for SMS according to the
2149 above algorithm. */
2150 static partial_schedule_ptr
sms_schedule_by_order(ddg_ptr g,int mii,int maxii,int * nodes_order)2151 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
2152 {
2153 int ii = mii;
2154 int i, c, success, num_splits = 0;
2155 int flush_and_start_over = true;
2156 int num_nodes = g->num_nodes;
2157 int start, end, step; /* Place together into one struct? */
2158 auto_sbitmap sched_nodes (num_nodes);
2159 auto_sbitmap must_precede (num_nodes);
2160 auto_sbitmap must_follow (num_nodes);
2161 auto_sbitmap tobe_scheduled (num_nodes);
2162
2163 partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
2164
2165 bitmap_ones (tobe_scheduled);
2166 bitmap_clear (sched_nodes);
2167
2168 while (flush_and_start_over && (ii < maxii))
2169 {
2170
2171 if (dump_file)
2172 fprintf (dump_file, "Starting with ii=%d\n", ii);
2173 flush_and_start_over = false;
2174 bitmap_clear (sched_nodes);
2175
2176 for (i = 0; i < num_nodes; i++)
2177 {
2178 int u = nodes_order[i];
2179 ddg_node_ptr u_node = &ps->g->nodes[u];
2180 rtx_insn *insn = u_node->insn;
2181
2182 if (!NONDEBUG_INSN_P (insn))
2183 {
2184 bitmap_clear_bit (tobe_scheduled, u);
2185 continue;
2186 }
2187
2188 if (bitmap_bit_p (sched_nodes, u))
2189 continue;
2190
2191 /* Try to get non-empty scheduling window. */
2192 success = 0;
2193 if (get_sched_window (ps, u_node, sched_nodes, ii, &start,
2194 &step, &end) == 0)
2195 {
2196 if (dump_file)
2197 fprintf (dump_file, "\nTrying to schedule node %d "
2198 "INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID
2199 (g->nodes[u].insn)), start, end, step);
2200
2201 gcc_assert ((step > 0 && start < end)
2202 || (step < 0 && start > end));
2203
2204 calculate_must_precede_follow (u_node, start, end, step, ii,
2205 sched_nodes, must_precede,
2206 must_follow);
2207
2208 for (c = start; c != end; c += step)
2209 {
2210 sbitmap tmp_precede, tmp_follow;
2211
2212 set_must_precede_follow (&tmp_follow, must_follow,
2213 &tmp_precede, must_precede,
2214 c, start, end, step);
2215 success =
2216 try_scheduling_node_in_cycle (ps, u, c,
2217 sched_nodes,
2218 &num_splits, tmp_precede,
2219 tmp_follow);
2220 if (success)
2221 break;
2222 }
2223
2224 verify_partial_schedule (ps, sched_nodes);
2225 }
2226 if (!success)
2227 {
2228 int split_row;
2229
2230 if (ii++ == maxii)
2231 break;
2232
2233 if (num_splits >= MAX_SPLIT_NUM)
2234 {
2235 num_splits = 0;
2236 flush_and_start_over = true;
2237 verify_partial_schedule (ps, sched_nodes);
2238 reset_partial_schedule (ps, ii);
2239 verify_partial_schedule (ps, sched_nodes);
2240 break;
2241 }
2242
2243 num_splits++;
2244 /* The scheduling window is exclusive of 'end'
2245 whereas compute_split_window() expects an inclusive,
2246 ordered range. */
2247 if (step == 1)
2248 split_row = compute_split_row (sched_nodes, start, end - 1,
2249 ps->ii, u_node);
2250 else
2251 split_row = compute_split_row (sched_nodes, end + 1, start,
2252 ps->ii, u_node);
2253
2254 ps_insert_empty_row (ps, split_row, sched_nodes);
2255 i--; /* Go back and retry node i. */
2256
2257 if (dump_file)
2258 fprintf (dump_file, "num_splits=%d\n", num_splits);
2259 }
2260
2261 /* ??? If (success), check register pressure estimates. */
2262 } /* Continue with next node. */
2263 } /* While flush_and_start_over. */
2264 if (ii >= maxii)
2265 {
2266 free_partial_schedule (ps);
2267 ps = NULL;
2268 }
2269 else
2270 gcc_assert (bitmap_equal_p (tobe_scheduled, sched_nodes));
2271
2272 return ps;
2273 }
2274
2275 /* This function inserts a new empty row into PS at the position
2276 according to SPLITROW, keeping all already scheduled instructions
2277 intact and updating their SCHED_TIME and cycle accordingly. */
2278 static void
ps_insert_empty_row(partial_schedule_ptr ps,int split_row,sbitmap sched_nodes)2279 ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
2280 sbitmap sched_nodes)
2281 {
2282 ps_insn_ptr crr_insn;
2283 ps_insn_ptr *rows_new;
2284 int ii = ps->ii;
2285 int new_ii = ii + 1;
2286 int row;
2287 int *rows_length_new;
2288
2289 verify_partial_schedule (ps, sched_nodes);
2290
2291 /* We normalize sched_time and rotate ps to have only non-negative sched
2292 times, for simplicity of updating cycles after inserting new row. */
2293 split_row -= ps->min_cycle;
2294 split_row = SMODULO (split_row, ii);
2295 if (dump_file)
2296 fprintf (dump_file, "split_row=%d\n", split_row);
2297
2298 reset_sched_times (ps, PS_MIN_CYCLE (ps));
2299 rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
2300
2301 rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
2302 rows_length_new = (int *) xcalloc (new_ii, sizeof (int));
2303 for (row = 0; row < split_row; row++)
2304 {
2305 rows_new[row] = ps->rows[row];
2306 rows_length_new[row] = ps->rows_length[row];
2307 ps->rows[row] = NULL;
2308 for (crr_insn = rows_new[row];
2309 crr_insn; crr_insn = crr_insn->next_in_row)
2310 {
2311 int u = crr_insn->id;
2312 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
2313
2314 SCHED_TIME (u) = new_time;
2315 crr_insn->cycle = new_time;
2316 SCHED_ROW (u) = new_time % new_ii;
2317 SCHED_STAGE (u) = new_time / new_ii;
2318 }
2319
2320 }
2321
2322 rows_new[split_row] = NULL;
2323
2324 for (row = split_row; row < ii; row++)
2325 {
2326 rows_new[row + 1] = ps->rows[row];
2327 rows_length_new[row + 1] = ps->rows_length[row];
2328 ps->rows[row] = NULL;
2329 for (crr_insn = rows_new[row + 1];
2330 crr_insn; crr_insn = crr_insn->next_in_row)
2331 {
2332 int u = crr_insn->id;
2333 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
2334
2335 SCHED_TIME (u) = new_time;
2336 crr_insn->cycle = new_time;
2337 SCHED_ROW (u) = new_time % new_ii;
2338 SCHED_STAGE (u) = new_time / new_ii;
2339 }
2340 }
2341
2342 /* Updating ps. */
2343 ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
2344 + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
2345 ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
2346 + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
2347 free (ps->rows);
2348 ps->rows = rows_new;
2349 free (ps->rows_length);
2350 ps->rows_length = rows_length_new;
2351 ps->ii = new_ii;
2352 gcc_assert (ps->min_cycle >= 0);
2353
2354 verify_partial_schedule (ps, sched_nodes);
2355
2356 if (dump_file)
2357 fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
2358 ps->max_cycle);
2359 }
2360
2361 /* Given U_NODE which is the node that failed to be scheduled; LOW and
2362 UP which are the boundaries of it's scheduling window; compute using
2363 SCHED_NODES and II a row in the partial schedule that can be split
2364 which will separate a critical predecessor from a critical successor
2365 thereby expanding the window, and return it. */
2366 static int
compute_split_row(sbitmap sched_nodes,int low,int up,int ii,ddg_node_ptr u_node)2367 compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
2368 ddg_node_ptr u_node)
2369 {
2370 ddg_edge_ptr e;
2371 int lower = INT_MIN, upper = INT_MAX;
2372 int crit_pred = -1;
2373 int crit_succ = -1;
2374 int crit_cycle;
2375
2376 for (e = u_node->in; e != 0; e = e->next_in)
2377 {
2378 int v = e->src->cuid;
2379
2380 if (bitmap_bit_p (sched_nodes, v)
2381 && (low == SCHED_TIME (v) + e->latency - (e->distance * ii)))
2382 if (SCHED_TIME (v) > lower)
2383 {
2384 crit_pred = v;
2385 lower = SCHED_TIME (v);
2386 }
2387 }
2388
2389 if (crit_pred >= 0)
2390 {
2391 crit_cycle = SCHED_TIME (crit_pred) + 1;
2392 return SMODULO (crit_cycle, ii);
2393 }
2394
2395 for (e = u_node->out; e != 0; e = e->next_out)
2396 {
2397 int v = e->dest->cuid;
2398
2399 if (bitmap_bit_p (sched_nodes, v)
2400 && (up == SCHED_TIME (v) - e->latency + (e->distance * ii)))
2401 if (SCHED_TIME (v) < upper)
2402 {
2403 crit_succ = v;
2404 upper = SCHED_TIME (v);
2405 }
2406 }
2407
2408 if (crit_succ >= 0)
2409 {
2410 crit_cycle = SCHED_TIME (crit_succ);
2411 return SMODULO (crit_cycle, ii);
2412 }
2413
2414 if (dump_file)
2415 fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
2416
2417 return SMODULO ((low + up + 1) / 2, ii);
2418 }
2419
2420 static void
verify_partial_schedule(partial_schedule_ptr ps,sbitmap sched_nodes)2421 verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
2422 {
2423 int row;
2424 ps_insn_ptr crr_insn;
2425
2426 for (row = 0; row < ps->ii; row++)
2427 {
2428 int length = 0;
2429
2430 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
2431 {
2432 int u = crr_insn->id;
2433
2434 length++;
2435 gcc_assert (bitmap_bit_p (sched_nodes, u));
2436 /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
2437 popcount (sched_nodes) == number of insns in ps. */
2438 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
2439 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
2440 }
2441
2442 gcc_assert (ps->rows_length[row] == length);
2443 }
2444 }
2445
2446
2447 /* This page implements the algorithm for ordering the nodes of a DDG
2448 for modulo scheduling, activated through the
2449 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
2450
2451 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
2452 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
2453 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
2454 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
2455 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
2456 #define DEPTH(x) (ASAP ((x)))
2457
2458 typedef struct node_order_params * nopa;
2459
2460 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
2461 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
2462 static nopa calculate_order_params (ddg_ptr, int, int *);
2463 static int find_max_asap (ddg_ptr, sbitmap);
2464 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
2465 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
2466
2467 enum sms_direction {BOTTOMUP, TOPDOWN};
2468
2469 struct node_order_params
2470 {
2471 int asap;
2472 int alap;
2473 int height;
2474 };
2475
2476 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
2477 static void
check_nodes_order(int * node_order,int num_nodes)2478 check_nodes_order (int *node_order, int num_nodes)
2479 {
2480 int i;
2481 auto_sbitmap tmp (num_nodes);
2482
2483 bitmap_clear (tmp);
2484
2485 if (dump_file)
2486 fprintf (dump_file, "SMS final nodes order: \n");
2487
2488 for (i = 0; i < num_nodes; i++)
2489 {
2490 int u = node_order[i];
2491
2492 if (dump_file)
2493 fprintf (dump_file, "%d ", u);
2494 gcc_assert (u < num_nodes && u >= 0 && !bitmap_bit_p (tmp, u));
2495
2496 bitmap_set_bit (tmp, u);
2497 }
2498
2499 if (dump_file)
2500 fprintf (dump_file, "\n");
2501 }
2502
2503 /* Order the nodes of G for scheduling and pass the result in
2504 NODE_ORDER. Also set aux.count of each node to ASAP.
2505 Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */
2506 static int
sms_order_nodes(ddg_ptr g,int mii,int * node_order,int * pmax_asap)2507 sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
2508 {
2509 int i;
2510 int rec_mii = 0;
2511 ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
2512
2513 nopa nops = calculate_order_params (g, mii, pmax_asap);
2514
2515 if (dump_file)
2516 print_sccs (dump_file, sccs, g);
2517
2518 order_nodes_of_sccs (sccs, node_order);
2519
2520 if (sccs->num_sccs > 0)
2521 /* First SCC has the largest recurrence_length. */
2522 rec_mii = sccs->sccs[0]->recurrence_length;
2523
2524 /* Save ASAP before destroying node_order_params. */
2525 for (i = 0; i < g->num_nodes; i++)
2526 {
2527 ddg_node_ptr v = &g->nodes[i];
2528 v->aux.count = ASAP (v);
2529 }
2530
2531 free (nops);
2532 free_ddg_all_sccs (sccs);
2533 check_nodes_order (node_order, g->num_nodes);
2534
2535 return rec_mii;
2536 }
2537
2538 static void
order_nodes_of_sccs(ddg_all_sccs_ptr all_sccs,int * node_order)2539 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
2540 {
2541 int i, pos = 0;
2542 ddg_ptr g = all_sccs->ddg;
2543 int num_nodes = g->num_nodes;
2544 auto_sbitmap prev_sccs (num_nodes);
2545 auto_sbitmap on_path (num_nodes);
2546 auto_sbitmap tmp (num_nodes);
2547 auto_sbitmap ones (num_nodes);
2548
2549 bitmap_clear (prev_sccs);
2550 bitmap_ones (ones);
2551
2552 /* Perform the node ordering starting from the SCC with the highest recMII.
2553 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
2554 for (i = 0; i < all_sccs->num_sccs; i++)
2555 {
2556 ddg_scc_ptr scc = all_sccs->sccs[i];
2557
2558 /* Add nodes on paths from previous SCCs to the current SCC. */
2559 find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
2560 bitmap_ior (tmp, scc->nodes, on_path);
2561
2562 /* Add nodes on paths from the current SCC to previous SCCs. */
2563 find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
2564 bitmap_ior (tmp, tmp, on_path);
2565
2566 /* Remove nodes of previous SCCs from current extended SCC. */
2567 bitmap_and_compl (tmp, tmp, prev_sccs);
2568
2569 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2570 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
2571 }
2572
2573 /* Handle the remaining nodes that do not belong to any scc. Each call
2574 to order_nodes_in_scc handles a single connected component. */
2575 while (pos < g->num_nodes)
2576 {
2577 bitmap_and_compl (tmp, ones, prev_sccs);
2578 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2579 }
2580 }
2581
2582 /* MII is needed if we consider backarcs (that do not close recursive cycles). */
2583 static struct node_order_params *
calculate_order_params(ddg_ptr g,int mii ATTRIBUTE_UNUSED,int * pmax_asap)2584 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
2585 {
2586 int u;
2587 int max_asap;
2588 int num_nodes = g->num_nodes;
2589 ddg_edge_ptr e;
2590 /* Allocate a place to hold ordering params for each node in the DDG. */
2591 nopa node_order_params_arr;
2592
2593 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
2594 node_order_params_arr = (nopa) xcalloc (num_nodes,
2595 sizeof (struct node_order_params));
2596
2597 /* Set the aux pointer of each node to point to its order_params structure. */
2598 for (u = 0; u < num_nodes; u++)
2599 g->nodes[u].aux.info = &node_order_params_arr[u];
2600
2601 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
2602 calculate ASAP, ALAP, mobility, distance, and height for each node
2603 in the dependence (direct acyclic) graph. */
2604
2605 /* We assume that the nodes in the array are in topological order. */
2606
2607 max_asap = 0;
2608 for (u = 0; u < num_nodes; u++)
2609 {
2610 ddg_node_ptr u_node = &g->nodes[u];
2611
2612 ASAP (u_node) = 0;
2613 for (e = u_node->in; e; e = e->next_in)
2614 if (e->distance == 0)
2615 ASAP (u_node) = MAX (ASAP (u_node),
2616 ASAP (e->src) + e->latency);
2617 max_asap = MAX (max_asap, ASAP (u_node));
2618 }
2619
2620 for (u = num_nodes - 1; u > -1; u--)
2621 {
2622 ddg_node_ptr u_node = &g->nodes[u];
2623
2624 ALAP (u_node) = max_asap;
2625 HEIGHT (u_node) = 0;
2626 for (e = u_node->out; e; e = e->next_out)
2627 if (e->distance == 0)
2628 {
2629 ALAP (u_node) = MIN (ALAP (u_node),
2630 ALAP (e->dest) - e->latency);
2631 HEIGHT (u_node) = MAX (HEIGHT (u_node),
2632 HEIGHT (e->dest) + e->latency);
2633 }
2634 }
2635 if (dump_file)
2636 {
2637 fprintf (dump_file, "\nOrder params\n");
2638 for (u = 0; u < num_nodes; u++)
2639 {
2640 ddg_node_ptr u_node = &g->nodes[u];
2641
2642 fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
2643 ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
2644 }
2645 }
2646
2647 *pmax_asap = max_asap;
2648 return node_order_params_arr;
2649 }
2650
2651 static int
find_max_asap(ddg_ptr g,sbitmap nodes)2652 find_max_asap (ddg_ptr g, sbitmap nodes)
2653 {
2654 unsigned int u = 0;
2655 int max_asap = -1;
2656 int result = -1;
2657 sbitmap_iterator sbi;
2658
2659 EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2660 {
2661 ddg_node_ptr u_node = &g->nodes[u];
2662
2663 if (max_asap < ASAP (u_node))
2664 {
2665 max_asap = ASAP (u_node);
2666 result = u;
2667 }
2668 }
2669 return result;
2670 }
2671
2672 static int
find_max_hv_min_mob(ddg_ptr g,sbitmap nodes)2673 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
2674 {
2675 unsigned int u = 0;
2676 int max_hv = -1;
2677 int min_mob = INT_MAX;
2678 int result = -1;
2679 sbitmap_iterator sbi;
2680
2681 EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2682 {
2683 ddg_node_ptr u_node = &g->nodes[u];
2684
2685 if (max_hv < HEIGHT (u_node))
2686 {
2687 max_hv = HEIGHT (u_node);
2688 min_mob = MOB (u_node);
2689 result = u;
2690 }
2691 else if ((max_hv == HEIGHT (u_node))
2692 && (min_mob > MOB (u_node)))
2693 {
2694 min_mob = MOB (u_node);
2695 result = u;
2696 }
2697 }
2698 return result;
2699 }
2700
2701 static int
find_max_dv_min_mob(ddg_ptr g,sbitmap nodes)2702 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
2703 {
2704 unsigned int u = 0;
2705 int max_dv = -1;
2706 int min_mob = INT_MAX;
2707 int result = -1;
2708 sbitmap_iterator sbi;
2709
2710 EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2711 {
2712 ddg_node_ptr u_node = &g->nodes[u];
2713
2714 if (max_dv < DEPTH (u_node))
2715 {
2716 max_dv = DEPTH (u_node);
2717 min_mob = MOB (u_node);
2718 result = u;
2719 }
2720 else if ((max_dv == DEPTH (u_node))
2721 && (min_mob > MOB (u_node)))
2722 {
2723 min_mob = MOB (u_node);
2724 result = u;
2725 }
2726 }
2727 return result;
2728 }
2729
2730 /* Places the nodes of SCC into the NODE_ORDER array starting
2731 at position POS, according to the SMS ordering algorithm.
2732 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2733 the NODE_ORDER array, starting from position zero. */
2734 static int
order_nodes_in_scc(ddg_ptr g,sbitmap nodes_ordered,sbitmap scc,int * node_order,int pos)2735 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
2736 int * node_order, int pos)
2737 {
2738 enum sms_direction dir;
2739 int num_nodes = g->num_nodes;
2740 auto_sbitmap workset (num_nodes);
2741 auto_sbitmap tmp (num_nodes);
2742 sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
2743 auto_sbitmap predecessors (num_nodes);
2744 auto_sbitmap successors (num_nodes);
2745
2746 bitmap_clear (predecessors);
2747 find_predecessors (predecessors, g, nodes_ordered);
2748
2749 bitmap_clear (successors);
2750 find_successors (successors, g, nodes_ordered);
2751
2752 bitmap_clear (tmp);
2753 if (bitmap_and (tmp, predecessors, scc))
2754 {
2755 bitmap_copy (workset, tmp);
2756 dir = BOTTOMUP;
2757 }
2758 else if (bitmap_and (tmp, successors, scc))
2759 {
2760 bitmap_copy (workset, tmp);
2761 dir = TOPDOWN;
2762 }
2763 else
2764 {
2765 int u;
2766
2767 bitmap_clear (workset);
2768 if ((u = find_max_asap (g, scc)) >= 0)
2769 bitmap_set_bit (workset, u);
2770 dir = BOTTOMUP;
2771 }
2772
2773 bitmap_clear (zero_bitmap);
2774 while (!bitmap_equal_p (workset, zero_bitmap))
2775 {
2776 int v;
2777 ddg_node_ptr v_node;
2778 sbitmap v_node_preds;
2779 sbitmap v_node_succs;
2780
2781 if (dir == TOPDOWN)
2782 {
2783 while (!bitmap_equal_p (workset, zero_bitmap))
2784 {
2785 v = find_max_hv_min_mob (g, workset);
2786 v_node = &g->nodes[v];
2787 node_order[pos++] = v;
2788 v_node_succs = NODE_SUCCESSORS (v_node);
2789 bitmap_and (tmp, v_node_succs, scc);
2790
2791 /* Don't consider the already ordered successors again. */
2792 bitmap_and_compl (tmp, tmp, nodes_ordered);
2793 bitmap_ior (workset, workset, tmp);
2794 bitmap_clear_bit (workset, v);
2795 bitmap_set_bit (nodes_ordered, v);
2796 }
2797 dir = BOTTOMUP;
2798 bitmap_clear (predecessors);
2799 find_predecessors (predecessors, g, nodes_ordered);
2800 bitmap_and (workset, predecessors, scc);
2801 }
2802 else
2803 {
2804 while (!bitmap_equal_p (workset, zero_bitmap))
2805 {
2806 v = find_max_dv_min_mob (g, workset);
2807 v_node = &g->nodes[v];
2808 node_order[pos++] = v;
2809 v_node_preds = NODE_PREDECESSORS (v_node);
2810 bitmap_and (tmp, v_node_preds, scc);
2811
2812 /* Don't consider the already ordered predecessors again. */
2813 bitmap_and_compl (tmp, tmp, nodes_ordered);
2814 bitmap_ior (workset, workset, tmp);
2815 bitmap_clear_bit (workset, v);
2816 bitmap_set_bit (nodes_ordered, v);
2817 }
2818 dir = TOPDOWN;
2819 bitmap_clear (successors);
2820 find_successors (successors, g, nodes_ordered);
2821 bitmap_and (workset, successors, scc);
2822 }
2823 }
2824 sbitmap_free (zero_bitmap);
2825 return pos;
2826 }
2827
2828
2829 /* This page contains functions for manipulating partial-schedules during
2830 modulo scheduling. */
2831
2832 /* Create a partial schedule and allocate a memory to hold II rows. */
2833
2834 static partial_schedule_ptr
create_partial_schedule(int ii,ddg_ptr g,int history)2835 create_partial_schedule (int ii, ddg_ptr g, int history)
2836 {
2837 partial_schedule_ptr ps = XNEW (struct partial_schedule);
2838 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2839 ps->rows_length = (int *) xcalloc (ii, sizeof (int));
2840 ps->reg_moves.create (0);
2841 ps->ii = ii;
2842 ps->history = history;
2843 ps->min_cycle = INT_MAX;
2844 ps->max_cycle = INT_MIN;
2845 ps->g = g;
2846
2847 return ps;
2848 }
2849
2850 /* Free the PS_INSNs in rows array of the given partial schedule.
2851 ??? Consider caching the PS_INSN's. */
2852 static void
free_ps_insns(partial_schedule_ptr ps)2853 free_ps_insns (partial_schedule_ptr ps)
2854 {
2855 int i;
2856
2857 for (i = 0; i < ps->ii; i++)
2858 {
2859 while (ps->rows[i])
2860 {
2861 ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2862
2863 free (ps->rows[i]);
2864 ps->rows[i] = ps_insn;
2865 }
2866 ps->rows[i] = NULL;
2867 }
2868 }
2869
2870 /* Free all the memory allocated to the partial schedule. */
2871
2872 static void
free_partial_schedule(partial_schedule_ptr ps)2873 free_partial_schedule (partial_schedule_ptr ps)
2874 {
2875 ps_reg_move_info *move;
2876 unsigned int i;
2877
2878 if (!ps)
2879 return;
2880
2881 FOR_EACH_VEC_ELT (ps->reg_moves, i, move)
2882 sbitmap_free (move->uses);
2883 ps->reg_moves.release ();
2884
2885 free_ps_insns (ps);
2886 free (ps->rows);
2887 free (ps->rows_length);
2888 free (ps);
2889 }
2890
2891 /* Clear the rows array with its PS_INSNs, and create a new one with
2892 NEW_II rows. */
2893
2894 static void
reset_partial_schedule(partial_schedule_ptr ps,int new_ii)2895 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2896 {
2897 if (!ps)
2898 return;
2899 free_ps_insns (ps);
2900 if (new_ii == ps->ii)
2901 return;
2902 ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2903 * sizeof (ps_insn_ptr));
2904 memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2905 ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int));
2906 memset (ps->rows_length, 0, new_ii * sizeof (int));
2907 ps->ii = new_ii;
2908 ps->min_cycle = INT_MAX;
2909 ps->max_cycle = INT_MIN;
2910 }
2911
2912 /* Prints the partial schedule as an ii rows array, for each rows
2913 print the ids of the insns in it. */
2914 void
print_partial_schedule(partial_schedule_ptr ps,FILE * dump)2915 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2916 {
2917 int i;
2918
2919 for (i = 0; i < ps->ii; i++)
2920 {
2921 ps_insn_ptr ps_i = ps->rows[i];
2922
2923 fprintf (dump, "\n[ROW %d ]: ", i);
2924 while (ps_i)
2925 {
2926 rtx_insn *insn = ps_rtl_insn (ps, ps_i->id);
2927
2928 if (JUMP_P (insn))
2929 fprintf (dump, "%d (branch), ", INSN_UID (insn));
2930 else
2931 fprintf (dump, "%d, ", INSN_UID (insn));
2932
2933 ps_i = ps_i->next_in_row;
2934 }
2935 }
2936 }
2937
2938 /* Creates an object of PS_INSN and initializes it to the given parameters. */
2939 static ps_insn_ptr
create_ps_insn(int id,int cycle)2940 create_ps_insn (int id, int cycle)
2941 {
2942 ps_insn_ptr ps_i = XNEW (struct ps_insn);
2943
2944 ps_i->id = id;
2945 ps_i->next_in_row = NULL;
2946 ps_i->prev_in_row = NULL;
2947 ps_i->cycle = cycle;
2948
2949 return ps_i;
2950 }
2951
2952
2953 /* Removes the given PS_INSN from the partial schedule. */
2954 static void
remove_node_from_ps(partial_schedule_ptr ps,ps_insn_ptr ps_i)2955 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2956 {
2957 int row;
2958
2959 gcc_assert (ps && ps_i);
2960
2961 row = SMODULO (ps_i->cycle, ps->ii);
2962 if (! ps_i->prev_in_row)
2963 {
2964 gcc_assert (ps_i == ps->rows[row]);
2965 ps->rows[row] = ps_i->next_in_row;
2966 if (ps->rows[row])
2967 ps->rows[row]->prev_in_row = NULL;
2968 }
2969 else
2970 {
2971 ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2972 if (ps_i->next_in_row)
2973 ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2974 }
2975
2976 ps->rows_length[row] -= 1;
2977 free (ps_i);
2978 return;
2979 }
2980
2981 /* Unlike what literature describes for modulo scheduling (which focuses
2982 on VLIW machines) the order of the instructions inside a cycle is
2983 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2984 where the current instruction should go relative to the already
2985 scheduled instructions in the given cycle. Go over these
2986 instructions and find the first possible column to put it in. */
2987 static bool
ps_insn_find_column(partial_schedule_ptr ps,ps_insn_ptr ps_i,sbitmap must_precede,sbitmap must_follow)2988 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2989 sbitmap must_precede, sbitmap must_follow)
2990 {
2991 ps_insn_ptr next_ps_i;
2992 ps_insn_ptr first_must_follow = NULL;
2993 ps_insn_ptr last_must_precede = NULL;
2994 ps_insn_ptr last_in_row = NULL;
2995 int row;
2996
2997 if (! ps_i)
2998 return false;
2999
3000 row = SMODULO (ps_i->cycle, ps->ii);
3001
3002 /* Find the first must follow and the last must precede
3003 and insert the node immediately after the must precede
3004 but make sure that it there is no must follow after it. */
3005 for (next_ps_i = ps->rows[row];
3006 next_ps_i;
3007 next_ps_i = next_ps_i->next_in_row)
3008 {
3009 if (must_follow
3010 && bitmap_bit_p (must_follow, next_ps_i->id)
3011 && ! first_must_follow)
3012 first_must_follow = next_ps_i;
3013 if (must_precede && bitmap_bit_p (must_precede, next_ps_i->id))
3014 {
3015 /* If we have already met a node that must follow, then
3016 there is no possible column. */
3017 if (first_must_follow)
3018 return false;
3019 else
3020 last_must_precede = next_ps_i;
3021 }
3022 /* The closing branch must be the last in the row. */
3023 if (JUMP_P (ps_rtl_insn (ps, next_ps_i->id)))
3024 return false;
3025
3026 last_in_row = next_ps_i;
3027 }
3028
3029 /* The closing branch is scheduled as well. Make sure there is no
3030 dependent instruction after it as the branch should be the last
3031 instruction in the row. */
3032 if (JUMP_P (ps_rtl_insn (ps, ps_i->id)))
3033 {
3034 if (first_must_follow)
3035 return false;
3036 if (last_in_row)
3037 {
3038 /* Make the branch the last in the row. New instructions
3039 will be inserted at the beginning of the row or after the
3040 last must_precede instruction thus the branch is guaranteed
3041 to remain the last instruction in the row. */
3042 last_in_row->next_in_row = ps_i;
3043 ps_i->prev_in_row = last_in_row;
3044 ps_i->next_in_row = NULL;
3045 }
3046 else
3047 ps->rows[row] = ps_i;
3048 return true;
3049 }
3050
3051 /* Now insert the node after INSERT_AFTER_PSI. */
3052
3053 if (! last_must_precede)
3054 {
3055 ps_i->next_in_row = ps->rows[row];
3056 ps_i->prev_in_row = NULL;
3057 if (ps_i->next_in_row)
3058 ps_i->next_in_row->prev_in_row = ps_i;
3059 ps->rows[row] = ps_i;
3060 }
3061 else
3062 {
3063 ps_i->next_in_row = last_must_precede->next_in_row;
3064 last_must_precede->next_in_row = ps_i;
3065 ps_i->prev_in_row = last_must_precede;
3066 if (ps_i->next_in_row)
3067 ps_i->next_in_row->prev_in_row = ps_i;
3068 }
3069
3070 return true;
3071 }
3072
3073 /* Advances the PS_INSN one column in its current row; returns false
3074 in failure and true in success. Bit N is set in MUST_FOLLOW if
3075 the node with cuid N must be come after the node pointed to by
3076 PS_I when scheduled in the same cycle. */
3077 static int
ps_insn_advance_column(partial_schedule_ptr ps,ps_insn_ptr ps_i,sbitmap must_follow)3078 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
3079 sbitmap must_follow)
3080 {
3081 ps_insn_ptr prev, next;
3082 int row;
3083
3084 if (!ps || !ps_i)
3085 return false;
3086
3087 row = SMODULO (ps_i->cycle, ps->ii);
3088
3089 if (! ps_i->next_in_row)
3090 return false;
3091
3092 /* Check if next_in_row is dependent on ps_i, both having same sched
3093 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
3094 if (must_follow && bitmap_bit_p (must_follow, ps_i->next_in_row->id))
3095 return false;
3096
3097 /* Advance PS_I over its next_in_row in the doubly linked list. */
3098 prev = ps_i->prev_in_row;
3099 next = ps_i->next_in_row;
3100
3101 if (ps_i == ps->rows[row])
3102 ps->rows[row] = next;
3103
3104 ps_i->next_in_row = next->next_in_row;
3105
3106 if (next->next_in_row)
3107 next->next_in_row->prev_in_row = ps_i;
3108
3109 next->next_in_row = ps_i;
3110 ps_i->prev_in_row = next;
3111
3112 next->prev_in_row = prev;
3113 if (prev)
3114 prev->next_in_row = next;
3115
3116 return true;
3117 }
3118
3119 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
3120 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
3121 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
3122 before/after (respectively) the node pointed to by PS_I when scheduled
3123 in the same cycle. */
3124 static ps_insn_ptr
add_node_to_ps(partial_schedule_ptr ps,int id,int cycle,sbitmap must_precede,sbitmap must_follow)3125 add_node_to_ps (partial_schedule_ptr ps, int id, int cycle,
3126 sbitmap must_precede, sbitmap must_follow)
3127 {
3128 ps_insn_ptr ps_i;
3129 int row = SMODULO (cycle, ps->ii);
3130
3131 if (ps->rows_length[row] >= issue_rate)
3132 return NULL;
3133
3134 ps_i = create_ps_insn (id, cycle);
3135
3136 /* Finds and inserts PS_I according to MUST_FOLLOW and
3137 MUST_PRECEDE. */
3138 if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
3139 {
3140 free (ps_i);
3141 return NULL;
3142 }
3143
3144 ps->rows_length[row] += 1;
3145 return ps_i;
3146 }
3147
3148 /* Advance time one cycle. Assumes DFA is being used. */
3149 static void
advance_one_cycle(void)3150 advance_one_cycle (void)
3151 {
3152 if (targetm.sched.dfa_pre_cycle_insn)
3153 state_transition (curr_state,
3154 targetm.sched.dfa_pre_cycle_insn ());
3155
3156 state_transition (curr_state, NULL);
3157
3158 if (targetm.sched.dfa_post_cycle_insn)
3159 state_transition (curr_state,
3160 targetm.sched.dfa_post_cycle_insn ());
3161 }
3162
3163
3164
3165 /* Checks if PS has resource conflicts according to DFA, starting from
3166 FROM cycle to TO cycle; returns true if there are conflicts and false
3167 if there are no conflicts. Assumes DFA is being used. */
3168 static int
ps_has_conflicts(partial_schedule_ptr ps,int from,int to)3169 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
3170 {
3171 int cycle;
3172
3173 state_reset (curr_state);
3174
3175 for (cycle = from; cycle <= to; cycle++)
3176 {
3177 ps_insn_ptr crr_insn;
3178 /* Holds the remaining issue slots in the current row. */
3179 int can_issue_more = issue_rate;
3180
3181 /* Walk through the DFA for the current row. */
3182 for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
3183 crr_insn;
3184 crr_insn = crr_insn->next_in_row)
3185 {
3186 rtx_insn *insn = ps_rtl_insn (ps, crr_insn->id);
3187
3188 if (!NONDEBUG_INSN_P (insn))
3189 continue;
3190
3191 /* Check if there is room for the current insn. */
3192 if (!can_issue_more || state_dead_lock_p (curr_state))
3193 return true;
3194
3195 /* Update the DFA state and return with failure if the DFA found
3196 resource conflicts. */
3197 if (state_transition (curr_state, insn) >= 0)
3198 return true;
3199
3200 if (targetm.sched.variable_issue)
3201 can_issue_more =
3202 targetm.sched.variable_issue (sched_dump, sched_verbose,
3203 insn, can_issue_more);
3204 /* A naked CLOBBER or USE generates no instruction, so don't
3205 let them consume issue slots. */
3206 else if (GET_CODE (PATTERN (insn)) != USE
3207 && GET_CODE (PATTERN (insn)) != CLOBBER)
3208 can_issue_more--;
3209 }
3210
3211 /* Advance the DFA to the next cycle. */
3212 advance_one_cycle ();
3213 }
3214 return false;
3215 }
3216
3217 /* Checks if the given node causes resource conflicts when added to PS at
3218 cycle C. If not the node is added to PS and returned; otherwise zero
3219 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
3220 cuid N must be come before/after (respectively) the node pointed to by
3221 PS_I when scheduled in the same cycle. */
3222 ps_insn_ptr
ps_add_node_check_conflicts(partial_schedule_ptr ps,int n,int c,sbitmap must_precede,sbitmap must_follow)3223 ps_add_node_check_conflicts (partial_schedule_ptr ps, int n,
3224 int c, sbitmap must_precede,
3225 sbitmap must_follow)
3226 {
3227 int i, first, amount, has_conflicts = 0;
3228 ps_insn_ptr ps_i;
3229
3230 /* First add the node to the PS, if this succeeds check for
3231 conflicts, trying different issue slots in the same row. */
3232 if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
3233 return NULL; /* Failed to insert the node at the given cycle. */
3234
3235 while (1)
3236 {
3237 has_conflicts = ps_has_conflicts (ps, c, c);
3238 if (ps->history > 0 && !has_conflicts)
3239 {
3240 /* Check all 2h+1 intervals, starting from c-2h..c up to c..2h,
3241 but not more than ii intervals. */
3242 first = c - ps->history;
3243 amount = 2 * ps->history + 1;
3244 if (amount > ps->ii)
3245 amount = ps->ii;
3246 for (i = first; i < first + amount; i++)
3247 {
3248 has_conflicts = ps_has_conflicts (ps,
3249 i - ps->history,
3250 i + ps->history);
3251 if (has_conflicts)
3252 break;
3253 }
3254 }
3255 if (!has_conflicts)
3256 break;
3257 /* Try different issue slots to find one that the given node can be
3258 scheduled in without conflicts. */
3259 if (! ps_insn_advance_column (ps, ps_i, must_follow))
3260 break;
3261 }
3262
3263 if (has_conflicts)
3264 {
3265 remove_node_from_ps (ps, ps_i);
3266 return NULL;
3267 }
3268
3269 ps->min_cycle = MIN (ps->min_cycle, c);
3270 ps->max_cycle = MAX (ps->max_cycle, c);
3271 return ps_i;
3272 }
3273
3274 /* Calculate the stage count of the partial schedule PS. The calculation
3275 takes into account the rotation amount passed in ROTATION_AMOUNT. */
3276 int
calculate_stage_count(partial_schedule_ptr ps,int rotation_amount)3277 calculate_stage_count (partial_schedule_ptr ps, int rotation_amount)
3278 {
3279 int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
3280 int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
3281 int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);
3282
3283 /* The calculation of stage count is done adding the number of stages
3284 before cycle zero and after cycle zero. */
3285 stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii);
3286
3287 return stage_count;
3288 }
3289
3290 /* Rotate the rows of PS such that insns scheduled at time
3291 START_CYCLE will appear in row 0. Updates max/min_cycles. */
3292 void
rotate_partial_schedule(partial_schedule_ptr ps,int start_cycle)3293 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
3294 {
3295 int i, row, backward_rotates;
3296 int last_row = ps->ii - 1;
3297
3298 if (start_cycle == 0)
3299 return;
3300
3301 backward_rotates = SMODULO (start_cycle, ps->ii);
3302
3303 /* Revisit later and optimize this into a single loop. */
3304 for (i = 0; i < backward_rotates; i++)
3305 {
3306 ps_insn_ptr first_row = ps->rows[0];
3307 int first_row_length = ps->rows_length[0];
3308
3309 for (row = 0; row < last_row; row++)
3310 {
3311 ps->rows[row] = ps->rows[row + 1];
3312 ps->rows_length[row] = ps->rows_length[row + 1];
3313 }
3314
3315 ps->rows[last_row] = first_row;
3316 ps->rows_length[last_row] = first_row_length;
3317 }
3318
3319 ps->max_cycle -= start_cycle;
3320 ps->min_cycle -= start_cycle;
3321 }
3322
3323 #endif /* INSN_SCHEDULING */
3324
3325 /* Run instruction scheduler. */
3326 /* Perform SMS module scheduling. */
3327
3328 namespace {
3329
3330 const pass_data pass_data_sms =
3331 {
3332 RTL_PASS, /* type */
3333 "sms", /* name */
3334 OPTGROUP_NONE, /* optinfo_flags */
3335 TV_SMS, /* tv_id */
3336 0, /* properties_required */
3337 0, /* properties_provided */
3338 0, /* properties_destroyed */
3339 0, /* todo_flags_start */
3340 TODO_df_finish, /* todo_flags_finish */
3341 };
3342
3343 class pass_sms : public rtl_opt_pass
3344 {
3345 public:
pass_sms(gcc::context * ctxt)3346 pass_sms (gcc::context *ctxt)
3347 : rtl_opt_pass (pass_data_sms, ctxt)
3348 {}
3349
3350 /* opt_pass methods: */
gate(function *)3351 virtual bool gate (function *)
3352 {
3353 return (optimize > 0 && flag_modulo_sched);
3354 }
3355
3356 virtual unsigned int execute (function *);
3357
3358 }; // class pass_sms
3359
3360 unsigned int
execute(function * fun ATTRIBUTE_UNUSED)3361 pass_sms::execute (function *fun ATTRIBUTE_UNUSED)
3362 {
3363 #ifdef INSN_SCHEDULING
3364 basic_block bb;
3365
3366 /* Collect loop information to be used in SMS. */
3367 cfg_layout_initialize (0);
3368 sms_schedule ();
3369
3370 /* Update the life information, because we add pseudos. */
3371 max_regno = max_reg_num ();
3372
3373 /* Finalize layout changes. */
3374 FOR_EACH_BB_FN (bb, fun)
3375 if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
3376 bb->aux = bb->next_bb;
3377 free_dominance_info (CDI_DOMINATORS);
3378 cfg_layout_finalize ();
3379 #endif /* INSN_SCHEDULING */
3380 return 0;
3381 }
3382
3383 } // anon namespace
3384
3385 rtl_opt_pass *
make_pass_sms(gcc::context * ctxt)3386 make_pass_sms (gcc::context *ctxt)
3387 {
3388 return new pass_sms (ctxt);
3389 }
3390