1 /* Swing Modulo Scheduling implementation.
2    Copyright (C) 2004-2018 Free Software Foundation, Inc.
3    Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "cfghooks.h"
30 #include "df.h"
31 #include "memmodel.h"
32 #include "optabs.h"
33 #include "regs.h"
34 #include "emit-rtl.h"
35 #include "gcov-io.h"
36 #include "profile.h"
37 #include "insn-attr.h"
38 #include "cfgrtl.h"
39 #include "sched-int.h"
40 #include "cfgloop.h"
41 #include "expr.h"
42 #include "params.h"
43 #include "ddg.h"
44 #include "tree-pass.h"
45 #include "dbgcnt.h"
46 #include "loop-unroll.h"
47 
48 #ifdef INSN_SCHEDULING
49 
50 /* This file contains the implementation of the Swing Modulo Scheduler,
51    described in the following references:
52    [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
53        Lifetime--sensitive modulo scheduling in a production environment.
54        IEEE Trans. on Comps., 50(3), March 2001
55    [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
56        Swing Modulo Scheduling: A Lifetime Sensitive Approach.
57        PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
58 
59    The basic structure is:
60    1. Build a data-dependence graph (DDG) for each loop.
61    2. Use the DDG to order the insns of a loop (not in topological order
62       necessarily, but rather) trying to place each insn after all its
63       predecessors _or_ after all its successors.
64    3. Compute MII: a lower bound on the number of cycles to schedule the loop.
65    4. Use the ordering to perform list-scheduling of the loop:
66       1. Set II = MII.  We will try to schedule the loop within II cycles.
67       2. Try to schedule the insns one by one according to the ordering.
68 	 For each insn compute an interval of cycles by considering already-
69 	 scheduled preds and succs (and associated latencies); try to place
70 	 the insn in the cycles of this window checking for potential
71 	 resource conflicts (using the DFA interface).
72 	 Note: this is different from the cycle-scheduling of schedule_insns;
73 	 here the insns are not scheduled monotonically top-down (nor bottom-
74 	 up).
75       3. If failed in scheduling all insns - bump II++ and try again, unless
76 	 II reaches an upper bound MaxII, in which case report failure.
77    5. If we succeeded in scheduling the loop within II cycles, we now
78       generate prolog and epilog, decrease the counter of the loop, and
79       perform modulo variable expansion for live ranges that span more than
80       II cycles (i.e. use register copies to prevent a def from overwriting
81       itself before reaching the use).
82 
83     SMS works with countable loops (1) whose control part can be easily
84     decoupled from the rest of the loop and (2) whose loop count can
85     be easily adjusted.  This is because we peel a constant number of
86     iterations into a prologue and epilogue for which we want to avoid
87     emitting the control part, and a kernel which is to iterate that
88     constant number of iterations less than the original loop.  So the
89     control part should be a set of insns clearly identified and having
90     its own iv, not otherwise used in the loop (at-least for now), which
91     initializes a register before the loop to the number of iterations.
92     Currently SMS relies on the do-loop pattern to recognize such loops,
93     where (1) the control part comprises of all insns defining and/or
94     using a certain 'count' register and (2) the loop count can be
95     adjusted by modifying this register prior to the loop.
96     TODO: Rely on cfgloop analysis instead.  */
97 
98 /* This page defines partial-schedule structures and functions for
99    modulo scheduling.  */
100 
101 typedef struct partial_schedule *partial_schedule_ptr;
102 typedef struct ps_insn *ps_insn_ptr;
103 
104 /* The minimum (absolute) cycle that a node of ps was scheduled in.  */
105 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
106 
107 /* The maximum (absolute) cycle that a node of ps was scheduled in.  */
108 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
109 
110 /* Perform signed modulo, always returning a non-negative value.  */
111 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
112 
113 /* The number of different iterations the nodes in ps span, assuming
114    the stage boundaries are placed efficiently.  */
115 #define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \
116                          + 1 + ii - 1) / ii)
117 /* The stage count of ps.  */
118 #define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
119 
120 /* A single instruction in the partial schedule.  */
121 struct ps_insn
122 {
123   /* Identifies the instruction to be scheduled.  Values smaller than
124      the ddg's num_nodes refer directly to ddg nodes.  A value of
125      X - num_nodes refers to register move X.  */
126   int id;
127 
128   /* The (absolute) cycle in which the PS instruction is scheduled.
129      Same as SCHED_TIME (node).  */
130   int cycle;
131 
132   /* The next/prev PS_INSN in the same row.  */
133   ps_insn_ptr next_in_row,
134 	      prev_in_row;
135 
136 };
137 
138 /* Information about a register move that has been added to a partial
139    schedule.  */
140 struct ps_reg_move_info
141 {
142   /* The source of the move is defined by the ps_insn with id DEF.
143      The destination is used by the ps_insns with the ids in USES.  */
144   int def;
145   sbitmap uses;
146 
147   /* The original form of USES' instructions used OLD_REG, but they
148      should now use NEW_REG.  */
149   rtx old_reg;
150   rtx new_reg;
151 
152   /* The number of consecutive stages that the move occupies.  */
153   int num_consecutive_stages;
154 
155   /* An instruction that sets NEW_REG to the correct value.  The first
156      move associated with DEF will have an rhs of OLD_REG; later moves
157      use the result of the previous move.  */
158   rtx_insn *insn;
159 };
160 
161 /* Holds the partial schedule as an array of II rows.  Each entry of the
162    array points to a linked list of PS_INSNs, which represents the
163    instructions that are scheduled for that row.  */
164 struct partial_schedule
165 {
166   int ii;	/* Number of rows in the partial schedule.  */
167   int history;  /* Threshold for conflict checking using DFA.  */
168 
169   /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii).  */
170   ps_insn_ptr *rows;
171 
172   /* All the moves added for this partial schedule.  Index X has
173      a ps_insn id of X + g->num_nodes.  */
174   vec<ps_reg_move_info> reg_moves;
175 
176   /*  rows_length[i] holds the number of instructions in the row.
177       It is used only (as an optimization) to back off quickly from
178       trying to schedule a node in a full row; that is, to avoid running
179       through futile DFA state transitions.  */
180   int *rows_length;
181 
182   /* The earliest absolute cycle of an insn in the partial schedule.  */
183   int min_cycle;
184 
185   /* The latest absolute cycle of an insn in the partial schedule.  */
186   int max_cycle;
187 
188   ddg_ptr g;	/* The DDG of the insns in the partial schedule.  */
189 
190   int stage_count;  /* The stage count of the partial schedule.  */
191 };
192 
193 
194 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
195 static void free_partial_schedule (partial_schedule_ptr);
196 static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
197 void print_partial_schedule (partial_schedule_ptr, FILE *);
198 static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
199 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
200 						int, int, sbitmap, sbitmap);
201 static void rotate_partial_schedule (partial_schedule_ptr, int);
202 void set_row_column_for_ps (partial_schedule_ptr);
203 static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
204 static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
205 
206 
207 /* This page defines constants and structures for the modulo scheduling
208    driver.  */
209 
210 static int sms_order_nodes (ddg_ptr, int, int *, int *);
211 static void set_node_sched_params (ddg_ptr);
212 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
213 static void permute_partial_schedule (partial_schedule_ptr, rtx_insn *);
214 static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
215                                     rtx, rtx);
216 static int calculate_stage_count (partial_schedule_ptr, int);
217 static void calculate_must_precede_follow (ddg_node_ptr, int, int,
218 					   int, int, sbitmap, sbitmap, sbitmap);
219 static int get_sched_window (partial_schedule_ptr, ddg_node_ptr,
220 			     sbitmap, int, int *, int *, int *);
221 static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int,
222 					  sbitmap, int *, sbitmap, sbitmap);
223 static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
224 
225 #define NODE_ASAP(node) ((node)->aux.count)
226 
227 #define SCHED_PARAMS(x) (&node_sched_param_vec[x])
228 #define SCHED_TIME(x) (SCHED_PARAMS (x)->time)
229 #define SCHED_ROW(x) (SCHED_PARAMS (x)->row)
230 #define SCHED_STAGE(x) (SCHED_PARAMS (x)->stage)
231 #define SCHED_COLUMN(x) (SCHED_PARAMS (x)->column)
232 
233 /* The scheduling parameters held for each node.  */
234 typedef struct node_sched_params
235 {
236   int time;	/* The absolute scheduling cycle.  */
237 
238   int row;    /* Holds time % ii.  */
239   int stage;  /* Holds time / ii.  */
240 
241   /* The column of a node inside the ps.  If nodes u, v are on the same row,
242      u will precede v if column (u) < column (v).  */
243   int column;
244 } *node_sched_params_ptr;
245 
246 /* The following three functions are copied from the current scheduler
247    code in order to use sched_analyze() for computing the dependencies.
248    They are used when initializing the sched_info structure.  */
249 static const char *
250 sms_print_insn (const rtx_insn *insn, int aligned ATTRIBUTE_UNUSED)
251 {
252   static char tmp[80];
253 
254   sprintf (tmp, "i%4d", INSN_UID (insn));
255   return tmp;
256 }
257 
258 static void
259 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
260 			       regset used ATTRIBUTE_UNUSED)
261 {
262 }
263 
264 static struct common_sched_info_def sms_common_sched_info;
265 
266 static struct sched_deps_info_def sms_sched_deps_info =
267   {
268     compute_jump_reg_dependencies,
269     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
270     NULL,
271     0, 0, 0
272   };
273 
274 static struct haifa_sched_info sms_sched_info =
275 {
276   NULL,
277   NULL,
278   NULL,
279   NULL,
280   NULL,
281   sms_print_insn,
282   NULL,
283   NULL, /* insn_finishes_block_p */
284   NULL, NULL,
285   NULL, NULL,
286   0, 0,
287 
288   NULL, NULL, NULL, NULL,
289   NULL, NULL,
290   0
291 };
292 
293 /* Partial schedule instruction ID in PS is a register move.  Return
294    information about it.  */
295 static struct ps_reg_move_info *
296 ps_reg_move (partial_schedule_ptr ps, int id)
297 {
298   gcc_checking_assert (id >= ps->g->num_nodes);
299   return &ps->reg_moves[id - ps->g->num_nodes];
300 }
301 
302 /* Return the rtl instruction that is being scheduled by partial schedule
303    instruction ID, which belongs to schedule PS.  */
304 static rtx_insn *
305 ps_rtl_insn (partial_schedule_ptr ps, int id)
306 {
307   if (id < ps->g->num_nodes)
308     return ps->g->nodes[id].insn;
309   else
310     return ps_reg_move (ps, id)->insn;
311 }
312 
313 /* Partial schedule instruction ID, which belongs to PS, occurred in
314    the original (unscheduled) loop.  Return the first instruction
315    in the loop that was associated with ps_rtl_insn (PS, ID).
316    If the instruction had some notes before it, this is the first
317    of those notes.  */
318 static rtx_insn *
319 ps_first_note (partial_schedule_ptr ps, int id)
320 {
321   gcc_assert (id < ps->g->num_nodes);
322   return ps->g->nodes[id].first_note;
323 }
324 
325 /* Return the number of consecutive stages that are occupied by
326    partial schedule instruction ID in PS.  */
327 static int
328 ps_num_consecutive_stages (partial_schedule_ptr ps, int id)
329 {
330   if (id < ps->g->num_nodes)
331     return 1;
332   else
333     return ps_reg_move (ps, id)->num_consecutive_stages;
334 }
335 
336 /* Given HEAD and TAIL which are the first and last insns in a loop;
337    return the register which controls the loop.  Return zero if it has
338    more than one occurrence in the loop besides the control part or the
339    do-loop pattern is not of the form we expect.  */
340 static rtx
341 doloop_register_get (rtx_insn *head, rtx_insn *tail)
342 {
343   rtx reg, condition;
344   rtx_insn *insn, *first_insn_not_to_check;
345 
346   if (!JUMP_P (tail))
347     return NULL_RTX;
348 
349   if (!targetm.code_for_doloop_end)
350     return NULL_RTX;
351 
352   /* TODO: Free SMS's dependence on doloop_condition_get.  */
353   condition = doloop_condition_get (tail);
354   if (! condition)
355     return NULL_RTX;
356 
357   if (REG_P (XEXP (condition, 0)))
358     reg = XEXP (condition, 0);
359   else if (GET_CODE (XEXP (condition, 0)) == PLUS
360 	   && REG_P (XEXP (XEXP (condition, 0), 0)))
361     reg = XEXP (XEXP (condition, 0), 0);
362   else
363     gcc_unreachable ();
364 
365   /* Check that the COUNT_REG has no other occurrences in the loop
366      until the decrement.  We assume the control part consists of
367      either a single (parallel) branch-on-count or a (non-parallel)
368      branch immediately preceded by a single (decrement) insn.  */
369   first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
370                              : prev_nondebug_insn (tail));
371 
372   for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
373     if (!DEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
374       {
375         if (dump_file)
376         {
377           fprintf (dump_file, "SMS count_reg found ");
378           print_rtl_single (dump_file, reg);
379           fprintf (dump_file, " outside control in insn:\n");
380           print_rtl_single (dump_file, insn);
381         }
382 
383         return NULL_RTX;
384       }
385 
386   return reg;
387 }
388 
389 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
390    that the number of iterations is a compile-time constant.  If so,
391    return the rtx_insn that sets COUNT_REG to a constant, and set COUNT to
392    this constant.  Otherwise return 0.  */
393 static rtx_insn *
394 const_iteration_count (rtx count_reg, basic_block pre_header,
395 		       int64_t * count)
396 {
397   rtx_insn *insn;
398   rtx_insn *head, *tail;
399 
400   if (! pre_header)
401     return NULL;
402 
403   get_ebb_head_tail (pre_header, pre_header, &head, &tail);
404 
405   for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
406     if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
407 	rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
408       {
409 	rtx pat = single_set (insn);
410 
411 	if (CONST_INT_P (SET_SRC (pat)))
412 	  {
413 	    *count = INTVAL (SET_SRC (pat));
414 	    return insn;
415 	  }
416 
417 	return NULL;
418       }
419 
420   return NULL;
421 }
422 
423 /* A very simple resource-based lower bound on the initiation interval.
424    ??? Improve the accuracy of this bound by considering the
425    utilization of various units.  */
426 static int
427 res_MII (ddg_ptr g)
428 {
429   if (targetm.sched.sms_res_mii)
430     return targetm.sched.sms_res_mii (g);
431 
432   return ((g->num_nodes - g->num_debug) / issue_rate);
433 }
434 
435 
436 /* A vector that contains the sched data for each ps_insn.  */
437 static vec<node_sched_params> node_sched_param_vec;
438 
439 /* Allocate sched_params for each node and initialize it.  */
440 static void
441 set_node_sched_params (ddg_ptr g)
442 {
443   node_sched_param_vec.truncate (0);
444   node_sched_param_vec.safe_grow_cleared (g->num_nodes);
445 }
446 
447 /* Make sure that node_sched_param_vec has an entry for every move in PS.  */
448 static void
449 extend_node_sched_params (partial_schedule_ptr ps)
450 {
451   node_sched_param_vec.safe_grow_cleared (ps->g->num_nodes
452 					  + ps->reg_moves.length ());
453 }
454 
455 /* Update the sched_params (time, row and stage) for node U using the II,
456    the CYCLE of U and MIN_CYCLE.
457    We're not simply taking the following
458    SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
459    because the stages may not be aligned on cycle 0.  */
460 static void
461 update_node_sched_params (int u, int ii, int cycle, int min_cycle)
462 {
463   int sc_until_cycle_zero;
464   int stage;
465 
466   SCHED_TIME (u) = cycle;
467   SCHED_ROW (u) = SMODULO (cycle, ii);
468 
469   /* The calculation of stage count is done adding the number
470      of stages before cycle zero and after cycle zero.  */
471   sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
472 
473   if (SCHED_TIME (u) < 0)
474     {
475       stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
476       SCHED_STAGE (u) = sc_until_cycle_zero - stage;
477     }
478   else
479     {
480       stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
481       SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
482     }
483 }
484 
485 static void
486 print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps)
487 {
488   int i;
489 
490   if (! file)
491     return;
492   for (i = 0; i < num_nodes; i++)
493     {
494       node_sched_params_ptr nsp = SCHED_PARAMS (i);
495 
496       fprintf (file, "Node = %d; INSN = %d\n", i,
497 	       INSN_UID (ps_rtl_insn (ps, i)));
498       fprintf (file, " asap = %d:\n", NODE_ASAP (&ps->g->nodes[i]));
499       fprintf (file, " time = %d:\n", nsp->time);
500       fprintf (file, " stage = %d:\n", nsp->stage);
501     }
502 }
503 
504 /* Set SCHED_COLUMN for each instruction in row ROW of PS.  */
505 static void
506 set_columns_for_row (partial_schedule_ptr ps, int row)
507 {
508   ps_insn_ptr cur_insn;
509   int column;
510 
511   column = 0;
512   for (cur_insn = ps->rows[row]; cur_insn; cur_insn = cur_insn->next_in_row)
513     SCHED_COLUMN (cur_insn->id) = column++;
514 }
515 
516 /* Set SCHED_COLUMN for each instruction in PS.  */
517 static void
518 set_columns_for_ps (partial_schedule_ptr ps)
519 {
520   int row;
521 
522   for (row = 0; row < ps->ii; row++)
523     set_columns_for_row (ps, row);
524 }
525 
526 /* Try to schedule the move with ps_insn identifier I_REG_MOVE in PS.
527    Its single predecessor has already been scheduled, as has its
528    ddg node successors.  (The move may have also another move as its
529    successor, in which case that successor will be scheduled later.)
530 
531    The move is part of a chain that satisfies register dependencies
532    between a producing ddg node and various consuming ddg nodes.
533    If some of these dependencies have a distance of 1 (meaning that
534    the use is upward-exposed) then DISTANCE1_USES is nonnull and
535    contains the set of uses with distance-1 dependencies.
536    DISTANCE1_USES is null otherwise.
537 
538    MUST_FOLLOW is a scratch bitmap that is big enough to hold
539    all current ps_insn ids.
540 
541    Return true on success.  */
542 static bool
543 schedule_reg_move (partial_schedule_ptr ps, int i_reg_move,
544 		   sbitmap distance1_uses, sbitmap must_follow)
545 {
546   unsigned int u;
547   int this_time, this_distance, this_start, this_end, this_latency;
548   int start, end, c, ii;
549   sbitmap_iterator sbi;
550   ps_reg_move_info *move;
551   rtx_insn *this_insn;
552   ps_insn_ptr psi;
553 
554   move = ps_reg_move (ps, i_reg_move);
555   ii = ps->ii;
556   if (dump_file)
557     {
558       fprintf (dump_file, "Scheduling register move INSN %d; ii = %d"
559 	       ", min cycle = %d\n\n", INSN_UID (move->insn), ii,
560 	       PS_MIN_CYCLE (ps));
561       print_rtl_single (dump_file, move->insn);
562       fprintf (dump_file, "\n%11s %11s %5s\n", "start", "end", "time");
563       fprintf (dump_file, "=========== =========== =====\n");
564     }
565 
566   start = INT_MIN;
567   end = INT_MAX;
568 
569   /* For dependencies of distance 1 between a producer ddg node A
570      and consumer ddg node B, we have a chain of dependencies:
571 
572         A --(T,L1,1)--> M1 --(T,L2,0)--> M2 ... --(T,Ln,0)--> B
573 
574      where Mi is the ith move.  For dependencies of distance 0 between
575      a producer ddg node A and consumer ddg node C, we have a chain of
576      dependencies:
577 
578         A --(T,L1',0)--> M1' --(T,L2',0)--> M2' ... --(T,Ln',0)--> C
579 
580      where Mi' occupies the same position as Mi but occurs a stage later.
581      We can only schedule each move once, so if we have both types of
582      chain, we model the second as:
583 
584         A --(T,L1',1)--> M1 --(T,L2',0)--> M2 ... --(T,Ln',-1)--> C
585 
586      First handle the dependencies between the previously-scheduled
587      predecessor and the move.  */
588   this_insn = ps_rtl_insn (ps, move->def);
589   this_latency = insn_latency (this_insn, move->insn);
590   this_distance = distance1_uses && move->def < ps->g->num_nodes ? 1 : 0;
591   this_time = SCHED_TIME (move->def) - this_distance * ii;
592   this_start = this_time + this_latency;
593   this_end = this_time + ii;
594   if (dump_file)
595     fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
596 	     this_start, this_end, SCHED_TIME (move->def),
597 	     INSN_UID (this_insn), this_latency, this_distance,
598 	     INSN_UID (move->insn));
599 
600   if (start < this_start)
601     start = this_start;
602   if (end > this_end)
603     end = this_end;
604 
605   /* Handle the dependencies between the move and previously-scheduled
606      successors.  */
607   EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, u, sbi)
608     {
609       this_insn = ps_rtl_insn (ps, u);
610       this_latency = insn_latency (move->insn, this_insn);
611       if (distance1_uses && !bitmap_bit_p (distance1_uses, u))
612 	this_distance = -1;
613       else
614 	this_distance = 0;
615       this_time = SCHED_TIME (u) + this_distance * ii;
616       this_start = this_time - ii;
617       this_end = this_time - this_latency;
618       if (dump_file)
619 	fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
620 		 this_start, this_end, SCHED_TIME (u), INSN_UID (move->insn),
621 		 this_latency, this_distance, INSN_UID (this_insn));
622 
623       if (start < this_start)
624 	start = this_start;
625       if (end > this_end)
626 	end = this_end;
627     }
628 
629   if (dump_file)
630     {
631       fprintf (dump_file, "----------- ----------- -----\n");
632       fprintf (dump_file, "%11d %11d %5s %s\n", start, end, "", "(max, min)");
633     }
634 
635   bitmap_clear (must_follow);
636   bitmap_set_bit (must_follow, move->def);
637 
638   start = MAX (start, end - (ii - 1));
639   for (c = end; c >= start; c--)
640     {
641       psi = ps_add_node_check_conflicts (ps, i_reg_move, c,
642 					 move->uses, must_follow);
643       if (psi)
644 	{
645 	  update_node_sched_params (i_reg_move, ii, c, PS_MIN_CYCLE (ps));
646 	  if (dump_file)
647 	    fprintf (dump_file, "\nScheduled register move INSN %d at"
648 		     " time %d, row %d\n\n", INSN_UID (move->insn), c,
649 		     SCHED_ROW (i_reg_move));
650 	  return true;
651 	}
652     }
653 
654   if (dump_file)
655     fprintf (dump_file, "\nNo available slot\n\n");
656 
657   return false;
658 }
659 
660 /*
661    Breaking intra-loop register anti-dependences:
662    Each intra-loop register anti-dependence implies a cross-iteration true
663    dependence of distance 1. Therefore, we can remove such false dependencies
664    and figure out if the partial schedule broke them by checking if (for a
665    true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
666    if so generate a register move.   The number of such moves is equal to:
667               SCHED_TIME (use) - SCHED_TIME (def)       { 0 broken
668    nreg_moves = ----------------------------------- + 1 - {   dependence.
669                             ii                          { 1 if not.
670 */
671 static bool
672 schedule_reg_moves (partial_schedule_ptr ps)
673 {
674   ddg_ptr g = ps->g;
675   int ii = ps->ii;
676   int i;
677 
678   for (i = 0; i < g->num_nodes; i++)
679     {
680       ddg_node_ptr u = &g->nodes[i];
681       ddg_edge_ptr e;
682       int nreg_moves = 0, i_reg_move;
683       rtx prev_reg, old_reg;
684       int first_move;
685       int distances[2];
686       sbitmap distance1_uses;
687       rtx set = single_set (u->insn);
688 
689       /* Skip instructions that do not set a register.  */
690       if (set && !REG_P (SET_DEST (set)))
691         continue;
692 
693       /* Compute the number of reg_moves needed for u, by looking at life
694 	 ranges started at u (excluding self-loops).  */
695       distances[0] = distances[1] = false;
696       for (e = u->out; e; e = e->next_out)
697 	if (e->type == TRUE_DEP && e->dest != e->src)
698 	  {
699 	    int nreg_moves4e = (SCHED_TIME (e->dest->cuid)
700 				- SCHED_TIME (e->src->cuid)) / ii;
701 
702             if (e->distance == 1)
703               nreg_moves4e = (SCHED_TIME (e->dest->cuid)
704 			      - SCHED_TIME (e->src->cuid) + ii) / ii;
705 
706 	    /* If dest precedes src in the schedule of the kernel, then dest
707 	       will read before src writes and we can save one reg_copy.  */
708 	    if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
709 		&& SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
710 	      nreg_moves4e--;
711 
712             if (nreg_moves4e >= 1)
713 	      {
714 		/* !single_set instructions are not supported yet and
715 		   thus we do not except to encounter them in the loop
716 		   except from the doloop part.  For the latter case
717 		   we assume no regmoves are generated as the doloop
718 		   instructions are tied to the branch with an edge.  */
719 		gcc_assert (set);
720 		/* If the instruction contains auto-inc register then
721 		   validate that the regmov is being generated for the
722 		   target regsiter rather then the inc'ed register.	*/
723 		gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn));
724 	      }
725 
726 	    if (nreg_moves4e)
727 	      {
728 		gcc_assert (e->distance < 2);
729 		distances[e->distance] = true;
730 	      }
731 	    nreg_moves = MAX (nreg_moves, nreg_moves4e);
732 	  }
733 
734       if (nreg_moves == 0)
735 	continue;
736 
737       /* Create NREG_MOVES register moves.  */
738       first_move = ps->reg_moves.length ();
739       ps->reg_moves.safe_grow_cleared (first_move + nreg_moves);
740       extend_node_sched_params (ps);
741 
742       /* Record the moves associated with this node.  */
743       first_move += ps->g->num_nodes;
744 
745       /* Generate each move.  */
746       old_reg = prev_reg = SET_DEST (set);
747       if (HARD_REGISTER_P (old_reg))
748 	return false;
749 
750       for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
751 	{
752 	  ps_reg_move_info *move = ps_reg_move (ps, first_move + i_reg_move);
753 
754 	  move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i;
755 	  move->uses = sbitmap_alloc (first_move + nreg_moves);
756 	  move->old_reg = old_reg;
757 	  move->new_reg = gen_reg_rtx (GET_MODE (prev_reg));
758 	  move->num_consecutive_stages = distances[0] && distances[1] ? 2 : 1;
759 	  move->insn = gen_move_insn (move->new_reg, copy_rtx (prev_reg));
760 	  bitmap_clear (move->uses);
761 
762 	  prev_reg = move->new_reg;
763 	}
764 
765       distance1_uses = distances[1] ? sbitmap_alloc (g->num_nodes) : NULL;
766 
767       if (distance1_uses)
768 	bitmap_clear (distance1_uses);
769 
770       /* Every use of the register defined by node may require a different
771 	 copy of this register, depending on the time the use is scheduled.
772 	 Record which uses require which move results.  */
773       for (e = u->out; e; e = e->next_out)
774 	if (e->type == TRUE_DEP && e->dest != e->src)
775 	  {
776 	    int dest_copy = (SCHED_TIME (e->dest->cuid)
777 			     - SCHED_TIME (e->src->cuid)) / ii;
778 
779 	    if (e->distance == 1)
780 	      dest_copy = (SCHED_TIME (e->dest->cuid)
781 			   - SCHED_TIME (e->src->cuid) + ii) / ii;
782 
783 	    if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
784 		&& SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
785 	      dest_copy--;
786 
787 	    if (dest_copy)
788 	      {
789 		ps_reg_move_info *move;
790 
791 		move = ps_reg_move (ps, first_move + dest_copy - 1);
792 		bitmap_set_bit (move->uses, e->dest->cuid);
793 		if (e->distance == 1)
794 		  bitmap_set_bit (distance1_uses, e->dest->cuid);
795 	      }
796 	  }
797 
798       auto_sbitmap must_follow (first_move + nreg_moves);
799       for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
800 	if (!schedule_reg_move (ps, first_move + i_reg_move,
801 				distance1_uses, must_follow))
802 	  break;
803       if (distance1_uses)
804 	sbitmap_free (distance1_uses);
805       if (i_reg_move < nreg_moves)
806 	return false;
807     }
808   return true;
809 }
810 
811 /* Emit the moves associated with PS.  Apply the substitutions
812    associated with them.  */
813 static void
814 apply_reg_moves (partial_schedule_ptr ps)
815 {
816   ps_reg_move_info *move;
817   int i;
818 
819   FOR_EACH_VEC_ELT (ps->reg_moves, i, move)
820     {
821       unsigned int i_use;
822       sbitmap_iterator sbi;
823 
824       EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, i_use, sbi)
825 	{
826 	  replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg);
827 	  df_insn_rescan (ps->g->nodes[i_use].insn);
828 	}
829     }
830 }
831 
832 /* Bump the SCHED_TIMEs of all nodes by AMOUNT.  Set the values of
833    SCHED_ROW and SCHED_STAGE.  Instruction scheduled on cycle AMOUNT
834    will move to cycle zero.  */
835 static void
836 reset_sched_times (partial_schedule_ptr ps, int amount)
837 {
838   int row;
839   int ii = ps->ii;
840   ps_insn_ptr crr_insn;
841 
842   for (row = 0; row < ii; row++)
843     for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
844       {
845 	int u = crr_insn->id;
846 	int normalized_time = SCHED_TIME (u) - amount;
847 	int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
848 
849         if (dump_file)
850           {
851             /* Print the scheduling times after the rotation.  */
852 	    rtx_insn *insn = ps_rtl_insn (ps, u);
853 
854             fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
855                      "crr_insn->cycle=%d, min_cycle=%d", u,
856                      INSN_UID (insn), normalized_time, new_min_cycle);
857             if (JUMP_P (insn))
858               fprintf (dump_file, " (branch)");
859             fprintf (dump_file, "\n");
860           }
861 
862 	gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
863 	gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
864 
865 	crr_insn->cycle = normalized_time;
866 	update_node_sched_params (u, ii, normalized_time, new_min_cycle);
867       }
868 }
869 
870 /* Permute the insns according to their order in PS, from row 0 to
871    row ii-1, and position them right before LAST.  This schedules
872    the insns of the loop kernel.  */
873 static void
874 permute_partial_schedule (partial_schedule_ptr ps, rtx_insn *last)
875 {
876   int ii = ps->ii;
877   int row;
878   ps_insn_ptr ps_ij;
879 
880   for (row = 0; row < ii ; row++)
881     for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
882       {
883 	rtx_insn *insn = ps_rtl_insn (ps, ps_ij->id);
884 
885 	if (PREV_INSN (last) != insn)
886 	  {
887 	    if (ps_ij->id < ps->g->num_nodes)
888 	      reorder_insns_nobb (ps_first_note (ps, ps_ij->id), insn,
889 				  PREV_INSN (last));
890 	    else
891 	      add_insn_before (insn, last, NULL);
892 	  }
893       }
894 }
895 
896 /* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE
897    respectively only if cycle C falls on the border of the scheduling
898    window boundaries marked by START and END cycles.  STEP is the
899    direction of the window.  */
900 static inline void
901 set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow,
902 			 sbitmap *tmp_precede, sbitmap must_precede, int c,
903 			 int start, int end, int step)
904 {
905   *tmp_precede = NULL;
906   *tmp_follow = NULL;
907 
908   if (c == start)
909     {
910       if (step == 1)
911 	*tmp_precede = must_precede;
912       else			/* step == -1.  */
913 	*tmp_follow = must_follow;
914     }
915   if (c == end - step)
916     {
917       if (step == 1)
918 	*tmp_follow = must_follow;
919       else			/* step == -1.  */
920 	*tmp_precede = must_precede;
921     }
922 
923 }
924 
925 /* Return True if the branch can be moved to row ii-1 while
926    normalizing the partial schedule PS to start from cycle zero and thus
927    optimize the SC.  Otherwise return False.  */
928 static bool
929 optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
930 {
931   int amount = PS_MIN_CYCLE (ps);
932   int start, end, step;
933   int ii = ps->ii;
934   bool ok = false;
935   int stage_count, stage_count_curr;
936 
937   /* Compare the SC after normalization and SC after bringing the branch
938      to row ii-1.  If they are equal just bail out.  */
939   stage_count = calculate_stage_count (ps, amount);
940   stage_count_curr =
941     calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid) - (ii - 1));
942 
943   if (stage_count == stage_count_curr)
944     {
945       if (dump_file)
946 	fprintf (dump_file, "SMS SC already optimized.\n");
947 
948       return false;
949     }
950 
951   if (dump_file)
952     {
953       fprintf (dump_file, "SMS Trying to optimize branch location\n");
954       fprintf (dump_file, "SMS partial schedule before trial:\n");
955       print_partial_schedule (ps, dump_file);
956     }
957 
958   /* First, normalize the partial scheduling.  */
959   reset_sched_times (ps, amount);
960   rotate_partial_schedule (ps, amount);
961   if (dump_file)
962     {
963       fprintf (dump_file,
964 	       "SMS partial schedule after normalization (ii, %d, SC %d):\n",
965 	       ii, stage_count);
966       print_partial_schedule (ps, dump_file);
967     }
968 
969   if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii) == ii - 1)
970     return true;
971 
972   auto_sbitmap sched_nodes (g->num_nodes);
973   bitmap_ones (sched_nodes);
974 
975   /* Calculate the new placement of the branch.  It should be in row
976      ii-1 and fall into it's scheduling window.  */
977   if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start,
978 			&step, &end) == 0)
979     {
980       bool success;
981       ps_insn_ptr next_ps_i;
982       int branch_cycle = SCHED_TIME (g->closing_branch->cuid);
983       int row = SMODULO (branch_cycle, ps->ii);
984       int num_splits = 0;
985       sbitmap tmp_precede, tmp_follow;
986       int min_cycle, c;
987 
988       if (dump_file)
989 	fprintf (dump_file, "\nTrying to schedule node %d "
990 		 "INSN = %d  in (%d .. %d) step %d\n",
991 		 g->closing_branch->cuid,
992 		 (INSN_UID (g->closing_branch->insn)), start, end, step);
993 
994       gcc_assert ((step > 0 && start < end) || (step < 0 && start > end));
995       if (step == 1)
996 	{
997 	  c = start + ii - SMODULO (start, ii) - 1;
998 	  gcc_assert (c >= start);
999 	  if (c >= end)
1000 	    {
1001 	      if (dump_file)
1002 		fprintf (dump_file,
1003 			 "SMS failed to schedule branch at cycle: %d\n", c);
1004 	      return false;
1005 	    }
1006 	}
1007       else
1008 	{
1009 	  c = start - SMODULO (start, ii) - 1;
1010 	  gcc_assert (c <= start);
1011 
1012 	  if (c <= end)
1013 	    {
1014 	      if (dump_file)
1015 		fprintf (dump_file,
1016 			 "SMS failed to schedule branch at cycle: %d\n", c);
1017 	      return false;
1018 	    }
1019 	}
1020 
1021       auto_sbitmap must_precede (g->num_nodes);
1022       auto_sbitmap must_follow (g->num_nodes);
1023 
1024       /* Try to schedule the branch is it's new cycle.  */
1025       calculate_must_precede_follow (g->closing_branch, start, end,
1026 				     step, ii, sched_nodes,
1027 				     must_precede, must_follow);
1028 
1029       set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
1030 			       must_precede, c, start, end, step);
1031 
1032       /* Find the element in the partial schedule related to the closing
1033          branch so we can remove it from it's current cycle.  */
1034       for (next_ps_i = ps->rows[row];
1035 	   next_ps_i; next_ps_i = next_ps_i->next_in_row)
1036 	if (next_ps_i->id == g->closing_branch->cuid)
1037 	  break;
1038 
1039       min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
1040       remove_node_from_ps (ps, next_ps_i);
1041       success =
1042 	try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c,
1043 				      sched_nodes, &num_splits,
1044 				      tmp_precede, tmp_follow);
1045       gcc_assert (num_splits == 0);
1046       if (!success)
1047 	{
1048 	  if (dump_file)
1049 	    fprintf (dump_file,
1050 		     "SMS failed to schedule branch at cycle: %d, "
1051 		     "bringing it back to cycle %d\n", c, branch_cycle);
1052 
1053 	  /* The branch was failed to be placed in row ii - 1.
1054 	     Put it back in it's original place in the partial
1055 	     schedualing.  */
1056 	  set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
1057 				   must_precede, branch_cycle, start, end,
1058 				   step);
1059 	  success =
1060 	    try_scheduling_node_in_cycle (ps, g->closing_branch->cuid,
1061 					  branch_cycle, sched_nodes,
1062 					  &num_splits, tmp_precede,
1063 					  tmp_follow);
1064 	  gcc_assert (success && (num_splits == 0));
1065 	  ok = false;
1066 	}
1067       else
1068 	{
1069 	  /* The branch is placed in row ii - 1.  */
1070 	  if (dump_file)
1071 	    fprintf (dump_file,
1072 		     "SMS success in moving branch to cycle %d\n", c);
1073 
1074 	  update_node_sched_params (g->closing_branch->cuid, ii, c,
1075 				    PS_MIN_CYCLE (ps));
1076 	  ok = true;
1077 	}
1078 
1079       /* This might have been added to a new first stage.  */
1080       if (PS_MIN_CYCLE (ps) < min_cycle)
1081 	reset_sched_times (ps, 0);
1082     }
1083 
1084   return ok;
1085 }
1086 
1087 static void
1088 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
1089 			   int to_stage, rtx count_reg)
1090 {
1091   int row;
1092   ps_insn_ptr ps_ij;
1093 
1094   for (row = 0; row < ps->ii; row++)
1095     for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
1096       {
1097 	int u = ps_ij->id;
1098 	int first_u, last_u;
1099 	rtx_insn *u_insn;
1100 
1101         /* Do not duplicate any insn which refers to count_reg as it
1102            belongs to the control part.
1103            The closing branch is scheduled as well and thus should
1104            be ignored.
1105            TODO: This should be done by analyzing the control part of
1106            the loop.  */
1107 	u_insn = ps_rtl_insn (ps, u);
1108         if (reg_mentioned_p (count_reg, u_insn)
1109             || JUMP_P (u_insn))
1110           continue;
1111 
1112 	first_u = SCHED_STAGE (u);
1113 	last_u = first_u + ps_num_consecutive_stages (ps, u) - 1;
1114 	if (from_stage <= last_u && to_stage >= first_u)
1115 	  {
1116 	    if (u < ps->g->num_nodes)
1117 	      duplicate_insn_chain (ps_first_note (ps, u), u_insn);
1118 	    else
1119 	      emit_insn (copy_rtx (PATTERN (u_insn)));
1120 	  }
1121       }
1122 }
1123 
1124 
1125 /* Generate the instructions (including reg_moves) for prolog & epilog.  */
1126 static void
1127 generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
1128                         rtx count_reg, rtx count_init)
1129 {
1130   int i;
1131   int last_stage = PS_STAGE_COUNT (ps) - 1;
1132   edge e;
1133 
1134   /* Generate the prolog, inserting its insns on the loop-entry edge.  */
1135   start_sequence ();
1136 
1137   if (!count_init)
1138     {
1139       /* Generate instructions at the beginning of the prolog to
1140          adjust the loop count by STAGE_COUNT.  If loop count is constant
1141          (count_init), this constant is adjusted by STAGE_COUNT in
1142          generate_prolog_epilog function.  */
1143       rtx sub_reg = NULL_RTX;
1144 
1145       sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, count_reg,
1146 				     gen_int_mode (last_stage,
1147 						   GET_MODE (count_reg)),
1148                                      count_reg, 1, OPTAB_DIRECT);
1149       gcc_assert (REG_P (sub_reg));
1150       if (REGNO (sub_reg) != REGNO (count_reg))
1151         emit_move_insn (count_reg, sub_reg);
1152     }
1153 
1154   for (i = 0; i < last_stage; i++)
1155     duplicate_insns_of_cycles (ps, 0, i, count_reg);
1156 
1157   /* Put the prolog on the entry edge.  */
1158   e = loop_preheader_edge (loop);
1159   split_edge_and_insert (e, get_insns ());
1160   if (!flag_resched_modulo_sched)
1161     e->dest->flags |= BB_DISABLE_SCHEDULE;
1162 
1163   end_sequence ();
1164 
1165   /* Generate the epilog, inserting its insns on the loop-exit edge.  */
1166   start_sequence ();
1167 
1168   for (i = 0; i < last_stage; i++)
1169     duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg);
1170 
1171   /* Put the epilogue on the exit edge.  */
1172   gcc_assert (single_exit (loop));
1173   e = single_exit (loop);
1174   split_edge_and_insert (e, get_insns ());
1175   if (!flag_resched_modulo_sched)
1176     e->dest->flags |= BB_DISABLE_SCHEDULE;
1177 
1178   end_sequence ();
1179 }
1180 
1181 /* Mark LOOP as software pipelined so the later
1182    scheduling passes don't touch it.  */
1183 static void
1184 mark_loop_unsched (struct loop *loop)
1185 {
1186   unsigned i;
1187   basic_block *bbs = get_loop_body (loop);
1188 
1189   for (i = 0; i < loop->num_nodes; i++)
1190     bbs[i]->flags |= BB_DISABLE_SCHEDULE;
1191 
1192   free (bbs);
1193 }
1194 
1195 /* Return true if all the BBs of the loop are empty except the
1196    loop header.  */
1197 static bool
1198 loop_single_full_bb_p (struct loop *loop)
1199 {
1200   unsigned i;
1201   basic_block *bbs = get_loop_body (loop);
1202 
1203   for (i = 0; i < loop->num_nodes ; i++)
1204     {
1205       rtx_insn *head, *tail;
1206       bool empty_bb = true;
1207 
1208       if (bbs[i] == loop->header)
1209         continue;
1210 
1211       /* Make sure that basic blocks other than the header
1212          have only notes labels or jumps.  */
1213       get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
1214       for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
1215         {
1216           if (NOTE_P (head) || LABEL_P (head)
1217  	      || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head))))
1218  	    continue;
1219  	  empty_bb = false;
1220  	  break;
1221         }
1222 
1223       if (! empty_bb)
1224         {
1225           free (bbs);
1226           return false;
1227         }
1228     }
1229   free (bbs);
1230   return true;
1231 }
1232 
1233 /* Dump file:line from INSN's location info to dump_file.  */
1234 
1235 static void
1236 dump_insn_location (rtx_insn *insn)
1237 {
1238   if (dump_file && INSN_HAS_LOCATION (insn))
1239     {
1240       expanded_location xloc = insn_location (insn);
1241       fprintf (dump_file, " %s:%i", xloc.file, xloc.line);
1242     }
1243 }
1244 
1245 /* A simple loop from SMS point of view; it is a loop that is composed of
1246    either a single basic block or two BBs - a header and a latch.  */
1247 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) 		    \
1248 				  && (EDGE_COUNT (loop->latch->preds) == 1) \
1249                                   && (EDGE_COUNT (loop->latch->succs) == 1))
1250 
1251 /* Return true if the loop is in its canonical form and false if not.
1252    i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit.  */
1253 static bool
1254 loop_canon_p (struct loop *loop)
1255 {
1256 
1257   if (loop->inner || !loop_outer (loop))
1258   {
1259     if (dump_file)
1260       fprintf (dump_file, "SMS loop inner or !loop_outer\n");
1261     return false;
1262   }
1263 
1264   if (!single_exit (loop))
1265     {
1266       if (dump_file)
1267 	{
1268 	  rtx_insn *insn = BB_END (loop->header);
1269 
1270 	  fprintf (dump_file, "SMS loop many exits");
1271 	  dump_insn_location (insn);
1272 	  fprintf (dump_file, "\n");
1273 	}
1274       return false;
1275     }
1276 
1277   if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
1278     {
1279       if (dump_file)
1280 	{
1281 	  rtx_insn *insn = BB_END (loop->header);
1282 
1283 	  fprintf (dump_file, "SMS loop many BBs.");
1284 	  dump_insn_location (insn);
1285 	  fprintf (dump_file, "\n");
1286 	}
1287       return false;
1288     }
1289 
1290     return true;
1291 }
1292 
1293 /* If there are more than one entry for the loop,
1294    make it one by splitting the first entry edge and
1295    redirecting the others to the new BB.  */
1296 static void
1297 canon_loop (struct loop *loop)
1298 {
1299   edge e;
1300   edge_iterator i;
1301 
1302   /* Avoid annoying special cases of edges going to exit
1303      block.  */
1304   FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
1305     if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
1306       split_edge (e);
1307 
1308   if (loop->latch == loop->header
1309       || EDGE_COUNT (loop->latch->succs) > 1)
1310     {
1311       FOR_EACH_EDGE (e, i, loop->header->preds)
1312         if (e->src == loop->latch)
1313           break;
1314       split_edge (e);
1315     }
1316 }
1317 
1318 /* Setup infos.  */
1319 static void
1320 setup_sched_infos (void)
1321 {
1322   memcpy (&sms_common_sched_info, &haifa_common_sched_info,
1323 	  sizeof (sms_common_sched_info));
1324   sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
1325   common_sched_info = &sms_common_sched_info;
1326 
1327   sched_deps_info = &sms_sched_deps_info;
1328   current_sched_info = &sms_sched_info;
1329 }
1330 
1331 /* Probability in % that the sms-ed loop rolls enough so that optimized
1332    version may be entered.  Just a guess.  */
1333 #define PROB_SMS_ENOUGH_ITERATIONS 80
1334 
1335 /* Used to calculate the upper bound of ii.  */
1336 #define MAXII_FACTOR 2
1337 
1338 /* Main entry point, perform SMS scheduling on the loops of the function
1339    that consist of single basic blocks.  */
1340 static void
1341 sms_schedule (void)
1342 {
1343   rtx_insn *insn;
1344   ddg_ptr *g_arr, g;
1345   int * node_order;
1346   int maxii, max_asap;
1347   partial_schedule_ptr ps;
1348   basic_block bb = NULL;
1349   struct loop *loop;
1350   basic_block condition_bb = NULL;
1351   edge latch_edge;
1352   HOST_WIDE_INT trip_count, max_trip_count;
1353 
1354   loop_optimizer_init (LOOPS_HAVE_PREHEADERS
1355 		       | LOOPS_HAVE_RECORDED_EXITS);
1356   if (number_of_loops (cfun) <= 1)
1357     {
1358       loop_optimizer_finalize ();
1359       return;  /* There are no loops to schedule.  */
1360     }
1361 
1362   /* Initialize issue_rate.  */
1363   if (targetm.sched.issue_rate)
1364     {
1365       int temp = reload_completed;
1366 
1367       reload_completed = 1;
1368       issue_rate = targetm.sched.issue_rate ();
1369       reload_completed = temp;
1370     }
1371   else
1372     issue_rate = 1;
1373 
1374   /* Initialize the scheduler.  */
1375   setup_sched_infos ();
1376   haifa_sched_init ();
1377 
1378   /* Allocate memory to hold the DDG array one entry for each loop.
1379      We use loop->num as index into this array.  */
1380   g_arr = XCNEWVEC (ddg_ptr, number_of_loops (cfun));
1381 
1382   if (dump_file)
1383   {
1384     fprintf (dump_file, "\n\nSMS analysis phase\n");
1385     fprintf (dump_file, "===================\n\n");
1386   }
1387 
1388   /* Build DDGs for all the relevant loops and hold them in G_ARR
1389      indexed by the loop index.  */
1390   FOR_EACH_LOOP (loop, 0)
1391     {
1392       rtx_insn *head, *tail;
1393       rtx count_reg;
1394 
1395       /* For debugging.  */
1396       if (dbg_cnt (sms_sched_loop) == false)
1397         {
1398           if (dump_file)
1399             fprintf (dump_file, "SMS reached max limit... \n");
1400 
1401 	  break;
1402         }
1403 
1404       if (dump_file)
1405 	{
1406 	  rtx_insn *insn = BB_END (loop->header);
1407 
1408 	  fprintf (dump_file, "SMS loop num: %d", loop->num);
1409 	  dump_insn_location (insn);
1410 	  fprintf (dump_file, "\n");
1411 	}
1412 
1413       if (! loop_canon_p (loop))
1414         continue;
1415 
1416       if (! loop_single_full_bb_p (loop))
1417       {
1418         if (dump_file)
1419           fprintf (dump_file, "SMS not loop_single_full_bb_p\n");
1420 	continue;
1421       }
1422 
1423       bb = loop->header;
1424 
1425       get_ebb_head_tail (bb, bb, &head, &tail);
1426       latch_edge = loop_latch_edge (loop);
1427       gcc_assert (single_exit (loop));
1428       trip_count = get_estimated_loop_iterations_int (loop);
1429       max_trip_count = get_max_loop_iterations_int (loop);
1430 
1431       /* Perform SMS only on loops that their average count is above threshold.  */
1432 
1433       if ( latch_edge->count () > profile_count::zero ()
1434           && (latch_edge->count()
1435 	      < single_exit (loop)->count ().apply_scale
1436 				 (SMS_LOOP_AVERAGE_COUNT_THRESHOLD, 1)))
1437 	{
1438 	  if (dump_file)
1439 	    {
1440 	      dump_insn_location (tail);
1441 	      fprintf (dump_file, "\nSMS single-bb-loop\n");
1442 	      if (profile_info && flag_branch_probabilities)
1443 	    	{
1444 	      	  fprintf (dump_file, "SMS loop-count ");
1445 	      	  fprintf (dump_file, "%" PRId64,
1446 	             	   (int64_t) bb->count.to_gcov_type ());
1447 	      	  fprintf (dump_file, "\n");
1448                   fprintf (dump_file, "SMS trip-count ");
1449                   fprintf (dump_file, "%" PRId64 "max %" PRId64,
1450                            (int64_t) trip_count, (int64_t) max_trip_count);
1451                   fprintf (dump_file, "\n");
1452 	      	  fprintf (dump_file, "SMS profile-sum-max ");
1453 	      	  fprintf (dump_file, "%" PRId64,
1454 	          	   (int64_t) profile_info->sum_max);
1455 	      	  fprintf (dump_file, "\n");
1456 	    	}
1457 	    }
1458           continue;
1459         }
1460 
1461       /* Make sure this is a doloop.  */
1462       if ( !(count_reg = doloop_register_get (head, tail)))
1463       {
1464         if (dump_file)
1465           fprintf (dump_file, "SMS doloop_register_get failed\n");
1466 	continue;
1467       }
1468 
1469       /* Don't handle BBs with calls or barriers
1470 	 or !single_set with the exception of instructions that include
1471 	 count_reg---these instructions are part of the control part
1472 	 that do-loop recognizes.
1473          ??? Should handle insns defining subregs.  */
1474      for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1475       {
1476          rtx set;
1477 
1478         if (CALL_P (insn)
1479             || BARRIER_P (insn)
1480             || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1481                 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
1482                 && !reg_mentioned_p (count_reg, insn))
1483             || (INSN_P (insn) && (set = single_set (insn))
1484                 && GET_CODE (SET_DEST (set)) == SUBREG))
1485         break;
1486       }
1487 
1488       if (insn != NEXT_INSN (tail))
1489 	{
1490 	  if (dump_file)
1491 	    {
1492 	      if (CALL_P (insn))
1493 		fprintf (dump_file, "SMS loop-with-call\n");
1494 	      else if (BARRIER_P (insn))
1495 		fprintf (dump_file, "SMS loop-with-barrier\n");
1496               else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1497                 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1498                 fprintf (dump_file, "SMS loop-with-not-single-set\n");
1499               else
1500                fprintf (dump_file, "SMS loop with subreg in lhs\n");
1501 	      print_rtl_single (dump_file, insn);
1502 	    }
1503 
1504 	  continue;
1505 	}
1506 
1507       /* Always schedule the closing branch with the rest of the
1508          instructions. The branch is rotated to be in row ii-1 at the
1509          end of the scheduling procedure to make sure it's the last
1510          instruction in the iteration.  */
1511       if (! (g = create_ddg (bb, 1)))
1512         {
1513           if (dump_file)
1514 	    fprintf (dump_file, "SMS create_ddg failed\n");
1515 	  continue;
1516         }
1517 
1518       g_arr[loop->num] = g;
1519       if (dump_file)
1520         fprintf (dump_file, "...OK\n");
1521 
1522     }
1523   if (dump_file)
1524   {
1525     fprintf (dump_file, "\nSMS transformation phase\n");
1526     fprintf (dump_file, "=========================\n\n");
1527   }
1528 
1529   /* We don't want to perform SMS on new loops - created by versioning.  */
1530   FOR_EACH_LOOP (loop, 0)
1531     {
1532       rtx_insn *head, *tail;
1533       rtx count_reg;
1534       rtx_insn *count_init;
1535       int mii, rec_mii, stage_count, min_cycle;
1536       int64_t loop_count = 0;
1537       bool opt_sc_p;
1538 
1539       if (! (g = g_arr[loop->num]))
1540         continue;
1541 
1542       if (dump_file)
1543 	{
1544 	  rtx_insn *insn = BB_END (loop->header);
1545 
1546 	  fprintf (dump_file, "SMS loop num: %d", loop->num);
1547 	  dump_insn_location (insn);
1548 	  fprintf (dump_file, "\n");
1549 
1550 	  print_ddg (dump_file, g);
1551 	}
1552 
1553       get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1554 
1555       latch_edge = loop_latch_edge (loop);
1556       gcc_assert (single_exit (loop));
1557       trip_count = get_estimated_loop_iterations_int (loop);
1558       max_trip_count = get_max_loop_iterations_int (loop);
1559 
1560       if (dump_file)
1561 	{
1562 	  dump_insn_location (tail);
1563 	  fprintf (dump_file, "\nSMS single-bb-loop\n");
1564 	  if (profile_info && flag_branch_probabilities)
1565 	    {
1566 	      fprintf (dump_file, "SMS loop-count ");
1567 	      fprintf (dump_file, "%" PRId64,
1568 	               (int64_t) bb->count.to_gcov_type ());
1569 	      fprintf (dump_file, "\n");
1570 	      fprintf (dump_file, "SMS profile-sum-max ");
1571 	      fprintf (dump_file, "%" PRId64,
1572 	               (int64_t) profile_info->sum_max);
1573 	      fprintf (dump_file, "\n");
1574 	    }
1575 	  fprintf (dump_file, "SMS doloop\n");
1576 	  fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1577           fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1578           fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1579 	}
1580 
1581 
1582       /* In case of th loop have doloop register it gets special
1583 	 handling.  */
1584       count_init = NULL;
1585       if ((count_reg = doloop_register_get (head, tail)))
1586 	{
1587 	  basic_block pre_header;
1588 
1589 	  pre_header = loop_preheader_edge (loop)->src;
1590 	  count_init = const_iteration_count (count_reg, pre_header,
1591 					      &loop_count);
1592 	}
1593       gcc_assert (count_reg);
1594 
1595       if (dump_file && count_init)
1596         {
1597           fprintf (dump_file, "SMS const-doloop ");
1598           fprintf (dump_file, "%" PRId64,
1599 		     loop_count);
1600           fprintf (dump_file, "\n");
1601         }
1602 
1603       node_order = XNEWVEC (int, g->num_nodes);
1604 
1605       mii = 1; /* Need to pass some estimate of mii.  */
1606       rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
1607       mii = MAX (res_MII (g), rec_mii);
1608       maxii = MAX (max_asap, MAXII_FACTOR * mii);
1609 
1610       if (dump_file)
1611 	fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1612 		 rec_mii, mii, maxii);
1613 
1614       for (;;)
1615 	{
1616 	  set_node_sched_params (g);
1617 
1618 	  stage_count = 0;
1619 	  opt_sc_p = false;
1620 	  ps = sms_schedule_by_order (g, mii, maxii, node_order);
1621 
1622 	  if (ps)
1623 	    {
1624 	      /* Try to achieve optimized SC by normalizing the partial
1625 		 schedule (having the cycles start from cycle zero).
1626 		 The branch location must be placed in row ii-1 in the
1627 		 final scheduling.	If failed, shift all instructions to
1628 		 position the branch in row ii-1.  */
1629 	      opt_sc_p = optimize_sc (ps, g);
1630 	      if (opt_sc_p)
1631 		stage_count = calculate_stage_count (ps, 0);
1632 	      else
1633 		{
1634 		  /* Bring the branch to cycle ii-1.  */
1635 		  int amount = (SCHED_TIME (g->closing_branch->cuid)
1636 				- (ps->ii - 1));
1637 
1638 		  if (dump_file)
1639 		    fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
1640 
1641 		  stage_count = calculate_stage_count (ps, amount);
1642 		}
1643 
1644 	      gcc_assert (stage_count >= 1);
1645 	    }
1646 
1647 	  /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
1648 	     1 means that there is no interleaving between iterations thus
1649 	     we let the scheduling passes do the job in this case.  */
1650 	  if (stage_count < PARAM_VALUE (PARAM_SMS_MIN_SC)
1651 	      || (count_init && (loop_count <= stage_count))
1652 	      || (max_trip_count >= 0 && max_trip_count <= stage_count)
1653 	      || (trip_count >= 0 && trip_count <= stage_count))
1654 	    {
1655 	      if (dump_file)
1656 		{
1657 		  fprintf (dump_file, "SMS failed... \n");
1658 		  fprintf (dump_file, "SMS sched-failed (stage-count=%d,"
1659 			   " loop-count=", stage_count);
1660 		  fprintf (dump_file, "%" PRId64, loop_count);
1661 		  fprintf (dump_file, ", trip-count=");
1662 		  fprintf (dump_file, "%" PRId64 "max %" PRId64,
1663 			   (int64_t) trip_count, (int64_t) max_trip_count);
1664 		  fprintf (dump_file, ")\n");
1665 		}
1666 	      break;
1667 	    }
1668 
1669           if (!opt_sc_p)
1670             {
1671 	      /* Rotate the partial schedule to have the branch in row ii-1.  */
1672               int amount = SCHED_TIME (g->closing_branch->cuid) - (ps->ii - 1);
1673 
1674               reset_sched_times (ps, amount);
1675               rotate_partial_schedule (ps, amount);
1676             }
1677 
1678 	  set_columns_for_ps (ps);
1679 
1680 	  min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
1681 	  if (!schedule_reg_moves (ps))
1682 	    {
1683 	      mii = ps->ii + 1;
1684 	      free_partial_schedule (ps);
1685 	      continue;
1686 	    }
1687 
1688 	  /* Moves that handle incoming values might have been added
1689 	     to a new first stage.  Bump the stage count if so.
1690 
1691 	     ??? Perhaps we could consider rotating the schedule here
1692 	     instead?  */
1693 	  if (PS_MIN_CYCLE (ps) < min_cycle)
1694 	    {
1695 	      reset_sched_times (ps, 0);
1696 	      stage_count++;
1697 	    }
1698 
1699 	  /* The stage count should now be correct without rotation.  */
1700 	  gcc_checking_assert (stage_count == calculate_stage_count (ps, 0));
1701 	  PS_STAGE_COUNT (ps) = stage_count;
1702 
1703 	  canon_loop (loop);
1704 
1705           if (dump_file)
1706             {
1707 	      dump_insn_location (tail);
1708 	      fprintf (dump_file, " SMS succeeded %d %d (with ii, sc)\n",
1709 		       ps->ii, stage_count);
1710 	      print_partial_schedule (ps, dump_file);
1711 	    }
1712 
1713           /* case the BCT count is not known , Do loop-versioning */
1714 	  if (count_reg && ! count_init)
1715             {
1716 	      rtx comp_rtx = gen_rtx_GT (VOIDmode, count_reg,
1717 					 gen_int_mode (stage_count,
1718 						       GET_MODE (count_reg)));
1719 	      profile_probability prob = profile_probability::guessed_always ()
1720 				.apply_scale (PROB_SMS_ENOUGH_ITERATIONS, 100);
1721 
1722 	      loop_version (loop, comp_rtx, &condition_bb,
1723 	  		    prob, prob.invert (),
1724 			    prob, prob.invert (), true);
1725 	     }
1726 
1727 	  /* Set new iteration count of loop kernel.  */
1728           if (count_reg && count_init)
1729 	    SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1730 						     - stage_count + 1);
1731 
1732 	  /* Now apply the scheduled kernel to the RTL of the loop.  */
1733 	  permute_partial_schedule (ps, g->closing_branch->first_note);
1734 
1735           /* Mark this loop as software pipelined so the later
1736 	     scheduling passes don't touch it.  */
1737 	  if (! flag_resched_modulo_sched)
1738 	    mark_loop_unsched (loop);
1739 
1740 	  /* The life-info is not valid any more.  */
1741 	  df_set_bb_dirty (g->bb);
1742 
1743 	  apply_reg_moves (ps);
1744 	  if (dump_file)
1745 	    print_node_sched_params (dump_file, g->num_nodes, ps);
1746 	  /* Generate prolog and epilog.  */
1747           generate_prolog_epilog (ps, loop, count_reg, count_init);
1748 	  break;
1749 	}
1750 
1751       free_partial_schedule (ps);
1752       node_sched_param_vec.release ();
1753       free (node_order);
1754       free_ddg (g);
1755     }
1756 
1757   free (g_arr);
1758 
1759   /* Release scheduler data, needed until now because of DFA.  */
1760   haifa_sched_finish ();
1761   loop_optimizer_finalize ();
1762 }
1763 
1764 /* The SMS scheduling algorithm itself
1765    -----------------------------------
1766    Input: 'O' an ordered list of insns of a loop.
1767    Output: A scheduling of the loop - kernel, prolog, and epilogue.
1768 
1769    'Q' is the empty Set
1770    'PS' is the partial schedule; it holds the currently scheduled nodes with
1771 	their cycle/slot.
1772    'PSP' previously scheduled predecessors.
1773    'PSS' previously scheduled successors.
1774    't(u)' the cycle where u is scheduled.
1775    'l(u)' is the latency of u.
1776    'd(v,u)' is the dependence distance from v to u.
1777    'ASAP(u)' the earliest time at which u could be scheduled as computed in
1778 	     the node ordering phase.
1779    'check_hardware_resources_conflicts(u, PS, c)'
1780 			     run a trace around cycle/slot through DFA model
1781 			     to check resource conflicts involving instruction u
1782 			     at cycle c given the partial schedule PS.
1783    'add_to_partial_schedule_at_time(u, PS, c)'
1784 			     Add the node/instruction u to the partial schedule
1785 			     PS at time c.
1786    'calculate_register_pressure(PS)'
1787 			     Given a schedule of instructions, calculate the register
1788 			     pressure it implies.  One implementation could be the
1789 			     maximum number of overlapping live ranges.
1790    'maxRP' The maximum allowed register pressure, it is usually derived from the number
1791 	   registers available in the hardware.
1792 
1793    1. II = MII.
1794    2. PS = empty list
1795    3. for each node u in O in pre-computed order
1796    4.   if (PSP(u) != Q && PSS(u) == Q) then
1797    5.     Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1798    6.     start = Early_start; end = Early_start + II - 1; step = 1
1799    11.  else if (PSP(u) == Q && PSS(u) != Q) then
1800    12.      Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1801    13.     start = Late_start; end = Late_start - II + 1; step = -1
1802    14.  else if (PSP(u) != Q && PSS(u) != Q) then
1803    15.     Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1804    16.     Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1805    17.     start = Early_start;
1806    18.     end = min(Early_start + II - 1 , Late_start);
1807    19.     step = 1
1808    20.     else "if (PSP(u) == Q && PSS(u) == Q)"
1809    21.	  start = ASAP(u); end = start + II - 1; step = 1
1810    22.  endif
1811 
1812    23.  success = false
1813    24.  for (c = start ; c != end ; c += step)
1814    25.     if check_hardware_resources_conflicts(u, PS, c) then
1815    26.       add_to_partial_schedule_at_time(u, PS, c)
1816    27.       success = true
1817    28.       break
1818    29.     endif
1819    30.  endfor
1820    31.  if (success == false) then
1821    32.    II = II + 1
1822    33.    if (II > maxII) then
1823    34.       finish - failed to schedule
1824    35.	 endif
1825    36.    goto 2.
1826    37.  endif
1827    38. endfor
1828    39. if (calculate_register_pressure(PS) > maxRP) then
1829    40.    goto 32.
1830    41. endif
1831    42. compute epilogue & prologue
1832    43. finish - succeeded to schedule
1833 
1834    ??? The algorithm restricts the scheduling window to II cycles.
1835    In rare cases, it may be better to allow windows of II+1 cycles.
1836    The window would then start and end on the same row, but with
1837    different "must precede" and "must follow" requirements.  */
1838 
1839 /* A limit on the number of cycles that resource conflicts can span.  ??? Should
1840    be provided by DFA, and be dependent on the type of insn scheduled.  Currently
1841    set to 0 to save compile time.  */
1842 #define DFA_HISTORY SMS_DFA_HISTORY
1843 
1844 /* A threshold for the number of repeated unsuccessful attempts to insert
1845    an empty row, before we flush the partial schedule and start over.  */
1846 #define MAX_SPLIT_NUM 10
1847 /* Given the partial schedule PS, this function calculates and returns the
1848    cycles in which we can schedule the node with the given index I.
1849    NOTE: Here we do the backtracking in SMS, in some special cases. We have
1850    noticed that there are several cases in which we fail    to SMS the loop
1851    because the sched window of a node is empty    due to tight data-deps. In
1852    such cases we want to unschedule    some of the predecessors/successors
1853    until we get non-empty    scheduling window.  It returns -1 if the
1854    scheduling window is empty and zero otherwise.  */
1855 
1856 static int
1857 get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node,
1858 		  sbitmap sched_nodes, int ii, int *start_p, int *step_p,
1859 		  int *end_p)
1860 {
1861   int start, step, end;
1862   int early_start, late_start;
1863   ddg_edge_ptr e;
1864   auto_sbitmap psp (ps->g->num_nodes);
1865   auto_sbitmap pss (ps->g->num_nodes);
1866   sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1867   sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1868   int psp_not_empty;
1869   int pss_not_empty;
1870   int count_preds;
1871   int count_succs;
1872 
1873   /* 1. compute sched window for u (start, end, step).  */
1874   bitmap_clear (psp);
1875   bitmap_clear (pss);
1876   psp_not_empty = bitmap_and (psp, u_node_preds, sched_nodes);
1877   pss_not_empty = bitmap_and (pss, u_node_succs, sched_nodes);
1878 
1879   /* We first compute a forward range (start <= end), then decide whether
1880      to reverse it.  */
1881   early_start = INT_MIN;
1882   late_start = INT_MAX;
1883   start = INT_MIN;
1884   end = INT_MAX;
1885   step = 1;
1886 
1887   count_preds = 0;
1888   count_succs = 0;
1889 
1890   if (dump_file && (psp_not_empty || pss_not_empty))
1891     {
1892       fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)"
1893 	       "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii);
1894       fprintf (dump_file, "%11s %11s %11s %11s %5s\n",
1895 	       "start", "early start", "late start", "end", "time");
1896       fprintf (dump_file, "=========== =========== =========== ==========="
1897 	       " =====\n");
1898     }
1899   /* Calculate early_start and limit end.  Both bounds are inclusive.  */
1900   if (psp_not_empty)
1901     for (e = u_node->in; e != 0; e = e->next_in)
1902       {
1903 	int v = e->src->cuid;
1904 
1905 	if (bitmap_bit_p (sched_nodes, v))
1906 	  {
1907 	    int p_st = SCHED_TIME (v);
1908 	    int earliest = p_st + e->latency - (e->distance * ii);
1909 	    int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX);
1910 
1911 	    if (dump_file)
1912 	      {
1913 		fprintf (dump_file, "%11s %11d %11s %11d %5d",
1914 			 "", earliest, "", latest, p_st);
1915 		print_ddg_edge (dump_file, e);
1916 		fprintf (dump_file, "\n");
1917 	      }
1918 
1919 	    early_start = MAX (early_start, earliest);
1920 	    end = MIN (end, latest);
1921 
1922 	    if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1923 	      count_preds++;
1924 	  }
1925       }
1926 
1927   /* Calculate late_start and limit start.  Both bounds are inclusive.  */
1928   if (pss_not_empty)
1929     for (e = u_node->out; e != 0; e = e->next_out)
1930       {
1931 	int v = e->dest->cuid;
1932 
1933 	if (bitmap_bit_p (sched_nodes, v))
1934 	  {
1935 	    int s_st = SCHED_TIME (v);
1936 	    int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN);
1937 	    int latest = s_st - e->latency + (e->distance * ii);
1938 
1939 	    if (dump_file)
1940 	      {
1941 		fprintf (dump_file, "%11d %11s %11d %11s %5d",
1942 			 earliest, "", latest, "", s_st);
1943 		print_ddg_edge (dump_file, e);
1944 		fprintf (dump_file, "\n");
1945 	      }
1946 
1947 	    start = MAX (start, earliest);
1948 	    late_start = MIN (late_start, latest);
1949 
1950 	    if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1951 	      count_succs++;
1952 	  }
1953       }
1954 
1955   if (dump_file && (psp_not_empty || pss_not_empty))
1956     {
1957       fprintf (dump_file, "----------- ----------- ----------- -----------"
1958 	       " -----\n");
1959       fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n",
1960 	       start, early_start, late_start, end, "",
1961 	       "(max, max, min, min)");
1962     }
1963 
1964   /* Get a target scheduling window no bigger than ii.  */
1965   if (early_start == INT_MIN && late_start == INT_MAX)
1966     early_start = NODE_ASAP (u_node);
1967   else if (early_start == INT_MIN)
1968     early_start = late_start - (ii - 1);
1969   late_start = MIN (late_start, early_start + (ii - 1));
1970 
1971   /* Apply memory dependence limits.  */
1972   start = MAX (start, early_start);
1973   end = MIN (end, late_start);
1974 
1975   if (dump_file && (psp_not_empty || pss_not_empty))
1976     fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n",
1977 	     "", start, end, "", "");
1978 
1979   /* If there are at least as many successors as predecessors, schedule the
1980      node close to its successors.  */
1981   if (pss_not_empty && count_succs >= count_preds)
1982     {
1983       std::swap (start, end);
1984       step = -1;
1985     }
1986 
1987   /* Now that we've finalized the window, make END an exclusive rather
1988      than an inclusive bound.  */
1989   end += step;
1990 
1991   *start_p = start;
1992   *step_p = step;
1993   *end_p = end;
1994 
1995   if ((start >= end && step == 1) || (start <= end && step == -1))
1996     {
1997       if (dump_file)
1998 	fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
1999 		 start, end, step);
2000       return -1;
2001     }
2002 
2003   return 0;
2004 }
2005 
2006 /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
2007    node currently been scheduled.  At the end of the calculation
2008    MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
2009    U_NODE which are (1) already scheduled in the first/last row of
2010    U_NODE's scheduling window, (2) whose dependence inequality with U
2011    becomes an equality when U is scheduled in this same row, and (3)
2012    whose dependence latency is zero.
2013 
2014    The first and last rows are calculated using the following parameters:
2015    START/END rows - The cycles that begins/ends the traversal on the window;
2016    searching for an empty cycle to schedule U_NODE.
2017    STEP - The direction in which we traverse the window.
2018    II - The initiation interval.  */
2019 
2020 static void
2021 calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
2022 			       int step, int ii, sbitmap sched_nodes,
2023 			       sbitmap must_precede, sbitmap must_follow)
2024 {
2025   ddg_edge_ptr e;
2026   int first_cycle_in_window, last_cycle_in_window;
2027 
2028   gcc_assert (must_precede && must_follow);
2029 
2030   /* Consider the following scheduling window:
2031      {first_cycle_in_window, first_cycle_in_window+1, ...,
2032      last_cycle_in_window}.  If step is 1 then the following will be
2033      the order we traverse the window: {start=first_cycle_in_window,
2034      first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
2035      or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
2036      end=first_cycle_in_window-1} if step is -1.  */
2037   first_cycle_in_window = (step == 1) ? start : end - step;
2038   last_cycle_in_window = (step == 1) ? end - step : start;
2039 
2040   bitmap_clear (must_precede);
2041   bitmap_clear (must_follow);
2042 
2043   if (dump_file)
2044     fprintf (dump_file, "\nmust_precede: ");
2045 
2046   /* Instead of checking if:
2047       (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
2048       && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
2049              first_cycle_in_window)
2050       && e->latency == 0
2051      we use the fact that latency is non-negative:
2052       SCHED_TIME (e->src) - (e->distance * ii) <=
2053       SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
2054       first_cycle_in_window
2055      and check only if
2056       SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window  */
2057   for (e = u_node->in; e != 0; e = e->next_in)
2058     if (bitmap_bit_p (sched_nodes, e->src->cuid)
2059 	&& ((SCHED_TIME (e->src->cuid) - (e->distance * ii)) ==
2060              first_cycle_in_window))
2061       {
2062 	if (dump_file)
2063 	  fprintf (dump_file, "%d ", e->src->cuid);
2064 
2065 	bitmap_set_bit (must_precede, e->src->cuid);
2066       }
2067 
2068   if (dump_file)
2069     fprintf (dump_file, "\nmust_follow: ");
2070 
2071   /* Instead of checking if:
2072       (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
2073       && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
2074              last_cycle_in_window)
2075       && e->latency == 0
2076      we use the fact that latency is non-negative:
2077       SCHED_TIME (e->dest) + (e->distance * ii) >=
2078       SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
2079       last_cycle_in_window
2080      and check only if
2081       SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window  */
2082   for (e = u_node->out; e != 0; e = e->next_out)
2083     if (bitmap_bit_p (sched_nodes, e->dest->cuid)
2084 	&& ((SCHED_TIME (e->dest->cuid) + (e->distance * ii)) ==
2085              last_cycle_in_window))
2086       {
2087 	if (dump_file)
2088 	  fprintf (dump_file, "%d ", e->dest->cuid);
2089 
2090 	bitmap_set_bit (must_follow, e->dest->cuid);
2091       }
2092 
2093   if (dump_file)
2094     fprintf (dump_file, "\n");
2095 }
2096 
2097 /* Return 1 if U_NODE can be scheduled in CYCLE.  Use the following
2098    parameters to decide if that's possible:
2099    PS - The partial schedule.
2100    U - The serial number of U_NODE.
2101    NUM_SPLITS - The number of row splits made so far.
2102    MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
2103    the first row of the scheduling window)
2104    MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
2105    last row of the scheduling window)  */
2106 
2107 static bool
2108 try_scheduling_node_in_cycle (partial_schedule_ptr ps,
2109 			      int u, int cycle, sbitmap sched_nodes,
2110 			      int *num_splits, sbitmap must_precede,
2111 			      sbitmap must_follow)
2112 {
2113   ps_insn_ptr psi;
2114   bool success = 0;
2115 
2116   verify_partial_schedule (ps, sched_nodes);
2117   psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow);
2118   if (psi)
2119     {
2120       SCHED_TIME (u) = cycle;
2121       bitmap_set_bit (sched_nodes, u);
2122       success = 1;
2123       *num_splits = 0;
2124       if (dump_file)
2125 	fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
2126 
2127     }
2128 
2129   return success;
2130 }
2131 
2132 /* This function implements the scheduling algorithm for SMS according to the
2133    above algorithm.  */
2134 static partial_schedule_ptr
2135 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
2136 {
2137   int ii = mii;
2138   int i, c, success, num_splits = 0;
2139   int flush_and_start_over = true;
2140   int num_nodes = g->num_nodes;
2141   int start, end, step; /* Place together into one struct?  */
2142   auto_sbitmap sched_nodes (num_nodes);
2143   auto_sbitmap must_precede (num_nodes);
2144   auto_sbitmap must_follow (num_nodes);
2145   auto_sbitmap tobe_scheduled (num_nodes);
2146 
2147   partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
2148 
2149   bitmap_ones (tobe_scheduled);
2150   bitmap_clear (sched_nodes);
2151 
2152   while (flush_and_start_over && (ii < maxii))
2153     {
2154 
2155       if (dump_file)
2156 	fprintf (dump_file, "Starting with ii=%d\n", ii);
2157       flush_and_start_over = false;
2158       bitmap_clear (sched_nodes);
2159 
2160       for (i = 0; i < num_nodes; i++)
2161 	{
2162 	  int u = nodes_order[i];
2163   	  ddg_node_ptr u_node = &ps->g->nodes[u];
2164 	  rtx_insn *insn = u_node->insn;
2165 
2166 	  if (!NONDEBUG_INSN_P (insn))
2167 	    {
2168 	      bitmap_clear_bit (tobe_scheduled, u);
2169 	      continue;
2170 	    }
2171 
2172 	  if (bitmap_bit_p (sched_nodes, u))
2173 	    continue;
2174 
2175 	  /* Try to get non-empty scheduling window.  */
2176 	 success = 0;
2177          if (get_sched_window (ps, u_node, sched_nodes, ii, &start,
2178                                 &step, &end) == 0)
2179             {
2180               if (dump_file)
2181                 fprintf (dump_file, "\nTrying to schedule node %d "
2182 			 "INSN = %d  in (%d .. %d) step %d\n", u, (INSN_UID
2183                         (g->nodes[u].insn)), start, end, step);
2184 
2185               gcc_assert ((step > 0 && start < end)
2186                           || (step < 0 && start > end));
2187 
2188               calculate_must_precede_follow (u_node, start, end, step, ii,
2189                                              sched_nodes, must_precede,
2190                                              must_follow);
2191 
2192               for (c = start; c != end; c += step)
2193                 {
2194 		  sbitmap tmp_precede, tmp_follow;
2195 
2196                   set_must_precede_follow (&tmp_follow, must_follow,
2197 		                           &tmp_precede, must_precede,
2198                                            c, start, end, step);
2199                   success =
2200                     try_scheduling_node_in_cycle (ps, u, c,
2201                                                   sched_nodes,
2202                                                   &num_splits, tmp_precede,
2203                                                   tmp_follow);
2204                   if (success)
2205                     break;
2206                 }
2207 
2208               verify_partial_schedule (ps, sched_nodes);
2209             }
2210             if (!success)
2211             {
2212               int split_row;
2213 
2214               if (ii++ == maxii)
2215                 break;
2216 
2217               if (num_splits >= MAX_SPLIT_NUM)
2218                 {
2219                   num_splits = 0;
2220                   flush_and_start_over = true;
2221                   verify_partial_schedule (ps, sched_nodes);
2222                   reset_partial_schedule (ps, ii);
2223                   verify_partial_schedule (ps, sched_nodes);
2224                   break;
2225                 }
2226 
2227               num_splits++;
2228               /* The scheduling window is exclusive of 'end'
2229                  whereas compute_split_window() expects an inclusive,
2230                  ordered range.  */
2231               if (step == 1)
2232                 split_row = compute_split_row (sched_nodes, start, end - 1,
2233                                                ps->ii, u_node);
2234               else
2235                 split_row = compute_split_row (sched_nodes, end + 1, start,
2236                                                ps->ii, u_node);
2237 
2238               ps_insert_empty_row (ps, split_row, sched_nodes);
2239               i--;              /* Go back and retry node i.  */
2240 
2241               if (dump_file)
2242                 fprintf (dump_file, "num_splits=%d\n", num_splits);
2243             }
2244 
2245           /* ??? If (success), check register pressure estimates.  */
2246         }                       /* Continue with next node.  */
2247     }                           /* While flush_and_start_over.  */
2248   if (ii >= maxii)
2249     {
2250       free_partial_schedule (ps);
2251       ps = NULL;
2252     }
2253   else
2254     gcc_assert (bitmap_equal_p (tobe_scheduled, sched_nodes));
2255 
2256   return ps;
2257 }
2258 
2259 /* This function inserts a new empty row into PS at the position
2260    according to SPLITROW, keeping all already scheduled instructions
2261    intact and updating their SCHED_TIME and cycle accordingly.  */
2262 static void
2263 ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
2264 		     sbitmap sched_nodes)
2265 {
2266   ps_insn_ptr crr_insn;
2267   ps_insn_ptr *rows_new;
2268   int ii = ps->ii;
2269   int new_ii = ii + 1;
2270   int row;
2271   int *rows_length_new;
2272 
2273   verify_partial_schedule (ps, sched_nodes);
2274 
2275   /* We normalize sched_time and rotate ps to have only non-negative sched
2276      times, for simplicity of updating cycles after inserting new row.  */
2277   split_row -= ps->min_cycle;
2278   split_row = SMODULO (split_row, ii);
2279   if (dump_file)
2280     fprintf (dump_file, "split_row=%d\n", split_row);
2281 
2282   reset_sched_times (ps, PS_MIN_CYCLE (ps));
2283   rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
2284 
2285   rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
2286   rows_length_new = (int *) xcalloc (new_ii, sizeof (int));
2287   for (row = 0; row < split_row; row++)
2288     {
2289       rows_new[row] = ps->rows[row];
2290       rows_length_new[row] = ps->rows_length[row];
2291       ps->rows[row] = NULL;
2292       for (crr_insn = rows_new[row];
2293 	   crr_insn; crr_insn = crr_insn->next_in_row)
2294 	{
2295 	  int u = crr_insn->id;
2296 	  int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
2297 
2298 	  SCHED_TIME (u) = new_time;
2299 	  crr_insn->cycle = new_time;
2300 	  SCHED_ROW (u) = new_time % new_ii;
2301 	  SCHED_STAGE (u) = new_time / new_ii;
2302 	}
2303 
2304     }
2305 
2306   rows_new[split_row] = NULL;
2307 
2308   for (row = split_row; row < ii; row++)
2309     {
2310       rows_new[row + 1] = ps->rows[row];
2311       rows_length_new[row + 1] = ps->rows_length[row];
2312       ps->rows[row] = NULL;
2313       for (crr_insn = rows_new[row + 1];
2314 	   crr_insn; crr_insn = crr_insn->next_in_row)
2315 	{
2316 	  int u = crr_insn->id;
2317 	  int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
2318 
2319 	  SCHED_TIME (u) = new_time;
2320 	  crr_insn->cycle = new_time;
2321 	  SCHED_ROW (u) = new_time % new_ii;
2322 	  SCHED_STAGE (u) = new_time / new_ii;
2323 	}
2324     }
2325 
2326   /* Updating ps.  */
2327   ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
2328     + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
2329   ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
2330     + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
2331   free (ps->rows);
2332   ps->rows = rows_new;
2333   free (ps->rows_length);
2334   ps->rows_length = rows_length_new;
2335   ps->ii = new_ii;
2336   gcc_assert (ps->min_cycle >= 0);
2337 
2338   verify_partial_schedule (ps, sched_nodes);
2339 
2340   if (dump_file)
2341     fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
2342 	     ps->max_cycle);
2343 }
2344 
2345 /* Given U_NODE which is the node that failed to be scheduled; LOW and
2346    UP which are the boundaries of it's scheduling window; compute using
2347    SCHED_NODES and II a row in the partial schedule that can be split
2348    which will separate a critical predecessor from a critical successor
2349    thereby expanding the window, and return it.  */
2350 static int
2351 compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
2352 		   ddg_node_ptr u_node)
2353 {
2354   ddg_edge_ptr e;
2355   int lower = INT_MIN, upper = INT_MAX;
2356   int crit_pred = -1;
2357   int crit_succ = -1;
2358   int crit_cycle;
2359 
2360   for (e = u_node->in; e != 0; e = e->next_in)
2361     {
2362       int v = e->src->cuid;
2363 
2364       if (bitmap_bit_p (sched_nodes, v)
2365 	  && (low == SCHED_TIME (v) + e->latency - (e->distance * ii)))
2366 	if (SCHED_TIME (v) > lower)
2367 	  {
2368 	    crit_pred = v;
2369 	    lower = SCHED_TIME (v);
2370 	  }
2371     }
2372 
2373   if (crit_pred >= 0)
2374     {
2375       crit_cycle = SCHED_TIME (crit_pred) + 1;
2376       return SMODULO (crit_cycle, ii);
2377     }
2378 
2379   for (e = u_node->out; e != 0; e = e->next_out)
2380     {
2381       int v = e->dest->cuid;
2382 
2383       if (bitmap_bit_p (sched_nodes, v)
2384 	  && (up == SCHED_TIME (v) - e->latency + (e->distance * ii)))
2385 	if (SCHED_TIME (v) < upper)
2386 	  {
2387 	    crit_succ = v;
2388 	    upper = SCHED_TIME (v);
2389 	  }
2390     }
2391 
2392   if (crit_succ >= 0)
2393     {
2394       crit_cycle = SCHED_TIME (crit_succ);
2395       return SMODULO (crit_cycle, ii);
2396     }
2397 
2398   if (dump_file)
2399     fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
2400 
2401   return SMODULO ((low + up + 1) / 2, ii);
2402 }
2403 
2404 static void
2405 verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
2406 {
2407   int row;
2408   ps_insn_ptr crr_insn;
2409 
2410   for (row = 0; row < ps->ii; row++)
2411     {
2412       int length = 0;
2413 
2414       for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
2415 	{
2416 	  int u = crr_insn->id;
2417 
2418 	  length++;
2419 	  gcc_assert (bitmap_bit_p (sched_nodes, u));
2420 	  /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
2421 	     popcount (sched_nodes) == number of insns in ps.  */
2422 	  gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
2423 	  gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
2424 	}
2425 
2426       gcc_assert (ps->rows_length[row] == length);
2427     }
2428 }
2429 
2430 
2431 /* This page implements the algorithm for ordering the nodes of a DDG
2432    for modulo scheduling, activated through the
2433    "int sms_order_nodes (ddg_ptr, int mii, int * result)" API.  */
2434 
2435 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
2436 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
2437 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
2438 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
2439 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
2440 #define DEPTH(x) (ASAP ((x)))
2441 
2442 typedef struct node_order_params * nopa;
2443 
2444 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
2445 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
2446 static nopa  calculate_order_params (ddg_ptr, int, int *);
2447 static int find_max_asap (ddg_ptr, sbitmap);
2448 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
2449 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
2450 
2451 enum sms_direction {BOTTOMUP, TOPDOWN};
2452 
2453 struct node_order_params
2454 {
2455   int asap;
2456   int alap;
2457   int height;
2458 };
2459 
2460 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1.  */
2461 static void
2462 check_nodes_order (int *node_order, int num_nodes)
2463 {
2464   int i;
2465   auto_sbitmap tmp (num_nodes);
2466 
2467   bitmap_clear (tmp);
2468 
2469   if (dump_file)
2470     fprintf (dump_file, "SMS final nodes order: \n");
2471 
2472   for (i = 0; i < num_nodes; i++)
2473     {
2474       int u = node_order[i];
2475 
2476       if (dump_file)
2477         fprintf (dump_file, "%d ", u);
2478       gcc_assert (u < num_nodes && u >= 0 && !bitmap_bit_p (tmp, u));
2479 
2480       bitmap_set_bit (tmp, u);
2481     }
2482 
2483   if (dump_file)
2484     fprintf (dump_file, "\n");
2485 }
2486 
2487 /* Order the nodes of G for scheduling and pass the result in
2488    NODE_ORDER.  Also set aux.count of each node to ASAP.
2489    Put maximal ASAP to PMAX_ASAP.  Return the recMII for the given DDG.  */
2490 static int
2491 sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
2492 {
2493   int i;
2494   int rec_mii = 0;
2495   ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
2496 
2497   nopa nops = calculate_order_params (g, mii, pmax_asap);
2498 
2499   if (dump_file)
2500     print_sccs (dump_file, sccs, g);
2501 
2502   order_nodes_of_sccs (sccs, node_order);
2503 
2504   if (sccs->num_sccs > 0)
2505     /* First SCC has the largest recurrence_length.  */
2506     rec_mii = sccs->sccs[0]->recurrence_length;
2507 
2508   /* Save ASAP before destroying node_order_params.  */
2509   for (i = 0; i < g->num_nodes; i++)
2510     {
2511       ddg_node_ptr v = &g->nodes[i];
2512       v->aux.count = ASAP (v);
2513     }
2514 
2515   free (nops);
2516   free_ddg_all_sccs (sccs);
2517   check_nodes_order (node_order, g->num_nodes);
2518 
2519   return rec_mii;
2520 }
2521 
2522 static void
2523 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
2524 {
2525   int i, pos = 0;
2526   ddg_ptr g = all_sccs->ddg;
2527   int num_nodes = g->num_nodes;
2528   auto_sbitmap prev_sccs (num_nodes);
2529   auto_sbitmap on_path (num_nodes);
2530   auto_sbitmap tmp (num_nodes);
2531   auto_sbitmap ones (num_nodes);
2532 
2533   bitmap_clear (prev_sccs);
2534   bitmap_ones (ones);
2535 
2536   /* Perform the node ordering starting from the SCC with the highest recMII.
2537      For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc.  */
2538   for (i = 0; i < all_sccs->num_sccs; i++)
2539     {
2540       ddg_scc_ptr scc = all_sccs->sccs[i];
2541 
2542       /* Add nodes on paths from previous SCCs to the current SCC.  */
2543       find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
2544       bitmap_ior (tmp, scc->nodes, on_path);
2545 
2546       /* Add nodes on paths from the current SCC to previous SCCs.  */
2547       find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
2548       bitmap_ior (tmp, tmp, on_path);
2549 
2550       /* Remove nodes of previous SCCs from current extended SCC.  */
2551       bitmap_and_compl (tmp, tmp, prev_sccs);
2552 
2553       pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2554       /* Above call to order_nodes_in_scc updated prev_sccs |= tmp.  */
2555     }
2556 
2557   /* Handle the remaining nodes that do not belong to any scc.  Each call
2558      to order_nodes_in_scc handles a single connected component.  */
2559   while (pos < g->num_nodes)
2560     {
2561       bitmap_and_compl (tmp, ones, prev_sccs);
2562       pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2563     }
2564 }
2565 
2566 /* MII is needed if we consider backarcs (that do not close recursive cycles).  */
2567 static struct node_order_params *
2568 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
2569 {
2570   int u;
2571   int max_asap;
2572   int num_nodes = g->num_nodes;
2573   ddg_edge_ptr e;
2574   /* Allocate a place to hold ordering params for each node in the DDG.  */
2575   nopa node_order_params_arr;
2576 
2577   /* Initialize of ASAP/ALAP/HEIGHT to zero.  */
2578   node_order_params_arr = (nopa) xcalloc (num_nodes,
2579 					  sizeof (struct node_order_params));
2580 
2581   /* Set the aux pointer of each node to point to its order_params structure.  */
2582   for (u = 0; u < num_nodes; u++)
2583     g->nodes[u].aux.info = &node_order_params_arr[u];
2584 
2585   /* Disregarding a backarc from each recursive cycle to obtain a DAG,
2586      calculate ASAP, ALAP, mobility, distance, and height for each node
2587      in the dependence (direct acyclic) graph.  */
2588 
2589   /* We assume that the nodes in the array are in topological order.  */
2590 
2591   max_asap = 0;
2592   for (u = 0; u < num_nodes; u++)
2593     {
2594       ddg_node_ptr u_node = &g->nodes[u];
2595 
2596       ASAP (u_node) = 0;
2597       for (e = u_node->in; e; e = e->next_in)
2598 	if (e->distance == 0)
2599 	  ASAP (u_node) = MAX (ASAP (u_node),
2600 			       ASAP (e->src) + e->latency);
2601       max_asap = MAX (max_asap, ASAP (u_node));
2602     }
2603 
2604   for (u = num_nodes - 1; u > -1; u--)
2605     {
2606       ddg_node_ptr u_node = &g->nodes[u];
2607 
2608       ALAP (u_node) = max_asap;
2609       HEIGHT (u_node) = 0;
2610       for (e = u_node->out; e; e = e->next_out)
2611 	if (e->distance == 0)
2612 	  {
2613 	    ALAP (u_node) = MIN (ALAP (u_node),
2614 				 ALAP (e->dest) - e->latency);
2615 	    HEIGHT (u_node) = MAX (HEIGHT (u_node),
2616 				   HEIGHT (e->dest) + e->latency);
2617 	  }
2618     }
2619   if (dump_file)
2620   {
2621     fprintf (dump_file, "\nOrder params\n");
2622     for (u = 0; u < num_nodes; u++)
2623       {
2624         ddg_node_ptr u_node = &g->nodes[u];
2625 
2626         fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
2627                  ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
2628       }
2629   }
2630 
2631   *pmax_asap = max_asap;
2632   return node_order_params_arr;
2633 }
2634 
2635 static int
2636 find_max_asap (ddg_ptr g, sbitmap nodes)
2637 {
2638   unsigned int u = 0;
2639   int max_asap = -1;
2640   int result = -1;
2641   sbitmap_iterator sbi;
2642 
2643   EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2644     {
2645       ddg_node_ptr u_node = &g->nodes[u];
2646 
2647       if (max_asap < ASAP (u_node))
2648 	{
2649 	  max_asap = ASAP (u_node);
2650 	  result = u;
2651 	}
2652     }
2653   return result;
2654 }
2655 
2656 static int
2657 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
2658 {
2659   unsigned int u = 0;
2660   int max_hv = -1;
2661   int min_mob = INT_MAX;
2662   int result = -1;
2663   sbitmap_iterator sbi;
2664 
2665   EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2666     {
2667       ddg_node_ptr u_node = &g->nodes[u];
2668 
2669       if (max_hv < HEIGHT (u_node))
2670 	{
2671 	  max_hv = HEIGHT (u_node);
2672 	  min_mob = MOB (u_node);
2673 	  result = u;
2674 	}
2675       else if ((max_hv == HEIGHT (u_node))
2676 	       && (min_mob > MOB (u_node)))
2677 	{
2678 	  min_mob = MOB (u_node);
2679 	  result = u;
2680 	}
2681     }
2682   return result;
2683 }
2684 
2685 static int
2686 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
2687 {
2688   unsigned int u = 0;
2689   int max_dv = -1;
2690   int min_mob = INT_MAX;
2691   int result = -1;
2692   sbitmap_iterator sbi;
2693 
2694   EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2695     {
2696       ddg_node_ptr u_node = &g->nodes[u];
2697 
2698       if (max_dv < DEPTH (u_node))
2699 	{
2700 	  max_dv = DEPTH (u_node);
2701 	  min_mob = MOB (u_node);
2702 	  result = u;
2703 	}
2704       else if ((max_dv == DEPTH (u_node))
2705 	       && (min_mob > MOB (u_node)))
2706 	{
2707 	  min_mob = MOB (u_node);
2708 	  result = u;
2709 	}
2710     }
2711   return result;
2712 }
2713 
2714 /* Places the nodes of SCC into the NODE_ORDER array starting
2715    at position POS, according to the SMS ordering algorithm.
2716    NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2717    the NODE_ORDER array, starting from position zero.  */
2718 static int
2719 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
2720 		    int * node_order, int pos)
2721 {
2722   enum sms_direction dir;
2723   int num_nodes = g->num_nodes;
2724   auto_sbitmap workset (num_nodes);
2725   auto_sbitmap tmp (num_nodes);
2726   sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
2727   auto_sbitmap predecessors (num_nodes);
2728   auto_sbitmap successors (num_nodes);
2729 
2730   bitmap_clear (predecessors);
2731   find_predecessors (predecessors, g, nodes_ordered);
2732 
2733   bitmap_clear (successors);
2734   find_successors (successors, g, nodes_ordered);
2735 
2736   bitmap_clear (tmp);
2737   if (bitmap_and (tmp, predecessors, scc))
2738     {
2739       bitmap_copy (workset, tmp);
2740       dir = BOTTOMUP;
2741     }
2742   else if (bitmap_and (tmp, successors, scc))
2743     {
2744       bitmap_copy (workset, tmp);
2745       dir = TOPDOWN;
2746     }
2747   else
2748     {
2749       int u;
2750 
2751       bitmap_clear (workset);
2752       if ((u = find_max_asap (g, scc)) >= 0)
2753 	bitmap_set_bit (workset, u);
2754       dir = BOTTOMUP;
2755     }
2756 
2757   bitmap_clear (zero_bitmap);
2758   while (!bitmap_equal_p (workset, zero_bitmap))
2759     {
2760       int v;
2761       ddg_node_ptr v_node;
2762       sbitmap v_node_preds;
2763       sbitmap v_node_succs;
2764 
2765       if (dir == TOPDOWN)
2766 	{
2767 	  while (!bitmap_equal_p (workset, zero_bitmap))
2768 	    {
2769 	      v = find_max_hv_min_mob (g, workset);
2770 	      v_node = &g->nodes[v];
2771 	      node_order[pos++] = v;
2772 	      v_node_succs = NODE_SUCCESSORS (v_node);
2773 	      bitmap_and (tmp, v_node_succs, scc);
2774 
2775 	      /* Don't consider the already ordered successors again.  */
2776 	      bitmap_and_compl (tmp, tmp, nodes_ordered);
2777 	      bitmap_ior (workset, workset, tmp);
2778 	      bitmap_clear_bit (workset, v);
2779 	      bitmap_set_bit (nodes_ordered, v);
2780 	    }
2781 	  dir = BOTTOMUP;
2782 	  bitmap_clear (predecessors);
2783 	  find_predecessors (predecessors, g, nodes_ordered);
2784 	  bitmap_and (workset, predecessors, scc);
2785 	}
2786       else
2787 	{
2788 	  while (!bitmap_equal_p (workset, zero_bitmap))
2789 	    {
2790 	      v = find_max_dv_min_mob (g, workset);
2791 	      v_node = &g->nodes[v];
2792 	      node_order[pos++] = v;
2793 	      v_node_preds = NODE_PREDECESSORS (v_node);
2794 	      bitmap_and (tmp, v_node_preds, scc);
2795 
2796 	      /* Don't consider the already ordered predecessors again.  */
2797 	      bitmap_and_compl (tmp, tmp, nodes_ordered);
2798 	      bitmap_ior (workset, workset, tmp);
2799 	      bitmap_clear_bit (workset, v);
2800 	      bitmap_set_bit (nodes_ordered, v);
2801 	    }
2802 	  dir = TOPDOWN;
2803 	  bitmap_clear (successors);
2804 	  find_successors (successors, g, nodes_ordered);
2805 	  bitmap_and (workset, successors, scc);
2806 	}
2807     }
2808   sbitmap_free (zero_bitmap);
2809   return pos;
2810 }
2811 
2812 
2813 /* This page contains functions for manipulating partial-schedules during
2814    modulo scheduling.  */
2815 
2816 /* Create a partial schedule and allocate a memory to hold II rows.  */
2817 
2818 static partial_schedule_ptr
2819 create_partial_schedule (int ii, ddg_ptr g, int history)
2820 {
2821   partial_schedule_ptr ps = XNEW (struct partial_schedule);
2822   ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2823   ps->rows_length = (int *) xcalloc (ii, sizeof (int));
2824   ps->reg_moves.create (0);
2825   ps->ii = ii;
2826   ps->history = history;
2827   ps->min_cycle = INT_MAX;
2828   ps->max_cycle = INT_MIN;
2829   ps->g = g;
2830 
2831   return ps;
2832 }
2833 
2834 /* Free the PS_INSNs in rows array of the given partial schedule.
2835    ??? Consider caching the PS_INSN's.  */
2836 static void
2837 free_ps_insns (partial_schedule_ptr ps)
2838 {
2839   int i;
2840 
2841   for (i = 0; i < ps->ii; i++)
2842     {
2843       while (ps->rows[i])
2844 	{
2845 	  ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2846 
2847 	  free (ps->rows[i]);
2848 	  ps->rows[i] = ps_insn;
2849 	}
2850       ps->rows[i] = NULL;
2851     }
2852 }
2853 
2854 /* Free all the memory allocated to the partial schedule.  */
2855 
2856 static void
2857 free_partial_schedule (partial_schedule_ptr ps)
2858 {
2859   ps_reg_move_info *move;
2860   unsigned int i;
2861 
2862   if (!ps)
2863     return;
2864 
2865   FOR_EACH_VEC_ELT (ps->reg_moves, i, move)
2866     sbitmap_free (move->uses);
2867   ps->reg_moves.release ();
2868 
2869   free_ps_insns (ps);
2870   free (ps->rows);
2871   free (ps->rows_length);
2872   free (ps);
2873 }
2874 
2875 /* Clear the rows array with its PS_INSNs, and create a new one with
2876    NEW_II rows.  */
2877 
2878 static void
2879 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2880 {
2881   if (!ps)
2882     return;
2883   free_ps_insns (ps);
2884   if (new_ii == ps->ii)
2885     return;
2886   ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2887 						 * sizeof (ps_insn_ptr));
2888   memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2889   ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int));
2890   memset (ps->rows_length, 0, new_ii * sizeof (int));
2891   ps->ii = new_ii;
2892   ps->min_cycle = INT_MAX;
2893   ps->max_cycle = INT_MIN;
2894 }
2895 
2896 /* Prints the partial schedule as an ii rows array, for each rows
2897    print the ids of the insns in it.  */
2898 void
2899 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2900 {
2901   int i;
2902 
2903   for (i = 0; i < ps->ii; i++)
2904     {
2905       ps_insn_ptr ps_i = ps->rows[i];
2906 
2907       fprintf (dump, "\n[ROW %d ]: ", i);
2908       while (ps_i)
2909 	{
2910 	  rtx_insn *insn = ps_rtl_insn (ps, ps_i->id);
2911 
2912 	  if (JUMP_P (insn))
2913 	    fprintf (dump, "%d (branch), ", INSN_UID (insn));
2914 	  else
2915 	    fprintf (dump, "%d, ", INSN_UID (insn));
2916 
2917 	  ps_i = ps_i->next_in_row;
2918 	}
2919     }
2920 }
2921 
2922 /* Creates an object of PS_INSN and initializes it to the given parameters.  */
2923 static ps_insn_ptr
2924 create_ps_insn (int id, int cycle)
2925 {
2926   ps_insn_ptr ps_i = XNEW (struct ps_insn);
2927 
2928   ps_i->id = id;
2929   ps_i->next_in_row = NULL;
2930   ps_i->prev_in_row = NULL;
2931   ps_i->cycle = cycle;
2932 
2933   return ps_i;
2934 }
2935 
2936 
2937 /* Removes the given PS_INSN from the partial schedule.  */
2938 static void
2939 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2940 {
2941   int row;
2942 
2943   gcc_assert (ps && ps_i);
2944 
2945   row = SMODULO (ps_i->cycle, ps->ii);
2946   if (! ps_i->prev_in_row)
2947     {
2948       gcc_assert (ps_i == ps->rows[row]);
2949       ps->rows[row] = ps_i->next_in_row;
2950       if (ps->rows[row])
2951 	ps->rows[row]->prev_in_row = NULL;
2952     }
2953   else
2954     {
2955       ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2956       if (ps_i->next_in_row)
2957 	ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2958     }
2959 
2960   ps->rows_length[row] -= 1;
2961   free (ps_i);
2962   return;
2963 }
2964 
2965 /* Unlike what literature describes for modulo scheduling (which focuses
2966    on VLIW machines) the order of the instructions inside a cycle is
2967    important.  Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2968    where the current instruction should go relative to the already
2969    scheduled instructions in the given cycle.  Go over these
2970    instructions and find the first possible column to put it in.  */
2971 static bool
2972 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2973 		     sbitmap must_precede, sbitmap must_follow)
2974 {
2975   ps_insn_ptr next_ps_i;
2976   ps_insn_ptr first_must_follow = NULL;
2977   ps_insn_ptr last_must_precede = NULL;
2978   ps_insn_ptr last_in_row = NULL;
2979   int row;
2980 
2981   if (! ps_i)
2982     return false;
2983 
2984   row = SMODULO (ps_i->cycle, ps->ii);
2985 
2986   /* Find the first must follow and the last must precede
2987      and insert the node immediately after the must precede
2988      but make sure that it there is no must follow after it.  */
2989   for (next_ps_i = ps->rows[row];
2990        next_ps_i;
2991        next_ps_i = next_ps_i->next_in_row)
2992     {
2993       if (must_follow
2994 	  && bitmap_bit_p (must_follow, next_ps_i->id)
2995 	  && ! first_must_follow)
2996         first_must_follow = next_ps_i;
2997       if (must_precede && bitmap_bit_p (must_precede, next_ps_i->id))
2998         {
2999           /* If we have already met a node that must follow, then
3000 	     there is no possible column.  */
3001   	  if (first_must_follow)
3002             return false;
3003 	  else
3004             last_must_precede = next_ps_i;
3005         }
3006       /* The closing branch must be the last in the row.  */
3007       if (must_precede
3008 	  && bitmap_bit_p (must_precede, next_ps_i->id)
3009 	  && JUMP_P (ps_rtl_insn (ps, next_ps_i->id)))
3010 	return false;
3011 
3012        last_in_row = next_ps_i;
3013     }
3014 
3015   /* The closing branch is scheduled as well.  Make sure there is no
3016      dependent instruction after it as the branch should be the last
3017      instruction in the row.  */
3018   if (JUMP_P (ps_rtl_insn (ps, ps_i->id)))
3019     {
3020       if (first_must_follow)
3021 	return false;
3022       if (last_in_row)
3023 	{
3024 	  /* Make the branch the last in the row.  New instructions
3025 	     will be inserted at the beginning of the row or after the
3026 	     last must_precede instruction thus the branch is guaranteed
3027 	     to remain the last instruction in the row.  */
3028 	  last_in_row->next_in_row = ps_i;
3029 	  ps_i->prev_in_row = last_in_row;
3030 	  ps_i->next_in_row = NULL;
3031 	}
3032       else
3033 	ps->rows[row] = ps_i;
3034       return true;
3035     }
3036 
3037   /* Now insert the node after INSERT_AFTER_PSI.  */
3038 
3039   if (! last_must_precede)
3040     {
3041       ps_i->next_in_row = ps->rows[row];
3042       ps_i->prev_in_row = NULL;
3043       if (ps_i->next_in_row)
3044     	ps_i->next_in_row->prev_in_row = ps_i;
3045       ps->rows[row] = ps_i;
3046     }
3047   else
3048     {
3049       ps_i->next_in_row = last_must_precede->next_in_row;
3050       last_must_precede->next_in_row = ps_i;
3051       ps_i->prev_in_row = last_must_precede;
3052       if (ps_i->next_in_row)
3053         ps_i->next_in_row->prev_in_row = ps_i;
3054     }
3055 
3056   return true;
3057 }
3058 
3059 /* Advances the PS_INSN one column in its current row; returns false
3060    in failure and true in success.  Bit N is set in MUST_FOLLOW if
3061    the node with cuid N must be come after the node pointed to by
3062    PS_I when scheduled in the same cycle.  */
3063 static int
3064 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
3065 			sbitmap must_follow)
3066 {
3067   ps_insn_ptr prev, next;
3068   int row;
3069 
3070   if (!ps || !ps_i)
3071     return false;
3072 
3073   row = SMODULO (ps_i->cycle, ps->ii);
3074 
3075   if (! ps_i->next_in_row)
3076     return false;
3077 
3078   /* Check if next_in_row is dependent on ps_i, both having same sched
3079      times (typically ANTI_DEP).  If so, ps_i cannot skip over it.  */
3080   if (must_follow && bitmap_bit_p (must_follow, ps_i->next_in_row->id))
3081     return false;
3082 
3083   /* Advance PS_I over its next_in_row in the doubly linked list.  */
3084   prev = ps_i->prev_in_row;
3085   next = ps_i->next_in_row;
3086 
3087   if (ps_i == ps->rows[row])
3088     ps->rows[row] = next;
3089 
3090   ps_i->next_in_row = next->next_in_row;
3091 
3092   if (next->next_in_row)
3093     next->next_in_row->prev_in_row = ps_i;
3094 
3095   next->next_in_row = ps_i;
3096   ps_i->prev_in_row = next;
3097 
3098   next->prev_in_row = prev;
3099   if (prev)
3100     prev->next_in_row = next;
3101 
3102   return true;
3103 }
3104 
3105 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
3106    Returns 0 if this is not possible and a PS_INSN otherwise.  Bit N is
3107    set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
3108    before/after (respectively) the node pointed to by PS_I when scheduled
3109    in the same cycle.  */
3110 static ps_insn_ptr
3111 add_node_to_ps (partial_schedule_ptr ps, int id, int cycle,
3112 		sbitmap must_precede, sbitmap must_follow)
3113 {
3114   ps_insn_ptr ps_i;
3115   int row = SMODULO (cycle, ps->ii);
3116 
3117   if (ps->rows_length[row] >= issue_rate)
3118     return NULL;
3119 
3120   ps_i = create_ps_insn (id, cycle);
3121 
3122   /* Finds and inserts PS_I according to MUST_FOLLOW and
3123      MUST_PRECEDE.  */
3124   if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
3125     {
3126       free (ps_i);
3127       return NULL;
3128     }
3129 
3130   ps->rows_length[row] += 1;
3131   return ps_i;
3132 }
3133 
3134 /* Advance time one cycle.  Assumes DFA is being used.  */
3135 static void
3136 advance_one_cycle (void)
3137 {
3138   if (targetm.sched.dfa_pre_cycle_insn)
3139     state_transition (curr_state,
3140 		      targetm.sched.dfa_pre_cycle_insn ());
3141 
3142   state_transition (curr_state, NULL);
3143 
3144   if (targetm.sched.dfa_post_cycle_insn)
3145     state_transition (curr_state,
3146 		      targetm.sched.dfa_post_cycle_insn ());
3147 }
3148 
3149 
3150 
3151 /* Checks if PS has resource conflicts according to DFA, starting from
3152    FROM cycle to TO cycle; returns true if there are conflicts and false
3153    if there are no conflicts.  Assumes DFA is being used.  */
3154 static int
3155 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
3156 {
3157   int cycle;
3158 
3159   state_reset (curr_state);
3160 
3161   for (cycle = from; cycle <= to; cycle++)
3162     {
3163       ps_insn_ptr crr_insn;
3164       /* Holds the remaining issue slots in the current row.  */
3165       int can_issue_more = issue_rate;
3166 
3167       /* Walk through the DFA for the current row.  */
3168       for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
3169 	   crr_insn;
3170 	   crr_insn = crr_insn->next_in_row)
3171 	{
3172 	  rtx_insn *insn = ps_rtl_insn (ps, crr_insn->id);
3173 
3174 	  if (!NONDEBUG_INSN_P (insn))
3175 	    continue;
3176 
3177 	  /* Check if there is room for the current insn.  */
3178 	  if (!can_issue_more || state_dead_lock_p (curr_state))
3179 	    return true;
3180 
3181 	  /* Update the DFA state and return with failure if the DFA found
3182 	     resource conflicts.  */
3183 	  if (state_transition (curr_state, insn) >= 0)
3184 	    return true;
3185 
3186 	  if (targetm.sched.variable_issue)
3187 	    can_issue_more =
3188 	      targetm.sched.variable_issue (sched_dump, sched_verbose,
3189 					    insn, can_issue_more);
3190 	  /* A naked CLOBBER or USE generates no instruction, so don't
3191 	     let them consume issue slots.  */
3192 	  else if (GET_CODE (PATTERN (insn)) != USE
3193 		   && GET_CODE (PATTERN (insn)) != CLOBBER)
3194 	    can_issue_more--;
3195 	}
3196 
3197       /* Advance the DFA to the next cycle.  */
3198       advance_one_cycle ();
3199     }
3200   return false;
3201 }
3202 
3203 /* Checks if the given node causes resource conflicts when added to PS at
3204    cycle C.  If not the node is added to PS and returned; otherwise zero
3205    is returned.  Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
3206    cuid N must be come before/after (respectively) the node pointed to by
3207    PS_I when scheduled in the same cycle.  */
3208 ps_insn_ptr
3209 ps_add_node_check_conflicts (partial_schedule_ptr ps, int n,
3210    			     int c, sbitmap must_precede,
3211 			     sbitmap must_follow)
3212 {
3213   int has_conflicts = 0;
3214   ps_insn_ptr ps_i;
3215 
3216   /* First add the node to the PS, if this succeeds check for
3217      conflicts, trying different issue slots in the same row.  */
3218   if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
3219     return NULL; /* Failed to insert the node at the given cycle.  */
3220 
3221   has_conflicts = ps_has_conflicts (ps, c, c)
3222 		  || (ps->history > 0
3223 		      && ps_has_conflicts (ps,
3224 					   c - ps->history,
3225 					   c + ps->history));
3226 
3227   /* Try different issue slots to find one that the given node can be
3228      scheduled in without conflicts.  */
3229   while (has_conflicts)
3230     {
3231       if (! ps_insn_advance_column (ps, ps_i, must_follow))
3232 	break;
3233       has_conflicts = ps_has_conflicts (ps, c, c)
3234 		      || (ps->history > 0
3235 			  && ps_has_conflicts (ps,
3236 					       c - ps->history,
3237 					       c + ps->history));
3238     }
3239 
3240   if (has_conflicts)
3241     {
3242       remove_node_from_ps (ps, ps_i);
3243       return NULL;
3244     }
3245 
3246   ps->min_cycle = MIN (ps->min_cycle, c);
3247   ps->max_cycle = MAX (ps->max_cycle, c);
3248   return ps_i;
3249 }
3250 
3251 /* Calculate the stage count of the partial schedule PS.  The calculation
3252    takes into account the rotation amount passed in ROTATION_AMOUNT.  */
3253 int
3254 calculate_stage_count (partial_schedule_ptr ps, int rotation_amount)
3255 {
3256   int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
3257   int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
3258   int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);
3259 
3260   /* The calculation of stage count is done adding the number of stages
3261      before cycle zero and after cycle zero.  */
3262   stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii);
3263 
3264   return stage_count;
3265 }
3266 
3267 /* Rotate the rows of PS such that insns scheduled at time
3268    START_CYCLE will appear in row 0.  Updates max/min_cycles.  */
3269 void
3270 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
3271 {
3272   int i, row, backward_rotates;
3273   int last_row = ps->ii - 1;
3274 
3275   if (start_cycle == 0)
3276     return;
3277 
3278   backward_rotates = SMODULO (start_cycle, ps->ii);
3279 
3280   /* Revisit later and optimize this into a single loop.  */
3281   for (i = 0; i < backward_rotates; i++)
3282     {
3283       ps_insn_ptr first_row = ps->rows[0];
3284       int first_row_length = ps->rows_length[0];
3285 
3286       for (row = 0; row < last_row; row++)
3287 	{
3288 	  ps->rows[row] = ps->rows[row + 1];
3289 	  ps->rows_length[row] = ps->rows_length[row + 1];
3290 	}
3291 
3292       ps->rows[last_row] = first_row;
3293       ps->rows_length[last_row] = first_row_length;
3294     }
3295 
3296   ps->max_cycle -= start_cycle;
3297   ps->min_cycle -= start_cycle;
3298 }
3299 
3300 #endif /* INSN_SCHEDULING */
3301 
3302 /* Run instruction scheduler.  */
3303 /* Perform SMS module scheduling.  */
3304 
3305 namespace {
3306 
3307 const pass_data pass_data_sms =
3308 {
3309   RTL_PASS, /* type */
3310   "sms", /* name */
3311   OPTGROUP_NONE, /* optinfo_flags */
3312   TV_SMS, /* tv_id */
3313   0, /* properties_required */
3314   0, /* properties_provided */
3315   0, /* properties_destroyed */
3316   0, /* todo_flags_start */
3317   TODO_df_finish, /* todo_flags_finish */
3318 };
3319 
3320 class pass_sms : public rtl_opt_pass
3321 {
3322 public:
3323   pass_sms (gcc::context *ctxt)
3324     : rtl_opt_pass (pass_data_sms, ctxt)
3325   {}
3326 
3327   /* opt_pass methods: */
3328   virtual bool gate (function *)
3329 {
3330   return (optimize > 0 && flag_modulo_sched);
3331 }
3332 
3333   virtual unsigned int execute (function *);
3334 
3335 }; // class pass_sms
3336 
3337 unsigned int
3338 pass_sms::execute (function *fun ATTRIBUTE_UNUSED)
3339 {
3340 #ifdef INSN_SCHEDULING
3341   basic_block bb;
3342 
3343   /* Collect loop information to be used in SMS.  */
3344   cfg_layout_initialize (0);
3345   sms_schedule ();
3346 
3347   /* Update the life information, because we add pseudos.  */
3348   max_regno = max_reg_num ();
3349 
3350   /* Finalize layout changes.  */
3351   FOR_EACH_BB_FN (bb, fun)
3352     if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
3353       bb->aux = bb->next_bb;
3354   free_dominance_info (CDI_DOMINATORS);
3355   cfg_layout_finalize ();
3356 #endif /* INSN_SCHEDULING */
3357   return 0;
3358 }
3359 
3360 } // anon namespace
3361 
3362 rtl_opt_pass *
3363 make_pass_sms (gcc::context *ctxt)
3364 {
3365   return new pass_sms (ctxt);
3366 }
3367