1 /* Early (pre-RA) rematerialization
2    Copyright (C) 2017-2021 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "df.h"
26 #include "tree-pass.h"
27 #include "memmodel.h"
28 #include "emit-rtl.h"
29 #include "insn-config.h"
30 #include "recog.h"
31 /* FIXME: The next two are only needed for gen_move_insn.  */
32 #include "tree.h"
33 #include "expr.h"
34 #include "target.h"
35 #include "inchash.h"
36 #include "rtlhash.h"
37 #include "print-rtl.h"
38 #include "rtl-iter.h"
39 #include "regs.h"
40 #include "function-abi.h"
41 
42 /* This pass runs before register allocation and implements an aggressive
43    form of rematerialization.  It looks for pseudo registers R of mode M
44    for which:
45 
46      (a) there are no call-preserved registers of mode M; and
47      (b) spilling R to the stack is expensive.
48 
49    The assumption is that it's better to recompute R after each call instead
50    of spilling it, even if this extends the live ranges of other registers.
51 
52    The motivating example for which these conditions hold are AArch64 SVE
53    vectors and predicates.  Spilling them to the stack makes the frame
54    variable-sized, which we'd like to avoid if possible.  It's also very
55    rare for SVE values to be "naturally" live across a call: usually this
56    happens as a result of CSE or other code motion.
57 
58    The pass is split into the following phases:
59 
60    Collection phase
61    ================
62 
63    First we go through all pseudo registers looking for any that meet
64    the conditions above.  For each such register R, we go through each
65    instruction that defines R to see whether any of them are suitable
66    rematerialization candidates.  If at least one is, we treat all the
67    instructions that define R as candidates, but record which ones are
68    not in fact suitable.  These unsuitable candidates exist only for the
69    sake of calculating reaching definitions (see below).
70 
71    A "candidate" is a single instruction that we want to rematerialize
72    and a "candidate register" is a register that is set by at least one
73    candidate.
74 
75    Candidate sorting
76    =================
77 
78    Next we sort the candidates based on the cfg postorder, so that if
79    candidate C1 uses candidate C2, C1 has a lower index than C2.
80    This is useful when iterating through candidate bitmaps.
81 
82    Reaching definition calculation
83    ===============================
84 
85    We then compute standard reaching-definition sets for each candidate.
86    Each set specifies which candidates might provide the current definition
87    of a live candidate register.
88 
89    From here on, a candidate C is "live" at a point P if the candidate
90    register defined by C is live at P and if C's definition reaches P.
91    An instruction I "uses" a candidate C if I takes the register defined by
92    C as input and if C is one of the reaching definitions of that register.
93 
94    Candidate validation and value numbering
95    ========================================
96 
97    Next we simultaneously decide which candidates are valid and look
98    for candidates that are equivalent to each other, assigning numbers
99    to each unique candidate value.  A candidate C is invalid if:
100 
101      (a) C uses an invalid candidate;
102 
103      (b) there is a cycle of candidate uses involving C; or
104 
105      (c) C takes a candidate register R as input and the reaching
106          definitions of R do not have the same value number.
107 
108    We assign a "representative" candidate C to each value number and from
109    here on replace references to other candidates with that value number
110    with references to C.  It is then only possible to rematerialize a
111    register R at point P if (after this replacement) there is a single
112    reaching definition of R at P.
113 
114    Local phase
115    ===========
116 
117    During this phase we go through each block and look for cases in which:
118 
119      (a) an instruction I comes between two call instructions CI1 and CI2;
120 
121      (b) I uses a candidate register R;
122 
123      (c) a candidate C provides the only reaching definition of R; and
124 
125      (d) C does not come between CI1 and I.
126 
127    We then emit a copy of C after CI1, as well as the transitive closure
128    TC of the candidates used by C.  The copies of TC might use the original
129    candidate registers or new temporary registers, depending on circumstances.
130 
131    For example, if elsewhere we have:
132 
133        C3: R3 <- f3 (...)
134 	   ...
135        C2: R2 <- f2 (...)
136 	   ...
137        C1: R1 <- f1 (R2, R3, ...)  // uses C2 and C3
138 
139    then for a block containing:
140 
141       CI1: call
142 	   ...
143 	I: use R1  // uses C1
144 	   ...
145       CI2: call
146 
147    we would emit:
148 
149       CI1: call
150       C3': R3' <- f3 (...)
151       C2': R2' <- f2 (...)
152       C1': R1 <- f1 (R2', R3', ...)
153 	   ...
154 	I: use R1
155 	   ...
156       CI2: call
157 
158    where R2' and R3' might be fresh registers.  If instead we had:
159 
160       CI1: call
161 	   ...
162        I1: use R1  // uses C1
163 	   ...
164        I2: use R3  // uses C3
165 	   ...
166       CI2: call
167 
168    we would keep the original R3:
169 
170       CI1: call
171       C3': R3 <- f3 (...)
172       C2': R2' <- f2 (...)
173       C1': R1 <- f1 (R2', R3, ...)
174 	   ...
175        I1: use R1  // uses C1
176 	   ...
177        I2: use R3  // uses C3
178 	   ...
179       CI2: call
180 
181    We also record the last call in each block (if any) and compute:
182 
183      rd_after_call:
184        The set of candidates that either (a) are defined outside the block
185        and are live after the last call or (b) are defined within the block
186        and reach the end of the last call.  (We don't track whether the
187        latter values are live or not.)
188 
189      required_after_call:
190        The set of candidates that need to be rematerialized after the
191        last call in order to satisfy uses in the block itself.
192 
193      required_in:
194        The set of candidates that are live on entry to the block and are
195        used without an intervening call.
196 
197    In addition, we compute the initial values of the sets required by
198    the global phase below.
199 
200    Global phase
201    ============
202 
203    We next compute a maximal solution to the following availability
204    problem:
205 
206      available_in:
207        The set of candidates that are live on entry to a block and can
208        be used at that point without rematerialization.
209 
210      available_out:
211        The set of candidates that are live on exit from a block and can
212        be used at that point without rematerialization.
213 
214      available_locally:
215        The subset of available_out that is due to code in the block itself.
216        It contains candidates that are defined or used in the block and
217        not invalidated by a later call.
218 
219    We then go through each block B and look for an appropriate place
220    to insert copies of required_in - available_in.  Conceptually we
221    start by placing the copies at the head of B, but then move the
222    copy of a candidate C to predecessors if:
223 
224      (a) that seems cheaper;
225 
226      (b) there is more than one reaching definition of C's register at
227 	 the head of B; or
228 
229      (c) copying C would clobber a hard register that is live on entry to B.
230 
231    Moving a copy of C to a predecessor block PB involves:
232 
233      (1) adding C to PB's required_after_call, if PB contains a call; or
234 
235      (2) adding C PB's required_in otherwise.
236 
237    C is then available on output from each PB and on input to B.
238 
239    Once all this is done, we emit instructions for the final required_in
240    and required_after_call sets.  */
241 
242 namespace {
243 
244 /* An invalid candidate index, used to indicate that there is more than
245    one reaching definition.  */
246 const unsigned int MULTIPLE_CANDIDATES = -1U;
247 
248 /* Pass-specific information about one basic block.  */
249 struct remat_block_info {
250   /* The last call instruction in the block.  */
251   rtx_insn *last_call;
252 
253   /* The set of candidates that are live on entry to the block.  NULL is
254      equivalent to an empty set.  */
255   bitmap rd_in;
256 
257   /* The set of candidates that are live on exit from the block.  This might
258      reuse rd_in.  NULL is equivalent to an empty set.  */
259   bitmap rd_out;
260 
261   /* The subset of RD_OUT that comes from local definitions.  NULL is
262      equivalent to an empty set.  */
263   bitmap rd_gen;
264 
265   /* The set of candidates that the block invalidates (because it defines
266      the register to something else, or because the register's value is
267      no longer important).  NULL is equivalent to an empty set.  */
268   bitmap rd_kill;
269 
270   /* The set of candidates that either (a) are defined outside the block
271      and are live after LAST_CALL or (b) are defined within the block
272      and reach the instruction after LAST_CALL.  (We don't track whether
273      the latter values are live or not.)
274 
275      Only used if LAST_CALL is nonnull.  NULL is equivalent to an
276      empty set.  */
277   bitmap rd_after_call;
278 
279   /* Candidates that are live and available without rematerialization
280      on entry to the block.  NULL is equivalent to an empty set.  */
281   bitmap available_in;
282 
283   /* Candidates that become available without rematerialization within the
284      block, and remain so on exit.  NULL is equivalent to an empty set.  */
285   bitmap available_locally;
286 
287   /* Candidates that are available without rematerialization on exit from
288      the block.  This might reuse available_in or available_locally.  */
289   bitmap available_out;
290 
291   /* Candidates that need to be rematerialized either at the start of the
292      block or before entering the block.  */
293   bitmap required_in;
294 
295   /* Candidates that need to be rematerialized after LAST_CALL.
296      Only used if LAST_CALL is nonnull.  */
297   bitmap required_after_call;
298 
299   /* The number of candidates in the block.  */
300   unsigned int num_candidates;
301 
302   /* The earliest candidate in the block (i.e. the one with the
303      highest index).  Only valid if NUM_CANDIDATES is nonzero.  */
304   unsigned int first_candidate;
305 
306   /* The best (lowest) execution frequency for rematerializing REQUIRED_IN.
307      This is the execution frequency of the block if LOCAL_REMAT_CHEAPER_P,
308      otherwise it is the sum of the execution frequencies of whichever
309      predecessor blocks would do the rematerialization.  */
310   int remat_frequency;
311 
312   /* True if the block ends with an abnormal call.  */
313   unsigned int abnormal_call_p : 1;
314 
315   /* Used to record whether a graph traversal has visited this block.  */
316   unsigned int visited_p : 1;
317 
318   /* True if we have calculated REMAT_FREQUENCY.  */
319   unsigned int remat_frequency_valid_p : 1;
320 
321   /* True if it is cheaper to rematerialize candidates at the start of
322      the block, rather than moving them to predecessor blocks.  */
323   unsigned int local_remat_cheaper_p : 1;
324 };
325 
326 /* Information about a group of candidates with the same value number.  */
327 struct remat_equiv_class {
328   /* The candidates that have the same value number.  */
329   bitmap members;
330 
331   /* The candidate that was first added to MEMBERS.  */
332   unsigned int earliest;
333 
334   /* The candidate that represents the others.  This is always the one
335      with the highest index.  */
336   unsigned int representative;
337 };
338 
339 /* Information about an instruction that we might want to rematerialize.  */
340 struct remat_candidate {
341   /* The pseudo register that the instruction sets.  */
342   unsigned int regno;
343 
344   /* A temporary register used when rematerializing uses of this candidate,
345      if REGNO doesn't have the right value or isn't worth using.  */
346   unsigned int copy_regno;
347 
348   /* True if we intend to rematerialize this instruction by emitting
349      a move of a constant into REGNO, false if we intend to emit a
350      copy of the original instruction.  */
351   unsigned int constant_p : 1;
352 
353   /* True if we still think it's possible to rematerialize INSN.  */
354   unsigned int can_copy_p : 1;
355 
356   /* Used to record whether a graph traversal has visited this candidate.  */
357   unsigned int visited_p : 1;
358 
359   /* True if we have verified that it's possible to rematerialize INSN.
360      Once this is true, both it and CAN_COPY_P remain true.  */
361   unsigned int validated_p : 1;
362 
363   /* True if we have "stabilized" INSN, i.e. ensured that all non-candidate
364      registers read by INSN will have the same value when rematerializing INSN.
365      Only ever true if CAN_COPY_P.  */
366   unsigned int stabilized_p : 1;
367 
368   /* Hash value used for value numbering.  */
369   hashval_t hash;
370 
371   /* The instruction that sets REGNO.  */
372   rtx_insn *insn;
373 
374   /* If CONSTANT_P, the value that should be moved into REGNO when
375      rematerializing, otherwise the pattern of the instruction that
376      should be used.  */
377   rtx remat_rtx;
378 
379   /* The set of candidates that INSN takes as input.  NULL is equivalent
380      to the empty set.  All candidates in this set have a higher index
381      than the current candidate.  */
382   bitmap uses;
383 
384   /* The set of hard registers that would be clobbered by rematerializing
385      the candidate, including (transitively) all those that would be
386      clobbered by rematerializing USES.  */
387   bitmap clobbers;
388 
389   /* The equivalence class to which the candidate belongs, or null if none.  */
390   remat_equiv_class *equiv_class;
391 };
392 
393 /* Hash functions used for value numbering.  */
394 struct remat_candidate_hasher : nofree_ptr_hash <remat_candidate>
395 {
396   typedef value_type compare_type;
397   static hashval_t hash (const remat_candidate *);
398   static bool equal (const remat_candidate *, const remat_candidate *);
399 };
400 
401 /* Main class for this pass.  */
402 class early_remat {
403 public:
404   early_remat (function *, sbitmap);
405   ~early_remat ();
406 
407   void run (void);
408 
409 private:
410   bitmap alloc_bitmap (void);
411   bitmap get_bitmap (bitmap *);
412   void init_temp_bitmap (bitmap *);
413   void copy_temp_bitmap (bitmap *, bitmap *);
414 
415   void dump_insn_id (rtx_insn *);
416   void dump_candidate_bitmap (bitmap);
417   void dump_all_candidates (void);
418   void dump_edge_list (basic_block, bool);
419   void dump_block_info (basic_block);
420   void dump_all_blocks (void);
421 
422   bool interesting_regno_p (unsigned int);
423   remat_candidate *add_candidate (rtx_insn *, unsigned int, bool);
424   bool maybe_add_candidate (rtx_insn *, unsigned int);
425   bool collect_candidates (void);
426   void init_block_info (void);
427   void sort_candidates (void);
428   void finalize_candidate_indices (void);
429   void record_equiv_candidates (unsigned int, unsigned int);
430   static bool rd_confluence_n (edge);
431   static bool rd_transfer (int);
432   void compute_rd (void);
433   unsigned int canon_candidate (unsigned int);
434   void canon_bitmap (bitmap *);
435   unsigned int resolve_reaching_def (bitmap);
436   bool check_candidate_uses (unsigned int);
437   void compute_clobbers (unsigned int);
438   void assign_value_number (unsigned int);
439   void decide_candidate_validity (void);
440   void restrict_remat_for_unavail_regs (bitmap, const_bitmap);
441   void restrict_remat_for_call (bitmap, rtx_insn *);
442   bool stable_use_p (unsigned int);
443   void emit_copy_before (unsigned int, rtx, rtx);
444   void stabilize_pattern (unsigned int);
445   void replace_dest_with_copy (unsigned int);
446   void stabilize_candidate_uses (unsigned int, bitmap, bitmap, bitmap,
447 				 bitmap);
448   void emit_remat_insns (bitmap, bitmap, bitmap, rtx_insn *);
449   bool set_available_out (remat_block_info *);
450   void process_block (basic_block);
451   void local_phase (void);
452   static bool avail_confluence_n (edge);
453   static bool avail_transfer (int);
454   void compute_availability (void);
455   void unshare_available_sets (remat_block_info *);
456   bool can_move_across_edge_p (edge);
457   bool local_remat_cheaper_p (unsigned int);
458   bool need_to_move_candidate_p (unsigned int, unsigned int);
459   void compute_minimum_move_set (unsigned int, bitmap);
460   void move_to_predecessors (unsigned int, bitmap, bitmap);
461   void choose_rematerialization_points (void);
462   void emit_remat_insns_for_block (basic_block);
463   void global_phase (void);
464 
465   /* The function that we're optimizing.  */
466   function *m_fn;
467 
468   /* The modes that we want to rematerialize.  */
469   sbitmap m_selected_modes;
470 
471   /* All rematerialization candidates, identified by their index into the
472      vector.  */
473   auto_vec<remat_candidate> m_candidates;
474 
475   /* The set of candidate registers.  */
476   bitmap_head m_candidate_regnos;
477 
478   /* Temporary sets.  */
479   bitmap_head m_tmp_bitmap;
480   bitmap m_available;
481   bitmap m_required;
482 
483   /* Information about each basic block.  */
484   auto_vec<remat_block_info> m_block_info;
485 
486   /* A mapping from register numbers to the set of associated candidates.
487      Only valid for registers in M_CANDIDATE_REGNOS.  */
488   auto_vec<bitmap> m_regno_to_candidates;
489 
490   /* An obstack used for allocating bitmaps, so that we can free them all
491      in one go.  */
492   bitmap_obstack m_obstack;
493 
494   /* A hash table of candidates used for value numbering.  If a candidate
495      in the table is in an equivalence class, the candidate is marked as
496      the earliest member of the class.  */
497   hash_table<remat_candidate_hasher> m_value_table;
498 
499   /* Used temporarily by callback functions.  */
500   static early_remat *er;
501 };
502 
503 }
504 
505 early_remat *early_remat::er;
506 
507 /* rtx_equal_p_cb callback that treats any two SCRATCHes as equal.
508    This allows us to compare two copies of a pattern, even though their
509    SCRATCHes are always distinct.  */
510 
511 static int
scratch_equal(const_rtx * x,const_rtx * y,rtx * nx,rtx * ny)512 scratch_equal (const_rtx *x, const_rtx *y, rtx *nx, rtx *ny)
513 {
514   if (GET_CODE (*x) == SCRATCH && GET_CODE (*y) == SCRATCH)
515     {
516       *nx = const0_rtx;
517       *ny = const0_rtx;
518       return 1;
519     }
520   return 0;
521 }
522 
523 /* Hash callback functions for remat_candidate.  */
524 
525 hashval_t
hash(const remat_candidate * cand)526 remat_candidate_hasher::hash (const remat_candidate *cand)
527 {
528   return cand->hash;
529 }
530 
531 bool
equal(const remat_candidate * cand1,const remat_candidate * cand2)532 remat_candidate_hasher::equal (const remat_candidate *cand1,
533 			       const remat_candidate *cand2)
534 {
535   return (cand1->regno == cand2->regno
536 	  && cand1->constant_p == cand2->constant_p
537 	  && (cand1->constant_p
538 	      ? rtx_equal_p (cand1->remat_rtx, cand2->remat_rtx)
539 	      : rtx_equal_p_cb (cand1->remat_rtx, cand2->remat_rtx,
540 				scratch_equal))
541 	  && (!cand1->uses || bitmap_equal_p (cand1->uses, cand2->uses)));
542 }
543 
544 /* Return true if B is null or empty.  */
545 
546 inline bool
empty_p(bitmap b)547 empty_p (bitmap b)
548 {
549   return !b || bitmap_empty_p (b);
550 }
551 
552 /* Allocate a new bitmap.  It will be automatically freed at the end of
553    the pass.  */
554 
555 inline bitmap
alloc_bitmap(void)556 early_remat::alloc_bitmap (void)
557 {
558   return bitmap_alloc (&m_obstack);
559 }
560 
561 /* Initialize *PTR to an empty bitmap if it is currently null.  */
562 
563 inline bitmap
get_bitmap(bitmap * ptr)564 early_remat::get_bitmap (bitmap *ptr)
565 {
566   if (!*ptr)
567     *ptr = alloc_bitmap ();
568   return *ptr;
569 }
570 
571 /* *PTR is either null or empty.  If it is null, initialize it to an
572    empty bitmap.  */
573 
574 inline void
init_temp_bitmap(bitmap * ptr)575 early_remat::init_temp_bitmap (bitmap *ptr)
576 {
577   if (!*ptr)
578     *ptr = alloc_bitmap ();
579   else
580     gcc_checking_assert (bitmap_empty_p (*ptr));
581 }
582 
583 /* Move *SRC to *DEST and leave *SRC empty.  */
584 
585 inline void
copy_temp_bitmap(bitmap * dest,bitmap * src)586 early_remat::copy_temp_bitmap (bitmap *dest, bitmap *src)
587 {
588   if (!empty_p (*src))
589     {
590       *dest = *src;
591       *src = NULL;
592     }
593   else
594     *dest = NULL;
595 }
596 
597 /* Print INSN's identifier to the dump file.  */
598 
599 void
dump_insn_id(rtx_insn * insn)600 early_remat::dump_insn_id (rtx_insn *insn)
601 {
602   fprintf (dump_file, "%d[bb:%d]", INSN_UID (insn),
603 	   BLOCK_FOR_INSN (insn)->index);
604 }
605 
606 /* Print candidate set CANDIDATES to the dump file, with a leading space.  */
607 
608 void
dump_candidate_bitmap(bitmap candidates)609 early_remat::dump_candidate_bitmap (bitmap candidates)
610 {
611   if (empty_p (candidates))
612     {
613       fprintf (dump_file, " none");
614       return;
615     }
616 
617   unsigned int cand_index;
618   bitmap_iterator bi;
619   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, cand_index, bi)
620     fprintf (dump_file, " %d", cand_index);
621 }
622 
623 /* Print information about all candidates to the dump file.  */
624 
625 void
dump_all_candidates(void)626 early_remat::dump_all_candidates (void)
627 {
628   fprintf (dump_file, "\n;; Candidates:\n;;\n");
629   fprintf (dump_file, ";; %5s %5s %8s %s\n", "#", "reg", "mode", "insn");
630   fprintf (dump_file, ";; %5s %5s %8s %s\n", "=", "===", "====", "====");
631   unsigned int cand_index;
632   remat_candidate *cand;
633   FOR_EACH_VEC_ELT (m_candidates, cand_index, cand)
634     {
635       fprintf (dump_file, ";; %5d %5d %8s ", cand_index, cand->regno,
636 	       GET_MODE_NAME (GET_MODE (regno_reg_rtx[cand->regno])));
637       dump_insn_id (cand->insn);
638       if (!cand->can_copy_p)
639 	fprintf (dump_file, "   -- can't copy");
640       fprintf (dump_file, "\n");
641     }
642 
643   fprintf (dump_file, "\n;; Register-to-candidate mapping:\n;;\n");
644   unsigned int regno;
645   bitmap_iterator bi;
646   EXECUTE_IF_SET_IN_BITMAP (&m_candidate_regnos, 0, regno, bi)
647     {
648       fprintf (dump_file, ";; %5d:", regno);
649       dump_candidate_bitmap (m_regno_to_candidates[regno]);
650       fprintf (dump_file, "\n");
651     }
652 }
653 
654 /* Print the predecessors or successors of BB to the dump file, with a
655    leading space.  DO_SUCC is true to print successors and false to print
656    predecessors.  */
657 
658 void
dump_edge_list(basic_block bb,bool do_succ)659 early_remat::dump_edge_list (basic_block bb, bool do_succ)
660 {
661   edge e;
662   edge_iterator ei;
663   FOR_EACH_EDGE (e, ei, do_succ ? bb->succs : bb->preds)
664     dump_edge_info (dump_file, e, TDF_NONE, do_succ);
665 }
666 
667 /* Print information about basic block BB to the dump file.  */
668 
669 void
dump_block_info(basic_block bb)670 early_remat::dump_block_info (basic_block bb)
671 {
672   remat_block_info *info = &m_block_info[bb->index];
673   fprintf (dump_file, ";;\n;; Block %d:", bb->index);
674   int width = 25;
675 
676   fprintf (dump_file, "\n;;%*s:", width, "predecessors");
677   dump_edge_list (bb, false);
678 
679   fprintf (dump_file, "\n;;%*s:", width, "successors");
680   dump_edge_list (bb, true);
681 
682   fprintf (dump_file, "\n;;%*s: %d", width, "frequency",
683 	   bb->count.to_frequency (m_fn));
684 
685   if (info->last_call)
686     fprintf (dump_file, "\n;;%*s: %d", width, "last call",
687 	     INSN_UID (info->last_call));
688 
689   if (!empty_p (info->rd_in))
690     {
691       fprintf (dump_file, "\n;;%*s:", width, "RD in");
692       dump_candidate_bitmap (info->rd_in);
693     }
694   if (!empty_p (info->rd_kill))
695     {
696       fprintf (dump_file, "\n;;%*s:", width, "RD kill");
697       dump_candidate_bitmap (info->rd_kill);
698     }
699   if (!empty_p (info->rd_gen))
700     {
701       fprintf (dump_file, "\n;;%*s:", width, "RD gen");
702       dump_candidate_bitmap (info->rd_gen);
703     }
704   if (!empty_p (info->rd_after_call))
705     {
706       fprintf (dump_file, "\n;;%*s:", width, "RD after call");
707       dump_candidate_bitmap (info->rd_after_call);
708     }
709   if (!empty_p (info->rd_out))
710     {
711       fprintf (dump_file, "\n;;%*s:", width, "RD out");
712       if (info->rd_in == info->rd_out)
713 	fprintf (dump_file, " RD in");
714       else
715 	dump_candidate_bitmap (info->rd_out);
716     }
717   if (!empty_p (info->available_in))
718     {
719       fprintf (dump_file, "\n;;%*s:", width, "available in");
720       dump_candidate_bitmap (info->available_in);
721     }
722   if (!empty_p (info->available_locally))
723     {
724       fprintf (dump_file, "\n;;%*s:", width, "available locally");
725       dump_candidate_bitmap (info->available_locally);
726     }
727   if (!empty_p (info->available_out))
728     {
729       fprintf (dump_file, "\n;;%*s:", width, "available out");
730       if (info->available_in == info->available_out)
731 	fprintf (dump_file, " available in");
732       else if (info->available_locally == info->available_out)
733 	fprintf (dump_file, " available locally");
734       else
735 	dump_candidate_bitmap (info->available_out);
736     }
737   if (!empty_p (info->required_in))
738     {
739       fprintf (dump_file, "\n;;%*s:", width, "required in");
740       dump_candidate_bitmap (info->required_in);
741     }
742   if (!empty_p (info->required_after_call))
743     {
744       fprintf (dump_file, "\n;;%*s:", width, "required after call");
745       dump_candidate_bitmap (info->required_after_call);
746     }
747   fprintf (dump_file, "\n");
748 }
749 
750 /* Print information about all basic blocks to the dump file.  */
751 
752 void
dump_all_blocks(void)753 early_remat::dump_all_blocks (void)
754 {
755   basic_block bb;
756   FOR_EACH_BB_FN (bb, m_fn)
757     dump_block_info (bb);
758 }
759 
760 /* Return true if REGNO is worth rematerializing.  */
761 
762 bool
interesting_regno_p(unsigned int regno)763 early_remat::interesting_regno_p (unsigned int regno)
764 {
765   /* Ignore unused registers.  */
766   rtx reg = regno_reg_rtx[regno];
767   if (!reg || DF_REG_DEF_COUNT (regno) == 0)
768     return false;
769 
770   /* Make sure the register has a mode that we want to rematerialize.  */
771   if (!bitmap_bit_p (m_selected_modes, GET_MODE (reg)))
772     return false;
773 
774   /* Ignore values that might sometimes be used uninitialized.  We could
775      instead add dummy candidates for the entry block definition, and so
776      handle uses that are definitely not uninitialized, but the combination
777      of the two should be rare in practice.  */
778   if (bitmap_bit_p (DF_LR_OUT (ENTRY_BLOCK_PTR_FOR_FN (m_fn)), regno))
779     return false;
780 
781   return true;
782 }
783 
784 /* Record the set of register REGNO in instruction INSN as a
785    rematerialization candidate.  CAN_COPY_P is true unless we already
786    know that rematerialization is impossible (in which case the candidate
787    only exists for the reaching definition calculation).
788 
789    The candidate's index is not fixed at this stage.  */
790 
791 remat_candidate *
add_candidate(rtx_insn * insn,unsigned int regno,bool can_copy_p)792 early_remat::add_candidate (rtx_insn *insn, unsigned int regno,
793 			    bool can_copy_p)
794 {
795   remat_candidate cand;
796   memset (&cand, 0, sizeof (cand));
797   cand.regno = regno;
798   cand.insn = insn;
799   cand.remat_rtx = PATTERN (insn);
800   cand.can_copy_p = can_copy_p;
801   m_candidates.safe_push (cand);
802 
803   bitmap_set_bit (&m_candidate_regnos, regno);
804 
805   return &m_candidates.last ();
806 }
807 
808 /* Return true if we can rematerialize the set of register REGNO in
809    instruction INSN, and add it as a candidate if so.  When returning
810    false, print the reason to the dump file.  */
811 
812 bool
maybe_add_candidate(rtx_insn * insn,unsigned int regno)813 early_remat::maybe_add_candidate (rtx_insn *insn, unsigned int regno)
814 {
815 #define FAILURE_FORMAT ";; Can't rematerialize set of reg %d in %d[bb:%d]: "
816 #define FAILURE_ARGS regno, INSN_UID (insn), BLOCK_FOR_INSN (insn)->index
817 
818   /* The definition must come from an ordinary instruction.  */
819   basic_block bb = BLOCK_FOR_INSN (insn);
820   if (!NONJUMP_INSN_P (insn)
821       || (insn == BB_END (bb)
822 	  && has_abnormal_or_eh_outgoing_edge_p (bb)))
823     {
824       if (dump_file)
825 	fprintf (dump_file, FAILURE_FORMAT "insn alters control flow\n",
826 		 FAILURE_ARGS);
827       return false;
828     }
829 
830   /* Prefer to rematerialize constants directly -- it's much easier.  */
831   machine_mode mode = GET_MODE (regno_reg_rtx[regno]);
832   if (rtx note = find_reg_equal_equiv_note (insn))
833     {
834       rtx val = XEXP (note, 0);
835       if (CONSTANT_P (val)
836 	  && targetm.legitimate_constant_p (mode, val))
837 	{
838 	  remat_candidate *cand = add_candidate (insn, regno, true);
839 	  cand->constant_p = true;
840 	  cand->remat_rtx = val;
841 	  return true;
842 	}
843     }
844 
845   /* See whether the target has reasons to prevent a copy.  */
846   if (targetm.cannot_copy_insn_p && targetm.cannot_copy_insn_p (insn))
847     {
848       if (dump_file)
849 	fprintf (dump_file, FAILURE_FORMAT "target forbids copying\n",
850 		 FAILURE_ARGS);
851       return false;
852     }
853 
854   /* We can't copy trapping instructions.  */
855   rtx pat = PATTERN (insn);
856   if (may_trap_p (pat))
857     {
858       if (dump_file)
859 	fprintf (dump_file, FAILURE_FORMAT "insn might trap\n", FAILURE_ARGS);
860       return false;
861     }
862 
863   /* We can't copy instructions that read memory, unless we know that
864      the contents never change.  */
865   subrtx_iterator::array_type array;
866   FOR_EACH_SUBRTX (iter, array, pat, ALL)
867     if (MEM_P (*iter) && !MEM_READONLY_P (*iter))
868       {
869 	if (dump_file)
870 	  fprintf (dump_file, FAILURE_FORMAT "insn references non-constant"
871 		   " memory\n", FAILURE_ARGS);
872 	return false;
873       }
874 
875   /* Check each defined register.  */
876   df_ref ref;
877   FOR_EACH_INSN_DEF (ref, insn)
878     {
879       unsigned int def_regno = DF_REF_REGNO (ref);
880       if (def_regno == regno)
881 	{
882 	  /* Make sure the definition is write-only.  (Partial definitions,
883 	     such as setting the low part and clobbering the high part,
884 	     are otherwise OK.)  */
885 	  if (DF_REF_FLAGS_IS_SET (ref, DF_REF_READ_WRITE))
886 	    {
887 	      if (dump_file)
888 		fprintf (dump_file, FAILURE_FORMAT "destination is"
889 			 " read-modify-write\n", FAILURE_ARGS);
890 	      return false;
891 	    }
892 	}
893       else
894 	{
895 	  /* The instruction can set additional registers, provided that
896 	     they're hard registers.  This is useful for instructions
897 	     that alter the condition codes.  */
898 	  if (!HARD_REGISTER_NUM_P (def_regno))
899 	    {
900 	      if (dump_file)
901 		fprintf (dump_file, FAILURE_FORMAT "insn also sets"
902 			 " pseudo reg %d\n", FAILURE_ARGS, def_regno);
903 	      return false;
904 	    }
905 	}
906     }
907 
908   /* If the instruction uses fixed hard registers, check that those
909      registers have the same value throughout the function.  If the
910      instruction uses non-fixed hard registers, check that we can
911      replace them with pseudos.  */
912   FOR_EACH_INSN_USE (ref, insn)
913     {
914       unsigned int use_regno = DF_REF_REGNO (ref);
915       if (HARD_REGISTER_NUM_P (use_regno) && fixed_regs[use_regno])
916 	{
917 	  if (rtx_unstable_p (DF_REF_REAL_REG (ref)))
918 	    {
919 	      if (dump_file)
920 		fprintf (dump_file, FAILURE_FORMAT "insn uses fixed hard reg"
921 			 " %d\n", FAILURE_ARGS, use_regno);
922 	      return false;
923 	    }
924 	}
925       else if (HARD_REGISTER_NUM_P (use_regno))
926 	{
927 	  /* Allocate a dummy pseudo register and temporarily install it.
928 	     Make the register number depend on the mode, which should
929 	     provide enough sharing for match_dup while also weeding
930 	     out cases in which operands with different modes are
931 	     explicitly tied.  */
932 	  rtx *loc = DF_REF_REAL_LOC (ref);
933 	  unsigned int size = RTX_CODE_SIZE (REG);
934 	  rtx new_reg = (rtx) alloca (size);
935 	  memset (new_reg, 0, size);
936 	  PUT_CODE (new_reg, REG);
937 	  set_mode_and_regno (new_reg, GET_MODE (*loc),
938 			      LAST_VIRTUAL_REGISTER + 1 + GET_MODE (*loc));
939 	  validate_change (insn, loc, new_reg, 1);
940 	}
941     }
942   bool ok_p = verify_changes (0);
943   cancel_changes (0);
944   if (!ok_p)
945     {
946       if (dump_file)
947 	fprintf (dump_file, FAILURE_FORMAT "insn does not allow hard"
948 		 " register inputs to be replaced\n", FAILURE_ARGS);
949       return false;
950     }
951 
952 #undef FAILURE_ARGS
953 #undef FAILURE_FORMAT
954 
955   add_candidate (insn, regno, true);
956   return true;
957 }
958 
959 /* Calculate the set of rematerialization candidates.  Return true if
960    we find at least one.  */
961 
962 bool
collect_candidates(void)963 early_remat::collect_candidates (void)
964 {
965   unsigned int nregs = DF_REG_SIZE (df);
966   for (unsigned int regno = FIRST_PSEUDO_REGISTER; regno < nregs; ++regno)
967     if (interesting_regno_p (regno))
968       {
969 	/* Create candidates for all suitable definitions.  */
970 	bitmap_clear (&m_tmp_bitmap);
971 	unsigned int bad = 0;
972 	unsigned int id = 0;
973 	for (df_ref ref = DF_REG_DEF_CHAIN (regno); ref;
974 	     ref = DF_REF_NEXT_REG (ref))
975 	  {
976 	    rtx_insn *insn = DF_REF_INSN (ref);
977 	    if (maybe_add_candidate (insn, regno))
978 	      bitmap_set_bit (&m_tmp_bitmap, id);
979 	    else
980 	      bad += 1;
981 	    id += 1;
982 	  }
983 
984 	/* If we found at least one suitable definition, add dummy
985 	   candidates for the rest, so that we can see which definitions
986 	   are live where.  */
987 	if (!bitmap_empty_p (&m_tmp_bitmap) && bad)
988 	  {
989 	    id = 0;
990 	    for (df_ref ref = DF_REG_DEF_CHAIN (regno); ref;
991 		 ref = DF_REF_NEXT_REG (ref))
992 	      {
993 		if (!bitmap_bit_p (&m_tmp_bitmap, id))
994 		  add_candidate (DF_REF_INSN (ref), regno, false);
995 		id += 1;
996 	      }
997 	  }
998       }
999 
1000 
1001   return !m_candidates.is_empty ();
1002 }
1003 
1004 /* Initialize the m_block_info array.  */
1005 
1006 void
init_block_info(void)1007 early_remat::init_block_info (void)
1008 {
1009   unsigned int n_blocks = last_basic_block_for_fn (m_fn);
1010   m_block_info.safe_grow_cleared (n_blocks, true);
1011 }
1012 
1013 /* Maps basic block indices to their position in the post order.  */
1014 static unsigned int *postorder_index;
1015 
1016 /* Order remat_candidates X_IN and Y_IN according to the cfg postorder.  */
1017 
1018 static int
compare_candidates(const void * x_in,const void * y_in)1019 compare_candidates (const void *x_in, const void *y_in)
1020 {
1021   const remat_candidate *x = (const remat_candidate *) x_in;
1022   const remat_candidate *y = (const remat_candidate *) y_in;
1023   basic_block x_bb = BLOCK_FOR_INSN (x->insn);
1024   basic_block y_bb = BLOCK_FOR_INSN (y->insn);
1025   if (x_bb != y_bb)
1026     /* Make X and Y follow block postorder.  */
1027     return postorder_index[x_bb->index] - postorder_index[y_bb->index];
1028 
1029   /* Make X and Y follow a backward traversal of the containing block.  */
1030   return DF_INSN_LUID (y->insn) - DF_INSN_LUID (x->insn);
1031 }
1032 
1033 /* Sort the collected rematerialization candidates so that they follow
1034    cfg postorder.  */
1035 
1036 void
sort_candidates(void)1037 early_remat::sort_candidates (void)
1038 {
1039   /* Make sure the DF LUIDs are up-to-date for all the blocks we
1040      care about.  */
1041   bitmap_clear (&m_tmp_bitmap);
1042   unsigned int cand_index;
1043   remat_candidate *cand;
1044   FOR_EACH_VEC_ELT (m_candidates, cand_index, cand)
1045     {
1046       basic_block bb = BLOCK_FOR_INSN (cand->insn);
1047       if (bitmap_set_bit (&m_tmp_bitmap, bb->index))
1048 	df_recompute_luids (bb);
1049     }
1050 
1051   /* Create a mapping from block numbers to their position in the
1052      postorder.  */
1053   unsigned int n_blocks = last_basic_block_for_fn (m_fn);
1054   int *postorder = df_get_postorder (DF_BACKWARD);
1055   unsigned int postorder_len = df_get_n_blocks (DF_BACKWARD);
1056   postorder_index = new unsigned int[n_blocks];
1057   for (unsigned int i = 0; i < postorder_len; ++i)
1058     postorder_index[postorder[i]] = i;
1059 
1060   m_candidates.qsort (compare_candidates);
1061 
1062   delete[] postorder_index;
1063 }
1064 
1065 /* Commit to the current candidate indices and initialize cross-references.  */
1066 
1067 void
finalize_candidate_indices(void)1068 early_remat::finalize_candidate_indices (void)
1069 {
1070   /* Create a bitmap for each candidate register.  */
1071   m_regno_to_candidates.safe_grow (max_reg_num (), true);
1072   unsigned int regno;
1073   bitmap_iterator bi;
1074   EXECUTE_IF_SET_IN_BITMAP (&m_candidate_regnos, 0, regno, bi)
1075     m_regno_to_candidates[regno] = alloc_bitmap ();
1076 
1077   /* Go through each candidate and record its index.  */
1078   unsigned int cand_index;
1079   remat_candidate *cand;
1080   FOR_EACH_VEC_ELT (m_candidates, cand_index, cand)
1081     {
1082       basic_block bb = BLOCK_FOR_INSN (cand->insn);
1083       remat_block_info *info = &m_block_info[bb->index];
1084       info->num_candidates += 1;
1085       info->first_candidate = cand_index;
1086       bitmap_set_bit (m_regno_to_candidates[cand->regno], cand_index);
1087     }
1088 }
1089 
1090 /* Record that candidates CAND1_INDEX and CAND2_INDEX are equivalent.
1091    CAND1_INDEX might already have an equivalence class, but CAND2_INDEX
1092    doesn't.  */
1093 
1094 void
record_equiv_candidates(unsigned int cand1_index,unsigned int cand2_index)1095 early_remat::record_equiv_candidates (unsigned int cand1_index,
1096 				      unsigned int cand2_index)
1097 {
1098   if (dump_file)
1099     fprintf (dump_file, ";; Candidate %d is equivalent to candidate %d\n",
1100 	     cand2_index, cand1_index);
1101 
1102   remat_candidate *cand1 = &m_candidates[cand1_index];
1103   remat_candidate *cand2 = &m_candidates[cand2_index];
1104   gcc_checking_assert (!cand2->equiv_class);
1105 
1106   remat_equiv_class *ec = cand1->equiv_class;
1107   if (!ec)
1108     {
1109       ec = XOBNEW (&m_obstack.obstack, remat_equiv_class);
1110       ec->members = alloc_bitmap ();
1111       bitmap_set_bit (ec->members, cand1_index);
1112       ec->earliest = cand1_index;
1113       ec->representative = cand1_index;
1114       cand1->equiv_class = ec;
1115     }
1116   cand2->equiv_class = ec;
1117   bitmap_set_bit (ec->members, cand2_index);
1118   if (cand2_index > ec->representative)
1119     ec->representative = cand2_index;
1120 }
1121 
1122 /* Propagate information from the rd_out set of E->src to the rd_in set
1123    of E->dest, when computing global reaching definitions.  Return true
1124    if something changed.  */
1125 
1126 bool
rd_confluence_n(edge e)1127 early_remat::rd_confluence_n (edge e)
1128 {
1129   remat_block_info *src = &er->m_block_info[e->src->index];
1130   remat_block_info *dest = &er->m_block_info[e->dest->index];
1131 
1132   /* available_in temporarily contains the set of candidates whose
1133      registers are live on entry.  */
1134   if (empty_p (src->rd_out) || empty_p (dest->available_in))
1135     return false;
1136 
1137   return bitmap_ior_and_into (er->get_bitmap (&dest->rd_in),
1138 			      src->rd_out, dest->available_in);
1139 }
1140 
1141 /* Propagate information from the rd_in set of block BB_INDEX to rd_out.
1142    Return true if something changed.  */
1143 
1144 bool
rd_transfer(int bb_index)1145 early_remat::rd_transfer (int bb_index)
1146 {
1147   remat_block_info *info = &er->m_block_info[bb_index];
1148 
1149   if (empty_p (info->rd_in))
1150     return false;
1151 
1152   if (empty_p (info->rd_kill))
1153     {
1154       gcc_checking_assert (empty_p (info->rd_gen));
1155       if (!info->rd_out)
1156 	info->rd_out = info->rd_in;
1157       else
1158 	gcc_checking_assert (info->rd_out == info->rd_in);
1159       /* Assume that we only get called if something changed.  */
1160       return true;
1161     }
1162 
1163   if (empty_p (info->rd_gen))
1164     return bitmap_and_compl (er->get_bitmap (&info->rd_out),
1165 			     info->rd_in, info->rd_kill);
1166 
1167   return bitmap_ior_and_compl (er->get_bitmap (&info->rd_out), info->rd_gen,
1168 			       info->rd_in, info->rd_kill);
1169 }
1170 
1171 /* Calculate the rd_* sets for each block.  */
1172 
1173 void
compute_rd(void)1174 early_remat::compute_rd (void)
1175 {
1176   /* First calculate the rd_kill and rd_gen sets, using the fact
1177      that m_candidates is sorted in order of decreasing LUID.  */
1178   unsigned int cand_index;
1179   remat_candidate *cand;
1180   FOR_EACH_VEC_ELT_REVERSE (m_candidates, cand_index, cand)
1181     {
1182       rtx_insn *insn = cand->insn;
1183       basic_block bb = BLOCK_FOR_INSN (insn);
1184       remat_block_info *info = &m_block_info[bb->index];
1185       bitmap kill = m_regno_to_candidates[cand->regno];
1186       bitmap_ior_into (get_bitmap (&info->rd_kill), kill);
1187       if (bitmap_bit_p (DF_LR_OUT (bb), cand->regno))
1188 	{
1189 	  bitmap_and_compl_into (get_bitmap (&info->rd_gen), kill);
1190 	  bitmap_set_bit (info->rd_gen, cand_index);
1191 	}
1192     }
1193 
1194   /* Set up the initial values of the other sets.  */
1195   basic_block bb;
1196   FOR_EACH_BB_FN (bb, m_fn)
1197     {
1198       remat_block_info *info = &m_block_info[bb->index];
1199       unsigned int regno;
1200       bitmap_iterator bi;
1201       EXECUTE_IF_AND_IN_BITMAP (DF_LR_IN (bb), &m_candidate_regnos,
1202 				0, regno, bi)
1203 	{
1204 	  /* Use available_in to record the set of candidates whose
1205 	     registers are live on entry (i.e. a maximum bound on rd_in).  */
1206 	  bitmap_ior_into (get_bitmap (&info->available_in),
1207 			   m_regno_to_candidates[regno]);
1208 
1209 	  /* Add registers that die in a block to the block's kill set,
1210 	     so that we don't needlessly propagate them through the rest
1211 	     of the function.  */
1212 	  if (!bitmap_bit_p (DF_LR_OUT (bb), regno))
1213 	    bitmap_ior_into (get_bitmap (&info->rd_kill),
1214 			     m_regno_to_candidates[regno]);
1215 	}
1216 
1217       /* Initialize each block's rd_out to the minimal set (the set of
1218 	 local definitions).  */
1219       if (!empty_p (info->rd_gen))
1220 	bitmap_copy (get_bitmap (&info->rd_out), info->rd_gen);
1221     }
1222 
1223   /* Iterate until we reach a fixed point.  */
1224   er = this;
1225   bitmap_clear (&m_tmp_bitmap);
1226   bitmap_set_range (&m_tmp_bitmap, 0, last_basic_block_for_fn (m_fn));
1227   df_simple_dataflow (DF_FORWARD, NULL, NULL, rd_confluence_n, rd_transfer,
1228 		      &m_tmp_bitmap, df_get_postorder (DF_FORWARD),
1229 		      df_get_n_blocks (DF_FORWARD));
1230   er = 0;
1231 
1232   /* Work out which definitions reach which candidates, again taking
1233      advantage of the candidate order.  */
1234   bitmap_head reaching;
1235   bitmap_initialize (&reaching, &m_obstack);
1236   basic_block old_bb = NULL;
1237   FOR_EACH_VEC_ELT_REVERSE (m_candidates, cand_index, cand)
1238     {
1239       bb = BLOCK_FOR_INSN (cand->insn);
1240       if (bb != old_bb)
1241 	{
1242 	  /* Get the definitions that reach the start of the new block.  */
1243 	  remat_block_info *info = &m_block_info[bb->index];
1244 	  if (info->rd_in)
1245 	    bitmap_copy (&reaching, info->rd_in);
1246 	  else
1247 	    bitmap_clear (&reaching);
1248 	  old_bb = bb;
1249 	}
1250       else
1251 	{
1252 	  /* Process the definitions of the previous instruction.  */
1253 	  bitmap kill = m_regno_to_candidates[cand[1].regno];
1254 	  bitmap_and_compl_into (&reaching, kill);
1255 	  bitmap_set_bit (&reaching, cand_index + 1);
1256 	}
1257 
1258       if (cand->can_copy_p && !cand->constant_p)
1259 	{
1260 	  df_ref ref;
1261 	  FOR_EACH_INSN_USE (ref, cand->insn)
1262 	    {
1263 	      unsigned int regno = DF_REF_REGNO (ref);
1264 	      if (bitmap_bit_p (&m_candidate_regnos, regno))
1265 		{
1266 		  bitmap defs = m_regno_to_candidates[regno];
1267 		  bitmap_and (&m_tmp_bitmap, defs, &reaching);
1268 		  bitmap_ior_into (get_bitmap (&cand->uses), &m_tmp_bitmap);
1269 		}
1270 	    }
1271 	}
1272     }
1273   bitmap_clear (&reaching);
1274 }
1275 
1276 /* If CAND_INDEX is in an equivalence class, return the representative
1277    of the class, otherwise return CAND_INDEX.  */
1278 
1279 inline unsigned int
canon_candidate(unsigned int cand_index)1280 early_remat::canon_candidate (unsigned int cand_index)
1281 {
1282   if (remat_equiv_class *ec = m_candidates[cand_index].equiv_class)
1283     return ec->representative;
1284   return cand_index;
1285 }
1286 
1287 /* Make candidate set *PTR refer to candidates using the representative
1288    of each equivalence class.  */
1289 
1290 void
canon_bitmap(bitmap * ptr)1291 early_remat::canon_bitmap (bitmap *ptr)
1292 {
1293   bitmap old_set = *ptr;
1294   if (empty_p (old_set))
1295     return;
1296 
1297   bitmap new_set = NULL;
1298   unsigned int old_index;
1299   bitmap_iterator bi;
1300   EXECUTE_IF_SET_IN_BITMAP (old_set, 0, old_index, bi)
1301     {
1302       unsigned int new_index = canon_candidate (old_index);
1303       if (old_index != new_index)
1304 	{
1305 	  if (!new_set)
1306 	    {
1307 	      new_set = alloc_bitmap ();
1308 	      bitmap_copy (new_set, old_set);
1309 	    }
1310 	  bitmap_clear_bit (new_set, old_index);
1311 	  bitmap_set_bit (new_set, new_index);
1312 	}
1313     }
1314   if (new_set)
1315     {
1316       BITMAP_FREE (*ptr);
1317       *ptr = new_set;
1318     }
1319 }
1320 
1321 /* If the candidates in REACHING all have the same value, return the
1322    earliest instance of that value (i.e. the first one to be added
1323    to m_value_table), otherwise return MULTIPLE_CANDIDATES.  */
1324 
1325 unsigned int
resolve_reaching_def(bitmap reaching)1326 early_remat::resolve_reaching_def (bitmap reaching)
1327 {
1328   unsigned int cand_index = bitmap_first_set_bit (reaching);
1329   if (remat_equiv_class *ec = m_candidates[cand_index].equiv_class)
1330     {
1331       if (!bitmap_intersect_compl_p (reaching, ec->members))
1332 	return ec->earliest;
1333     }
1334   else if (bitmap_single_bit_set_p (reaching))
1335     return cand_index;
1336 
1337   return MULTIPLE_CANDIDATES;
1338 }
1339 
1340 /* Check whether all candidate registers used by candidate CAND_INDEX have
1341    unique definitions.  Return true if so, replacing the candidate's uses
1342    set with the appropriate form for value numbering.  */
1343 
1344 bool
check_candidate_uses(unsigned int cand_index)1345 early_remat::check_candidate_uses (unsigned int cand_index)
1346 {
1347   remat_candidate *cand = &m_candidates[cand_index];
1348 
1349   /* Process the uses for each register in turn.  */
1350   bitmap_head uses;
1351   bitmap_initialize (&uses, &m_obstack);
1352   bitmap_copy (&uses, cand->uses);
1353   bitmap uses_ec = alloc_bitmap ();
1354   while (!bitmap_empty_p (&uses))
1355     {
1356       /* Get the register for the lowest-indexed candidate remaining,
1357 	 and the reaching definitions of that register.  */
1358       unsigned int first = bitmap_first_set_bit (&uses);
1359       unsigned int regno = m_candidates[first].regno;
1360       bitmap_and (&m_tmp_bitmap, &uses, m_regno_to_candidates[regno]);
1361 
1362       /* See whether all reaching definitions have the same value and if
1363 	 so get the index of the first candidate we saw with that value.  */
1364       unsigned int def = resolve_reaching_def (&m_tmp_bitmap);
1365       if (def == MULTIPLE_CANDIDATES)
1366 	{
1367 	  if (dump_file)
1368 	    fprintf (dump_file, ";; Removing candidate %d because there is"
1369 		     " more than one reaching definition of reg %d\n",
1370 		     cand_index, regno);
1371 	  cand->can_copy_p = false;
1372 	  break;
1373 	}
1374       bitmap_set_bit (uses_ec, def);
1375       bitmap_and_compl_into (&uses, &m_tmp_bitmap);
1376     }
1377   BITMAP_FREE (cand->uses);
1378   cand->uses = uses_ec;
1379   return cand->can_copy_p;
1380 }
1381 
1382 /* Calculate the set of hard registers that would be clobbered by
1383    rematerializing candidate CAND_INDEX.  At this point the candidate's
1384    set of uses is final.  */
1385 
1386 void
compute_clobbers(unsigned int cand_index)1387 early_remat::compute_clobbers (unsigned int cand_index)
1388 {
1389   remat_candidate *cand = &m_candidates[cand_index];
1390   if (cand->uses)
1391     {
1392       unsigned int use_index;
1393       bitmap_iterator bi;
1394       EXECUTE_IF_SET_IN_BITMAP (cand->uses, 0, use_index, bi)
1395 	if (bitmap clobbers = m_candidates[use_index].clobbers)
1396 	  bitmap_ior_into (get_bitmap (&cand->clobbers), clobbers);
1397     }
1398 
1399   df_ref ref;
1400   FOR_EACH_INSN_DEF (ref, cand->insn)
1401     {
1402       unsigned int def_regno = DF_REF_REGNO (ref);
1403       if (def_regno != cand->regno)
1404 	bitmap_set_bit (get_bitmap (&cand->clobbers), def_regno);
1405     }
1406 }
1407 
1408 /* Mark candidate CAND_INDEX as validated and add it to the value table.  */
1409 
1410 void
assign_value_number(unsigned int cand_index)1411 early_remat::assign_value_number (unsigned int cand_index)
1412 {
1413   remat_candidate *cand = &m_candidates[cand_index];
1414   gcc_checking_assert (cand->can_copy_p && !cand->validated_p);
1415 
1416   compute_clobbers (cand_index);
1417   cand->validated_p = true;
1418 
1419   inchash::hash h;
1420   h.add_int (cand->regno);
1421   inchash::add_rtx (cand->remat_rtx, h);
1422   cand->hash = h.end ();
1423 
1424   remat_candidate **slot
1425     = m_value_table.find_slot_with_hash (cand, cand->hash, INSERT);
1426   if (!*slot)
1427     {
1428       *slot = cand;
1429       if (dump_file)
1430 	fprintf (dump_file, ";; Candidate %d is not equivalent to"
1431 		 " others seen so far\n", cand_index);
1432     }
1433   else
1434     record_equiv_candidates (*slot - m_candidates.address (), cand_index);
1435 }
1436 
1437 /* Make a final decision about which candidates are valid and assign
1438    value numbers to those that are.  */
1439 
1440 void
decide_candidate_validity(void)1441 early_remat::decide_candidate_validity (void)
1442 {
1443   auto_vec<unsigned int, 16> stack;
1444   unsigned int cand1_index;
1445   remat_candidate *cand1;
1446   FOR_EACH_VEC_ELT_REVERSE (m_candidates, cand1_index, cand1)
1447     {
1448       if (!cand1->can_copy_p || cand1->validated_p)
1449 	continue;
1450 
1451       if (empty_p (cand1->uses))
1452 	{
1453 	  assign_value_number (cand1_index);
1454 	  continue;
1455 	}
1456 
1457       stack.safe_push (cand1_index);
1458       while (!stack.is_empty ())
1459 	{
1460 	  unsigned int cand2_index = stack.last ();
1461 	  unsigned int watermark = stack.length ();
1462 	  remat_candidate *cand2 = &m_candidates[cand2_index];
1463 	  if (!cand2->can_copy_p || cand2->validated_p)
1464 	    {
1465 	      stack.pop ();
1466 	      continue;
1467 	    }
1468 	  cand2->visited_p = true;
1469 	  unsigned int cand3_index;
1470 	  bitmap_iterator bi;
1471 	  EXECUTE_IF_SET_IN_BITMAP (cand2->uses, 0, cand3_index, bi)
1472 	    {
1473 	      remat_candidate *cand3 = &m_candidates[cand3_index];
1474 	      if (!cand3->can_copy_p)
1475 		{
1476 		  if (dump_file)
1477 		    fprintf (dump_file, ";; Removing candidate %d because"
1478 			     " it uses removed candidate %d\n", cand2_index,
1479 			     cand3_index);
1480 		  cand2->can_copy_p = false;
1481 		  break;
1482 		}
1483 	      if (!cand3->validated_p)
1484 		{
1485 		  if (empty_p (cand3->uses))
1486 		    assign_value_number (cand3_index);
1487 		  else if (cand3->visited_p)
1488 		    {
1489 		      if (dump_file)
1490 			fprintf (dump_file, ";; Removing candidate %d"
1491 				 " because its definition is cyclic\n",
1492 				 cand2_index);
1493 		      cand2->can_copy_p = false;
1494 		      break;
1495 		    }
1496 		  else
1497 		    stack.safe_push (cand3_index);
1498 		}
1499 	    }
1500 	  if (!cand2->can_copy_p)
1501 	    {
1502 	      cand2->visited_p = false;
1503 	      stack.truncate (watermark - 1);
1504 	    }
1505 	  else if (watermark == stack.length ())
1506 	    {
1507 	      cand2->visited_p = false;
1508 	      if (check_candidate_uses (cand2_index))
1509 		assign_value_number (cand2_index);
1510 	      stack.pop ();
1511 	    }
1512 	}
1513     }
1514 
1515   /* Ensure that the candidates always use the same candidate index
1516      to refer to an equivalence class.  */
1517   FOR_EACH_VEC_ELT_REVERSE (m_candidates, cand1_index, cand1)
1518     if (cand1->can_copy_p && !empty_p (cand1->uses))
1519       {
1520 	canon_bitmap (&cand1->uses);
1521 	gcc_checking_assert (bitmap_first_set_bit (cand1->uses) > cand1_index);
1522       }
1523 }
1524 
1525 /* Remove any candidates in CANDIDATES that would clobber a register in
1526    UNAVAIL_REGS.  */
1527 
1528 void
restrict_remat_for_unavail_regs(bitmap candidates,const_bitmap unavail_regs)1529 early_remat::restrict_remat_for_unavail_regs (bitmap candidates,
1530 					      const_bitmap unavail_regs)
1531 {
1532   bitmap_clear (&m_tmp_bitmap);
1533   unsigned int cand_index;
1534   bitmap_iterator bi;
1535   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, cand_index, bi)
1536     {
1537       remat_candidate *cand = &m_candidates[cand_index];
1538       if (cand->clobbers
1539 	  && bitmap_intersect_p (cand->clobbers, unavail_regs))
1540 	bitmap_set_bit (&m_tmp_bitmap, cand_index);
1541     }
1542   bitmap_and_compl_into (candidates, &m_tmp_bitmap);
1543 }
1544 
1545 /* Remove any candidates in CANDIDATES that would clobber a register
1546    that is potentially live across CALL.  */
1547 
1548 void
restrict_remat_for_call(bitmap candidates,rtx_insn * call)1549 early_remat::restrict_remat_for_call (bitmap candidates, rtx_insn *call)
1550 {
1551   /* We don't know whether partially-clobbered registers are live
1552      across the call or not, so assume that they are.  */
1553   bitmap_view<HARD_REG_SET> call_preserved_regs
1554     (~insn_callee_abi (call).full_reg_clobbers ());
1555   restrict_remat_for_unavail_regs (candidates, call_preserved_regs);
1556 }
1557 
1558 /* Assuming that every path reaching a point P contains a copy of a
1559    use U of REGNO, return true if another copy of U at P would have
1560    access to the same value of REGNO.  */
1561 
1562 bool
stable_use_p(unsigned int regno)1563 early_remat::stable_use_p (unsigned int regno)
1564 {
1565   /* Conservatively assume not for hard registers.  */
1566   if (HARD_REGISTER_NUM_P (regno))
1567     return false;
1568 
1569   /* See if REGNO has a single definition and is never used uninitialized.
1570      In this case the definition of REGNO dominates the common dominator
1571      of the uses U, which in turn dominates P.  */
1572   if (DF_REG_DEF_COUNT (regno) == 1
1573       && !bitmap_bit_p (DF_LR_OUT (ENTRY_BLOCK_PTR_FOR_FN (m_fn)), regno))
1574     return true;
1575 
1576   return false;
1577 }
1578 
1579 /* Emit a copy from register DEST to register SRC before candidate
1580    CAND_INDEX's instruction.  */
1581 
1582 void
emit_copy_before(unsigned int cand_index,rtx dest,rtx src)1583 early_remat::emit_copy_before (unsigned int cand_index, rtx dest, rtx src)
1584 {
1585   remat_candidate *cand = &m_candidates[cand_index];
1586   if (dump_file)
1587     {
1588       fprintf (dump_file, ";; Stabilizing insn ");
1589       dump_insn_id (cand->insn);
1590       fprintf (dump_file, " by copying source reg %d:%s to temporary reg %d\n",
1591 	       REGNO (src), GET_MODE_NAME (GET_MODE (src)), REGNO (dest));
1592     }
1593   emit_insn_before (gen_move_insn (dest, src), cand->insn);
1594 }
1595 
1596 /* Check whether any inputs to candidate CAND_INDEX's instruction could
1597    change at rematerialization points and replace them with new pseudo
1598    registers if so.  */
1599 
1600 void
stabilize_pattern(unsigned int cand_index)1601 early_remat::stabilize_pattern (unsigned int cand_index)
1602 {
1603   remat_candidate *cand = &m_candidates[cand_index];
1604   if (cand->stabilized_p)
1605     return;
1606 
1607   remat_equiv_class *ec = cand->equiv_class;
1608   gcc_checking_assert (!ec || cand_index == ec->representative);
1609 
1610   /* Record the replacements we've made so far, so that we don't
1611      create two separate registers for match_dups.  Lookup is O(n),
1612      but the n is very small.  */
1613   typedef std::pair<rtx, rtx> reg_pair;
1614   auto_vec<reg_pair, 16> reg_map;
1615 
1616   rtx_insn *insn = cand->insn;
1617   df_ref ref;
1618   FOR_EACH_INSN_USE (ref, insn)
1619     {
1620       unsigned int old_regno = DF_REF_REGNO (ref);
1621       rtx *loc = DF_REF_REAL_LOC (ref);
1622 
1623       if (HARD_REGISTER_NUM_P (old_regno) && fixed_regs[old_regno])
1624 	{
1625 	  /* We checked when adding the candidate that the value is stable.  */
1626 	  gcc_checking_assert (!rtx_unstable_p (*loc));
1627 	  continue;
1628 	}
1629 
1630       if (bitmap_bit_p (&m_candidate_regnos, old_regno))
1631 	/* We already know which candidate provides the definition
1632 	   and will handle it during copying.  */
1633 	continue;
1634 
1635       if (stable_use_p (old_regno))
1636 	/* We can continue to use the existing register.  */
1637 	continue;
1638 
1639       /* We need to replace the register.  See whether we've already
1640 	 created a suitable copy.  */
1641       rtx old_reg = *loc;
1642       rtx new_reg = NULL_RTX;
1643       machine_mode mode = GET_MODE (old_reg);
1644       reg_pair *p;
1645       unsigned int pi;
1646       FOR_EACH_VEC_ELT (reg_map, pi, p)
1647 	if (REGNO (p->first) == old_regno
1648 	    && GET_MODE (p->first) == mode)
1649 	  {
1650 	    new_reg = p->second;
1651 	    break;
1652 	  }
1653 
1654       if (!new_reg)
1655 	{
1656 	  /* Create a new register and initialize it just before
1657 	     the instruction.  */
1658 	  new_reg = gen_reg_rtx (mode);
1659 	  reg_map.safe_push (reg_pair (old_reg, new_reg));
1660 	  if (ec)
1661 	    {
1662 	      unsigned int member_index;
1663 	      bitmap_iterator bi;
1664 	      EXECUTE_IF_SET_IN_BITMAP (ec->members, 0, member_index, bi)
1665 		emit_copy_before (member_index, new_reg, old_reg);
1666 	    }
1667 	  else
1668 	    emit_copy_before (cand_index, new_reg, old_reg);
1669 	}
1670       validate_change (insn, loc, new_reg, true);
1671     }
1672   if (num_changes_pending ())
1673     {
1674       if (!apply_change_group ())
1675 	/* We checked when adding the candidates that the pattern allows
1676 	   hard registers to be replaced.  Nothing else should make the
1677 	   changes invalid.  */
1678 	gcc_unreachable ();
1679 
1680       if (ec)
1681 	{
1682 	  /* Copy the new pattern to other members of the equivalence
1683 	     class.  */
1684 	  unsigned int member_index;
1685 	  bitmap_iterator bi;
1686 	  EXECUTE_IF_SET_IN_BITMAP (ec->members, 0, member_index, bi)
1687 	    if (cand_index != member_index)
1688 	      {
1689 		rtx_insn *other_insn = m_candidates[member_index].insn;
1690 		if (!validate_change (other_insn, &PATTERN (other_insn),
1691 				      copy_insn (PATTERN (insn)), 0))
1692 		  /* If the original instruction was valid then the copy
1693 		     should be too.  */
1694 		  gcc_unreachable ();
1695 	      }
1696 	}
1697     }
1698 
1699   cand->stabilized_p = true;
1700 }
1701 
1702 /* Change CAND's instruction so that it sets CAND->copy_regno instead
1703    of CAND->regno.  */
1704 
1705 void
replace_dest_with_copy(unsigned int cand_index)1706 early_remat::replace_dest_with_copy (unsigned int cand_index)
1707 {
1708   remat_candidate *cand = &m_candidates[cand_index];
1709   df_ref def;
1710   FOR_EACH_INSN_DEF (def, cand->insn)
1711     if (DF_REF_REGNO (def) == cand->regno)
1712       validate_change (cand->insn, DF_REF_REAL_LOC (def),
1713 		       regno_reg_rtx[cand->copy_regno], 1);
1714 }
1715 
1716 /* Make sure that the candidates used by candidate CAND_INDEX are available.
1717    There are two ways of doing this for an input candidate I:
1718 
1719    (1) Using the existing register number and ensuring that I is available.
1720 
1721    (2) Using a new register number (recorded in copy_regno) and adding I
1722        to VIA_COPY.  This guarantees that making I available does not
1723        conflict with other uses of the original register.
1724 
1725    REQUIRED is the set of candidates that are required but not available
1726    before the copy of CAND_INDEX.  AVAILABLE is the set of candidates
1727    that are already available before the copy of CAND_INDEX.  REACHING
1728    is the set of candidates that reach the copy of CAND_INDEX.  VIA_COPY
1729    is the set of candidates that will use new register numbers recorded
1730    in copy_regno instead of the original ones.  */
1731 
1732 void
stabilize_candidate_uses(unsigned int cand_index,bitmap required,bitmap available,bitmap reaching,bitmap via_copy)1733 early_remat::stabilize_candidate_uses (unsigned int cand_index,
1734 				       bitmap required, bitmap available,
1735 				       bitmap reaching, bitmap via_copy)
1736 {
1737   remat_candidate *cand = &m_candidates[cand_index];
1738   df_ref use;
1739   FOR_EACH_INSN_USE (use, cand->insn)
1740     {
1741       unsigned int regno = DF_REF_REGNO (use);
1742       if (!bitmap_bit_p (&m_candidate_regnos, regno))
1743 	continue;
1744 
1745       /* Work out which candidate provides the definition.  */
1746       bitmap defs = m_regno_to_candidates[regno];
1747       bitmap_and (&m_tmp_bitmap, cand->uses, defs);
1748       gcc_checking_assert (bitmap_single_bit_set_p (&m_tmp_bitmap));
1749       unsigned int def_index = bitmap_first_set_bit (&m_tmp_bitmap);
1750 
1751       /* First see if DEF_INDEX is the only reaching definition of REGNO
1752 	 at this point too and if it is or will become available.  We can
1753 	 continue to use REGNO if so.  */
1754       bitmap_and (&m_tmp_bitmap, reaching, defs);
1755       if (bitmap_single_bit_set_p (&m_tmp_bitmap)
1756 	  && bitmap_first_set_bit (&m_tmp_bitmap) == def_index
1757 	  && ((available && bitmap_bit_p (available, def_index))
1758 	      || bitmap_bit_p (required, def_index)))
1759 	{
1760 	  if (dump_file)
1761 	    fprintf (dump_file, ";; Keeping reg %d for use of candidate %d"
1762 		     " in candidate %d\n", regno, def_index, cand_index);
1763 	  continue;
1764 	}
1765 
1766       /* Otherwise fall back to using a copy.  There are other cases
1767 	 in which we *could* continue to use REGNO, but there's not
1768 	 really much point.  Using a separate register ought to make
1769 	 things easier for the register allocator.  */
1770       remat_candidate *def_cand = &m_candidates[def_index];
1771       rtx *loc = DF_REF_REAL_LOC (use);
1772       rtx new_reg;
1773       if (bitmap_set_bit (via_copy, def_index))
1774 	{
1775 	  new_reg = gen_reg_rtx (GET_MODE (*loc));
1776 	  def_cand->copy_regno = REGNO (new_reg);
1777 	  if (dump_file)
1778 	    fprintf (dump_file, ";; Creating reg %d for use of candidate %d"
1779 		     " in candidate %d\n", REGNO (new_reg), def_index,
1780 		     cand_index);
1781 	}
1782       else
1783 	new_reg = regno_reg_rtx[def_cand->copy_regno];
1784       validate_change (cand->insn, loc, new_reg, 1);
1785     }
1786 }
1787 
1788 /* Rematerialize the candidates in REQUIRED after instruction INSN,
1789    given that the candidates in AVAILABLE are already available
1790    and that REACHING is the set of candidates live after INSN.
1791    REQUIRED and AVAILABLE are disjoint on entry.
1792 
1793    Clear REQUIRED on exit.  */
1794 
1795 void
emit_remat_insns(bitmap required,bitmap available,bitmap reaching,rtx_insn * insn)1796 early_remat::emit_remat_insns (bitmap required, bitmap available,
1797 			       bitmap reaching, rtx_insn *insn)
1798 {
1799   /* Quick exit if there's nothing to do.  */
1800   if (empty_p (required))
1801     return;
1802 
1803   /* Only reaching definitions should be available or required.  */
1804   gcc_checking_assert (!bitmap_intersect_compl_p (required, reaching));
1805   if (available)
1806     gcc_checking_assert (!bitmap_intersect_compl_p (available, reaching));
1807 
1808   bitmap_head via_copy;
1809   bitmap_initialize (&via_copy, &m_obstack);
1810   while (!bitmap_empty_p (required) || !bitmap_empty_p (&via_copy))
1811     {
1812       /* Pick the lowest-indexed candidate left.  */
1813       unsigned int required_index = (bitmap_empty_p (required)
1814 				     ? ~0U : bitmap_first_set_bit (required));
1815       unsigned int via_copy_index = (bitmap_empty_p (&via_copy)
1816 				     ? ~0U : bitmap_first_set_bit (&via_copy));
1817       unsigned int cand_index = MIN (required_index, via_copy_index);
1818       remat_candidate *cand = &m_candidates[cand_index];
1819 
1820       bool via_copy_p = (cand_index == via_copy_index);
1821       if (via_copy_p)
1822 	bitmap_clear_bit (&via_copy, cand_index);
1823       else
1824 	{
1825 	  /* Remove all candidates for the same register from REQUIRED.  */
1826 	  bitmap_and (&m_tmp_bitmap, reaching,
1827 		      m_regno_to_candidates[cand->regno]);
1828 	  bitmap_and_compl_into (required, &m_tmp_bitmap);
1829 	  gcc_checking_assert (!bitmap_bit_p (required, cand_index));
1830 
1831 	  /* Only rematerialize if we have a single reaching definition
1832 	     of the register.  */
1833 	  if (!bitmap_single_bit_set_p (&m_tmp_bitmap))
1834 	    {
1835 	      if (dump_file)
1836 		{
1837 		  fprintf (dump_file, ";; Can't rematerialize reg %d after ",
1838 			   cand->regno);
1839 		  dump_insn_id (insn);
1840 		  fprintf (dump_file, ": more than one reaching definition\n");
1841 		}
1842 	      continue;
1843 	    }
1844 
1845 	  /* Skip candidates that can't be rematerialized.  */
1846 	  if (!cand->can_copy_p)
1847 	    continue;
1848 
1849 	  /* Check the function precondition.  */
1850 	  gcc_checking_assert (!available
1851 			       || !bitmap_bit_p (available, cand_index));
1852 	}
1853 
1854       /* Invalid candidates should have been weeded out by now.  */
1855       gcc_assert (cand->can_copy_p);
1856 
1857       rtx new_pattern;
1858       if (cand->constant_p)
1859 	{
1860 	  /* Emit a simple move.  */
1861 	  unsigned int regno = via_copy_p ? cand->copy_regno : cand->regno;
1862 	  new_pattern = gen_move_insn (regno_reg_rtx[regno], cand->remat_rtx);
1863 	}
1864       else
1865 	{
1866 	  /* If this is the first time we've copied the instruction, make
1867 	     sure that any inputs will have the same value after INSN.  */
1868 	  stabilize_pattern (cand_index);
1869 
1870 	  /* Temporarily adjust the original instruction so that it has
1871 	     the right form for the copy.  */
1872 	  if (via_copy_p)
1873 	    replace_dest_with_copy (cand_index);
1874 	  if (cand->uses)
1875 	    stabilize_candidate_uses (cand_index, required, available,
1876 				      reaching, &via_copy);
1877 
1878 	  /* Get the new instruction pattern.  */
1879 	  new_pattern = copy_insn (cand->remat_rtx);
1880 
1881 	  /* Undo the temporary changes.  */
1882 	  cancel_changes (0);
1883 	}
1884 
1885       /* Emit the new instruction.  */
1886       rtx_insn *new_insn = emit_insn_after (new_pattern, insn);
1887 
1888       if (dump_file)
1889 	{
1890 	  fprintf (dump_file, ";; Rematerializing candidate %d after ",
1891 		   cand_index);
1892 	  dump_insn_id (insn);
1893 	  if (via_copy_p)
1894 	    fprintf (dump_file, " with new destination reg %d",
1895 		     cand->copy_regno);
1896 	  fprintf (dump_file, ":\n\n");
1897 	  print_rtl_single (dump_file, new_insn);
1898 	  fprintf (dump_file, "\n");
1899 	}
1900     }
1901 }
1902 
1903 /* Recompute INFO's available_out set, given that it's distinct from
1904    available_in and available_locally.  */
1905 
1906 bool
set_available_out(remat_block_info * info)1907 early_remat::set_available_out (remat_block_info *info)
1908 {
1909   if (empty_p (info->available_locally))
1910     return bitmap_and_compl (get_bitmap (&info->available_out),
1911 			     info->available_in, info->rd_kill);
1912 
1913   if (empty_p (info->rd_kill))
1914     return bitmap_ior (get_bitmap (&info->available_out),
1915 		       info->available_locally, info->available_in);
1916 
1917   return bitmap_ior_and_compl (get_bitmap (&info->available_out),
1918 			       info->available_locally, info->available_in,
1919 			       info->rd_kill);
1920 }
1921 
1922 /* If BB has more than one call, decide which candidates should be
1923    rematerialized after the non-final calls and emit the associated
1924    instructions.  Record other information about the block in preparation
1925    for the global phase.  */
1926 
1927 void
process_block(basic_block bb)1928 early_remat::process_block (basic_block bb)
1929 {
1930   remat_block_info *info = &m_block_info[bb->index];
1931   rtx_insn *last_call = NULL;
1932   rtx_insn *insn;
1933 
1934   /* Ensure that we always use the same candidate index to refer to an
1935      equivalence class.  */
1936   if (info->rd_out == info->rd_in)
1937     {
1938       canon_bitmap (&info->rd_in);
1939       info->rd_out = info->rd_in;
1940     }
1941   else
1942     {
1943       canon_bitmap (&info->rd_in);
1944       canon_bitmap (&info->rd_out);
1945     }
1946   canon_bitmap (&info->rd_kill);
1947   canon_bitmap (&info->rd_gen);
1948 
1949   /* The set of candidates that should be rematerialized on entry to the
1950      block or after the previous call (whichever is more recent).  */
1951   init_temp_bitmap (&m_required);
1952 
1953   /* The set of candidates that reach the current instruction (i.e. are
1954      live just before the instruction).  */
1955   bitmap_head reaching;
1956   bitmap_initialize (&reaching, &m_obstack);
1957   if (info->rd_in)
1958     bitmap_copy (&reaching, info->rd_in);
1959 
1960   /* The set of candidates that are live and available without
1961      rematerialization just before the current instruction.  This only
1962      accounts for earlier candidates in the block, or those that become
1963      available by being added to M_REQUIRED.  */
1964   init_temp_bitmap (&m_available);
1965 
1966   /* Get the range of candidates in the block.  */
1967   unsigned int next_candidate = info->first_candidate;
1968   unsigned int num_candidates = info->num_candidates;
1969   remat_candidate *next_def = (num_candidates > 0
1970 			       ? &m_candidates[next_candidate]
1971 			       : NULL);
1972 
1973   FOR_BB_INSNS (bb, insn)
1974     {
1975       if (!NONDEBUG_INSN_P (insn))
1976 	continue;
1977 
1978       /* First process uses, since this is a forward walk.  */
1979       df_ref ref;
1980       FOR_EACH_INSN_USE (ref, insn)
1981 	{
1982 	  unsigned int regno = DF_REF_REGNO (ref);
1983 	  if (bitmap_bit_p (&m_candidate_regnos, regno))
1984 	    {
1985 	      bitmap defs = m_regno_to_candidates[regno];
1986 	      bitmap_and (&m_tmp_bitmap, defs, &reaching);
1987 	      gcc_checking_assert (!bitmap_empty_p (&m_tmp_bitmap));
1988 	      if (!bitmap_intersect_p (defs, m_available))
1989 		{
1990 		  /* There has been no definition of the register since
1991 		     the last call or the start of the block (whichever
1992 		     is most recent).  Mark the reaching definitions
1993 		     as required at that point and thus available here.  */
1994 		  bitmap_ior_into (m_required, &m_tmp_bitmap);
1995 		  bitmap_ior_into (m_available, &m_tmp_bitmap);
1996 		}
1997 	    }
1998 	}
1999 
2000       if (CALL_P (insn))
2001 	{
2002 	  if (!last_call)
2003 	    {
2004 	      /* The first call in the block.  Record which candidates are
2005 		 required at the start of the block.  */
2006 	      copy_temp_bitmap (&info->required_in, &m_required);
2007 	      init_temp_bitmap (&m_required);
2008 	    }
2009 	  else
2010 	    {
2011 	      /* The fully-local case: candidates that need to be
2012 		 rematerialized after a previous call in the block.  */
2013 	      restrict_remat_for_call (m_required, last_call);
2014 	      emit_remat_insns (m_required, NULL, info->rd_after_call,
2015 				last_call);
2016 	    }
2017 	  last_call = insn;
2018 	  bitmap_clear (m_available);
2019 	  gcc_checking_assert (empty_p (m_required));
2020 	}
2021 
2022       /* Now process definitions.  */
2023       while (next_def && insn == next_def->insn)
2024 	{
2025 	  unsigned int gen = canon_candidate (next_candidate);
2026 
2027 	  /* Other candidates with the same regno are not available
2028 	     any more.  */
2029 	  bitmap kill = m_regno_to_candidates[next_def->regno];
2030 	  bitmap_and_compl_into (m_available, kill);
2031 	  bitmap_and_compl_into (&reaching, kill);
2032 
2033 	  /* Record that this candidate is available without
2034 	     rematerialization.  */
2035 	  bitmap_set_bit (m_available, gen);
2036 	  bitmap_set_bit (&reaching, gen);
2037 
2038 	  /* Find the next candidate in the block.  */
2039 	  num_candidates -= 1;
2040 	  next_candidate -= 1;
2041 	  if (num_candidates > 0)
2042 	    next_def -= 1;
2043 	  else
2044 	    next_def = NULL;
2045 	}
2046 
2047       if (insn == last_call)
2048 	bitmap_copy (get_bitmap (&info->rd_after_call), &reaching);
2049     }
2050   bitmap_clear (&reaching);
2051   gcc_checking_assert (num_candidates == 0);
2052 
2053   /* Remove values from the available set if they aren't live (and so
2054      aren't interesting to successor blocks).  */
2055   if (info->rd_out)
2056     bitmap_and_into (m_available, info->rd_out);
2057 
2058   /* Record the accumulated information.  */
2059   info->last_call = last_call;
2060   info->abnormal_call_p = (last_call
2061 			   && last_call == BB_END (bb)
2062 			   && has_abnormal_or_eh_outgoing_edge_p (bb));
2063   copy_temp_bitmap (&info->available_locally, &m_available);
2064   if (last_call)
2065     copy_temp_bitmap (&info->required_after_call, &m_required);
2066   else
2067     copy_temp_bitmap (&info->required_in, &m_required);
2068 
2069   /* Assume at first that all live-in values are available without
2070      rematerialization (i.e. start with the most optimistic assumption).  */
2071   if (info->available_in)
2072     {
2073       if (info->rd_in)
2074 	bitmap_copy (info->available_in, info->rd_in);
2075       else
2076 	BITMAP_FREE (info->available_in);
2077     }
2078 
2079   if (last_call || empty_p (info->available_in))
2080     /* The values available on exit from the block are exactly those that
2081        are available locally.  This set doesn't change.  */
2082     info->available_out = info->available_locally;
2083   else if (empty_p (info->available_locally) && empty_p (info->rd_kill))
2084     /* The values available on exit are the same as those available on entry.
2085        Updating one updates the other.  */
2086     info->available_out = info->available_in;
2087   else
2088     set_available_out (info);
2089 }
2090 
2091 /* Process each block as for process_block, visiting dominators before
2092    the blocks they dominate.  */
2093 
2094 void
local_phase(void)2095 early_remat::local_phase (void)
2096 {
2097   if (dump_file)
2098     fprintf (dump_file, "\n;; Local phase:\n");
2099 
2100   int *postorder = df_get_postorder (DF_BACKWARD);
2101   unsigned int postorder_len = df_get_n_blocks (DF_BACKWARD);
2102   for (unsigned int i = postorder_len; i-- > 0; )
2103     if (postorder[i] >= NUM_FIXED_BLOCKS)
2104       process_block (BASIC_BLOCK_FOR_FN (m_fn, postorder[i]));
2105 }
2106 
2107 /* Return true if available values survive across edge E.  */
2108 
2109 static inline bool
available_across_edge_p(edge e)2110 available_across_edge_p (edge e)
2111 {
2112   return (e->flags & EDGE_EH) == 0;
2113 }
2114 
2115 /* Propagate information from the available_out set of E->src to the
2116    available_in set of E->dest, when computing global availability.
2117    Return true if something changed.  */
2118 
2119 bool
avail_confluence_n(edge e)2120 early_remat::avail_confluence_n (edge e)
2121 {
2122   remat_block_info *src = &er->m_block_info[e->src->index];
2123   remat_block_info *dest = &er->m_block_info[e->dest->index];
2124 
2125   if (!available_across_edge_p (e))
2126     return false;
2127 
2128   if (empty_p (dest->available_in))
2129     return false;
2130 
2131   if (!src->available_out)
2132     {
2133       bitmap_clear (dest->available_in);
2134       return true;
2135     }
2136 
2137   return bitmap_and_into (dest->available_in, src->available_out);
2138 }
2139 
2140 /* Propagate information from the available_in set of block BB_INDEX
2141    to available_out.  Return true if something changed.  */
2142 
2143 bool
avail_transfer(int bb_index)2144 early_remat::avail_transfer (int bb_index)
2145 {
2146   remat_block_info *info = &er->m_block_info[bb_index];
2147 
2148   if (info->available_out == info->available_locally)
2149     return false;
2150 
2151   if (info->available_out == info->available_in)
2152     /* Assume that we are only called if the input changed.  */
2153     return true;
2154 
2155   return er->set_available_out (info);
2156 }
2157 
2158 /* Compute global availability for the function, starting with the local
2159    information computed by local_phase.  */
2160 
2161 void
compute_availability(void)2162 early_remat::compute_availability (void)
2163 {
2164   /* We use df_simple_dataflow instead of the lcm routines for three reasons:
2165 
2166      (1) it avoids recomputing the traversal order;
2167      (2) many of the sets are likely to be sparse, so we don't necessarily
2168 	 want to use sbitmaps; and
2169      (3) it means we can avoid creating an explicit kill set for the call.  */
2170   er = this;
2171   bitmap_clear (&m_tmp_bitmap);
2172   bitmap_set_range (&m_tmp_bitmap, 0, last_basic_block_for_fn (m_fn));
2173   df_simple_dataflow (DF_FORWARD, NULL, NULL,
2174 		      avail_confluence_n, avail_transfer,
2175 		      &m_tmp_bitmap, df_get_postorder (DF_FORWARD),
2176 		      df_get_n_blocks (DF_FORWARD));
2177   er = 0;
2178 
2179   /* Restrict the required_in sets to values that aren't available.  */
2180   basic_block bb;
2181   FOR_EACH_BB_FN (bb, m_fn)
2182     {
2183       remat_block_info *info = &m_block_info[bb->index];
2184       if (info->required_in && info->available_in)
2185 	bitmap_and_compl_into (info->required_in, info->available_in);
2186     }
2187 }
2188 
2189 /* Make sure that INFO's available_out and available_in sets are unique.  */
2190 
2191 inline void
unshare_available_sets(remat_block_info * info)2192 early_remat::unshare_available_sets (remat_block_info *info)
2193 {
2194   if (info->available_in && info->available_in == info->available_out)
2195     {
2196       info->available_in = alloc_bitmap ();
2197       bitmap_copy (info->available_in, info->available_out);
2198     }
2199 }
2200 
2201 /* Return true if it is possible to move rematerializations from the
2202    destination of E to the source of E.  */
2203 
2204 inline bool
can_move_across_edge_p(edge e)2205 early_remat::can_move_across_edge_p (edge e)
2206 {
2207   return (available_across_edge_p (e)
2208 	  && !m_block_info[e->src->index].abnormal_call_p);
2209 }
2210 
2211 /* Return true if it is cheaper to rematerialize values at the head of
2212    block QUERY_BB_INDEX instead of rematerializing in its predecessors.  */
2213 
2214 bool
local_remat_cheaper_p(unsigned int query_bb_index)2215 early_remat::local_remat_cheaper_p (unsigned int query_bb_index)
2216 {
2217   if (m_block_info[query_bb_index].remat_frequency_valid_p)
2218     return m_block_info[query_bb_index].local_remat_cheaper_p;
2219 
2220   /* Iteratively compute the cost of rematerializing values in the
2221      predecessor blocks, then compare that with the cost of
2222      rematerializing at the head of the block.
2223 
2224      A cycle indicates that there is no call on that execution path,
2225      so it isn't necessary to rematerialize on that path.  */
2226   auto_vec<basic_block, 16> stack;
2227   stack.quick_push (BASIC_BLOCK_FOR_FN (m_fn, query_bb_index));
2228   while (!stack.is_empty ())
2229     {
2230       basic_block bb = stack.last ();
2231       remat_block_info *info = &m_block_info[bb->index];
2232       if (info->remat_frequency_valid_p)
2233 	{
2234 	  stack.pop ();
2235 	  continue;
2236 	}
2237 
2238       info->visited_p = true;
2239       int frequency = 0;
2240       bool can_move_p = true;
2241       edge e;
2242       edge_iterator ei;
2243       FOR_EACH_EDGE (e, ei, bb->preds)
2244 	if (!can_move_across_edge_p (e))
2245 	  {
2246 	    can_move_p = false;
2247 	    break;
2248 	  }
2249 	else if (m_block_info[e->src->index].last_call)
2250 	  /* We'll rematerialize after the call.  */
2251 	  frequency += e->src->count.to_frequency (m_fn);
2252 	else if (m_block_info[e->src->index].remat_frequency_valid_p)
2253 	  /* Add the cost of rematerializing at the head of E->src
2254 	     or in its predecessors (whichever is cheaper).  */
2255 	  frequency += m_block_info[e->src->index].remat_frequency;
2256 	else if (!m_block_info[e->src->index].visited_p)
2257 	  /* Queue E->src and then revisit this block again.  */
2258 	  stack.safe_push (e->src);
2259 
2260       /* Come back to this block later if we need to process some of
2261 	 its predecessors.  */
2262       if (stack.last () != bb)
2263 	continue;
2264 
2265       /* If rematerializing in and before the block have equal cost, prefer
2266 	 rematerializing in the block.  This should shorten the live range.  */
2267       int bb_frequency = bb->count.to_frequency (m_fn);
2268       if (!can_move_p || frequency >= bb_frequency)
2269 	{
2270 	  info->local_remat_cheaper_p = true;
2271 	  info->remat_frequency = bb_frequency;
2272 	}
2273       else
2274 	info->remat_frequency = frequency;
2275       info->remat_frequency_valid_p = true;
2276       info->visited_p = false;
2277       if (dump_file)
2278 	{
2279 	  if (!can_move_p)
2280 	    fprintf (dump_file, ";; Need to rematerialize at the head of"
2281 		     " block %d; cannot move to predecessors.\n", bb->index);
2282 	  else
2283 	    {
2284 	      fprintf (dump_file, ";; Block %d has frequency %d,"
2285 		       " rematerializing in predecessors has frequency %d",
2286 		       bb->index, bb_frequency, frequency);
2287 	      if (info->local_remat_cheaper_p)
2288 		fprintf (dump_file, "; prefer to rematerialize"
2289 			 " in the block\n");
2290 	      else
2291 		fprintf (dump_file, "; prefer to rematerialize"
2292 			 " in predecessors\n");
2293 	    }
2294 	}
2295       stack.pop ();
2296     }
2297   return m_block_info[query_bb_index].local_remat_cheaper_p;
2298 }
2299 
2300 /* Return true if we cannot rematerialize candidate CAND_INDEX at the head of
2301    block BB_INDEX.  */
2302 
2303 bool
need_to_move_candidate_p(unsigned int bb_index,unsigned int cand_index)2304 early_remat::need_to_move_candidate_p (unsigned int bb_index,
2305 				       unsigned int cand_index)
2306 {
2307   remat_block_info *info = &m_block_info[bb_index];
2308   remat_candidate *cand = &m_candidates[cand_index];
2309   basic_block bb = BASIC_BLOCK_FOR_FN (m_fn, bb_index);
2310 
2311   /* If there is more than one reaching definition of REGNO,
2312      we'll need to rematerialize in predecessors instead.  */
2313   bitmap_and (&m_tmp_bitmap, info->rd_in, m_regno_to_candidates[cand->regno]);
2314   if (!bitmap_single_bit_set_p (&m_tmp_bitmap))
2315     {
2316       if (dump_file)
2317 	fprintf (dump_file, ";; Cannot rematerialize %d at the"
2318 		 " head of block %d because there is more than one"
2319 		 " reaching definition of reg %d\n", cand_index,
2320 		 bb_index, cand->regno);
2321       return true;
2322     }
2323 
2324   /* Likewise if rematerializing CAND here would clobber a live register.  */
2325   if (cand->clobbers
2326       && bitmap_intersect_p (cand->clobbers, DF_LR_IN (bb)))
2327     {
2328       if (dump_file)
2329 	fprintf (dump_file, ";; Cannot rematerialize %d at the"
2330 		 " head of block %d because it would clobber live"
2331 		 " registers\n", cand_index, bb_index);
2332       return true;
2333     }
2334 
2335   return false;
2336 }
2337 
2338 /* Set REQUIRED to the minimum set of candidates that must be rematerialized
2339    in predecessors of block BB_INDEX instead of at the start of the block.  */
2340 
2341 void
compute_minimum_move_set(unsigned int bb_index,bitmap required)2342 early_remat::compute_minimum_move_set (unsigned int bb_index,
2343 				       bitmap required)
2344 {
2345   remat_block_info *info = &m_block_info[bb_index];
2346   bitmap_head remaining;
2347 
2348   bitmap_clear (required);
2349   bitmap_initialize (&remaining, &m_obstack);
2350   bitmap_copy (&remaining, info->required_in);
2351   while (!bitmap_empty_p (&remaining))
2352     {
2353       unsigned int cand_index = bitmap_first_set_bit (&remaining);
2354       remat_candidate *cand = &m_candidates[cand_index];
2355       bitmap_clear_bit (&remaining, cand_index);
2356 
2357       /* Leave invalid candidates where they are.  */
2358       if (!cand->can_copy_p)
2359 	continue;
2360 
2361       /* Decide whether to move this candidate.  */
2362       if (!bitmap_bit_p (required, cand_index))
2363 	{
2364 	  if (!need_to_move_candidate_p (bb_index, cand_index))
2365 	    continue;
2366 	  bitmap_set_bit (required, cand_index);
2367 	}
2368 
2369       /* Also move values used by the candidate, so that we don't
2370 	 rematerialize them twice.  */
2371       if (cand->uses)
2372 	{
2373 	  bitmap_ior_and_into (required, cand->uses, info->required_in);
2374 	  bitmap_ior_and_into (&remaining, cand->uses, info->required_in);
2375 	}
2376     }
2377 }
2378 
2379 /* Make the predecessors of BB_INDEX rematerialize the candidates in
2380    REQUIRED.  Add any blocks whose required_in set changes to
2381    PENDING_BLOCKS.  */
2382 
2383 void
move_to_predecessors(unsigned int bb_index,bitmap required,bitmap pending_blocks)2384 early_remat::move_to_predecessors (unsigned int bb_index, bitmap required,
2385 				   bitmap pending_blocks)
2386 {
2387   if (empty_p (required))
2388     return;
2389   remat_block_info *dest_info = &m_block_info[bb_index];
2390   basic_block bb = BASIC_BLOCK_FOR_FN (m_fn, bb_index);
2391   edge e;
2392   edge_iterator ei;
2393   FOR_EACH_EDGE (e, ei, bb->preds)
2394     {
2395       remat_block_info *src_info = &m_block_info[e->src->index];
2396 
2397       /* Restrict the set we add to the reaching definitions.  */
2398       bitmap_and (&m_tmp_bitmap, required, src_info->rd_out);
2399       if (bitmap_empty_p (&m_tmp_bitmap))
2400 	continue;
2401 
2402       if (!can_move_across_edge_p (e))
2403 	{
2404 	  /* We can't move the rematerialization and we can't do it at
2405 	     the start of the block either.  In this case we just give up
2406 	     and rely on spilling to make the values available across E.  */
2407 	  if (dump_file)
2408 	    {
2409 	      fprintf (dump_file, ";; Cannot rematerialize the following"
2410 		       " candidates in block %d:", e->src->index);
2411 	      dump_candidate_bitmap (required);
2412 	      fprintf (dump_file, "\n");
2413 	    }
2414 	  continue;
2415 	}
2416 
2417       /* Remove candidates that are already available.  */
2418       if (src_info->available_out)
2419 	{
2420 	  bitmap_and_compl_into (&m_tmp_bitmap, src_info->available_out);
2421 	  if (bitmap_empty_p (&m_tmp_bitmap))
2422 	    continue;
2423 	}
2424 
2425       /* Add the remaining candidates to the appropriate required set.  */
2426       if (dump_file)
2427 	{
2428 	  fprintf (dump_file, ";; Moving this set from block %d"
2429 		   " to block %d:", bb_index, e->src->index);
2430 	  dump_candidate_bitmap (&m_tmp_bitmap);
2431 	  fprintf (dump_file, "\n");
2432 	}
2433       /* If the source block contains a call, we want to rematerialize
2434 	 after the call, otherwise we want to rematerialize at the start
2435 	 of the block.  */
2436       bitmap src_required = get_bitmap (src_info->last_call
2437 					? &src_info->required_after_call
2438 					: &src_info->required_in);
2439       if (bitmap_ior_into (src_required, &m_tmp_bitmap))
2440 	{
2441 	  if (!src_info->last_call)
2442 	    bitmap_set_bit (pending_blocks, e->src->index);
2443 	  unshare_available_sets (src_info);
2444 	  bitmap_ior_into (get_bitmap (&src_info->available_out),
2445 			   &m_tmp_bitmap);
2446 	}
2447     }
2448 
2449   /* The candidates are now available on entry to the block.  */
2450   bitmap_and_compl_into (dest_info->required_in, required);
2451   unshare_available_sets (dest_info);
2452   bitmap_ior_into (get_bitmap (&dest_info->available_in), required);
2453 }
2454 
2455 /* Go through the candidates that are currently marked as being
2456    rematerialized at the beginning of a block.  Decide in each case
2457    whether that's valid and profitable; if it isn't, move the
2458    rematerialization to predecessor blocks instead.  */
2459 
2460 void
choose_rematerialization_points(void)2461 early_remat::choose_rematerialization_points (void)
2462 {
2463   bitmap_head required;
2464   bitmap_head pending_blocks;
2465 
2466   int *postorder = df_get_postorder (DF_BACKWARD);
2467   unsigned int postorder_len = df_get_n_blocks (DF_BACKWARD);
2468   bitmap_initialize (&required, &m_obstack);
2469   bitmap_initialize (&pending_blocks, &m_obstack);
2470   do
2471     /* Process the blocks in postorder, to reduce the number of iterations
2472        of the outer loop.  */
2473     for (unsigned int i = 0; i < postorder_len; ++i)
2474       {
2475 	unsigned int bb_index = postorder[i];
2476 	remat_block_info *info = &m_block_info[bb_index];
2477 	bitmap_clear_bit (&pending_blocks, bb_index);
2478 
2479 	if (empty_p (info->required_in))
2480 	  continue;
2481 
2482 	if (info->available_in)
2483 	  gcc_checking_assert (!bitmap_intersect_p (info->required_in,
2484 						    info->available_in));
2485 
2486 	if (local_remat_cheaper_p (bb_index))
2487 	  {
2488 	    /* We'd prefer to rematerialize at the head of the block.
2489 	       Only move candidates if we need to.  */
2490 	    compute_minimum_move_set (bb_index, &required);
2491 	    move_to_predecessors (bb_index, &required, &pending_blocks);
2492 	  }
2493 	else
2494 	  move_to_predecessors (bb_index, info->required_in,
2495 				&pending_blocks);
2496       }
2497   while (!bitmap_empty_p (&pending_blocks));
2498   bitmap_clear (&required);
2499 }
2500 
2501 /* Emit all rematerialization instructions queued for BB.  */
2502 
2503 void
emit_remat_insns_for_block(basic_block bb)2504 early_remat::emit_remat_insns_for_block (basic_block bb)
2505 {
2506   remat_block_info *info = &m_block_info[bb->index];
2507 
2508   if (info->last_call && !empty_p (info->required_after_call))
2509     {
2510       restrict_remat_for_call (info->required_after_call, info->last_call);
2511       emit_remat_insns (info->required_after_call, NULL,
2512 			info->rd_after_call, info->last_call);
2513     }
2514 
2515   if (!empty_p (info->required_in))
2516     {
2517       rtx_insn *insn = BB_HEAD (bb);
2518       while (insn != BB_END (bb)
2519 	     && !INSN_P (NEXT_INSN (insn)))
2520 	insn = NEXT_INSN (insn);
2521       restrict_remat_for_unavail_regs (info->required_in, DF_LR_IN (bb));
2522       emit_remat_insns (info->required_in, info->available_in,
2523 			info->rd_in, insn);
2524     }
2525 }
2526 
2527 /* Decide which candidates in each block's REQUIRED_IN set need to be
2528    rematerialized and decide where the rematerialization instructions
2529    should go.  Emit queued rematerialization instructions at the start
2530    of blocks and after the last calls in blocks.  */
2531 
2532 void
global_phase(void)2533 early_remat::global_phase (void)
2534 {
2535   compute_availability ();
2536   if (dump_file)
2537     {
2538       fprintf (dump_file, "\n;; Blocks after computing global"
2539 	       " availability:\n");
2540       dump_all_blocks ();
2541     }
2542 
2543   choose_rematerialization_points ();
2544   if (dump_file)
2545     {
2546       fprintf (dump_file, "\n;; Blocks after choosing rematerialization"
2547 	       " points:\n");
2548       dump_all_blocks ();
2549     }
2550 
2551   basic_block bb;
2552   FOR_EACH_BB_FN (bb, m_fn)
2553     emit_remat_insns_for_block (bb);
2554 }
2555 
2556 /* Main function for the pass.  */
2557 
2558 void
run(void)2559 early_remat::run (void)
2560 {
2561   df_analyze ();
2562 
2563   if (!collect_candidates ())
2564     return;
2565 
2566   init_block_info ();
2567   sort_candidates ();
2568   finalize_candidate_indices ();
2569   if (dump_file)
2570     dump_all_candidates ();
2571 
2572   compute_rd ();
2573   decide_candidate_validity ();
2574   local_phase ();
2575   global_phase ();
2576 }
2577 
early_remat(function * fn,sbitmap selected_modes)2578 early_remat::early_remat (function *fn, sbitmap selected_modes)
2579   : m_fn (fn),
2580     m_selected_modes (selected_modes),
2581     m_available (0),
2582     m_required (0),
2583     m_value_table (63)
2584 {
2585   bitmap_obstack_initialize (&m_obstack);
2586   bitmap_initialize (&m_candidate_regnos, &m_obstack);
2587   bitmap_initialize (&m_tmp_bitmap, &m_obstack);
2588 }
2589 
~early_remat()2590 early_remat::~early_remat ()
2591 {
2592   bitmap_obstack_release (&m_obstack);
2593 }
2594 
2595 namespace {
2596 
2597 const pass_data pass_data_early_remat =
2598 {
2599   RTL_PASS, /* type */
2600   "early_remat", /* name */
2601   OPTGROUP_NONE, /* optinfo_flags */
2602   TV_EARLY_REMAT, /* tv_id */
2603   0, /* properties_required */
2604   0, /* properties_provided */
2605   0, /* properties_destroyed */
2606   0, /* todo_flags_start */
2607   TODO_df_finish, /* todo_flags_finish */
2608 };
2609 
2610 class pass_early_remat : public rtl_opt_pass
2611 {
2612 public:
pass_early_remat(gcc::context * ctxt)2613   pass_early_remat (gcc::context *ctxt)
2614     : rtl_opt_pass (pass_data_early_remat, ctxt)
2615   {}
2616 
2617   /* opt_pass methods: */
gate(function *)2618   virtual bool gate (function *)
2619   {
2620     return optimize > 1 && NUM_POLY_INT_COEFFS > 1;
2621   }
2622 
execute(function * f)2623   virtual unsigned int execute (function *f)
2624   {
2625     auto_sbitmap selected_modes (NUM_MACHINE_MODES);
2626     bitmap_clear (selected_modes);
2627     targetm.select_early_remat_modes (selected_modes);
2628     if (!bitmap_empty_p (selected_modes))
2629       early_remat (f, selected_modes).run ();
2630     return 0;
2631   }
2632 }; // class pass_early_remat
2633 
2634 } // anon namespace
2635 
2636 rtl_opt_pass *
make_pass_early_remat(gcc::context * ctxt)2637 make_pass_early_remat (gcc::context *ctxt)
2638 {
2639   return new pass_early_remat (ctxt);
2640 }
2641