xref: /netbsd/external/gpl3/gcc/dist/gcc/ira.cc (revision f0fbc68b)
1 /* Integrated Register Allocator (IRA) entry point.
2    Copyright (C) 2006-2022 Free Software Foundation, Inc.
3    Contributed by Vladimir Makarov <vmakarov@redhat.com>.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* The integrated register allocator (IRA) is a
22    regional register allocator performing graph coloring on a top-down
23    traversal of nested regions.  Graph coloring in a region is based
24    on Chaitin-Briggs algorithm.  It is called integrated because
25    register coalescing, register live range splitting, and choosing a
26    better hard register are done on-the-fly during coloring.  Register
27    coalescing and choosing a cheaper hard register is done by hard
28    register preferencing during hard register assigning.  The live
29    range splitting is a byproduct of the regional register allocation.
30 
31    Major IRA notions are:
32 
33      o *Region* is a part of CFG where graph coloring based on
34        Chaitin-Briggs algorithm is done.  IRA can work on any set of
35        nested CFG regions forming a tree.  Currently the regions are
36        the entire function for the root region and natural loops for
37        the other regions.  Therefore data structure representing a
38        region is called loop_tree_node.
39 
40      o *Allocno class* is a register class used for allocation of
41        given allocno.  It means that only hard register of given
42        register class can be assigned to given allocno.  In reality,
43        even smaller subset of (*profitable*) hard registers can be
44        assigned.  In rare cases, the subset can be even smaller
45        because our modification of Chaitin-Briggs algorithm requires
46        that sets of hard registers can be assigned to allocnos forms a
47        forest, i.e. the sets can be ordered in a way where any
48        previous set is not intersected with given set or is a superset
49        of given set.
50 
51      o *Pressure class* is a register class belonging to a set of
52        register classes containing all of the hard-registers available
53        for register allocation.  The set of all pressure classes for a
54        target is defined in the corresponding machine-description file
55        according some criteria.  Register pressure is calculated only
56        for pressure classes and it affects some IRA decisions as
57        forming allocation regions.
58 
59      o *Allocno* represents the live range of a pseudo-register in a
60        region.  Besides the obvious attributes like the corresponding
61        pseudo-register number, allocno class, conflicting allocnos and
62        conflicting hard-registers, there are a few allocno attributes
63        which are important for understanding the allocation algorithm:
64 
65        - *Live ranges*.  This is a list of ranges of *program points*
66          where the allocno lives.  Program points represent places
67          where a pseudo can be born or become dead (there are
68          approximately two times more program points than the insns)
69          and they are represented by integers starting with 0.  The
70          live ranges are used to find conflicts between allocnos.
71          They also play very important role for the transformation of
72          the IRA internal representation of several regions into a one
73          region representation.  The later is used during the reload
74          pass work because each allocno represents all of the
75          corresponding pseudo-registers.
76 
77        - *Hard-register costs*.  This is a vector of size equal to the
78          number of available hard-registers of the allocno class.  The
79          cost of a callee-clobbered hard-register for an allocno is
80          increased by the cost of save/restore code around the calls
81          through the given allocno's life.  If the allocno is a move
82          instruction operand and another operand is a hard-register of
83          the allocno class, the cost of the hard-register is decreased
84          by the move cost.
85 
86          When an allocno is assigned, the hard-register with minimal
87          full cost is used.  Initially, a hard-register's full cost is
88          the corresponding value from the hard-register's cost vector.
89          If the allocno is connected by a *copy* (see below) to
90          another allocno which has just received a hard-register, the
91          cost of the hard-register is decreased.  Before choosing a
92          hard-register for an allocno, the allocno's current costs of
93          the hard-registers are modified by the conflict hard-register
94          costs of all of the conflicting allocnos which are not
95          assigned yet.
96 
97        - *Conflict hard-register costs*.  This is a vector of the same
98          size as the hard-register costs vector.  To permit an
99          unassigned allocno to get a better hard-register, IRA uses
100          this vector to calculate the final full cost of the
101          available hard-registers.  Conflict hard-register costs of an
102          unassigned allocno are also changed with a change of the
103          hard-register cost of the allocno when a copy involving the
104          allocno is processed as described above.  This is done to
105          show other unassigned allocnos that a given allocno prefers
106          some hard-registers in order to remove the move instruction
107          corresponding to the copy.
108 
109      o *Cap*.  If a pseudo-register does not live in a region but
110        lives in a nested region, IRA creates a special allocno called
111        a cap in the outer region.  A region cap is also created for a
112        subregion cap.
113 
114      o *Copy*.  Allocnos can be connected by copies.  Copies are used
115        to modify hard-register costs for allocnos during coloring.
116        Such modifications reflects a preference to use the same
117        hard-register for the allocnos connected by copies.  Usually
118        copies are created for move insns (in this case it results in
119        register coalescing).  But IRA also creates copies for operands
120        of an insn which should be assigned to the same hard-register
121        due to constraints in the machine description (it usually
122        results in removing a move generated in reload to satisfy
123        the constraints) and copies referring to the allocno which is
124        the output operand of an instruction and the allocno which is
125        an input operand dying in the instruction (creation of such
126        copies results in less register shuffling).  IRA *does not*
127        create copies between the same register allocnos from different
128        regions because we use another technique for propagating
129        hard-register preference on the borders of regions.
130 
131    Allocnos (including caps) for the upper region in the region tree
132    *accumulate* information important for coloring from allocnos with
133    the same pseudo-register from nested regions.  This includes
134    hard-register and memory costs, conflicts with hard-registers,
135    allocno conflicts, allocno copies and more.  *Thus, attributes for
136    allocnos in a region have the same values as if the region had no
137    subregions*.  It means that attributes for allocnos in the
138    outermost region corresponding to the function have the same values
139    as though the allocation used only one region which is the entire
140    function.  It also means that we can look at IRA work as if the
141    first IRA did allocation for all function then it improved the
142    allocation for loops then their subloops and so on.
143 
144    IRA major passes are:
145 
146      o Building IRA internal representation which consists of the
147        following subpasses:
148 
149        * First, IRA builds regions and creates allocnos (file
150          ira-build.cc) and initializes most of their attributes.
151 
152        * Then IRA finds an allocno class for each allocno and
153          calculates its initial (non-accumulated) cost of memory and
154          each hard-register of its allocno class (file ira-cost.c).
155 
156        * IRA creates live ranges of each allocno, calculates register
157          pressure for each pressure class in each region, sets up
158          conflict hard registers for each allocno and info about calls
159          the allocno lives through (file ira-lives.cc).
160 
161        * IRA removes low register pressure loops from the regions
162          mostly to speed IRA up (file ira-build.cc).
163 
164        * IRA propagates accumulated allocno info from lower region
165          allocnos to corresponding upper region allocnos (file
166          ira-build.cc).
167 
168        * IRA creates all caps (file ira-build.cc).
169 
170        * Having live-ranges of allocnos and their classes, IRA creates
171          conflicting allocnos for each allocno.  Conflicting allocnos
172          are stored as a bit vector or array of pointers to the
173          conflicting allocnos whatever is more profitable (file
174          ira-conflicts.cc).  At this point IRA creates allocno copies.
175 
176      o Coloring.  Now IRA has all necessary info to start graph coloring
177        process.  It is done in each region on top-down traverse of the
178        region tree (file ira-color.cc).  There are following subpasses:
179 
180        * Finding profitable hard registers of corresponding allocno
181          class for each allocno.  For example, only callee-saved hard
182          registers are frequently profitable for allocnos living
183          through colors.  If the profitable hard register set of
184          allocno does not form a tree based on subset relation, we use
185          some approximation to form the tree.  This approximation is
186          used to figure out trivial colorability of allocnos.  The
187          approximation is a pretty rare case.
188 
189        * Putting allocnos onto the coloring stack.  IRA uses Briggs
190          optimistic coloring which is a major improvement over
191          Chaitin's coloring.  Therefore IRA does not spill allocnos at
192          this point.  There is some freedom in the order of putting
193          allocnos on the stack which can affect the final result of
194          the allocation.  IRA uses some heuristics to improve the
195          order.  The major one is to form *threads* from colorable
196          allocnos and push them on the stack by threads.  Thread is a
197          set of non-conflicting colorable allocnos connected by
198          copies.  The thread contains allocnos from the colorable
199          bucket or colorable allocnos already pushed onto the coloring
200          stack.  Pushing thread allocnos one after another onto the
201          stack increases chances of removing copies when the allocnos
202          get the same hard reg.
203 
204 	 We also use a modification of Chaitin-Briggs algorithm which
205          works for intersected register classes of allocnos.  To
206          figure out trivial colorability of allocnos, the mentioned
207          above tree of hard register sets is used.  To get an idea how
208          the algorithm works in i386 example, let us consider an
209          allocno to which any general hard register can be assigned.
210          If the allocno conflicts with eight allocnos to which only
211          EAX register can be assigned, given allocno is still
212          trivially colorable because all conflicting allocnos might be
213          assigned only to EAX and all other general hard registers are
214          still free.
215 
216 	 To get an idea of the used trivial colorability criterion, it
217 	 is also useful to read article "Graph-Coloring Register
218 	 Allocation for Irregular Architectures" by Michael D. Smith
219 	 and Glen Holloway.  Major difference between the article
220 	 approach and approach used in IRA is that Smith's approach
221 	 takes register classes only from machine description and IRA
222 	 calculate register classes from intermediate code too
223 	 (e.g. an explicit usage of hard registers in RTL code for
224 	 parameter passing can result in creation of additional
225 	 register classes which contain or exclude the hard
226 	 registers).  That makes IRA approach useful for improving
227 	 coloring even for architectures with regular register files
228 	 and in fact some benchmarking shows the improvement for
229 	 regular class architectures is even bigger than for irregular
230 	 ones.  Another difference is that Smith's approach chooses
231 	 intersection of classes of all insn operands in which a given
232 	 pseudo occurs.  IRA can use bigger classes if it is still
233 	 more profitable than memory usage.
234 
235        * Popping the allocnos from the stack and assigning them hard
236          registers.  If IRA cannot assign a hard register to an
237          allocno and the allocno is coalesced, IRA undoes the
238          coalescing and puts the uncoalesced allocnos onto the stack in
239          the hope that some such allocnos will get a hard register
240          separately.  If IRA fails to assign hard register or memory
241          is more profitable for it, IRA spills the allocno.  IRA
242          assigns the allocno the hard-register with minimal full
243          allocation cost which reflects the cost of usage of the
244          hard-register for the allocno and cost of usage of the
245          hard-register for allocnos conflicting with given allocno.
246 
247        * Chaitin-Briggs coloring assigns as many pseudos as possible
248          to hard registers.  After coloring we try to improve
249          allocation with cost point of view.  We improve the
250          allocation by spilling some allocnos and assigning the freed
251          hard registers to other allocnos if it decreases the overall
252          allocation cost.
253 
254        * After allocno assigning in the region, IRA modifies the hard
255          register and memory costs for the corresponding allocnos in
256          the subregions to reflect the cost of possible loads, stores,
257          or moves on the border of the region and its subregions.
258          When default regional allocation algorithm is used
259          (-fira-algorithm=mixed), IRA just propagates the assignment
260          for allocnos if the register pressure in the region for the
261          corresponding pressure class is less than number of available
262          hard registers for given pressure class.
263 
264      o Spill/restore code moving.  When IRA performs an allocation
265        by traversing regions in top-down order, it does not know what
266        happens below in the region tree.  Therefore, sometimes IRA
267        misses opportunities to perform a better allocation.  A simple
268        optimization tries to improve allocation in a region having
269        subregions and containing in another region.  If the
270        corresponding allocnos in the subregion are spilled, it spills
271        the region allocno if it is profitable.  The optimization
272        implements a simple iterative algorithm performing profitable
273        transformations while they are still possible.  It is fast in
274        practice, so there is no real need for a better time complexity
275        algorithm.
276 
277      o Code change.  After coloring, two allocnos representing the
278        same pseudo-register outside and inside a region respectively
279        may be assigned to different locations (hard-registers or
280        memory).  In this case IRA creates and uses a new
281        pseudo-register inside the region and adds code to move allocno
282        values on the region's borders.  This is done during top-down
283        traversal of the regions (file ira-emit.cc).  In some
284        complicated cases IRA can create a new allocno to move allocno
285        values (e.g. when a swap of values stored in two hard-registers
286        is needed).  At this stage, the new allocno is marked as
287        spilled.  IRA still creates the pseudo-register and the moves
288        on the region borders even when both allocnos were assigned to
289        the same hard-register.  If the reload pass spills a
290        pseudo-register for some reason, the effect will be smaller
291        because another allocno will still be in the hard-register.  In
292        most cases, this is better then spilling both allocnos.  If
293        reload does not change the allocation for the two
294        pseudo-registers, the trivial move will be removed by
295        post-reload optimizations.  IRA does not generate moves for
296        allocnos assigned to the same hard register when the default
297        regional allocation algorithm is used and the register pressure
298        in the region for the corresponding pressure class is less than
299        number of available hard registers for given pressure class.
300        IRA also does some optimizations to remove redundant stores and
301        to reduce code duplication on the region borders.
302 
303      o Flattening internal representation.  After changing code, IRA
304        transforms its internal representation for several regions into
305        one region representation (file ira-build.cc).  This process is
306        called IR flattening.  Such process is more complicated than IR
307        rebuilding would be, but is much faster.
308 
309      o After IR flattening, IRA tries to assign hard registers to all
310        spilled allocnos.  This is implemented by a simple and fast
311        priority coloring algorithm (see function
312        ira_reassign_conflict_allocnos::ira-color.cc).  Here new allocnos
313        created during the code change pass can be assigned to hard
314        registers.
315 
316      o At the end IRA calls the reload pass.  The reload pass
317        communicates with IRA through several functions in file
318        ira-color.cc to improve its decisions in
319 
320        * sharing stack slots for the spilled pseudos based on IRA info
321          about pseudo-register conflicts.
322 
323        * reassigning hard-registers to all spilled pseudos at the end
324          of each reload iteration.
325 
326        * choosing a better hard-register to spill based on IRA info
327          about pseudo-register live ranges and the register pressure
328          in places where the pseudo-register lives.
329 
330    IRA uses a lot of data representing the target processors.  These
331    data are initialized in file ira.cc.
332 
333    If function has no loops (or the loops are ignored when
334    -fira-algorithm=CB is used), we have classic Chaitin-Briggs
335    coloring (only instead of separate pass of coalescing, we use hard
336    register preferencing).  In such case, IRA works much faster
337    because many things are not made (like IR flattening, the
338    spill/restore optimization, and the code change).
339 
340    Literature is worth to read for better understanding the code:
341 
342    o Preston Briggs, Keith D. Cooper, Linda Torczon.  Improvements to
343      Graph Coloring Register Allocation.
344 
345    o David Callahan, Brian Koblenz.  Register allocation via
346      hierarchical graph coloring.
347 
348    o Keith Cooper, Anshuman Dasgupta, Jason Eckhardt. Revisiting Graph
349      Coloring Register Allocation: A Study of the Chaitin-Briggs and
350      Callahan-Koblenz Algorithms.
351 
352    o Guei-Yuan Lueh, Thomas Gross, and Ali-Reza Adl-Tabatabai. Global
353      Register Allocation Based on Graph Fusion.
354 
355    o Michael D. Smith and Glenn Holloway.  Graph-Coloring Register
356      Allocation for Irregular Architectures
357 
358    o Vladimir Makarov. The Integrated Register Allocator for GCC.
359 
360    o Vladimir Makarov.  The top-down register allocator for irregular
361      register file architectures.
362 
363 */
364 
365 
366 #include "config.h"
367 #include "system.h"
368 #include "coretypes.h"
369 #include "backend.h"
370 #include "target.h"
371 #include "rtl.h"
372 #include "tree.h"
373 #include "df.h"
374 #include "memmodel.h"
375 #include "tm_p.h"
376 #include "insn-config.h"
377 #include "regs.h"
378 #include "ira.h"
379 #include "ira-int.h"
380 #include "diagnostic-core.h"
381 #include "cfgrtl.h"
382 #include "cfgbuild.h"
383 #include "cfgcleanup.h"
384 #include "expr.h"
385 #include "tree-pass.h"
386 #include "output.h"
387 #include "reload.h"
388 #include "cfgloop.h"
389 #include "lra.h"
390 #include "dce.h"
391 #include "dbgcnt.h"
392 #include "rtl-iter.h"
393 #include "shrink-wrap.h"
394 #include "print-rtl.h"
395 
396 struct target_ira default_target_ira;
397 class target_ira_int default_target_ira_int;
398 #if SWITCHABLE_TARGET
399 struct target_ira *this_target_ira = &default_target_ira;
400 class target_ira_int *this_target_ira_int = &default_target_ira_int;
401 #endif
402 
403 /* A modified value of flag `-fira-verbose' used internally.  */
404 int internal_flag_ira_verbose;
405 
406 /* Dump file of the allocator if it is not NULL.  */
407 FILE *ira_dump_file;
408 
409 /* The number of elements in the following array.  */
410 int ira_spilled_reg_stack_slots_num;
411 
412 /* The following array contains info about spilled pseudo-registers
413    stack slots used in current function so far.  */
414 class ira_spilled_reg_stack_slot *ira_spilled_reg_stack_slots;
415 
416 /* Correspondingly overall cost of the allocation, overall cost before
417    reload, cost of the allocnos assigned to hard-registers, cost of
418    the allocnos assigned to memory, cost of loads, stores and register
419    move insns generated for pseudo-register live range splitting (see
420    ira-emit.cc).  */
421 int64_t ira_overall_cost, overall_cost_before;
422 int64_t ira_reg_cost, ira_mem_cost;
423 int64_t ira_load_cost, ira_store_cost, ira_shuffle_cost;
424 int ira_move_loops_num, ira_additional_jumps_num;
425 
426 /* All registers that can be eliminated.  */
427 
428 HARD_REG_SET eliminable_regset;
429 
430 /* Value of max_reg_num () before IRA work start.  This value helps
431    us to recognize a situation when new pseudos were created during
432    IRA work.  */
433 static int max_regno_before_ira;
434 
435 /* Temporary hard reg set used for a different calculation.  */
436 static HARD_REG_SET temp_hard_regset;
437 
438 #define last_mode_for_init_move_cost \
439   (this_target_ira_int->x_last_mode_for_init_move_cost)
440 
441 
442 /* The function sets up the map IRA_REG_MODE_HARD_REGSET.  */
443 static void
setup_reg_mode_hard_regset(void)444 setup_reg_mode_hard_regset (void)
445 {
446   int i, m, hard_regno;
447 
448   for (m = 0; m < NUM_MACHINE_MODES; m++)
449     for (hard_regno = 0; hard_regno < FIRST_PSEUDO_REGISTER; hard_regno++)
450       {
451 	CLEAR_HARD_REG_SET (ira_reg_mode_hard_regset[hard_regno][m]);
452 	for (i = hard_regno_nregs (hard_regno, (machine_mode) m) - 1;
453 	     i >= 0; i--)
454 	  if (hard_regno + i < FIRST_PSEUDO_REGISTER)
455 	    SET_HARD_REG_BIT (ira_reg_mode_hard_regset[hard_regno][m],
456 			      hard_regno + i);
457       }
458 }
459 
460 
461 #define no_unit_alloc_regs \
462   (this_target_ira_int->x_no_unit_alloc_regs)
463 
464 /* The function sets up the three arrays declared above.  */
465 static void
setup_class_hard_regs(void)466 setup_class_hard_regs (void)
467 {
468   int cl, i, hard_regno, n;
469   HARD_REG_SET processed_hard_reg_set;
470 
471   ira_assert (SHRT_MAX >= FIRST_PSEUDO_REGISTER);
472   for (cl = (int) N_REG_CLASSES - 1; cl >= 0; cl--)
473     {
474       temp_hard_regset = reg_class_contents[cl] & ~no_unit_alloc_regs;
475       CLEAR_HARD_REG_SET (processed_hard_reg_set);
476       for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
477 	{
478 	  ira_non_ordered_class_hard_regs[cl][i] = -1;
479 	  ira_class_hard_reg_index[cl][i] = -1;
480 	}
481       for (n = 0, i = 0; i < FIRST_PSEUDO_REGISTER; i++)
482 	{
483 #ifdef REG_ALLOC_ORDER
484 	  hard_regno = reg_alloc_order[i];
485 #else
486 	  hard_regno = i;
487 #endif
488 	  if (TEST_HARD_REG_BIT (processed_hard_reg_set, hard_regno))
489 	    continue;
490 	  SET_HARD_REG_BIT (processed_hard_reg_set, hard_regno);
491       	  if (! TEST_HARD_REG_BIT (temp_hard_regset, hard_regno))
492 	    ira_class_hard_reg_index[cl][hard_regno] = -1;
493 	  else
494 	    {
495 	      ira_class_hard_reg_index[cl][hard_regno] = n;
496 	      ira_class_hard_regs[cl][n++] = hard_regno;
497 	    }
498 	}
499       ira_class_hard_regs_num[cl] = n;
500       for (n = 0, i = 0; i < FIRST_PSEUDO_REGISTER; i++)
501 	if (TEST_HARD_REG_BIT (temp_hard_regset, i))
502 	  ira_non_ordered_class_hard_regs[cl][n++] = i;
503       ira_assert (ira_class_hard_regs_num[cl] == n);
504     }
505 }
506 
507 /* Set up global variables defining info about hard registers for the
508    allocation.  These depend on USE_HARD_FRAME_P whose TRUE value means
509    that we can use the hard frame pointer for the allocation.  */
510 static void
setup_alloc_regs(bool use_hard_frame_p)511 setup_alloc_regs (bool use_hard_frame_p)
512 {
513 #ifdef ADJUST_REG_ALLOC_ORDER
514   ADJUST_REG_ALLOC_ORDER;
515 #endif
516   no_unit_alloc_regs = fixed_nonglobal_reg_set;
517   if (! use_hard_frame_p)
518     add_to_hard_reg_set (&no_unit_alloc_regs, Pmode,
519 			 HARD_FRAME_POINTER_REGNUM);
520   setup_class_hard_regs ();
521 }
522 
523 
524 
525 #define alloc_reg_class_subclasses \
526   (this_target_ira_int->x_alloc_reg_class_subclasses)
527 
528 /* Initialize the table of subclasses of each reg class.  */
529 static void
setup_reg_subclasses(void)530 setup_reg_subclasses (void)
531 {
532   int i, j;
533   HARD_REG_SET temp_hard_regset2;
534 
535   for (i = 0; i < N_REG_CLASSES; i++)
536     for (j = 0; j < N_REG_CLASSES; j++)
537       alloc_reg_class_subclasses[i][j] = LIM_REG_CLASSES;
538 
539   for (i = 0; i < N_REG_CLASSES; i++)
540     {
541       if (i == (int) NO_REGS)
542 	continue;
543 
544       temp_hard_regset = reg_class_contents[i] & ~no_unit_alloc_regs;
545       if (hard_reg_set_empty_p (temp_hard_regset))
546 	continue;
547       for (j = 0; j < N_REG_CLASSES; j++)
548 	if (i != j)
549 	  {
550 	    enum reg_class *p;
551 
552 	    temp_hard_regset2 = reg_class_contents[j] & ~no_unit_alloc_regs;
553 	    if (! hard_reg_set_subset_p (temp_hard_regset,
554 					 temp_hard_regset2))
555 	      continue;
556 	    p = &alloc_reg_class_subclasses[j][0];
557 	    while (*p != LIM_REG_CLASSES) p++;
558 	    *p = (enum reg_class) i;
559 	  }
560     }
561 }
562 
563 
564 
565 /* Set up IRA_MEMORY_MOVE_COST and IRA_MAX_MEMORY_MOVE_COST.  */
566 static void
setup_class_subset_and_memory_move_costs(void)567 setup_class_subset_and_memory_move_costs (void)
568 {
569   int cl, cl2, mode, cost;
570   HARD_REG_SET temp_hard_regset2;
571 
572   for (mode = 0; mode < MAX_MACHINE_MODE; mode++)
573     ira_memory_move_cost[mode][NO_REGS][0]
574       = ira_memory_move_cost[mode][NO_REGS][1] = SHRT_MAX;
575   for (cl = (int) N_REG_CLASSES - 1; cl >= 0; cl--)
576     {
577       if (cl != (int) NO_REGS)
578 	for (mode = 0; mode < MAX_MACHINE_MODE; mode++)
579 	  {
580 	    ira_max_memory_move_cost[mode][cl][0]
581 	      = ira_memory_move_cost[mode][cl][0]
582 	      = memory_move_cost ((machine_mode) mode,
583 				  (reg_class_t) cl, false);
584 	    ira_max_memory_move_cost[mode][cl][1]
585 	      = ira_memory_move_cost[mode][cl][1]
586 	      = memory_move_cost ((machine_mode) mode,
587 				  (reg_class_t) cl, true);
588 	    /* Costs for NO_REGS are used in cost calculation on the
589 	       1st pass when the preferred register classes are not
590 	       known yet.  In this case we take the best scenario.  */
591 	    if (ira_memory_move_cost[mode][NO_REGS][0]
592 		> ira_memory_move_cost[mode][cl][0])
593 	      ira_max_memory_move_cost[mode][NO_REGS][0]
594 		= ira_memory_move_cost[mode][NO_REGS][0]
595 		= ira_memory_move_cost[mode][cl][0];
596 	    if (ira_memory_move_cost[mode][NO_REGS][1]
597 		> ira_memory_move_cost[mode][cl][1])
598 	      ira_max_memory_move_cost[mode][NO_REGS][1]
599 		= ira_memory_move_cost[mode][NO_REGS][1]
600 		= ira_memory_move_cost[mode][cl][1];
601 	  }
602     }
603   for (cl = (int) N_REG_CLASSES - 1; cl >= 0; cl--)
604     for (cl2 = (int) N_REG_CLASSES - 1; cl2 >= 0; cl2--)
605       {
606 	temp_hard_regset = reg_class_contents[cl] & ~no_unit_alloc_regs;
607 	temp_hard_regset2 = reg_class_contents[cl2] & ~no_unit_alloc_regs;
608 	ira_class_subset_p[cl][cl2]
609 	  = hard_reg_set_subset_p (temp_hard_regset, temp_hard_regset2);
610 	if (! hard_reg_set_empty_p (temp_hard_regset2)
611 	    && hard_reg_set_subset_p (reg_class_contents[cl2],
612 				      reg_class_contents[cl]))
613 	  for (mode = 0; mode < MAX_MACHINE_MODE; mode++)
614 	    {
615 	      cost = ira_memory_move_cost[mode][cl2][0];
616 	      if (cost > ira_max_memory_move_cost[mode][cl][0])
617 		ira_max_memory_move_cost[mode][cl][0] = cost;
618 	      cost = ira_memory_move_cost[mode][cl2][1];
619 	      if (cost > ira_max_memory_move_cost[mode][cl][1])
620 		ira_max_memory_move_cost[mode][cl][1] = cost;
621 	    }
622       }
623   for (cl = (int) N_REG_CLASSES - 1; cl >= 0; cl--)
624     for (mode = 0; mode < MAX_MACHINE_MODE; mode++)
625       {
626 	ira_memory_move_cost[mode][cl][0]
627 	  = ira_max_memory_move_cost[mode][cl][0];
628 	ira_memory_move_cost[mode][cl][1]
629 	  = ira_max_memory_move_cost[mode][cl][1];
630       }
631   setup_reg_subclasses ();
632 }
633 
634 
635 
636 /* Define the following macro if allocation through malloc if
637    preferable.  */
638 #define IRA_NO_OBSTACK
639 
640 #ifndef IRA_NO_OBSTACK
641 /* Obstack used for storing all dynamic data (except bitmaps) of the
642    IRA.  */
643 static struct obstack ira_obstack;
644 #endif
645 
646 /* Obstack used for storing all bitmaps of the IRA.  */
647 static struct bitmap_obstack ira_bitmap_obstack;
648 
649 /* Allocate memory of size LEN for IRA data.  */
650 void *
ira_allocate(size_t len)651 ira_allocate (size_t len)
652 {
653   void *res;
654 
655 #ifndef IRA_NO_OBSTACK
656   res = obstack_alloc (&ira_obstack, len);
657 #else
658   res = xmalloc (len);
659 #endif
660   return res;
661 }
662 
663 /* Free memory ADDR allocated for IRA data.  */
664 void
ira_free(void * addr ATTRIBUTE_UNUSED)665 ira_free (void *addr ATTRIBUTE_UNUSED)
666 {
667 #ifndef IRA_NO_OBSTACK
668   /* do nothing */
669 #else
670   free (addr);
671 #endif
672 }
673 
674 
675 /* Allocate and returns bitmap for IRA.  */
676 bitmap
ira_allocate_bitmap(void)677 ira_allocate_bitmap (void)
678 {
679   return BITMAP_ALLOC (&ira_bitmap_obstack);
680 }
681 
682 /* Free bitmap B allocated for IRA.  */
683 void
ira_free_bitmap(bitmap b ATTRIBUTE_UNUSED)684 ira_free_bitmap (bitmap b ATTRIBUTE_UNUSED)
685 {
686   /* do nothing */
687 }
688 
689 
690 
691 /* Output information about allocation of all allocnos (except for
692    caps) into file F.  */
693 void
ira_print_disposition(FILE * f)694 ira_print_disposition (FILE *f)
695 {
696   int i, n, max_regno;
697   ira_allocno_t a;
698   basic_block bb;
699 
700   fprintf (f, "Disposition:");
701   max_regno = max_reg_num ();
702   for (n = 0, i = FIRST_PSEUDO_REGISTER; i < max_regno; i++)
703     for (a = ira_regno_allocno_map[i];
704 	 a != NULL;
705 	 a = ALLOCNO_NEXT_REGNO_ALLOCNO (a))
706       {
707 	if (n % 4 == 0)
708 	  fprintf (f, "\n");
709 	n++;
710 	fprintf (f, " %4d:r%-4d", ALLOCNO_NUM (a), ALLOCNO_REGNO (a));
711 	if ((bb = ALLOCNO_LOOP_TREE_NODE (a)->bb) != NULL)
712 	  fprintf (f, "b%-3d", bb->index);
713 	else
714 	  fprintf (f, "l%-3d", ALLOCNO_LOOP_TREE_NODE (a)->loop_num);
715 	if (ALLOCNO_HARD_REGNO (a) >= 0)
716 	  fprintf (f, " %3d", ALLOCNO_HARD_REGNO (a));
717 	else
718 	  fprintf (f, " mem");
719       }
720   fprintf (f, "\n");
721 }
722 
723 /* Outputs information about allocation of all allocnos into
724    stderr.  */
725 void
ira_debug_disposition(void)726 ira_debug_disposition (void)
727 {
728   ira_print_disposition (stderr);
729 }
730 
731 
732 
733 /* Set up ira_stack_reg_pressure_class which is the biggest pressure
734    register class containing stack registers or NO_REGS if there are
735    no stack registers.  To find this class, we iterate through all
736    register pressure classes and choose the first register pressure
737    class containing all the stack registers and having the biggest
738    size.  */
739 static void
setup_stack_reg_pressure_class(void)740 setup_stack_reg_pressure_class (void)
741 {
742   ira_stack_reg_pressure_class = NO_REGS;
743 #ifdef STACK_REGS
744   {
745     int i, best, size;
746     enum reg_class cl;
747     HARD_REG_SET temp_hard_regset2;
748 
749     CLEAR_HARD_REG_SET (temp_hard_regset);
750     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
751       SET_HARD_REG_BIT (temp_hard_regset, i);
752     best = 0;
753     for (i = 0; i < ira_pressure_classes_num; i++)
754       {
755 	cl = ira_pressure_classes[i];
756 	temp_hard_regset2 = temp_hard_regset & reg_class_contents[cl];
757 	size = hard_reg_set_size (temp_hard_regset2);
758 	if (best < size)
759 	  {
760 	    best = size;
761 	    ira_stack_reg_pressure_class = cl;
762 	  }
763       }
764   }
765 #endif
766 }
767 
768 /* Find pressure classes which are register classes for which we
769    calculate register pressure in IRA, register pressure sensitive
770    insn scheduling, and register pressure sensitive loop invariant
771    motion.
772 
773    To make register pressure calculation easy, we always use
774    non-intersected register pressure classes.  A move of hard
775    registers from one register pressure class is not more expensive
776    than load and store of the hard registers.  Most likely an allocno
777    class will be a subset of a register pressure class and in many
778    cases a register pressure class.  That makes usage of register
779    pressure classes a good approximation to find a high register
780    pressure.  */
781 static void
setup_pressure_classes(void)782 setup_pressure_classes (void)
783 {
784   int cost, i, n, curr;
785   int cl, cl2;
786   enum reg_class pressure_classes[N_REG_CLASSES];
787   int m;
788   HARD_REG_SET temp_hard_regset2;
789   bool insert_p;
790 
791   if (targetm.compute_pressure_classes)
792     n = targetm.compute_pressure_classes (pressure_classes);
793   else
794     {
795       n = 0;
796       for (cl = 0; cl < N_REG_CLASSES; cl++)
797 	{
798 	  if (ira_class_hard_regs_num[cl] == 0)
799 	    continue;
800 	  if (ira_class_hard_regs_num[cl] != 1
801 	      /* A register class without subclasses may contain a few
802 		 hard registers and movement between them is costly
803 		 (e.g. SPARC FPCC registers).  We still should consider it
804 		 as a candidate for a pressure class.  */
805 	      && alloc_reg_class_subclasses[cl][0] < cl)
806 	    {
807 	      /* Check that the moves between any hard registers of the
808 		 current class are not more expensive for a legal mode
809 		 than load/store of the hard registers of the current
810 		 class.  Such class is a potential candidate to be a
811 		 register pressure class.  */
812 	      for (m = 0; m < NUM_MACHINE_MODES; m++)
813 		{
814 		  temp_hard_regset
815 		    = (reg_class_contents[cl]
816 		       & ~(no_unit_alloc_regs
817 			   | ira_prohibited_class_mode_regs[cl][m]));
818 		  if (hard_reg_set_empty_p (temp_hard_regset))
819 		    continue;
820 		  ira_init_register_move_cost_if_necessary ((machine_mode) m);
821 		  cost = ira_register_move_cost[m][cl][cl];
822 		  if (cost <= ira_max_memory_move_cost[m][cl][1]
823 		      || cost <= ira_max_memory_move_cost[m][cl][0])
824 		    break;
825 		}
826 	      if (m >= NUM_MACHINE_MODES)
827 		continue;
828 	    }
829 	  curr = 0;
830 	  insert_p = true;
831 	  temp_hard_regset = reg_class_contents[cl] & ~no_unit_alloc_regs;
832 	  /* Remove so far added pressure classes which are subset of the
833 	     current candidate class.  Prefer GENERAL_REGS as a pressure
834 	     register class to another class containing the same
835 	     allocatable hard registers.  We do this because machine
836 	     dependent cost hooks might give wrong costs for the latter
837 	     class but always give the right cost for the former class
838 	     (GENERAL_REGS).  */
839 	  for (i = 0; i < n; i++)
840 	    {
841 	      cl2 = pressure_classes[i];
842 	      temp_hard_regset2 = (reg_class_contents[cl2]
843 				   & ~no_unit_alloc_regs);
844 	      if (hard_reg_set_subset_p (temp_hard_regset, temp_hard_regset2)
845 		  && (temp_hard_regset != temp_hard_regset2
846 		      || cl2 == (int) GENERAL_REGS))
847 		{
848 		  pressure_classes[curr++] = (enum reg_class) cl2;
849 		  insert_p = false;
850 		  continue;
851 		}
852 	      if (hard_reg_set_subset_p (temp_hard_regset2, temp_hard_regset)
853 		  && (temp_hard_regset2 != temp_hard_regset
854 		      || cl == (int) GENERAL_REGS))
855 		continue;
856 	      if (temp_hard_regset2 == temp_hard_regset)
857 		insert_p = false;
858 	      pressure_classes[curr++] = (enum reg_class) cl2;
859 	    }
860 	  /* If the current candidate is a subset of a so far added
861 	     pressure class, don't add it to the list of the pressure
862 	     classes.  */
863 	  if (insert_p)
864 	    pressure_classes[curr++] = (enum reg_class) cl;
865 	  n = curr;
866 	}
867     }
868 #ifdef ENABLE_IRA_CHECKING
869   {
870     HARD_REG_SET ignore_hard_regs;
871 
872     /* Check pressure classes correctness: here we check that hard
873        registers from all register pressure classes contains all hard
874        registers available for the allocation.  */
875     CLEAR_HARD_REG_SET (temp_hard_regset);
876     CLEAR_HARD_REG_SET (temp_hard_regset2);
877     ignore_hard_regs = no_unit_alloc_regs;
878     for (cl = 0; cl < LIM_REG_CLASSES; cl++)
879       {
880 	/* For some targets (like MIPS with MD_REGS), there are some
881 	   classes with hard registers available for allocation but
882 	   not able to hold value of any mode.  */
883 	for (m = 0; m < NUM_MACHINE_MODES; m++)
884 	  if (contains_reg_of_mode[cl][m])
885 	    break;
886 	if (m >= NUM_MACHINE_MODES)
887 	  {
888 	    ignore_hard_regs |= reg_class_contents[cl];
889 	    continue;
890 	  }
891 	for (i = 0; i < n; i++)
892 	  if ((int) pressure_classes[i] == cl)
893 	    break;
894 	temp_hard_regset2 |= reg_class_contents[cl];
895 	if (i < n)
896 	  temp_hard_regset |= reg_class_contents[cl];
897       }
898     for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
899       /* Some targets (like SPARC with ICC reg) have allocatable regs
900 	 for which no reg class is defined.  */
901       if (REGNO_REG_CLASS (i) == NO_REGS)
902 	SET_HARD_REG_BIT (ignore_hard_regs, i);
903     temp_hard_regset &= ~ignore_hard_regs;
904     temp_hard_regset2 &= ~ignore_hard_regs;
905     ira_assert (hard_reg_set_subset_p (temp_hard_regset2, temp_hard_regset));
906   }
907 #endif
908   ira_pressure_classes_num = 0;
909   for (i = 0; i < n; i++)
910     {
911       cl = (int) pressure_classes[i];
912       ira_reg_pressure_class_p[cl] = true;
913       ira_pressure_classes[ira_pressure_classes_num++] = (enum reg_class) cl;
914     }
915   setup_stack_reg_pressure_class ();
916 }
917 
918 /* Set up IRA_UNIFORM_CLASS_P.  Uniform class is a register class
919    whose register move cost between any registers of the class is the
920    same as for all its subclasses.  We use the data to speed up the
921    2nd pass of calculations of allocno costs.  */
922 static void
setup_uniform_class_p(void)923 setup_uniform_class_p (void)
924 {
925   int i, cl, cl2, m;
926 
927   for (cl = 0; cl < N_REG_CLASSES; cl++)
928     {
929       ira_uniform_class_p[cl] = false;
930       if (ira_class_hard_regs_num[cl] == 0)
931 	continue;
932       /* We cannot use alloc_reg_class_subclasses here because move
933 	 cost hooks does not take into account that some registers are
934 	 unavailable for the subtarget.  E.g. for i686, INT_SSE_REGS
935 	 is element of alloc_reg_class_subclasses for GENERAL_REGS
936 	 because SSE regs are unavailable.  */
937       for (i = 0; (cl2 = reg_class_subclasses[cl][i]) != LIM_REG_CLASSES; i++)
938 	{
939 	  if (ira_class_hard_regs_num[cl2] == 0)
940 	    continue;
941       	  for (m = 0; m < NUM_MACHINE_MODES; m++)
942 	    if (contains_reg_of_mode[cl][m] && contains_reg_of_mode[cl2][m])
943 	      {
944 		ira_init_register_move_cost_if_necessary ((machine_mode) m);
945 		if (ira_register_move_cost[m][cl][cl]
946 		    != ira_register_move_cost[m][cl2][cl2])
947 		  break;
948 	      }
949 	  if (m < NUM_MACHINE_MODES)
950 	    break;
951 	}
952       if (cl2 == LIM_REG_CLASSES)
953 	ira_uniform_class_p[cl] = true;
954     }
955 }
956 
957 /* Set up IRA_ALLOCNO_CLASSES, IRA_ALLOCNO_CLASSES_NUM,
958    IRA_IMPORTANT_CLASSES, and IRA_IMPORTANT_CLASSES_NUM.
959 
960    Target may have many subtargets and not all target hard registers can
961    be used for allocation, e.g. x86 port in 32-bit mode cannot use
962    hard registers introduced in x86-64 like r8-r15).  Some classes
963    might have the same allocatable hard registers, e.g.  INDEX_REGS
964    and GENERAL_REGS in x86 port in 32-bit mode.  To decrease different
965    calculations efforts we introduce allocno classes which contain
966    unique non-empty sets of allocatable hard-registers.
967 
968    Pseudo class cost calculation in ira-costs.cc is very expensive.
969    Therefore we are trying to decrease number of classes involved in
970    such calculation.  Register classes used in the cost calculation
971    are called important classes.  They are allocno classes and other
972    non-empty classes whose allocatable hard register sets are inside
973    of an allocno class hard register set.  From the first sight, it
974    looks like that they are just allocno classes.  It is not true.  In
975    example of x86-port in 32-bit mode, allocno classes will contain
976    GENERAL_REGS but not LEGACY_REGS (because allocatable hard
977    registers are the same for the both classes).  The important
978    classes will contain GENERAL_REGS and LEGACY_REGS.  It is done
979    because a machine description insn constraint may refers for
980    LEGACY_REGS and code in ira-costs.cc is mostly base on investigation
981    of the insn constraints.  */
982 static void
setup_allocno_and_important_classes(void)983 setup_allocno_and_important_classes (void)
984 {
985   int i, j, n, cl;
986   bool set_p;
987   HARD_REG_SET temp_hard_regset2;
988   static enum reg_class classes[LIM_REG_CLASSES + 1];
989 
990   n = 0;
991   /* Collect classes which contain unique sets of allocatable hard
992      registers.  Prefer GENERAL_REGS to other classes containing the
993      same set of hard registers.  */
994   for (i = 0; i < LIM_REG_CLASSES; i++)
995     {
996       temp_hard_regset = reg_class_contents[i] & ~no_unit_alloc_regs;
997       for (j = 0; j < n; j++)
998 	{
999 	  cl = classes[j];
1000 	  temp_hard_regset2 = reg_class_contents[cl] & ~no_unit_alloc_regs;
1001 	  if (temp_hard_regset == temp_hard_regset2)
1002 	    break;
1003 	}
1004       if (j >= n || targetm.additional_allocno_class_p (i))
1005 	classes[n++] = (enum reg_class) i;
1006       else if (i == GENERAL_REGS)
1007 	/* Prefer general regs.  For i386 example, it means that
1008 	   we prefer GENERAL_REGS over INDEX_REGS or LEGACY_REGS
1009 	   (all of them consists of the same available hard
1010 	   registers).  */
1011 	classes[j] = (enum reg_class) i;
1012     }
1013   classes[n] = LIM_REG_CLASSES;
1014 
1015   /* Set up classes which can be used for allocnos as classes
1016      containing non-empty unique sets of allocatable hard
1017      registers.  */
1018   ira_allocno_classes_num = 0;
1019   for (i = 0; (cl = classes[i]) != LIM_REG_CLASSES; i++)
1020     if (ira_class_hard_regs_num[cl] > 0)
1021       ira_allocno_classes[ira_allocno_classes_num++] = (enum reg_class) cl;
1022   ira_important_classes_num = 0;
1023   /* Add non-allocno classes containing to non-empty set of
1024      allocatable hard regs.  */
1025   for (cl = 0; cl < N_REG_CLASSES; cl++)
1026     if (ira_class_hard_regs_num[cl] > 0)
1027       {
1028 	temp_hard_regset = reg_class_contents[cl] & ~no_unit_alloc_regs;
1029 	set_p = false;
1030 	for (j = 0; j < ira_allocno_classes_num; j++)
1031 	  {
1032 	    temp_hard_regset2 = (reg_class_contents[ira_allocno_classes[j]]
1033 				 & ~no_unit_alloc_regs);
1034 	    if ((enum reg_class) cl == ira_allocno_classes[j])
1035 	      break;
1036 	    else if (hard_reg_set_subset_p (temp_hard_regset,
1037 					    temp_hard_regset2))
1038 	      set_p = true;
1039 	  }
1040 	if (set_p && j >= ira_allocno_classes_num)
1041 	  ira_important_classes[ira_important_classes_num++]
1042 	    = (enum reg_class) cl;
1043       }
1044   /* Now add allocno classes to the important classes.  */
1045   for (j = 0; j < ira_allocno_classes_num; j++)
1046     ira_important_classes[ira_important_classes_num++]
1047       = ira_allocno_classes[j];
1048   for (cl = 0; cl < N_REG_CLASSES; cl++)
1049     {
1050       ira_reg_allocno_class_p[cl] = false;
1051       ira_reg_pressure_class_p[cl] = false;
1052     }
1053   for (j = 0; j < ira_allocno_classes_num; j++)
1054     ira_reg_allocno_class_p[ira_allocno_classes[j]] = true;
1055   setup_pressure_classes ();
1056   setup_uniform_class_p ();
1057 }
1058 
1059 /* Setup translation in CLASS_TRANSLATE of all classes into a class
1060    given by array CLASSES of length CLASSES_NUM.  The function is used
1061    make translation any reg class to an allocno class or to an
1062    pressure class.  This translation is necessary for some
1063    calculations when we can use only allocno or pressure classes and
1064    such translation represents an approximate representation of all
1065    classes.
1066 
1067    The translation in case when allocatable hard register set of a
1068    given class is subset of allocatable hard register set of a class
1069    in CLASSES is pretty simple.  We use smallest classes from CLASSES
1070    containing a given class.  If allocatable hard register set of a
1071    given class is not a subset of any corresponding set of a class
1072    from CLASSES, we use the cheapest (with load/store point of view)
1073    class from CLASSES whose set intersects with given class set.  */
1074 static void
setup_class_translate_array(enum reg_class * class_translate,int classes_num,enum reg_class * classes)1075 setup_class_translate_array (enum reg_class *class_translate,
1076 			     int classes_num, enum reg_class *classes)
1077 {
1078   int cl, mode;
1079   enum reg_class aclass, best_class, *cl_ptr;
1080   int i, cost, min_cost, best_cost;
1081 
1082   for (cl = 0; cl < N_REG_CLASSES; cl++)
1083     class_translate[cl] = NO_REGS;
1084 
1085   for (i = 0; i < classes_num; i++)
1086     {
1087       aclass = classes[i];
1088       for (cl_ptr = &alloc_reg_class_subclasses[aclass][0];
1089 	   (cl = *cl_ptr) != LIM_REG_CLASSES;
1090 	   cl_ptr++)
1091 	if (class_translate[cl] == NO_REGS)
1092 	  class_translate[cl] = aclass;
1093       class_translate[aclass] = aclass;
1094     }
1095   /* For classes which are not fully covered by one of given classes
1096      (in other words covered by more one given class), use the
1097      cheapest class.  */
1098   for (cl = 0; cl < N_REG_CLASSES; cl++)
1099     {
1100       if (cl == NO_REGS || class_translate[cl] != NO_REGS)
1101 	continue;
1102       best_class = NO_REGS;
1103       best_cost = INT_MAX;
1104       for (i = 0; i < classes_num; i++)
1105 	{
1106 	  aclass = classes[i];
1107 	  temp_hard_regset = (reg_class_contents[aclass]
1108 			      & reg_class_contents[cl]
1109 			      & ~no_unit_alloc_regs);
1110 	  if (! hard_reg_set_empty_p (temp_hard_regset))
1111 	    {
1112 	      min_cost = INT_MAX;
1113 	      for (mode = 0; mode < MAX_MACHINE_MODE; mode++)
1114 		{
1115 		  cost = (ira_memory_move_cost[mode][aclass][0]
1116 			  + ira_memory_move_cost[mode][aclass][1]);
1117 		  if (min_cost > cost)
1118 		    min_cost = cost;
1119 		}
1120 	      if (best_class == NO_REGS || best_cost > min_cost)
1121 		{
1122 		  best_class = aclass;
1123 		  best_cost = min_cost;
1124 		}
1125 	    }
1126 	}
1127       class_translate[cl] = best_class;
1128     }
1129 }
1130 
1131 /* Set up array IRA_ALLOCNO_CLASS_TRANSLATE and
1132    IRA_PRESSURE_CLASS_TRANSLATE.  */
1133 static void
setup_class_translate(void)1134 setup_class_translate (void)
1135 {
1136   setup_class_translate_array (ira_allocno_class_translate,
1137 			       ira_allocno_classes_num, ira_allocno_classes);
1138   setup_class_translate_array (ira_pressure_class_translate,
1139 			       ira_pressure_classes_num, ira_pressure_classes);
1140 }
1141 
1142 /* Order numbers of allocno classes in original target allocno class
1143    array, -1 for non-allocno classes.  */
1144 static int allocno_class_order[N_REG_CLASSES];
1145 
1146 /* The function used to sort the important classes.  */
1147 static int
comp_reg_classes_func(const void * v1p,const void * v2p)1148 comp_reg_classes_func (const void *v1p, const void *v2p)
1149 {
1150   enum reg_class cl1 = *(const enum reg_class *) v1p;
1151   enum reg_class cl2 = *(const enum reg_class *) v2p;
1152   enum reg_class tcl1, tcl2;
1153   int diff;
1154 
1155   tcl1 = ira_allocno_class_translate[cl1];
1156   tcl2 = ira_allocno_class_translate[cl2];
1157   if (tcl1 != NO_REGS && tcl2 != NO_REGS
1158       && (diff = allocno_class_order[tcl1] - allocno_class_order[tcl2]) != 0)
1159     return diff;
1160   return (int) cl1 - (int) cl2;
1161 }
1162 
1163 /* For correct work of function setup_reg_class_relation we need to
1164    reorder important classes according to the order of their allocno
1165    classes.  It places important classes containing the same
1166    allocatable hard register set adjacent to each other and allocno
1167    class with the allocatable hard register set right after the other
1168    important classes with the same set.
1169 
1170    In example from comments of function
1171    setup_allocno_and_important_classes, it places LEGACY_REGS and
1172    GENERAL_REGS close to each other and GENERAL_REGS is after
1173    LEGACY_REGS.  */
1174 static void
reorder_important_classes(void)1175 reorder_important_classes (void)
1176 {
1177   int i;
1178 
1179   for (i = 0; i < N_REG_CLASSES; i++)
1180     allocno_class_order[i] = -1;
1181   for (i = 0; i < ira_allocno_classes_num; i++)
1182     allocno_class_order[ira_allocno_classes[i]] = i;
1183   qsort (ira_important_classes, ira_important_classes_num,
1184 	 sizeof (enum reg_class), comp_reg_classes_func);
1185   for (i = 0; i < ira_important_classes_num; i++)
1186     ira_important_class_nums[ira_important_classes[i]] = i;
1187 }
1188 
1189 /* Set up IRA_REG_CLASS_SUBUNION, IRA_REG_CLASS_SUPERUNION,
1190    IRA_REG_CLASS_SUPER_CLASSES, IRA_REG_CLASSES_INTERSECT, and
1191    IRA_REG_CLASSES_INTERSECT_P.  For the meaning of the relations,
1192    please see corresponding comments in ira-int.h.  */
1193 static void
setup_reg_class_relations(void)1194 setup_reg_class_relations (void)
1195 {
1196   int i, cl1, cl2, cl3;
1197   HARD_REG_SET intersection_set, union_set, temp_set2;
1198   bool important_class_p[N_REG_CLASSES];
1199 
1200   memset (important_class_p, 0, sizeof (important_class_p));
1201   for (i = 0; i < ira_important_classes_num; i++)
1202     important_class_p[ira_important_classes[i]] = true;
1203   for (cl1 = 0; cl1 < N_REG_CLASSES; cl1++)
1204     {
1205       ira_reg_class_super_classes[cl1][0] = LIM_REG_CLASSES;
1206       for (cl2 = 0; cl2 < N_REG_CLASSES; cl2++)
1207 	{
1208 	  ira_reg_classes_intersect_p[cl1][cl2] = false;
1209 	  ira_reg_class_intersect[cl1][cl2] = NO_REGS;
1210 	  ira_reg_class_subset[cl1][cl2] = NO_REGS;
1211 	  temp_hard_regset = reg_class_contents[cl1] & ~no_unit_alloc_regs;
1212 	  temp_set2 = reg_class_contents[cl2] & ~no_unit_alloc_regs;
1213 	  if (hard_reg_set_empty_p (temp_hard_regset)
1214 	      && hard_reg_set_empty_p (temp_set2))
1215 	    {
1216 	      /* The both classes have no allocatable hard registers
1217 		 -- take all class hard registers into account and use
1218 		 reg_class_subunion and reg_class_superunion.  */
1219 	      for (i = 0;; i++)
1220 		{
1221 		  cl3 = reg_class_subclasses[cl1][i];
1222 		  if (cl3 == LIM_REG_CLASSES)
1223 		    break;
1224 		  if (reg_class_subset_p (ira_reg_class_intersect[cl1][cl2],
1225 					  (enum reg_class) cl3))
1226 		    ira_reg_class_intersect[cl1][cl2] = (enum reg_class) cl3;
1227 		}
1228 	      ira_reg_class_subunion[cl1][cl2] = reg_class_subunion[cl1][cl2];
1229 	      ira_reg_class_superunion[cl1][cl2] = reg_class_superunion[cl1][cl2];
1230 	      continue;
1231 	    }
1232 	  ira_reg_classes_intersect_p[cl1][cl2]
1233 	    = hard_reg_set_intersect_p (temp_hard_regset, temp_set2);
1234 	  if (important_class_p[cl1] && important_class_p[cl2]
1235 	      && hard_reg_set_subset_p (temp_hard_regset, temp_set2))
1236 	    {
1237 	      /* CL1 and CL2 are important classes and CL1 allocatable
1238 		 hard register set is inside of CL2 allocatable hard
1239 		 registers -- make CL1 a superset of CL2.  */
1240 	      enum reg_class *p;
1241 
1242 	      p = &ira_reg_class_super_classes[cl1][0];
1243 	      while (*p != LIM_REG_CLASSES)
1244 		p++;
1245 	      *p++ = (enum reg_class) cl2;
1246 	      *p = LIM_REG_CLASSES;
1247 	    }
1248 	  ira_reg_class_subunion[cl1][cl2] = NO_REGS;
1249 	  ira_reg_class_superunion[cl1][cl2] = NO_REGS;
1250 	  intersection_set = (reg_class_contents[cl1]
1251 			      & reg_class_contents[cl2]
1252 			      & ~no_unit_alloc_regs);
1253 	  union_set = ((reg_class_contents[cl1] | reg_class_contents[cl2])
1254 		       & ~no_unit_alloc_regs);
1255 	  for (cl3 = 0; cl3 < N_REG_CLASSES; cl3++)
1256 	    {
1257 	      temp_hard_regset = reg_class_contents[cl3] & ~no_unit_alloc_regs;
1258 	      if (hard_reg_set_subset_p (temp_hard_regset, intersection_set))
1259 		{
1260 		  /* CL3 allocatable hard register set is inside of
1261 		     intersection of allocatable hard register sets
1262 		     of CL1 and CL2.  */
1263 		  if (important_class_p[cl3])
1264 		    {
1265 		      temp_set2
1266 			= (reg_class_contents
1267 			   [ira_reg_class_intersect[cl1][cl2]]);
1268 		      temp_set2 &= ~no_unit_alloc_regs;
1269 		      if (! hard_reg_set_subset_p (temp_hard_regset, temp_set2)
1270 			  /* If the allocatable hard register sets are
1271 			     the same, prefer GENERAL_REGS or the
1272 			     smallest class for debugging
1273 			     purposes.  */
1274 			  || (temp_hard_regset == temp_set2
1275 			      && (cl3 == GENERAL_REGS
1276 				  || ((ira_reg_class_intersect[cl1][cl2]
1277 				       != GENERAL_REGS)
1278 				      && hard_reg_set_subset_p
1279 				         (reg_class_contents[cl3],
1280 					  reg_class_contents
1281 					  [(int)
1282 					   ira_reg_class_intersect[cl1][cl2]])))))
1283 			ira_reg_class_intersect[cl1][cl2] = (enum reg_class) cl3;
1284 		    }
1285 		  temp_set2
1286 		    = (reg_class_contents[ira_reg_class_subset[cl1][cl2]]
1287 		       & ~no_unit_alloc_regs);
1288 		  if (! hard_reg_set_subset_p (temp_hard_regset, temp_set2)
1289 		      /* Ignore unavailable hard registers and prefer
1290 			 smallest class for debugging purposes.  */
1291 		      || (temp_hard_regset == temp_set2
1292 			  && hard_reg_set_subset_p
1293 			     (reg_class_contents[cl3],
1294 			      reg_class_contents
1295 			      [(int) ira_reg_class_subset[cl1][cl2]])))
1296 		    ira_reg_class_subset[cl1][cl2] = (enum reg_class) cl3;
1297 		}
1298 	      if (important_class_p[cl3]
1299 		  && hard_reg_set_subset_p (temp_hard_regset, union_set))
1300 		{
1301 		  /* CL3 allocatable hard register set is inside of
1302 		     union of allocatable hard register sets of CL1
1303 		     and CL2.  */
1304 		  temp_set2
1305 		    = (reg_class_contents[ira_reg_class_subunion[cl1][cl2]]
1306 		       & ~no_unit_alloc_regs);
1307 	 	  if (ira_reg_class_subunion[cl1][cl2] == NO_REGS
1308 		      || (hard_reg_set_subset_p (temp_set2, temp_hard_regset)
1309 
1310 			  && (temp_set2 != temp_hard_regset
1311 			      || cl3 == GENERAL_REGS
1312 			      /* If the allocatable hard register sets are the
1313 				 same, prefer GENERAL_REGS or the smallest
1314 				 class for debugging purposes.  */
1315 			      || (ira_reg_class_subunion[cl1][cl2] != GENERAL_REGS
1316 				  && hard_reg_set_subset_p
1317 				     (reg_class_contents[cl3],
1318 				      reg_class_contents
1319 				      [(int) ira_reg_class_subunion[cl1][cl2]])))))
1320 		    ira_reg_class_subunion[cl1][cl2] = (enum reg_class) cl3;
1321 		}
1322 	      if (hard_reg_set_subset_p (union_set, temp_hard_regset))
1323 		{
1324 		  /* CL3 allocatable hard register set contains union
1325 		     of allocatable hard register sets of CL1 and
1326 		     CL2.  */
1327 		  temp_set2
1328 		    = (reg_class_contents[ira_reg_class_superunion[cl1][cl2]]
1329 		       & ~no_unit_alloc_regs);
1330 	 	  if (ira_reg_class_superunion[cl1][cl2] == NO_REGS
1331 		      || (hard_reg_set_subset_p (temp_hard_regset, temp_set2)
1332 
1333 			  && (temp_set2 != temp_hard_regset
1334 			      || cl3 == GENERAL_REGS
1335 			      /* If the allocatable hard register sets are the
1336 				 same, prefer GENERAL_REGS or the smallest
1337 				 class for debugging purposes.  */
1338 			      || (ira_reg_class_superunion[cl1][cl2] != GENERAL_REGS
1339 				  && hard_reg_set_subset_p
1340 				     (reg_class_contents[cl3],
1341 				      reg_class_contents
1342 				      [(int) ira_reg_class_superunion[cl1][cl2]])))))
1343 		    ira_reg_class_superunion[cl1][cl2] = (enum reg_class) cl3;
1344 		}
1345 	    }
1346 	}
1347     }
1348 }
1349 
1350 /* Output all uniform and important classes into file F.  */
1351 static void
print_uniform_and_important_classes(FILE * f)1352 print_uniform_and_important_classes (FILE *f)
1353 {
1354   int i, cl;
1355 
1356   fprintf (f, "Uniform classes:\n");
1357   for (cl = 0; cl < N_REG_CLASSES; cl++)
1358     if (ira_uniform_class_p[cl])
1359       fprintf (f, " %s", reg_class_names[cl]);
1360   fprintf (f, "\nImportant classes:\n");
1361   for (i = 0; i < ira_important_classes_num; i++)
1362     fprintf (f, " %s", reg_class_names[ira_important_classes[i]]);
1363   fprintf (f, "\n");
1364 }
1365 
1366 /* Output all possible allocno or pressure classes and their
1367    translation map into file F.  */
1368 static void
print_translated_classes(FILE * f,bool pressure_p)1369 print_translated_classes (FILE *f, bool pressure_p)
1370 {
1371   int classes_num = (pressure_p
1372 		     ? ira_pressure_classes_num : ira_allocno_classes_num);
1373   enum reg_class *classes = (pressure_p
1374 			     ? ira_pressure_classes : ira_allocno_classes);
1375   enum reg_class *class_translate = (pressure_p
1376 				     ? ira_pressure_class_translate
1377 				     : ira_allocno_class_translate);
1378   int i;
1379 
1380   fprintf (f, "%s classes:\n", pressure_p ? "Pressure" : "Allocno");
1381   for (i = 0; i < classes_num; i++)
1382     fprintf (f, " %s", reg_class_names[classes[i]]);
1383   fprintf (f, "\nClass translation:\n");
1384   for (i = 0; i < N_REG_CLASSES; i++)
1385     fprintf (f, " %s -> %s\n", reg_class_names[i],
1386 	     reg_class_names[class_translate[i]]);
1387 }
1388 
1389 /* Output all possible allocno and translation classes and the
1390    translation maps into stderr.  */
1391 void
ira_debug_allocno_classes(void)1392 ira_debug_allocno_classes (void)
1393 {
1394   print_uniform_and_important_classes (stderr);
1395   print_translated_classes (stderr, false);
1396   print_translated_classes (stderr, true);
1397 }
1398 
1399 /* Set up different arrays concerning class subsets, allocno and
1400    important classes.  */
1401 static void
find_reg_classes(void)1402 find_reg_classes (void)
1403 {
1404   setup_allocno_and_important_classes ();
1405   setup_class_translate ();
1406   reorder_important_classes ();
1407   setup_reg_class_relations ();
1408 }
1409 
1410 
1411 
1412 /* Set up the array above.  */
1413 static void
setup_hard_regno_aclass(void)1414 setup_hard_regno_aclass (void)
1415 {
1416   int i;
1417 
1418   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
1419     {
1420 #if 1
1421       ira_hard_regno_allocno_class[i]
1422 	= (TEST_HARD_REG_BIT (no_unit_alloc_regs, i)
1423 	   ? NO_REGS
1424 	   : ira_allocno_class_translate[REGNO_REG_CLASS (i)]);
1425 #else
1426       int j;
1427       enum reg_class cl;
1428       ira_hard_regno_allocno_class[i] = NO_REGS;
1429       for (j = 0; j < ira_allocno_classes_num; j++)
1430  	{
1431 	  cl = ira_allocno_classes[j];
1432  	  if (ira_class_hard_reg_index[cl][i] >= 0)
1433  	    {
1434 	      ira_hard_regno_allocno_class[i] = cl;
1435  	      break;
1436  	    }
1437  	}
1438 #endif
1439     }
1440 }
1441 
1442 
1443 
1444 /* Form IRA_REG_CLASS_MAX_NREGS and IRA_REG_CLASS_MIN_NREGS maps.  */
1445 static void
setup_reg_class_nregs(void)1446 setup_reg_class_nregs (void)
1447 {
1448   int i, cl, cl2, m;
1449 
1450   for (m = 0; m < MAX_MACHINE_MODE; m++)
1451     {
1452       for (cl = 0; cl < N_REG_CLASSES; cl++)
1453 	ira_reg_class_max_nregs[cl][m]
1454 	  = ira_reg_class_min_nregs[cl][m]
1455 	  = targetm.class_max_nregs ((reg_class_t) cl, (machine_mode) m);
1456       for (cl = 0; cl < N_REG_CLASSES; cl++)
1457 	for (i = 0;
1458 	     (cl2 = alloc_reg_class_subclasses[cl][i]) != LIM_REG_CLASSES;
1459 	     i++)
1460 	  if (ira_reg_class_min_nregs[cl2][m]
1461 	      < ira_reg_class_min_nregs[cl][m])
1462 	    ira_reg_class_min_nregs[cl][m] = ira_reg_class_min_nregs[cl2][m];
1463     }
1464 }
1465 
1466 
1467 
1468 /* Set up IRA_PROHIBITED_CLASS_MODE_REGS, IRA_EXCLUDE_CLASS_MODE_REGS, and
1469    IRA_CLASS_SINGLETON.  This function is called once IRA_CLASS_HARD_REGS has
1470    been initialized.  */
1471 static void
setup_prohibited_and_exclude_class_mode_regs(void)1472 setup_prohibited_and_exclude_class_mode_regs (void)
1473 {
1474   int j, k, hard_regno, cl, last_hard_regno, count;
1475 
1476   for (cl = (int) N_REG_CLASSES - 1; cl >= 0; cl--)
1477     {
1478       temp_hard_regset = reg_class_contents[cl] & ~no_unit_alloc_regs;
1479       for (j = 0; j < NUM_MACHINE_MODES; j++)
1480 	{
1481 	  count = 0;
1482 	  last_hard_regno = -1;
1483 	  CLEAR_HARD_REG_SET (ira_prohibited_class_mode_regs[cl][j]);
1484 	  CLEAR_HARD_REG_SET (ira_exclude_class_mode_regs[cl][j]);
1485 	  for (k = ira_class_hard_regs_num[cl] - 1; k >= 0; k--)
1486 	    {
1487 	      hard_regno = ira_class_hard_regs[cl][k];
1488 	      if (!targetm.hard_regno_mode_ok (hard_regno, (machine_mode) j))
1489 		SET_HARD_REG_BIT (ira_prohibited_class_mode_regs[cl][j],
1490 				  hard_regno);
1491 	      else if (in_hard_reg_set_p (temp_hard_regset,
1492 					  (machine_mode) j, hard_regno))
1493 		{
1494 		  last_hard_regno = hard_regno;
1495 		  count++;
1496 		}
1497 	      else
1498 		{
1499 		  SET_HARD_REG_BIT (ira_exclude_class_mode_regs[cl][j], hard_regno);
1500 		}
1501 	    }
1502 	  ira_class_singleton[cl][j] = (count == 1 ? last_hard_regno : -1);
1503 	}
1504     }
1505 }
1506 
1507 /* Clarify IRA_PROHIBITED_CLASS_MODE_REGS by excluding hard registers
1508    spanning from one register pressure class to another one.  It is
1509    called after defining the pressure classes.  */
1510 static void
clarify_prohibited_class_mode_regs(void)1511 clarify_prohibited_class_mode_regs (void)
1512 {
1513   int j, k, hard_regno, cl, pclass, nregs;
1514 
1515   for (cl = (int) N_REG_CLASSES - 1; cl >= 0; cl--)
1516     for (j = 0; j < NUM_MACHINE_MODES; j++)
1517       {
1518 	CLEAR_HARD_REG_SET (ira_useful_class_mode_regs[cl][j]);
1519 	for (k = ira_class_hard_regs_num[cl] - 1; k >= 0; k--)
1520 	  {
1521 	    hard_regno = ira_class_hard_regs[cl][k];
1522 	    if (TEST_HARD_REG_BIT (ira_prohibited_class_mode_regs[cl][j], hard_regno))
1523 	      continue;
1524 	    nregs = hard_regno_nregs (hard_regno, (machine_mode) j);
1525 	    if (hard_regno + nregs > FIRST_PSEUDO_REGISTER)
1526 	      {
1527 		SET_HARD_REG_BIT (ira_prohibited_class_mode_regs[cl][j],
1528 				  hard_regno);
1529 		 continue;
1530 	      }
1531 	    pclass = ira_pressure_class_translate[REGNO_REG_CLASS (hard_regno)];
1532 	    for (nregs-- ;nregs >= 0; nregs--)
1533 	      if (((enum reg_class) pclass
1534 		   != ira_pressure_class_translate[REGNO_REG_CLASS
1535 						   (hard_regno + nregs)]))
1536 		{
1537 		  SET_HARD_REG_BIT (ira_prohibited_class_mode_regs[cl][j],
1538 				    hard_regno);
1539 		  break;
1540 		}
1541 	    if (!TEST_HARD_REG_BIT (ira_prohibited_class_mode_regs[cl][j],
1542 				    hard_regno))
1543 	      add_to_hard_reg_set (&ira_useful_class_mode_regs[cl][j],
1544 				   (machine_mode) j, hard_regno);
1545 	  }
1546       }
1547 }
1548 
1549 /* Allocate and initialize IRA_REGISTER_MOVE_COST, IRA_MAY_MOVE_IN_COST
1550    and IRA_MAY_MOVE_OUT_COST for MODE.  */
1551 void
ira_init_register_move_cost(machine_mode mode)1552 ira_init_register_move_cost (machine_mode mode)
1553 {
1554   static unsigned short last_move_cost[N_REG_CLASSES][N_REG_CLASSES];
1555   bool all_match = true;
1556   unsigned int i, cl1, cl2;
1557   HARD_REG_SET ok_regs;
1558 
1559   ira_assert (ira_register_move_cost[mode] == NULL
1560 	      && ira_may_move_in_cost[mode] == NULL
1561 	      && ira_may_move_out_cost[mode] == NULL);
1562   CLEAR_HARD_REG_SET (ok_regs);
1563   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
1564     if (targetm.hard_regno_mode_ok (i, mode))
1565       SET_HARD_REG_BIT (ok_regs, i);
1566 
1567   /* Note that we might be asked about the move costs of modes that
1568      cannot be stored in any hard register, for example if an inline
1569      asm tries to create a register operand with an impossible mode.
1570      We therefore can't assert have_regs_of_mode[mode] here.  */
1571   for (cl1 = 0; cl1 < N_REG_CLASSES; cl1++)
1572     for (cl2 = 0; cl2 < N_REG_CLASSES; cl2++)
1573       {
1574 	int cost;
1575 	if (!hard_reg_set_intersect_p (ok_regs, reg_class_contents[cl1])
1576 	    || !hard_reg_set_intersect_p (ok_regs, reg_class_contents[cl2]))
1577 	  {
1578 	    if ((ira_reg_class_max_nregs[cl1][mode]
1579 		 > ira_class_hard_regs_num[cl1])
1580 		|| (ira_reg_class_max_nregs[cl2][mode]
1581 		    > ira_class_hard_regs_num[cl2]))
1582 	      cost = 65535;
1583 	    else
1584 	      cost = (ira_memory_move_cost[mode][cl1][0]
1585 		      + ira_memory_move_cost[mode][cl2][1]) * 2;
1586 	  }
1587 	else
1588 	  {
1589 	    cost = register_move_cost (mode, (enum reg_class) cl1,
1590 				       (enum reg_class) cl2);
1591 	    ira_assert (cost < 65535);
1592 	  }
1593 	all_match &= (last_move_cost[cl1][cl2] == cost);
1594 	last_move_cost[cl1][cl2] = cost;
1595       }
1596   if (all_match && last_mode_for_init_move_cost != -1)
1597     {
1598       ira_register_move_cost[mode]
1599 	= ira_register_move_cost[last_mode_for_init_move_cost];
1600       ira_may_move_in_cost[mode]
1601 	= ira_may_move_in_cost[last_mode_for_init_move_cost];
1602       ira_may_move_out_cost[mode]
1603 	= ira_may_move_out_cost[last_mode_for_init_move_cost];
1604       return;
1605     }
1606   last_mode_for_init_move_cost = mode;
1607   ira_register_move_cost[mode] = XNEWVEC (move_table, N_REG_CLASSES);
1608   ira_may_move_in_cost[mode] = XNEWVEC (move_table, N_REG_CLASSES);
1609   ira_may_move_out_cost[mode] = XNEWVEC (move_table, N_REG_CLASSES);
1610   for (cl1 = 0; cl1 < N_REG_CLASSES; cl1++)
1611     for (cl2 = 0; cl2 < N_REG_CLASSES; cl2++)
1612       {
1613 	int cost;
1614 	enum reg_class *p1, *p2;
1615 
1616 	if (last_move_cost[cl1][cl2] == 65535)
1617 	  {
1618 	    ira_register_move_cost[mode][cl1][cl2] = 65535;
1619 	    ira_may_move_in_cost[mode][cl1][cl2] = 65535;
1620 	    ira_may_move_out_cost[mode][cl1][cl2] = 65535;
1621 	  }
1622 	else
1623 	  {
1624 	    cost = last_move_cost[cl1][cl2];
1625 
1626 	    for (p2 = &reg_class_subclasses[cl2][0];
1627 		 *p2 != LIM_REG_CLASSES; p2++)
1628 	      if (ira_class_hard_regs_num[*p2] > 0
1629 		  && (ira_reg_class_max_nregs[*p2][mode]
1630 		      <= ira_class_hard_regs_num[*p2]))
1631 		cost = MAX (cost, ira_register_move_cost[mode][cl1][*p2]);
1632 
1633 	    for (p1 = &reg_class_subclasses[cl1][0];
1634 		 *p1 != LIM_REG_CLASSES; p1++)
1635 	      if (ira_class_hard_regs_num[*p1] > 0
1636 		  && (ira_reg_class_max_nregs[*p1][mode]
1637 		      <= ira_class_hard_regs_num[*p1]))
1638 		cost = MAX (cost, ira_register_move_cost[mode][*p1][cl2]);
1639 
1640 	    ira_assert (cost <= 65535);
1641 	    ira_register_move_cost[mode][cl1][cl2] = cost;
1642 
1643 	    if (ira_class_subset_p[cl1][cl2])
1644 	      ira_may_move_in_cost[mode][cl1][cl2] = 0;
1645 	    else
1646 	      ira_may_move_in_cost[mode][cl1][cl2] = cost;
1647 
1648 	    if (ira_class_subset_p[cl2][cl1])
1649 	      ira_may_move_out_cost[mode][cl1][cl2] = 0;
1650 	    else
1651 	      ira_may_move_out_cost[mode][cl1][cl2] = cost;
1652 	  }
1653       }
1654 }
1655 
1656 
1657 
1658 /* This is called once during compiler work.  It sets up
1659    different arrays whose values don't depend on the compiled
1660    function.  */
1661 void
ira_init_once(void)1662 ira_init_once (void)
1663 {
1664   ira_init_costs_once ();
1665   lra_init_once ();
1666 
1667   ira_use_lra_p = targetm.lra_p ();
1668 }
1669 
1670 /* Free ira_max_register_move_cost, ira_may_move_in_cost and
1671    ira_may_move_out_cost for each mode.  */
1672 void
free_register_move_costs(void)1673 target_ira_int::free_register_move_costs (void)
1674 {
1675   int mode, i;
1676 
1677   /* Reset move_cost and friends, making sure we only free shared
1678      table entries once.  */
1679   for (mode = 0; mode < MAX_MACHINE_MODE; mode++)
1680     if (x_ira_register_move_cost[mode])
1681       {
1682 	for (i = 0;
1683 	     i < mode && (x_ira_register_move_cost[i]
1684 			  != x_ira_register_move_cost[mode]);
1685 	     i++)
1686 	  ;
1687 	if (i == mode)
1688 	  {
1689 	    free (x_ira_register_move_cost[mode]);
1690 	    free (x_ira_may_move_in_cost[mode]);
1691 	    free (x_ira_may_move_out_cost[mode]);
1692 	  }
1693       }
1694   memset (x_ira_register_move_cost, 0, sizeof x_ira_register_move_cost);
1695   memset (x_ira_may_move_in_cost, 0, sizeof x_ira_may_move_in_cost);
1696   memset (x_ira_may_move_out_cost, 0, sizeof x_ira_may_move_out_cost);
1697   last_mode_for_init_move_cost = -1;
1698 }
1699 
~target_ira_int()1700 target_ira_int::~target_ira_int ()
1701 {
1702   free_ira_costs ();
1703   free_register_move_costs ();
1704 }
1705 
1706 /* This is called every time when register related information is
1707    changed.  */
1708 void
ira_init(void)1709 ira_init (void)
1710 {
1711   this_target_ira_int->free_register_move_costs ();
1712   setup_reg_mode_hard_regset ();
1713   setup_alloc_regs (flag_omit_frame_pointer != 0);
1714   setup_class_subset_and_memory_move_costs ();
1715   setup_reg_class_nregs ();
1716   setup_prohibited_and_exclude_class_mode_regs ();
1717   find_reg_classes ();
1718   clarify_prohibited_class_mode_regs ();
1719   setup_hard_regno_aclass ();
1720   ira_init_costs ();
1721 }
1722 
1723 
1724 #define ira_prohibited_mode_move_regs_initialized_p \
1725   (this_target_ira_int->x_ira_prohibited_mode_move_regs_initialized_p)
1726 
1727 /* Set up IRA_PROHIBITED_MODE_MOVE_REGS.  */
1728 static void
setup_prohibited_mode_move_regs(void)1729 setup_prohibited_mode_move_regs (void)
1730 {
1731   int i, j;
1732   rtx test_reg1, test_reg2, move_pat;
1733   rtx_insn *move_insn;
1734 
1735   if (ira_prohibited_mode_move_regs_initialized_p)
1736     return;
1737   ira_prohibited_mode_move_regs_initialized_p = true;
1738   test_reg1 = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
1739   test_reg2 = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2);
1740   move_pat = gen_rtx_SET (test_reg1, test_reg2);
1741   move_insn = gen_rtx_INSN (VOIDmode, 0, 0, 0, move_pat, 0, -1, 0);
1742   for (i = 0; i < NUM_MACHINE_MODES; i++)
1743     {
1744       SET_HARD_REG_SET (ira_prohibited_mode_move_regs[i]);
1745       for (j = 0; j < FIRST_PSEUDO_REGISTER; j++)
1746 	{
1747 	  if (!targetm.hard_regno_mode_ok (j, (machine_mode) i))
1748 	    continue;
1749 	  set_mode_and_regno (test_reg1, (machine_mode) i, j);
1750 	  set_mode_and_regno (test_reg2, (machine_mode) i, j);
1751 	  INSN_CODE (move_insn) = -1;
1752 	  recog_memoized (move_insn);
1753 	  if (INSN_CODE (move_insn) < 0)
1754 	    continue;
1755 	  extract_insn (move_insn);
1756 	  /* We don't know whether the move will be in code that is optimized
1757 	     for size or speed, so consider all enabled alternatives.  */
1758 	  if (! constrain_operands (1, get_enabled_alternatives (move_insn)))
1759 	    continue;
1760 	  CLEAR_HARD_REG_BIT (ira_prohibited_mode_move_regs[i], j);
1761 	}
1762     }
1763 }
1764 
1765 
1766 
1767 /* Extract INSN and return the set of alternatives that we should consider.
1768    This excludes any alternatives whose constraints are obviously impossible
1769    to meet (e.g. because the constraint requires a constant and the operand
1770    is nonconstant).  It also excludes alternatives that are bound to need
1771    a spill or reload, as long as we have other alternatives that match
1772    exactly.  */
1773 alternative_mask
ira_setup_alts(rtx_insn * insn)1774 ira_setup_alts (rtx_insn *insn)
1775 {
1776   int nop, nalt;
1777   bool curr_swapped;
1778   const char *p;
1779   int commutative = -1;
1780 
1781   extract_insn (insn);
1782   preprocess_constraints (insn);
1783   alternative_mask preferred = get_preferred_alternatives (insn);
1784   alternative_mask alts = 0;
1785   alternative_mask exact_alts = 0;
1786   /* Check that the hard reg set is enough for holding all
1787      alternatives.  It is hard to imagine the situation when the
1788      assertion is wrong.  */
1789   ira_assert (recog_data.n_alternatives
1790 	      <= (int) MAX (sizeof (HARD_REG_ELT_TYPE) * CHAR_BIT,
1791 			    FIRST_PSEUDO_REGISTER));
1792   for (nop = 0; nop < recog_data.n_operands; nop++)
1793     if (recog_data.constraints[nop][0] == '%')
1794       {
1795 	commutative = nop;
1796 	break;
1797       }
1798   for (curr_swapped = false;; curr_swapped = true)
1799     {
1800       for (nalt = 0; nalt < recog_data.n_alternatives; nalt++)
1801 	{
1802 	  if (!TEST_BIT (preferred, nalt) || TEST_BIT (exact_alts, nalt))
1803 	    continue;
1804 
1805 	  const operand_alternative *op_alt
1806 	    = &recog_op_alt[nalt * recog_data.n_operands];
1807 	  int this_reject = 0;
1808 	  for (nop = 0; nop < recog_data.n_operands; nop++)
1809 	    {
1810 	      int c, len;
1811 
1812 	      this_reject += op_alt[nop].reject;
1813 
1814 	      rtx op = recog_data.operand[nop];
1815 	      p = op_alt[nop].constraint;
1816 	      if (*p == 0 || *p == ',')
1817 		continue;
1818 
1819 	      bool win_p = false;
1820 	      do
1821 		switch (c = *p, len = CONSTRAINT_LEN (c, p), c)
1822 		  {
1823 		  case '#':
1824 		  case ',':
1825 		    c = '\0';
1826 		    /* FALLTHRU */
1827 		  case '\0':
1828 		    len = 0;
1829 		    break;
1830 
1831 		  case '%':
1832 		    /* The commutative modifier is handled above.  */
1833 		    break;
1834 
1835 		  case '0':  case '1':  case '2':  case '3':  case '4':
1836 		  case '5':  case '6':  case '7':  case '8':  case '9':
1837 		    {
1838 		      char *end;
1839 		      unsigned long dup = strtoul (p, &end, 10);
1840 		      rtx other = recog_data.operand[dup];
1841 		      len = end - p;
1842 		      if (MEM_P (other)
1843 			  ? rtx_equal_p (other, op)
1844 			  : REG_P (op) || SUBREG_P (op))
1845 			goto op_success;
1846 		      win_p = true;
1847 		    }
1848 		    break;
1849 
1850 		  case 'g':
1851 		    goto op_success;
1852 		    break;
1853 
1854 		  default:
1855 		    {
1856 		      enum constraint_num cn = lookup_constraint (p);
1857 		      rtx mem = NULL;
1858 		      switch (get_constraint_type (cn))
1859 			{
1860 			case CT_REGISTER:
1861 			  if (reg_class_for_constraint (cn) != NO_REGS)
1862 			    {
1863 			      if (REG_P (op) || SUBREG_P (op))
1864 				goto op_success;
1865 			      win_p = true;
1866 			    }
1867 			  break;
1868 
1869 			case CT_CONST_INT:
1870 			  if (CONST_INT_P (op)
1871 			      && (insn_const_int_ok_for_constraint
1872 				  (INTVAL (op), cn)))
1873 			    goto op_success;
1874 			  break;
1875 
1876 			case CT_ADDRESS:
1877 			  goto op_success;
1878 
1879 			case CT_MEMORY:
1880 			case CT_RELAXED_MEMORY:
1881 			  mem = op;
1882 			  /* Fall through.  */
1883 			case CT_SPECIAL_MEMORY:
1884 			  if (!mem)
1885 			    mem = extract_mem_from_operand (op);
1886 			  if (MEM_P (mem))
1887 			    goto op_success;
1888 			  win_p = true;
1889 			  break;
1890 
1891 			case CT_FIXED_FORM:
1892 			  if (constraint_satisfied_p (op, cn))
1893 			    goto op_success;
1894 			  break;
1895 			}
1896 		      break;
1897 		    }
1898 		  }
1899 	      while (p += len, c);
1900 	      if (!win_p)
1901 		break;
1902 	      /* We can make the alternative match by spilling a register
1903 		 to memory or loading something into a register.  Count a
1904 		 cost of one reload (the equivalent of the '?' constraint).  */
1905 	      this_reject += 6;
1906 	    op_success:
1907 	      ;
1908 	    }
1909 
1910 	  if (nop >= recog_data.n_operands)
1911 	    {
1912 	      alts |= ALTERNATIVE_BIT (nalt);
1913 	      if (this_reject == 0)
1914 		exact_alts |= ALTERNATIVE_BIT (nalt);
1915 	    }
1916 	}
1917       if (commutative < 0)
1918 	break;
1919       /* Swap forth and back to avoid changing recog_data.  */
1920       std::swap (recog_data.operand[commutative],
1921 		 recog_data.operand[commutative + 1]);
1922       if (curr_swapped)
1923 	break;
1924     }
1925   return exact_alts ? exact_alts : alts;
1926 }
1927 
1928 /* Return the number of the output non-early clobber operand which
1929    should be the same in any case as operand with number OP_NUM (or
1930    negative value if there is no such operand).  ALTS is the mask
1931    of alternatives that we should consider.  SINGLE_INPUT_OP_HAS_CSTR_P
1932    should be set in this function, it indicates whether there is only
1933    a single input operand which has the matching constraint on the
1934    output operand at the position specified in return value.  If the
1935    pattern allows any one of several input operands holds the matching
1936    constraint, it's set as false, one typical case is destructive FMA
1937    instruction on target rs6000.  Note that for a non-NO_REG preferred
1938    register class with no free register move copy, if the parameter
1939    PARAM_IRA_CONSIDER_DUP_IN_ALL_ALTS is set to one, this function
1940    will check all available alternatives for matching constraints,
1941    even if it has found or will find one alternative with non-NO_REG
1942    regclass, it can respect more cases with matching constraints.  If
1943    PARAM_IRA_CONSIDER_DUP_IN_ALL_ALTS is set to zero,
1944    SINGLE_INPUT_OP_HAS_CSTR_P is always true, it will stop to find
1945    matching constraint relationship once it hits some alternative with
1946    some non-NO_REG regclass.  */
1947 int
ira_get_dup_out_num(int op_num,alternative_mask alts,bool & single_input_op_has_cstr_p)1948 ira_get_dup_out_num (int op_num, alternative_mask alts,
1949 		     bool &single_input_op_has_cstr_p)
1950 {
1951   int curr_alt, c, original;
1952   bool ignore_p, use_commut_op_p;
1953   const char *str;
1954 
1955   if (op_num < 0 || recog_data.n_alternatives == 0)
1956     return -1;
1957   /* We should find duplications only for input operands.  */
1958   if (recog_data.operand_type[op_num] != OP_IN)
1959     return -1;
1960   str = recog_data.constraints[op_num];
1961   use_commut_op_p = false;
1962   single_input_op_has_cstr_p = true;
1963 
1964   rtx op = recog_data.operand[op_num];
1965   int op_regno = reg_or_subregno (op);
1966   enum reg_class op_pref_cl = reg_preferred_class (op_regno);
1967   machine_mode op_mode = GET_MODE (op);
1968 
1969   ira_init_register_move_cost_if_necessary (op_mode);
1970   /* If the preferred regclass isn't NO_REG, continue to find the matching
1971      constraint in all available alternatives with preferred regclass, even
1972      if we have found or will find one alternative whose constraint stands
1973      for a REG (non-NO_REG) regclass.  Note that it would be fine not to
1974      respect matching constraint if the register copy is free, so exclude
1975      it.  */
1976   bool respect_dup_despite_reg_cstr
1977     = param_ira_consider_dup_in_all_alts
1978       && op_pref_cl != NO_REGS
1979       && ira_register_move_cost[op_mode][op_pref_cl][op_pref_cl] > 0;
1980 
1981   /* Record the alternative whose constraint uses the same regclass as the
1982      preferred regclass, later if we find one matching constraint for this
1983      operand with preferred reclass, we will visit these recorded
1984      alternatives to check whether if there is one alternative in which no
1985      any INPUT operands have one matching constraint same as our candidate.
1986      If yes, it means there is one alternative which is perfectly fine
1987      without satisfying this matching constraint.  If no, it means in any
1988      alternatives there is one other INPUT operand holding this matching
1989      constraint, it's fine to respect this matching constraint and further
1990      create this constraint copy since it would become harmless once some
1991      other takes preference and it's interfered.  */
1992   alternative_mask pref_cl_alts;
1993 
1994   for (;;)
1995     {
1996       pref_cl_alts = 0;
1997 
1998       for (curr_alt = 0, ignore_p = !TEST_BIT (alts, curr_alt),
1999 	   original = -1;;)
2000 	{
2001 	  c = *str;
2002 	  if (c == '\0')
2003 	    break;
2004 	  if (c == '#')
2005 	    ignore_p = true;
2006 	  else if (c == ',')
2007 	    {
2008 	      curr_alt++;
2009 	      ignore_p = !TEST_BIT (alts, curr_alt);
2010 	    }
2011 	  else if (! ignore_p)
2012 	    switch (c)
2013 	      {
2014 	      case 'g':
2015 		goto fail;
2016 	      default:
2017 		{
2018 		  enum constraint_num cn = lookup_constraint (str);
2019 		  enum reg_class cl = reg_class_for_constraint (cn);
2020 		  if (cl != NO_REGS && !targetm.class_likely_spilled_p (cl))
2021 		    {
2022 		      if (respect_dup_despite_reg_cstr)
2023 			{
2024 			  /* If it's free to move from one preferred class to
2025 			     the one without matching constraint, it doesn't
2026 			     have to respect this constraint with costs.  */
2027 			  if (cl != op_pref_cl
2028 			      && (ira_reg_class_intersect[cl][op_pref_cl]
2029 				  != NO_REGS)
2030 			      && (ira_may_move_in_cost[op_mode][op_pref_cl][cl]
2031 				  == 0))
2032 			    goto fail;
2033 			  else if (cl == op_pref_cl)
2034 			    pref_cl_alts |= ALTERNATIVE_BIT (curr_alt);
2035 			}
2036 		      else
2037 			goto fail;
2038 		    }
2039 		  if (constraint_satisfied_p (op, cn))
2040 		    goto fail;
2041 		  break;
2042 		}
2043 
2044 	      case '0': case '1': case '2': case '3': case '4':
2045 	      case '5': case '6': case '7': case '8': case '9':
2046 		{
2047 		  char *end;
2048 		  int n = (int) strtoul (str, &end, 10);
2049 		  str = end;
2050 		  if (original != -1 && original != n)
2051 		    goto fail;
2052 		  gcc_assert (n < recog_data.n_operands);
2053 		  if (respect_dup_despite_reg_cstr)
2054 		    {
2055 		      const operand_alternative *op_alt
2056 			= &recog_op_alt[curr_alt * recog_data.n_operands];
2057 		      /* Only respect the one with preferred rclass, without
2058 			 respect_dup_despite_reg_cstr it's possible to get
2059 			 one whose regclass isn't preferred first before,
2060 			 but it would fail since there should be other
2061 			 alternatives with preferred regclass.  */
2062 		      if (op_alt[n].cl == op_pref_cl)
2063 			original = n;
2064 		    }
2065 		  else
2066 		    original = n;
2067 		  continue;
2068 		}
2069 	      }
2070 	  str += CONSTRAINT_LEN (c, str);
2071 	}
2072       if (original == -1)
2073 	goto fail;
2074       if (recog_data.operand_type[original] == OP_OUT)
2075 	{
2076 	  if (pref_cl_alts == 0)
2077 	    return original;
2078 	  /* Visit these recorded alternatives to check whether
2079 	     there is one alternative in which no any INPUT operands
2080 	     have one matching constraint same as our candidate.
2081 	     Give up this candidate if so.  */
2082 	  int nop, nalt;
2083 	  for (nalt = 0; nalt < recog_data.n_alternatives; nalt++)
2084 	    {
2085 	      if (!TEST_BIT (pref_cl_alts, nalt))
2086 		continue;
2087 	      const operand_alternative *op_alt
2088 		= &recog_op_alt[nalt * recog_data.n_operands];
2089 	      bool dup_in_other = false;
2090 	      for (nop = 0; nop < recog_data.n_operands; nop++)
2091 		{
2092 		  if (recog_data.operand_type[nop] != OP_IN)
2093 		    continue;
2094 		  if (nop == op_num)
2095 		    continue;
2096 		  if (op_alt[nop].matches == original)
2097 		    {
2098 		      dup_in_other = true;
2099 		      break;
2100 		    }
2101 		}
2102 	      if (!dup_in_other)
2103 		return -1;
2104 	    }
2105 	  single_input_op_has_cstr_p = false;
2106 	  return original;
2107 	}
2108     fail:
2109       if (use_commut_op_p)
2110 	break;
2111       use_commut_op_p = true;
2112       if (recog_data.constraints[op_num][0] == '%')
2113 	str = recog_data.constraints[op_num + 1];
2114       else if (op_num > 0 && recog_data.constraints[op_num - 1][0] == '%')
2115 	str = recog_data.constraints[op_num - 1];
2116       else
2117 	break;
2118     }
2119   return -1;
2120 }
2121 
2122 
2123 
2124 /* Search forward to see if the source register of a copy insn dies
2125    before either it or the destination register is modified, but don't
2126    scan past the end of the basic block.  If so, we can replace the
2127    source with the destination and let the source die in the copy
2128    insn.
2129 
2130    This will reduce the number of registers live in that range and may
2131    enable the destination and the source coalescing, thus often saving
2132    one register in addition to a register-register copy.  */
2133 
2134 static void
decrease_live_ranges_number(void)2135 decrease_live_ranges_number (void)
2136 {
2137   basic_block bb;
2138   rtx_insn *insn;
2139   rtx set, src, dest, dest_death, note;
2140   rtx_insn *p, *q;
2141   int sregno, dregno;
2142 
2143   if (! flag_expensive_optimizations)
2144     return;
2145 
2146   if (ira_dump_file)
2147     fprintf (ira_dump_file, "Starting decreasing number of live ranges...\n");
2148 
2149   FOR_EACH_BB_FN (bb, cfun)
2150     FOR_BB_INSNS (bb, insn)
2151       {
2152 	set = single_set (insn);
2153 	if (! set)
2154 	  continue;
2155 	src = SET_SRC (set);
2156 	dest = SET_DEST (set);
2157 	if (! REG_P (src) || ! REG_P (dest)
2158 	    || find_reg_note (insn, REG_DEAD, src))
2159 	  continue;
2160 	sregno = REGNO (src);
2161 	dregno = REGNO (dest);
2162 
2163 	/* We don't want to mess with hard regs if register classes
2164 	   are small.  */
2165 	if (sregno == dregno
2166 	    || (targetm.small_register_classes_for_mode_p (GET_MODE (src))
2167 		&& (sregno < FIRST_PSEUDO_REGISTER
2168 		    || dregno < FIRST_PSEUDO_REGISTER))
2169 	    /* We don't see all updates to SP if they are in an
2170 	       auto-inc memory reference, so we must disallow this
2171 	       optimization on them.  */
2172 	    || sregno == STACK_POINTER_REGNUM
2173 	    || dregno == STACK_POINTER_REGNUM)
2174 	  continue;
2175 
2176 	dest_death = NULL_RTX;
2177 
2178 	for (p = NEXT_INSN (insn); p; p = NEXT_INSN (p))
2179 	  {
2180 	    if (! INSN_P (p))
2181 	      continue;
2182 	    if (BLOCK_FOR_INSN (p) != bb)
2183 	      break;
2184 
2185 	    if (reg_set_p (src, p) || reg_set_p (dest, p)
2186 		/* If SRC is an asm-declared register, it must not be
2187 		   replaced in any asm.  Unfortunately, the REG_EXPR
2188 		   tree for the asm variable may be absent in the SRC
2189 		   rtx, so we can't check the actual register
2190 		   declaration easily (the asm operand will have it,
2191 		   though).  To avoid complicating the test for a rare
2192 		   case, we just don't perform register replacement
2193 		   for a hard reg mentioned in an asm.  */
2194 		|| (sregno < FIRST_PSEUDO_REGISTER
2195 		    && asm_noperands (PATTERN (p)) >= 0
2196 		    && reg_overlap_mentioned_p (src, PATTERN (p)))
2197 		/* Don't change hard registers used by a call.  */
2198 		|| (CALL_P (p) && sregno < FIRST_PSEUDO_REGISTER
2199 		    && find_reg_fusage (p, USE, src))
2200 		/* Don't change a USE of a register.  */
2201 		|| (GET_CODE (PATTERN (p)) == USE
2202 		    && reg_overlap_mentioned_p (src, XEXP (PATTERN (p), 0))))
2203 	      break;
2204 
2205 	    /* See if all of SRC dies in P.  This test is slightly
2206 	       more conservative than it needs to be.  */
2207 	    if ((note = find_regno_note (p, REG_DEAD, sregno))
2208 		&& GET_MODE (XEXP (note, 0)) == GET_MODE (src))
2209 	      {
2210 		int failed = 0;
2211 
2212 		/* We can do the optimization.  Scan forward from INSN
2213 		   again, replacing regs as we go.  Set FAILED if a
2214 		   replacement can't be done.  In that case, we can't
2215 		   move the death note for SRC.  This should be
2216 		   rare.  */
2217 
2218 		/* Set to stop at next insn.  */
2219 		for (q = next_real_insn (insn);
2220 		     q != next_real_insn (p);
2221 		     q = next_real_insn (q))
2222 		  {
2223 		    if (reg_overlap_mentioned_p (src, PATTERN (q)))
2224 		      {
2225 			/* If SRC is a hard register, we might miss
2226 			   some overlapping registers with
2227 			   validate_replace_rtx, so we would have to
2228 			   undo it.  We can't if DEST is present in
2229 			   the insn, so fail in that combination of
2230 			   cases.  */
2231 			if (sregno < FIRST_PSEUDO_REGISTER
2232 			    && reg_mentioned_p (dest, PATTERN (q)))
2233 			  failed = 1;
2234 
2235 			/* Attempt to replace all uses.  */
2236 			else if (!validate_replace_rtx (src, dest, q))
2237 			  failed = 1;
2238 
2239 			/* If this succeeded, but some part of the
2240 			   register is still present, undo the
2241 			   replacement.  */
2242 			else if (sregno < FIRST_PSEUDO_REGISTER
2243 				 && reg_overlap_mentioned_p (src, PATTERN (q)))
2244 			  {
2245 			    validate_replace_rtx (dest, src, q);
2246 			    failed = 1;
2247 			  }
2248 		      }
2249 
2250 		    /* If DEST dies here, remove the death note and
2251 		       save it for later.  Make sure ALL of DEST dies
2252 		       here; again, this is overly conservative.  */
2253 		    if (! dest_death
2254 			&& (dest_death = find_regno_note (q, REG_DEAD, dregno)))
2255 		      {
2256 			if (GET_MODE (XEXP (dest_death, 0)) == GET_MODE (dest))
2257 			  remove_note (q, dest_death);
2258 			else
2259 			  {
2260 			    failed = 1;
2261 			    dest_death = 0;
2262 			  }
2263 		      }
2264 		  }
2265 
2266 		if (! failed)
2267 		  {
2268 		    /* Move death note of SRC from P to INSN.  */
2269 		    remove_note (p, note);
2270 		    XEXP (note, 1) = REG_NOTES (insn);
2271 		    REG_NOTES (insn) = note;
2272 		  }
2273 
2274 		/* DEST is also dead if INSN has a REG_UNUSED note for
2275 		   DEST.  */
2276 		if (! dest_death
2277 		    && (dest_death
2278 			= find_regno_note (insn, REG_UNUSED, dregno)))
2279 		  {
2280 		    PUT_REG_NOTE_KIND (dest_death, REG_DEAD);
2281 		    remove_note (insn, dest_death);
2282 		  }
2283 
2284 		/* Put death note of DEST on P if we saw it die.  */
2285 		if (dest_death)
2286 		  {
2287 		    XEXP (dest_death, 1) = REG_NOTES (p);
2288 		    REG_NOTES (p) = dest_death;
2289 		  }
2290 		break;
2291 	      }
2292 
2293 	    /* If SRC is a hard register which is set or killed in
2294 	       some other way, we can't do this optimization.  */
2295 	    else if (sregno < FIRST_PSEUDO_REGISTER && dead_or_set_p (p, src))
2296 	      break;
2297 	  }
2298       }
2299 }
2300 
2301 
2302 
2303 /* Return nonzero if REGNO is a particularly bad choice for reloading X.  */
2304 static bool
ira_bad_reload_regno_1(int regno,rtx x)2305 ira_bad_reload_regno_1 (int regno, rtx x)
2306 {
2307   int x_regno, n, i;
2308   ira_allocno_t a;
2309   enum reg_class pref;
2310 
2311   /* We only deal with pseudo regs.  */
2312   if (! x || GET_CODE (x) != REG)
2313     return false;
2314 
2315   x_regno = REGNO (x);
2316   if (x_regno < FIRST_PSEUDO_REGISTER)
2317     return false;
2318 
2319   /* If the pseudo prefers REGNO explicitly, then do not consider
2320      REGNO a bad spill choice.  */
2321   pref = reg_preferred_class (x_regno);
2322   if (reg_class_size[pref] == 1)
2323     return !TEST_HARD_REG_BIT (reg_class_contents[pref], regno);
2324 
2325   /* If the pseudo conflicts with REGNO, then we consider REGNO a
2326      poor choice for a reload regno.  */
2327   a = ira_regno_allocno_map[x_regno];
2328   n = ALLOCNO_NUM_OBJECTS (a);
2329   for (i = 0; i < n; i++)
2330     {
2331       ira_object_t obj = ALLOCNO_OBJECT (a, i);
2332       if (TEST_HARD_REG_BIT (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), regno))
2333 	return true;
2334     }
2335   return false;
2336 }
2337 
2338 /* Return nonzero if REGNO is a particularly bad choice for reloading
2339    IN or OUT.  */
2340 bool
ira_bad_reload_regno(int regno,rtx in,rtx out)2341 ira_bad_reload_regno (int regno, rtx in, rtx out)
2342 {
2343   return (ira_bad_reload_regno_1 (regno, in)
2344 	  || ira_bad_reload_regno_1 (regno, out));
2345 }
2346 
2347 /* Add register clobbers from asm statements.  */
2348 static void
compute_regs_asm_clobbered(void)2349 compute_regs_asm_clobbered (void)
2350 {
2351   basic_block bb;
2352 
2353   FOR_EACH_BB_FN (bb, cfun)
2354     {
2355       rtx_insn *insn;
2356       FOR_BB_INSNS_REVERSE (bb, insn)
2357 	{
2358 	  df_ref def;
2359 
2360 	  if (NONDEBUG_INSN_P (insn) && asm_noperands (PATTERN (insn)) >= 0)
2361 	    FOR_EACH_INSN_DEF (def, insn)
2362 	      {
2363 		unsigned int dregno = DF_REF_REGNO (def);
2364 		if (HARD_REGISTER_NUM_P (dregno))
2365 		  add_to_hard_reg_set (&crtl->asm_clobbers,
2366 				       GET_MODE (DF_REF_REAL_REG (def)),
2367 				       dregno);
2368 	      }
2369 	}
2370     }
2371 }
2372 
2373 
2374 /* Set up ELIMINABLE_REGSET, IRA_NO_ALLOC_REGS, and
2375    REGS_EVER_LIVE.  */
2376 void
ira_setup_eliminable_regset(void)2377 ira_setup_eliminable_regset (void)
2378 {
2379   int i;
2380   static const struct {const int from, to; } eliminables[] = ELIMINABLE_REGS;
2381   int fp_reg_count = hard_regno_nregs (HARD_FRAME_POINTER_REGNUM, Pmode);
2382 
2383   /* Setup is_leaf as frame_pointer_required may use it.  This function
2384      is called by sched_init before ira if scheduling is enabled.  */
2385   crtl->is_leaf = leaf_function_p ();
2386 
2387   /* FIXME: If EXIT_IGNORE_STACK is set, we will not save and restore
2388      sp for alloca.  So we can't eliminate the frame pointer in that
2389      case.  At some point, we should improve this by emitting the
2390      sp-adjusting insns for this case.  */
2391   frame_pointer_needed
2392     = (! flag_omit_frame_pointer
2393        || (cfun->calls_alloca && EXIT_IGNORE_STACK)
2394        /* We need the frame pointer to catch stack overflow exceptions if
2395 	  the stack pointer is moving (as for the alloca case just above).  */
2396        || (STACK_CHECK_MOVING_SP
2397 	   && flag_stack_check
2398 	   && flag_exceptions
2399 	   && cfun->can_throw_non_call_exceptions)
2400        || crtl->accesses_prior_frames
2401        || (SUPPORTS_STACK_ALIGNMENT && crtl->stack_realign_needed)
2402        || targetm.frame_pointer_required ());
2403 
2404     /* The chance that FRAME_POINTER_NEEDED is changed from inspecting
2405        RTL is very small.  So if we use frame pointer for RA and RTL
2406        actually prevents this, we will spill pseudos assigned to the
2407        frame pointer in LRA.  */
2408 
2409   if (frame_pointer_needed)
2410     for (i = 0; i < fp_reg_count; i++)
2411       df_set_regs_ever_live (HARD_FRAME_POINTER_REGNUM + i, true);
2412 
2413   ira_no_alloc_regs = no_unit_alloc_regs;
2414   CLEAR_HARD_REG_SET (eliminable_regset);
2415 
2416   compute_regs_asm_clobbered ();
2417 
2418   /* Build the regset of all eliminable registers and show we can't
2419      use those that we already know won't be eliminated.  */
2420   for (i = 0; i < (int) ARRAY_SIZE (eliminables); i++)
2421     {
2422       bool cannot_elim
2423 	= (! targetm.can_eliminate (eliminables[i].from, eliminables[i].to)
2424 	   || (eliminables[i].to == STACK_POINTER_REGNUM && frame_pointer_needed));
2425 
2426       if (!TEST_HARD_REG_BIT (crtl->asm_clobbers, eliminables[i].from))
2427 	{
2428 	    SET_HARD_REG_BIT (eliminable_regset, eliminables[i].from);
2429 
2430 	    if (cannot_elim)
2431 	      SET_HARD_REG_BIT (ira_no_alloc_regs, eliminables[i].from);
2432 	}
2433       else if (cannot_elim)
2434 	error ("%s cannot be used in %<asm%> here",
2435 	       reg_names[eliminables[i].from]);
2436       else
2437 	df_set_regs_ever_live (eliminables[i].from, true);
2438     }
2439   if (!HARD_FRAME_POINTER_IS_FRAME_POINTER)
2440     {
2441       for (i = 0; i < fp_reg_count; i++)
2442 	if (global_regs[HARD_FRAME_POINTER_REGNUM + i])
2443 	  /* Nothing to do: the register is already treated as live
2444 	     where appropriate, and cannot be eliminated.  */
2445 	  ;
2446 	else if (!TEST_HARD_REG_BIT (crtl->asm_clobbers,
2447 				     HARD_FRAME_POINTER_REGNUM + i))
2448 	  {
2449 	    SET_HARD_REG_BIT (eliminable_regset,
2450 			      HARD_FRAME_POINTER_REGNUM + i);
2451 	    if (frame_pointer_needed)
2452 	      SET_HARD_REG_BIT (ira_no_alloc_regs,
2453 				HARD_FRAME_POINTER_REGNUM + i);
2454 	  }
2455 	else if (frame_pointer_needed)
2456 	  error ("%s cannot be used in %<asm%> here",
2457 		 reg_names[HARD_FRAME_POINTER_REGNUM + i]);
2458 	else
2459 	  df_set_regs_ever_live (HARD_FRAME_POINTER_REGNUM + i, true);
2460     }
2461 }
2462 
2463 
2464 
2465 /* Vector of substitutions of register numbers,
2466    used to map pseudo regs into hardware regs.
2467    This is set up as a result of register allocation.
2468    Element N is the hard reg assigned to pseudo reg N,
2469    or is -1 if no hard reg was assigned.
2470    If N is a hard reg number, element N is N.  */
2471 short *reg_renumber;
2472 
2473 /* Set up REG_RENUMBER and CALLER_SAVE_NEEDED (used by reload) from
2474    the allocation found by IRA.  */
2475 static void
setup_reg_renumber(void)2476 setup_reg_renumber (void)
2477 {
2478   int regno, hard_regno;
2479   ira_allocno_t a;
2480   ira_allocno_iterator ai;
2481 
2482   caller_save_needed = 0;
2483   FOR_EACH_ALLOCNO (a, ai)
2484     {
2485       if (ira_use_lra_p && ALLOCNO_CAP_MEMBER (a) != NULL)
2486 	continue;
2487       /* There are no caps at this point.  */
2488       ira_assert (ALLOCNO_CAP_MEMBER (a) == NULL);
2489       if (! ALLOCNO_ASSIGNED_P (a))
2490 	/* It can happen if A is not referenced but partially anticipated
2491 	   somewhere in a region.  */
2492 	ALLOCNO_ASSIGNED_P (a) = true;
2493       ira_free_allocno_updated_costs (a);
2494       hard_regno = ALLOCNO_HARD_REGNO (a);
2495       regno = ALLOCNO_REGNO (a);
2496       reg_renumber[regno] = (hard_regno < 0 ? -1 : hard_regno);
2497       if (hard_regno >= 0)
2498 	{
2499 	  int i, nwords;
2500 	  enum reg_class pclass;
2501 	  ira_object_t obj;
2502 
2503 	  pclass = ira_pressure_class_translate[REGNO_REG_CLASS (hard_regno)];
2504 	  nwords = ALLOCNO_NUM_OBJECTS (a);
2505 	  for (i = 0; i < nwords; i++)
2506 	    {
2507 	      obj = ALLOCNO_OBJECT (a, i);
2508 	      OBJECT_TOTAL_CONFLICT_HARD_REGS (obj)
2509 		|= ~reg_class_contents[pclass];
2510 	    }
2511 	  if (ira_need_caller_save_p (a, hard_regno))
2512 	    {
2513 	      ira_assert (!optimize || flag_caller_saves
2514 			  || (ALLOCNO_CALLS_CROSSED_NUM (a)
2515 			      == ALLOCNO_CHEAP_CALLS_CROSSED_NUM (a))
2516 			  || regno >= ira_reg_equiv_len
2517 			  || ira_equiv_no_lvalue_p (regno));
2518 	      caller_save_needed = 1;
2519 	    }
2520 	}
2521     }
2522 }
2523 
2524 /* Set up allocno assignment flags for further allocation
2525    improvements.  */
2526 static void
setup_allocno_assignment_flags(void)2527 setup_allocno_assignment_flags (void)
2528 {
2529   int hard_regno;
2530   ira_allocno_t a;
2531   ira_allocno_iterator ai;
2532 
2533   FOR_EACH_ALLOCNO (a, ai)
2534     {
2535       if (! ALLOCNO_ASSIGNED_P (a))
2536 	/* It can happen if A is not referenced but partially anticipated
2537 	   somewhere in a region.  */
2538 	ira_free_allocno_updated_costs (a);
2539       hard_regno = ALLOCNO_HARD_REGNO (a);
2540       /* Don't assign hard registers to allocnos which are destination
2541 	 of removed store at the end of loop.  It has no sense to keep
2542 	 the same value in different hard registers.  It is also
2543 	 impossible to assign hard registers correctly to such
2544 	 allocnos because the cost info and info about intersected
2545 	 calls are incorrect for them.  */
2546       ALLOCNO_ASSIGNED_P (a) = (hard_regno >= 0
2547 				|| ALLOCNO_EMIT_DATA (a)->mem_optimized_dest_p
2548 				|| (ALLOCNO_MEMORY_COST (a)
2549 				    - ALLOCNO_CLASS_COST (a)) < 0);
2550       ira_assert
2551 	(hard_regno < 0
2552 	 || ira_hard_reg_in_set_p (hard_regno, ALLOCNO_MODE (a),
2553 				   reg_class_contents[ALLOCNO_CLASS (a)]));
2554     }
2555 }
2556 
2557 /* Evaluate overall allocation cost and the costs for using hard
2558    registers and memory for allocnos.  */
2559 static void
calculate_allocation_cost(void)2560 calculate_allocation_cost (void)
2561 {
2562   int hard_regno, cost;
2563   ira_allocno_t a;
2564   ira_allocno_iterator ai;
2565 
2566   ira_overall_cost = ira_reg_cost = ira_mem_cost = 0;
2567   FOR_EACH_ALLOCNO (a, ai)
2568     {
2569       hard_regno = ALLOCNO_HARD_REGNO (a);
2570       ira_assert (hard_regno < 0
2571 		  || (ira_hard_reg_in_set_p
2572 		      (hard_regno, ALLOCNO_MODE (a),
2573 		       reg_class_contents[ALLOCNO_CLASS (a)])));
2574       if (hard_regno < 0)
2575 	{
2576 	  cost = ALLOCNO_MEMORY_COST (a);
2577 	  ira_mem_cost += cost;
2578 	}
2579       else if (ALLOCNO_HARD_REG_COSTS (a) != NULL)
2580 	{
2581 	  cost = (ALLOCNO_HARD_REG_COSTS (a)
2582 		  [ira_class_hard_reg_index
2583 		   [ALLOCNO_CLASS (a)][hard_regno]]);
2584 	  ira_reg_cost += cost;
2585 	}
2586       else
2587 	{
2588 	  cost = ALLOCNO_CLASS_COST (a);
2589 	  ira_reg_cost += cost;
2590 	}
2591       ira_overall_cost += cost;
2592     }
2593 
2594   if (internal_flag_ira_verbose > 0 && ira_dump_file != NULL)
2595     {
2596       fprintf (ira_dump_file,
2597 	       "+++Costs: overall %" PRId64
2598 	       ", reg %" PRId64
2599 	       ", mem %" PRId64
2600 	       ", ld %" PRId64
2601 	       ", st %" PRId64
2602 	       ", move %" PRId64,
2603 	       ira_overall_cost, ira_reg_cost, ira_mem_cost,
2604 	       ira_load_cost, ira_store_cost, ira_shuffle_cost);
2605       fprintf (ira_dump_file, "\n+++       move loops %d, new jumps %d\n",
2606 	       ira_move_loops_num, ira_additional_jumps_num);
2607     }
2608 
2609 }
2610 
2611 #ifdef ENABLE_IRA_CHECKING
2612 /* Check the correctness of the allocation.  We do need this because
2613    of complicated code to transform more one region internal
2614    representation into one region representation.  */
2615 static void
check_allocation(void)2616 check_allocation (void)
2617 {
2618   ira_allocno_t a;
2619   int hard_regno, nregs, conflict_nregs;
2620   ira_allocno_iterator ai;
2621 
2622   FOR_EACH_ALLOCNO (a, ai)
2623     {
2624       int n = ALLOCNO_NUM_OBJECTS (a);
2625       int i;
2626 
2627       if (ALLOCNO_CAP_MEMBER (a) != NULL
2628 	  || (hard_regno = ALLOCNO_HARD_REGNO (a)) < 0)
2629 	continue;
2630       nregs = hard_regno_nregs (hard_regno, ALLOCNO_MODE (a));
2631       if (nregs == 1)
2632 	/* We allocated a single hard register.  */
2633 	n = 1;
2634       else if (n > 1)
2635 	/* We allocated multiple hard registers, and we will test
2636 	   conflicts in a granularity of single hard regs.  */
2637 	nregs = 1;
2638 
2639       for (i = 0; i < n; i++)
2640 	{
2641 	  ira_object_t obj = ALLOCNO_OBJECT (a, i);
2642 	  ira_object_t conflict_obj;
2643 	  ira_object_conflict_iterator oci;
2644 	  int this_regno = hard_regno;
2645 	  if (n > 1)
2646 	    {
2647 	      if (REG_WORDS_BIG_ENDIAN)
2648 		this_regno += n - i - 1;
2649 	      else
2650 		this_regno += i;
2651 	    }
2652 	  FOR_EACH_OBJECT_CONFLICT (obj, conflict_obj, oci)
2653 	    {
2654 	      ira_allocno_t conflict_a = OBJECT_ALLOCNO (conflict_obj);
2655 	      int conflict_hard_regno = ALLOCNO_HARD_REGNO (conflict_a);
2656 	      if (conflict_hard_regno < 0)
2657 		continue;
2658 	      if (ira_soft_conflict (a, conflict_a))
2659 		continue;
2660 
2661 	      conflict_nregs = hard_regno_nregs (conflict_hard_regno,
2662 						 ALLOCNO_MODE (conflict_a));
2663 
2664 	      if (ALLOCNO_NUM_OBJECTS (conflict_a) > 1
2665 		  && conflict_nregs == ALLOCNO_NUM_OBJECTS (conflict_a))
2666 		{
2667 		  if (REG_WORDS_BIG_ENDIAN)
2668 		    conflict_hard_regno += (ALLOCNO_NUM_OBJECTS (conflict_a)
2669 					    - OBJECT_SUBWORD (conflict_obj) - 1);
2670 		  else
2671 		    conflict_hard_regno += OBJECT_SUBWORD (conflict_obj);
2672 		  conflict_nregs = 1;
2673 		}
2674 
2675 	      if ((conflict_hard_regno <= this_regno
2676 		 && this_regno < conflict_hard_regno + conflict_nregs)
2677 		|| (this_regno <= conflict_hard_regno
2678 		    && conflict_hard_regno < this_regno + nregs))
2679 		{
2680 		  fprintf (stderr, "bad allocation for %d and %d\n",
2681 			   ALLOCNO_REGNO (a), ALLOCNO_REGNO (conflict_a));
2682 		  gcc_unreachable ();
2683 		}
2684 	    }
2685 	}
2686     }
2687 }
2688 #endif
2689 
2690 /* Allocate REG_EQUIV_INIT.  Set up it from IRA_REG_EQUIV which should
2691    be already calculated.  */
2692 static void
setup_reg_equiv_init(void)2693 setup_reg_equiv_init (void)
2694 {
2695   int i;
2696   int max_regno = max_reg_num ();
2697 
2698   for (i = 0; i < max_regno; i++)
2699     reg_equiv_init (i) = ira_reg_equiv[i].init_insns;
2700 }
2701 
2702 /* Update equiv regno from movement of FROM_REGNO to TO_REGNO.  INSNS
2703    are insns which were generated for such movement.  It is assumed
2704    that FROM_REGNO and TO_REGNO always have the same value at the
2705    point of any move containing such registers. This function is used
2706    to update equiv info for register shuffles on the region borders
2707    and for caller save/restore insns.  */
2708 void
ira_update_equiv_info_by_shuffle_insn(int to_regno,int from_regno,rtx_insn * insns)2709 ira_update_equiv_info_by_shuffle_insn (int to_regno, int from_regno, rtx_insn *insns)
2710 {
2711   rtx_insn *insn;
2712   rtx x, note;
2713 
2714   if (! ira_reg_equiv[from_regno].defined_p
2715       && (! ira_reg_equiv[to_regno].defined_p
2716 	  || ((x = ira_reg_equiv[to_regno].memory) != NULL_RTX
2717 	      && ! MEM_READONLY_P (x))))
2718     return;
2719   insn = insns;
2720   if (NEXT_INSN (insn) != NULL_RTX)
2721     {
2722       if (! ira_reg_equiv[to_regno].defined_p)
2723 	{
2724 	  ira_assert (ira_reg_equiv[to_regno].init_insns == NULL_RTX);
2725 	  return;
2726 	}
2727       ira_reg_equiv[to_regno].defined_p = false;
2728       ira_reg_equiv[to_regno].memory
2729 	= ira_reg_equiv[to_regno].constant
2730 	= ira_reg_equiv[to_regno].invariant
2731 	= ira_reg_equiv[to_regno].init_insns = NULL;
2732       if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL)
2733 	fprintf (ira_dump_file,
2734 		 "      Invalidating equiv info for reg %d\n", to_regno);
2735       return;
2736     }
2737   /* It is possible that FROM_REGNO still has no equivalence because
2738      in shuffles to_regno<-from_regno and from_regno<-to_regno the 2nd
2739      insn was not processed yet.  */
2740   if (ira_reg_equiv[from_regno].defined_p)
2741     {
2742       ira_reg_equiv[to_regno].defined_p = true;
2743       if ((x = ira_reg_equiv[from_regno].memory) != NULL_RTX)
2744 	{
2745 	  ira_assert (ira_reg_equiv[from_regno].invariant == NULL_RTX
2746 		      && ira_reg_equiv[from_regno].constant == NULL_RTX);
2747 	  ira_assert (ira_reg_equiv[to_regno].memory == NULL_RTX
2748 		      || rtx_equal_p (ira_reg_equiv[to_regno].memory, x));
2749 	  ira_reg_equiv[to_regno].memory = x;
2750 	  if (! MEM_READONLY_P (x))
2751 	    /* We don't add the insn to insn init list because memory
2752 	       equivalence is just to say what memory is better to use
2753 	       when the pseudo is spilled.  */
2754 	    return;
2755 	}
2756       else if ((x = ira_reg_equiv[from_regno].constant) != NULL_RTX)
2757 	{
2758 	  ira_assert (ira_reg_equiv[from_regno].invariant == NULL_RTX);
2759 	  ira_assert (ira_reg_equiv[to_regno].constant == NULL_RTX
2760 		      || rtx_equal_p (ira_reg_equiv[to_regno].constant, x));
2761 	  ira_reg_equiv[to_regno].constant = x;
2762 	}
2763       else
2764 	{
2765 	  x = ira_reg_equiv[from_regno].invariant;
2766 	  ira_assert (x != NULL_RTX);
2767 	  ira_assert (ira_reg_equiv[to_regno].invariant == NULL_RTX
2768 		      || rtx_equal_p (ira_reg_equiv[to_regno].invariant, x));
2769 	  ira_reg_equiv[to_regno].invariant = x;
2770 	}
2771       if (find_reg_note (insn, REG_EQUIV, x) == NULL_RTX)
2772 	{
2773 	  note = set_unique_reg_note (insn, REG_EQUIV, copy_rtx (x));
2774 	  gcc_assert (note != NULL_RTX);
2775 	  if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL)
2776 	    {
2777 	      fprintf (ira_dump_file,
2778 		       "      Adding equiv note to insn %u for reg %d ",
2779 		       INSN_UID (insn), to_regno);
2780 	      dump_value_slim (ira_dump_file, x, 1);
2781 	      fprintf (ira_dump_file, "\n");
2782 	    }
2783 	}
2784     }
2785   ira_reg_equiv[to_regno].init_insns
2786     = gen_rtx_INSN_LIST (VOIDmode, insn,
2787 			 ira_reg_equiv[to_regno].init_insns);
2788   if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL)
2789     fprintf (ira_dump_file,
2790 	     "      Adding equiv init move insn %u to reg %d\n",
2791 	     INSN_UID (insn), to_regno);
2792 }
2793 
2794 /* Fix values of array REG_EQUIV_INIT after live range splitting done
2795    by IRA.  */
2796 static void
fix_reg_equiv_init(void)2797 fix_reg_equiv_init (void)
2798 {
2799   int max_regno = max_reg_num ();
2800   int i, new_regno, max;
2801   rtx set;
2802   rtx_insn_list *x, *next, *prev;
2803   rtx_insn *insn;
2804 
2805   if (max_regno_before_ira < max_regno)
2806     {
2807       max = vec_safe_length (reg_equivs);
2808       grow_reg_equivs ();
2809       for (i = FIRST_PSEUDO_REGISTER; i < max; i++)
2810 	for (prev = NULL, x = reg_equiv_init (i);
2811 	     x != NULL_RTX;
2812 	     x = next)
2813 	  {
2814 	    next = x->next ();
2815 	    insn = x->insn ();
2816 	    set = single_set (insn);
2817 	    ira_assert (set != NULL_RTX
2818 			&& (REG_P (SET_DEST (set)) || REG_P (SET_SRC (set))));
2819 	    if (REG_P (SET_DEST (set))
2820 		&& ((int) REGNO (SET_DEST (set)) == i
2821 		    || (int) ORIGINAL_REGNO (SET_DEST (set)) == i))
2822 	      new_regno = REGNO (SET_DEST (set));
2823 	    else if (REG_P (SET_SRC (set))
2824 		     && ((int) REGNO (SET_SRC (set)) == i
2825 			 || (int) ORIGINAL_REGNO (SET_SRC (set)) == i))
2826 	      new_regno = REGNO (SET_SRC (set));
2827 	    else
2828  	      gcc_unreachable ();
2829 	    if (new_regno == i)
2830 	      prev = x;
2831 	    else
2832 	      {
2833 		/* Remove the wrong list element.  */
2834 		if (prev == NULL_RTX)
2835 		  reg_equiv_init (i) = next;
2836 		else
2837 		  XEXP (prev, 1) = next;
2838 		XEXP (x, 1) = reg_equiv_init (new_regno);
2839 		reg_equiv_init (new_regno) = x;
2840 	      }
2841 	  }
2842     }
2843 }
2844 
2845 #ifdef ENABLE_IRA_CHECKING
2846 /* Print redundant memory-memory copies.  */
2847 static void
print_redundant_copies(void)2848 print_redundant_copies (void)
2849 {
2850   int hard_regno;
2851   ira_allocno_t a;
2852   ira_copy_t cp, next_cp;
2853   ira_allocno_iterator ai;
2854 
2855   FOR_EACH_ALLOCNO (a, ai)
2856     {
2857       if (ALLOCNO_CAP_MEMBER (a) != NULL)
2858 	/* It is a cap.  */
2859 	continue;
2860       hard_regno = ALLOCNO_HARD_REGNO (a);
2861       if (hard_regno >= 0)
2862 	continue;
2863       for (cp = ALLOCNO_COPIES (a); cp != NULL; cp = next_cp)
2864 	if (cp->first == a)
2865 	  next_cp = cp->next_first_allocno_copy;
2866 	else
2867 	  {
2868 	    next_cp = cp->next_second_allocno_copy;
2869 	    if (internal_flag_ira_verbose > 4 && ira_dump_file != NULL
2870 		&& cp->insn != NULL_RTX
2871 		&& ALLOCNO_HARD_REGNO (cp->first) == hard_regno)
2872 	      fprintf (ira_dump_file,
2873 		       "        Redundant move from %d(freq %d):%d\n",
2874 		       INSN_UID (cp->insn), cp->freq, hard_regno);
2875 	  }
2876     }
2877 }
2878 #endif
2879 
2880 /* Setup preferred and alternative classes for new pseudo-registers
2881    created by IRA starting with START.  */
2882 static void
setup_preferred_alternate_classes_for_new_pseudos(int start)2883 setup_preferred_alternate_classes_for_new_pseudos (int start)
2884 {
2885   int i, old_regno;
2886   int max_regno = max_reg_num ();
2887 
2888   for (i = start; i < max_regno; i++)
2889     {
2890       old_regno = ORIGINAL_REGNO (regno_reg_rtx[i]);
2891       ira_assert (i != old_regno);
2892       setup_reg_classes (i, reg_preferred_class (old_regno),
2893 			 reg_alternate_class (old_regno),
2894 			 reg_allocno_class (old_regno));
2895       if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
2896 	fprintf (ira_dump_file,
2897 		 "    New r%d: setting preferred %s, alternative %s\n",
2898 		 i, reg_class_names[reg_preferred_class (old_regno)],
2899 		 reg_class_names[reg_alternate_class (old_regno)]);
2900     }
2901 }
2902 
2903 
2904 /* The number of entries allocated in reg_info.  */
2905 static int allocated_reg_info_size;
2906 
2907 /* Regional allocation can create new pseudo-registers.  This function
2908    expands some arrays for pseudo-registers.  */
2909 static void
expand_reg_info(void)2910 expand_reg_info (void)
2911 {
2912   int i;
2913   int size = max_reg_num ();
2914 
2915   resize_reg_info ();
2916   for (i = allocated_reg_info_size; i < size; i++)
2917     setup_reg_classes (i, GENERAL_REGS, ALL_REGS, GENERAL_REGS);
2918   setup_preferred_alternate_classes_for_new_pseudos (allocated_reg_info_size);
2919   allocated_reg_info_size = size;
2920 }
2921 
2922 /* Return TRUE if there is too high register pressure in the function.
2923    It is used to decide when stack slot sharing is worth to do.  */
2924 static bool
too_high_register_pressure_p(void)2925 too_high_register_pressure_p (void)
2926 {
2927   int i;
2928   enum reg_class pclass;
2929 
2930   for (i = 0; i < ira_pressure_classes_num; i++)
2931     {
2932       pclass = ira_pressure_classes[i];
2933       if (ira_loop_tree_root->reg_pressure[pclass] > 10000)
2934 	return true;
2935     }
2936   return false;
2937 }
2938 
2939 
2940 
2941 /* Indicate that hard register number FROM was eliminated and replaced with
2942    an offset from hard register number TO.  The status of hard registers live
2943    at the start of a basic block is updated by replacing a use of FROM with
2944    a use of TO.  */
2945 
2946 void
mark_elimination(int from,int to)2947 mark_elimination (int from, int to)
2948 {
2949   basic_block bb;
2950   bitmap r;
2951 
2952   FOR_EACH_BB_FN (bb, cfun)
2953     {
2954       r = DF_LR_IN (bb);
2955       if (bitmap_bit_p (r, from))
2956 	{
2957 	  bitmap_clear_bit (r, from);
2958 	  bitmap_set_bit (r, to);
2959 	}
2960       if (! df_live)
2961         continue;
2962       r = DF_LIVE_IN (bb);
2963       if (bitmap_bit_p (r, from))
2964 	{
2965 	  bitmap_clear_bit (r, from);
2966 	  bitmap_set_bit (r, to);
2967 	}
2968     }
2969 }
2970 
2971 
2972 
2973 /* The length of the following array.  */
2974 int ira_reg_equiv_len;
2975 
2976 /* Info about equiv. info for each register.  */
2977 struct ira_reg_equiv_s *ira_reg_equiv;
2978 
2979 /* Expand ira_reg_equiv if necessary.  */
2980 void
ira_expand_reg_equiv(void)2981 ira_expand_reg_equiv (void)
2982 {
2983   int old = ira_reg_equiv_len;
2984 
2985   if (ira_reg_equiv_len > max_reg_num ())
2986     return;
2987   ira_reg_equiv_len = max_reg_num () * 3 / 2 + 1;
2988   ira_reg_equiv
2989     = (struct ira_reg_equiv_s *) xrealloc (ira_reg_equiv,
2990 					 ira_reg_equiv_len
2991 					 * sizeof (struct ira_reg_equiv_s));
2992   gcc_assert (old < ira_reg_equiv_len);
2993   memset (ira_reg_equiv + old, 0,
2994 	  sizeof (struct ira_reg_equiv_s) * (ira_reg_equiv_len - old));
2995 }
2996 
2997 static void
init_reg_equiv(void)2998 init_reg_equiv (void)
2999 {
3000   ira_reg_equiv_len = 0;
3001   ira_reg_equiv = NULL;
3002   ira_expand_reg_equiv ();
3003 }
3004 
3005 static void
finish_reg_equiv(void)3006 finish_reg_equiv (void)
3007 {
3008   free (ira_reg_equiv);
3009 }
3010 
3011 
3012 
3013 struct equivalence
3014 {
3015   /* Set when a REG_EQUIV note is found or created.  Use to
3016      keep track of what memory accesses might be created later,
3017      e.g. by reload.  */
3018   rtx replacement;
3019   rtx *src_p;
3020 
3021   /* The list of each instruction which initializes this register.
3022 
3023      NULL indicates we know nothing about this register's equivalence
3024      properties.
3025 
3026      An INSN_LIST with a NULL insn indicates this pseudo is already
3027      known to not have a valid equivalence.  */
3028   rtx_insn_list *init_insns;
3029 
3030   /* Loop depth is used to recognize equivalences which appear
3031      to be present within the same loop (or in an inner loop).  */
3032   short loop_depth;
3033   /* Nonzero if this had a preexisting REG_EQUIV note.  */
3034   unsigned char is_arg_equivalence : 1;
3035   /* Set when an attempt should be made to replace a register
3036      with the associated src_p entry.  */
3037   unsigned char replace : 1;
3038   /* Set if this register has no known equivalence.  */
3039   unsigned char no_equiv : 1;
3040   /* Set if this register is mentioned in a paradoxical subreg.  */
3041   unsigned char pdx_subregs : 1;
3042 };
3043 
3044 /* reg_equiv[N] (where N is a pseudo reg number) is the equivalence
3045    structure for that register.  */
3046 static struct equivalence *reg_equiv;
3047 
3048 /* Used for communication between the following two functions.  */
3049 struct equiv_mem_data
3050 {
3051   /* A MEM that we wish to ensure remains unchanged.  */
3052   rtx equiv_mem;
3053 
3054   /* Set true if EQUIV_MEM is modified.  */
3055   bool equiv_mem_modified;
3056 };
3057 
3058 /* If EQUIV_MEM is modified by modifying DEST, indicate that it is modified.
3059    Called via note_stores.  */
3060 static void
validate_equiv_mem_from_store(rtx dest,const_rtx set ATTRIBUTE_UNUSED,void * data)3061 validate_equiv_mem_from_store (rtx dest, const_rtx set ATTRIBUTE_UNUSED,
3062 			       void *data)
3063 {
3064   struct equiv_mem_data *info = (struct equiv_mem_data *) data;
3065 
3066   if ((REG_P (dest)
3067        && reg_overlap_mentioned_p (dest, info->equiv_mem))
3068       || (MEM_P (dest)
3069 	  && anti_dependence (info->equiv_mem, dest)))
3070     info->equiv_mem_modified = true;
3071 }
3072 
3073 enum valid_equiv { valid_none, valid_combine, valid_reload };
3074 
3075 /* Verify that no store between START and the death of REG invalidates
3076    MEMREF.  MEMREF is invalidated by modifying a register used in MEMREF,
3077    by storing into an overlapping memory location, or with a non-const
3078    CALL_INSN.
3079 
3080    Return VALID_RELOAD if MEMREF remains valid for both reload and
3081    combine_and_move insns, VALID_COMBINE if only valid for
3082    combine_and_move_insns, and VALID_NONE otherwise.  */
3083 static enum valid_equiv
validate_equiv_mem(rtx_insn * start,rtx reg,rtx memref)3084 validate_equiv_mem (rtx_insn *start, rtx reg, rtx memref)
3085 {
3086   rtx_insn *insn;
3087   rtx note;
3088   struct equiv_mem_data info = { memref, false };
3089   enum valid_equiv ret = valid_reload;
3090 
3091   /* If the memory reference has side effects or is volatile, it isn't a
3092      valid equivalence.  */
3093   if (side_effects_p (memref))
3094     return valid_none;
3095 
3096   for (insn = start; insn; insn = NEXT_INSN (insn))
3097     {
3098       if (!INSN_P (insn))
3099 	continue;
3100 
3101       if (find_reg_note (insn, REG_DEAD, reg))
3102 	return ret;
3103 
3104       if (CALL_P (insn))
3105 	{
3106 	  /* We can combine a reg def from one insn into a reg use in
3107 	     another over a call if the memory is readonly or the call
3108 	     const/pure.  However, we can't set reg_equiv notes up for
3109 	     reload over any call.  The problem is the equivalent form
3110 	     may reference a pseudo which gets assigned a call
3111 	     clobbered hard reg.  When we later replace REG with its
3112 	     equivalent form, the value in the call-clobbered reg has
3113 	     been changed and all hell breaks loose.  */
3114 	  ret = valid_combine;
3115 	  if (!MEM_READONLY_P (memref)
3116 	      && !RTL_CONST_OR_PURE_CALL_P (insn))
3117 	    return valid_none;
3118 	}
3119 
3120       note_stores (insn, validate_equiv_mem_from_store, &info);
3121       if (info.equiv_mem_modified)
3122 	return valid_none;
3123 
3124       /* If a register mentioned in MEMREF is modified via an
3125 	 auto-increment, we lose the equivalence.  Do the same if one
3126 	 dies; although we could extend the life, it doesn't seem worth
3127 	 the trouble.  */
3128 
3129       for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
3130 	if ((REG_NOTE_KIND (note) == REG_INC
3131 	     || REG_NOTE_KIND (note) == REG_DEAD)
3132 	    && REG_P (XEXP (note, 0))
3133 	    && reg_overlap_mentioned_p (XEXP (note, 0), memref))
3134 	  return valid_none;
3135     }
3136 
3137   return valid_none;
3138 }
3139 
3140 /* Returns zero if X is known to be invariant.  */
3141 static int
equiv_init_varies_p(rtx x)3142 equiv_init_varies_p (rtx x)
3143 {
3144   RTX_CODE code = GET_CODE (x);
3145   int i;
3146   const char *fmt;
3147 
3148   switch (code)
3149     {
3150     case MEM:
3151       return !MEM_READONLY_P (x) || equiv_init_varies_p (XEXP (x, 0));
3152 
3153     case CONST:
3154     CASE_CONST_ANY:
3155     case SYMBOL_REF:
3156     case LABEL_REF:
3157       return 0;
3158 
3159     case REG:
3160       return reg_equiv[REGNO (x)].replace == 0 && rtx_varies_p (x, 0);
3161 
3162     case ASM_OPERANDS:
3163       if (MEM_VOLATILE_P (x))
3164 	return 1;
3165 
3166       /* Fall through.  */
3167 
3168     default:
3169       break;
3170     }
3171 
3172   fmt = GET_RTX_FORMAT (code);
3173   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3174     if (fmt[i] == 'e')
3175       {
3176 	if (equiv_init_varies_p (XEXP (x, i)))
3177 	  return 1;
3178       }
3179     else if (fmt[i] == 'E')
3180       {
3181 	int j;
3182 	for (j = 0; j < XVECLEN (x, i); j++)
3183 	  if (equiv_init_varies_p (XVECEXP (x, i, j)))
3184 	    return 1;
3185       }
3186 
3187   return 0;
3188 }
3189 
3190 /* Returns nonzero if X (used to initialize register REGNO) is movable.
3191    X is only movable if the registers it uses have equivalent initializations
3192    which appear to be within the same loop (or in an inner loop) and movable
3193    or if they are not candidates for local_alloc and don't vary.  */
3194 static int
equiv_init_movable_p(rtx x,int regno)3195 equiv_init_movable_p (rtx x, int regno)
3196 {
3197   int i, j;
3198   const char *fmt;
3199   enum rtx_code code = GET_CODE (x);
3200 
3201   switch (code)
3202     {
3203     case SET:
3204       return equiv_init_movable_p (SET_SRC (x), regno);
3205 
3206     case CLOBBER:
3207       return 0;
3208 
3209     case PRE_INC:
3210     case PRE_DEC:
3211     case POST_INC:
3212     case POST_DEC:
3213     case PRE_MODIFY:
3214     case POST_MODIFY:
3215       return 0;
3216 
3217     case REG:
3218       return ((reg_equiv[REGNO (x)].loop_depth >= reg_equiv[regno].loop_depth
3219 	       && reg_equiv[REGNO (x)].replace)
3220 	      || (REG_BASIC_BLOCK (REGNO (x)) < NUM_FIXED_BLOCKS
3221 		  && ! rtx_varies_p (x, 0)));
3222 
3223     case UNSPEC_VOLATILE:
3224       return 0;
3225 
3226     case ASM_OPERANDS:
3227       if (MEM_VOLATILE_P (x))
3228 	return 0;
3229 
3230       /* Fall through.  */
3231 
3232     default:
3233       break;
3234     }
3235 
3236   fmt = GET_RTX_FORMAT (code);
3237   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3238     switch (fmt[i])
3239       {
3240       case 'e':
3241 	if (! equiv_init_movable_p (XEXP (x, i), regno))
3242 	  return 0;
3243 	break;
3244       case 'E':
3245 	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3246 	  if (! equiv_init_movable_p (XVECEXP (x, i, j), regno))
3247 	    return 0;
3248 	break;
3249       }
3250 
3251   return 1;
3252 }
3253 
3254 static bool memref_referenced_p (rtx memref, rtx x, bool read_p);
3255 
3256 /* Auxiliary function for memref_referenced_p.  Process setting X for
3257    MEMREF store.  */
3258 static bool
process_set_for_memref_referenced_p(rtx memref,rtx x)3259 process_set_for_memref_referenced_p (rtx memref, rtx x)
3260 {
3261   /* If we are setting a MEM, it doesn't count (its address does), but any
3262      other SET_DEST that has a MEM in it is referencing the MEM.  */
3263   if (MEM_P (x))
3264     {
3265       if (memref_referenced_p (memref, XEXP (x, 0), true))
3266 	return true;
3267     }
3268   else if (memref_referenced_p (memref, x, false))
3269     return true;
3270 
3271   return false;
3272 }
3273 
3274 /* TRUE if X references a memory location (as a read if READ_P) that
3275    would be affected by a store to MEMREF.  */
3276 static bool
memref_referenced_p(rtx memref,rtx x,bool read_p)3277 memref_referenced_p (rtx memref, rtx x, bool read_p)
3278 {
3279   int i, j;
3280   const char *fmt;
3281   enum rtx_code code = GET_CODE (x);
3282 
3283   switch (code)
3284     {
3285     case CONST:
3286     case LABEL_REF:
3287     case SYMBOL_REF:
3288     CASE_CONST_ANY:
3289     case PC:
3290     case HIGH:
3291     case LO_SUM:
3292       return false;
3293 
3294     case REG:
3295       return (reg_equiv[REGNO (x)].replacement
3296 	      && memref_referenced_p (memref,
3297 				      reg_equiv[REGNO (x)].replacement, read_p));
3298 
3299     case MEM:
3300       /* Memory X might have another effective type than MEMREF.  */
3301       if (read_p || true_dependence (memref, VOIDmode, x))
3302 	return true;
3303       break;
3304 
3305     case SET:
3306       if (process_set_for_memref_referenced_p (memref, SET_DEST (x)))
3307 	return true;
3308 
3309       return memref_referenced_p (memref, SET_SRC (x), true);
3310 
3311     case CLOBBER:
3312       if (process_set_for_memref_referenced_p (memref, XEXP (x, 0)))
3313 	return true;
3314 
3315       return false;
3316 
3317     case PRE_DEC:
3318     case POST_DEC:
3319     case PRE_INC:
3320     case POST_INC:
3321       if (process_set_for_memref_referenced_p (memref, XEXP (x, 0)))
3322 	return true;
3323 
3324       return memref_referenced_p (memref, XEXP (x, 0), true);
3325 
3326     case POST_MODIFY:
3327     case PRE_MODIFY:
3328       /* op0 = op0 + op1 */
3329       if (process_set_for_memref_referenced_p (memref, XEXP (x, 0)))
3330 	return true;
3331 
3332       if (memref_referenced_p (memref, XEXP (x, 0), true))
3333 	return true;
3334 
3335       return memref_referenced_p (memref, XEXP (x, 1), true);
3336 
3337     default:
3338       break;
3339     }
3340 
3341   fmt = GET_RTX_FORMAT (code);
3342   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3343     switch (fmt[i])
3344       {
3345       case 'e':
3346 	if (memref_referenced_p (memref, XEXP (x, i), read_p))
3347 	  return true;
3348 	break;
3349       case 'E':
3350 	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3351 	  if (memref_referenced_p (memref, XVECEXP (x, i, j), read_p))
3352 	    return true;
3353 	break;
3354       }
3355 
3356   return false;
3357 }
3358 
3359 /* TRUE if some insn in the range (START, END] references a memory location
3360    that would be affected by a store to MEMREF.
3361 
3362    Callers should not call this routine if START is after END in the
3363    RTL chain.  */
3364 
3365 static int
memref_used_between_p(rtx memref,rtx_insn * start,rtx_insn * end)3366 memref_used_between_p (rtx memref, rtx_insn *start, rtx_insn *end)
3367 {
3368   rtx_insn *insn;
3369 
3370   for (insn = NEXT_INSN (start);
3371        insn && insn != NEXT_INSN (end);
3372        insn = NEXT_INSN (insn))
3373     {
3374       if (!NONDEBUG_INSN_P (insn))
3375 	continue;
3376 
3377       if (memref_referenced_p (memref, PATTERN (insn), false))
3378 	return 1;
3379 
3380       /* Nonconst functions may access memory.  */
3381       if (CALL_P (insn) && (! RTL_CONST_CALL_P (insn)))
3382 	return 1;
3383     }
3384 
3385   gcc_assert (insn == NEXT_INSN (end));
3386   return 0;
3387 }
3388 
3389 /* Mark REG as having no known equivalence.
3390    Some instructions might have been processed before and furnished
3391    with REG_EQUIV notes for this register; these notes will have to be
3392    removed.
3393    STORE is the piece of RTL that does the non-constant / conflicting
3394    assignment - a SET, CLOBBER or REG_INC note.  It is currently not used,
3395    but needs to be there because this function is called from note_stores.  */
3396 static void
no_equiv(rtx reg,const_rtx store ATTRIBUTE_UNUSED,void * data ATTRIBUTE_UNUSED)3397 no_equiv (rtx reg, const_rtx store ATTRIBUTE_UNUSED,
3398 	  void *data ATTRIBUTE_UNUSED)
3399 {
3400   int regno;
3401   rtx_insn_list *list;
3402 
3403   if (!REG_P (reg))
3404     return;
3405   regno = REGNO (reg);
3406   reg_equiv[regno].no_equiv = 1;
3407   list = reg_equiv[regno].init_insns;
3408   if (list && list->insn () == NULL)
3409     return;
3410   reg_equiv[regno].init_insns = gen_rtx_INSN_LIST (VOIDmode, NULL_RTX, NULL);
3411   reg_equiv[regno].replacement = NULL_RTX;
3412   /* This doesn't matter for equivalences made for argument registers, we
3413      should keep their initialization insns.  */
3414   if (reg_equiv[regno].is_arg_equivalence)
3415     return;
3416   ira_reg_equiv[regno].defined_p = false;
3417   ira_reg_equiv[regno].init_insns = NULL;
3418   for (; list; list = list->next ())
3419     {
3420       rtx_insn *insn = list->insn ();
3421       remove_note (insn, find_reg_note (insn, REG_EQUIV, NULL_RTX));
3422     }
3423 }
3424 
3425 /* Check whether the SUBREG is a paradoxical subreg and set the result
3426    in PDX_SUBREGS.  */
3427 
3428 static void
set_paradoxical_subreg(rtx_insn * insn)3429 set_paradoxical_subreg (rtx_insn *insn)
3430 {
3431   subrtx_iterator::array_type array;
3432   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
3433     {
3434       const_rtx subreg = *iter;
3435       if (GET_CODE (subreg) == SUBREG)
3436 	{
3437 	  const_rtx reg = SUBREG_REG (subreg);
3438 	  if (REG_P (reg) && paradoxical_subreg_p (subreg))
3439 	    reg_equiv[REGNO (reg)].pdx_subregs = true;
3440 	}
3441     }
3442 }
3443 
3444 /* In DEBUG_INSN location adjust REGs from CLEARED_REGS bitmap to the
3445    equivalent replacement.  */
3446 
3447 static rtx
adjust_cleared_regs(rtx loc,const_rtx old_rtx ATTRIBUTE_UNUSED,void * data)3448 adjust_cleared_regs (rtx loc, const_rtx old_rtx ATTRIBUTE_UNUSED, void *data)
3449 {
3450   if (REG_P (loc))
3451     {
3452       bitmap cleared_regs = (bitmap) data;
3453       if (bitmap_bit_p (cleared_regs, REGNO (loc)))
3454 	return simplify_replace_fn_rtx (copy_rtx (*reg_equiv[REGNO (loc)].src_p),
3455 					NULL_RTX, adjust_cleared_regs, data);
3456     }
3457   return NULL_RTX;
3458 }
3459 
3460 /* Given register REGNO is set only once, return true if the defining
3461    insn dominates all uses.  */
3462 
3463 static bool
def_dominates_uses(int regno)3464 def_dominates_uses (int regno)
3465 {
3466   df_ref def = DF_REG_DEF_CHAIN (regno);
3467 
3468   struct df_insn_info *def_info = DF_REF_INSN_INFO (def);
3469   /* If this is an artificial def (eh handler regs, hard frame pointer
3470      for non-local goto, regs defined on function entry) then def_info
3471      is NULL and the reg is always live before any use.  We might
3472      reasonably return true in that case, but since the only call
3473      of this function is currently here in ira.cc when we are looking
3474      at a defining insn we can't have an artificial def as that would
3475      bump DF_REG_DEF_COUNT.  */
3476   gcc_assert (DF_REG_DEF_COUNT (regno) == 1 && def_info != NULL);
3477 
3478   rtx_insn *def_insn = DF_REF_INSN (def);
3479   basic_block def_bb = BLOCK_FOR_INSN (def_insn);
3480 
3481   for (df_ref use = DF_REG_USE_CHAIN (regno);
3482        use;
3483        use = DF_REF_NEXT_REG (use))
3484     {
3485       struct df_insn_info *use_info = DF_REF_INSN_INFO (use);
3486       /* Only check real uses, not artificial ones.  */
3487       if (use_info)
3488 	{
3489 	  rtx_insn *use_insn = DF_REF_INSN (use);
3490 	  if (!DEBUG_INSN_P (use_insn))
3491 	    {
3492 	      basic_block use_bb = BLOCK_FOR_INSN (use_insn);
3493 	      if (use_bb != def_bb
3494 		  ? !dominated_by_p (CDI_DOMINATORS, use_bb, def_bb)
3495 		  : DF_INSN_INFO_LUID (use_info) < DF_INSN_INFO_LUID (def_info))
3496 		return false;
3497 	    }
3498 	}
3499     }
3500   return true;
3501 }
3502 
3503 /* Scan the instructions before update_equiv_regs.  Record which registers
3504    are referenced as paradoxical subregs.  Also check for cases in which
3505    the current function needs to save a register that one of its call
3506    instructions clobbers.
3507 
3508    These things are logically unrelated, but it's more efficient to do
3509    them together.  */
3510 
3511 static void
update_equiv_regs_prescan(void)3512 update_equiv_regs_prescan (void)
3513 {
3514   basic_block bb;
3515   rtx_insn *insn;
3516   function_abi_aggregator callee_abis;
3517 
3518   FOR_EACH_BB_FN (bb, cfun)
3519     FOR_BB_INSNS (bb, insn)
3520       if (NONDEBUG_INSN_P (insn))
3521 	{
3522 	  set_paradoxical_subreg (insn);
3523 	  if (CALL_P (insn))
3524 	    callee_abis.note_callee_abi (insn_callee_abi (insn));
3525 	}
3526 
3527   HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
3528   if (!hard_reg_set_empty_p (extra_caller_saves))
3529     for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
3530       if (TEST_HARD_REG_BIT (extra_caller_saves, regno))
3531 	df_set_regs_ever_live (regno, true);
3532 }
3533 
3534 /* Find registers that are equivalent to a single value throughout the
3535    compilation (either because they can be referenced in memory or are
3536    set once from a single constant).  Lower their priority for a
3537    register.
3538 
3539    If such a register is only referenced once, try substituting its
3540    value into the using insn.  If it succeeds, we can eliminate the
3541    register completely.
3542 
3543    Initialize init_insns in ira_reg_equiv array.  */
3544 static void
update_equiv_regs(void)3545 update_equiv_regs (void)
3546 {
3547   rtx_insn *insn;
3548   basic_block bb;
3549 
3550   /* Scan the insns and find which registers have equivalences.  Do this
3551      in a separate scan of the insns because (due to -fcse-follow-jumps)
3552      a register can be set below its use.  */
3553   bitmap setjmp_crosses = regstat_get_setjmp_crosses ();
3554   FOR_EACH_BB_FN (bb, cfun)
3555     {
3556       int loop_depth = bb_loop_depth (bb);
3557 
3558       for (insn = BB_HEAD (bb);
3559 	   insn != NEXT_INSN (BB_END (bb));
3560 	   insn = NEXT_INSN (insn))
3561 	{
3562 	  rtx note;
3563 	  rtx set;
3564 	  rtx dest, src;
3565 	  int regno;
3566 
3567 	  if (! INSN_P (insn))
3568 	    continue;
3569 
3570 	  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
3571 	    if (REG_NOTE_KIND (note) == REG_INC)
3572 	      no_equiv (XEXP (note, 0), note, NULL);
3573 
3574 	  set = single_set (insn);
3575 
3576 	  /* If this insn contains more (or less) than a single SET,
3577 	     only mark all destinations as having no known equivalence.  */
3578 	  if (set == NULL_RTX
3579 	      || side_effects_p (SET_SRC (set)))
3580 	    {
3581 	      note_pattern_stores (PATTERN (insn), no_equiv, NULL);
3582 	      continue;
3583 	    }
3584 	  else if (GET_CODE (PATTERN (insn)) == PARALLEL)
3585 	    {
3586 	      int i;
3587 
3588 	      for (i = XVECLEN (PATTERN (insn), 0) - 1; i >= 0; i--)
3589 		{
3590 		  rtx part = XVECEXP (PATTERN (insn), 0, i);
3591 		  if (part != set)
3592 		    note_pattern_stores (part, no_equiv, NULL);
3593 		}
3594 	    }
3595 
3596 	  dest = SET_DEST (set);
3597 	  src = SET_SRC (set);
3598 
3599 	  /* See if this is setting up the equivalence between an argument
3600 	     register and its stack slot.  */
3601 	  note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3602 	  if (note)
3603 	    {
3604 	      gcc_assert (REG_P (dest));
3605 	      regno = REGNO (dest);
3606 
3607 	      /* Note that we don't want to clear init_insns in
3608 		 ira_reg_equiv even if there are multiple sets of this
3609 		 register.  */
3610 	      reg_equiv[regno].is_arg_equivalence = 1;
3611 
3612 	      /* The insn result can have equivalence memory although
3613 		 the equivalence is not set up by the insn.  We add
3614 		 this insn to init insns as it is a flag for now that
3615 		 regno has an equivalence.  We will remove the insn
3616 		 from init insn list later.  */
3617 	      if (rtx_equal_p (src, XEXP (note, 0)) || MEM_P (XEXP (note, 0)))
3618 		ira_reg_equiv[regno].init_insns
3619 		  = gen_rtx_INSN_LIST (VOIDmode, insn,
3620 				       ira_reg_equiv[regno].init_insns);
3621 
3622 	      /* Continue normally in case this is a candidate for
3623 		 replacements.  */
3624 	    }
3625 
3626 	  if (!optimize)
3627 	    continue;
3628 
3629 	  /* We only handle the case of a pseudo register being set
3630 	     once, or always to the same value.  */
3631 	  /* ??? The mn10200 port breaks if we add equivalences for
3632 	     values that need an ADDRESS_REGS register and set them equivalent
3633 	     to a MEM of a pseudo.  The actual problem is in the over-conservative
3634 	     handling of INPADDR_ADDRESS / INPUT_ADDRESS / INPUT triples in
3635 	     calculate_needs, but we traditionally work around this problem
3636 	     here by rejecting equivalences when the destination is in a register
3637 	     that's likely spilled.  This is fragile, of course, since the
3638 	     preferred class of a pseudo depends on all instructions that set
3639 	     or use it.  */
3640 
3641 	  if (!REG_P (dest)
3642 	      || (regno = REGNO (dest)) < FIRST_PSEUDO_REGISTER
3643 	      || (reg_equiv[regno].init_insns
3644 		  && reg_equiv[regno].init_insns->insn () == NULL)
3645 	      || (targetm.class_likely_spilled_p (reg_preferred_class (regno))
3646 		  && MEM_P (src) && ! reg_equiv[regno].is_arg_equivalence))
3647 	    {
3648 	      /* This might be setting a SUBREG of a pseudo, a pseudo that is
3649 		 also set somewhere else to a constant.  */
3650 	      note_pattern_stores (set, no_equiv, NULL);
3651 	      continue;
3652 	    }
3653 
3654 	  /* Don't set reg mentioned in a paradoxical subreg
3655 	     equivalent to a mem.  */
3656 	  if (MEM_P (src) && reg_equiv[regno].pdx_subregs)
3657 	    {
3658 	      note_pattern_stores (set, no_equiv, NULL);
3659 	      continue;
3660 	    }
3661 
3662 	  note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3663 
3664 	  /* cse sometimes generates function invariants, but doesn't put a
3665 	     REG_EQUAL note on the insn.  Since this note would be redundant,
3666 	     there's no point creating it earlier than here.  */
3667 	  if (! note && ! rtx_varies_p (src, 0))
3668 	    note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
3669 
3670 	  /* Don't bother considering a REG_EQUAL note containing an EXPR_LIST
3671 	     since it represents a function call.  */
3672 	  if (note && GET_CODE (XEXP (note, 0)) == EXPR_LIST)
3673 	    note = NULL_RTX;
3674 
3675 	  if (DF_REG_DEF_COUNT (regno) != 1)
3676 	    {
3677 	      bool equal_p = true;
3678 	      rtx_insn_list *list;
3679 
3680 	      /* If we have already processed this pseudo and determined it
3681 		 cannot have an equivalence, then honor that decision.  */
3682 	      if (reg_equiv[regno].no_equiv)
3683 		continue;
3684 
3685 	      if (! note
3686 		  || rtx_varies_p (XEXP (note, 0), 0)
3687 		  || (reg_equiv[regno].replacement
3688 		      && ! rtx_equal_p (XEXP (note, 0),
3689 					reg_equiv[regno].replacement)))
3690 		{
3691 		  no_equiv (dest, set, NULL);
3692 		  continue;
3693 		}
3694 
3695 	      list = reg_equiv[regno].init_insns;
3696 	      for (; list; list = list->next ())
3697 		{
3698 		  rtx note_tmp;
3699 		  rtx_insn *insn_tmp;
3700 
3701 		  insn_tmp = list->insn ();
3702 		  note_tmp = find_reg_note (insn_tmp, REG_EQUAL, NULL_RTX);
3703 		  gcc_assert (note_tmp);
3704 		  if (! rtx_equal_p (XEXP (note, 0), XEXP (note_tmp, 0)))
3705 		    {
3706 		      equal_p = false;
3707 		      break;
3708 		    }
3709 		}
3710 
3711 	      if (! equal_p)
3712 		{
3713 		  no_equiv (dest, set, NULL);
3714 		  continue;
3715 		}
3716 	    }
3717 
3718 	  /* Record this insn as initializing this register.  */
3719 	  reg_equiv[regno].init_insns
3720 	    = gen_rtx_INSN_LIST (VOIDmode, insn, reg_equiv[regno].init_insns);
3721 
3722 	  /* If this register is known to be equal to a constant, record that
3723 	     it is always equivalent to the constant.
3724 	     Note that it is possible to have a register use before
3725 	     the def in loops (see gcc.c-torture/execute/pr79286.c)
3726 	     where the reg is undefined on first use.  If the def insn
3727 	     won't trap we can use it as an equivalence, effectively
3728 	     choosing the "undefined" value for the reg to be the
3729 	     same as the value set by the def.  */
3730 	  if (DF_REG_DEF_COUNT (regno) == 1
3731 	      && note
3732 	      && !rtx_varies_p (XEXP (note, 0), 0)
3733 	      && (!may_trap_or_fault_p (XEXP (note, 0))
3734 		  || def_dominates_uses (regno)))
3735 	    {
3736 	      rtx note_value = XEXP (note, 0);
3737 	      remove_note (insn, note);
3738 	      set_unique_reg_note (insn, REG_EQUIV, note_value);
3739 	    }
3740 
3741 	  /* If this insn introduces a "constant" register, decrease the priority
3742 	     of that register.  Record this insn if the register is only used once
3743 	     more and the equivalence value is the same as our source.
3744 
3745 	     The latter condition is checked for two reasons:  First, it is an
3746 	     indication that it may be more efficient to actually emit the insn
3747 	     as written (if no registers are available, reload will substitute
3748 	     the equivalence).  Secondly, it avoids problems with any registers
3749 	     dying in this insn whose death notes would be missed.
3750 
3751 	     If we don't have a REG_EQUIV note, see if this insn is loading
3752 	     a register used only in one basic block from a MEM.  If so, and the
3753 	     MEM remains unchanged for the life of the register, add a REG_EQUIV
3754 	     note.  */
3755 	  note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3756 
3757 	  rtx replacement = NULL_RTX;
3758 	  if (note)
3759 	    replacement = XEXP (note, 0);
3760 	  else if (REG_BASIC_BLOCK (regno) >= NUM_FIXED_BLOCKS
3761 		   && MEM_P (SET_SRC (set)))
3762 	    {
3763 	      enum valid_equiv validity;
3764 	      validity = validate_equiv_mem (insn, dest, SET_SRC (set));
3765 	      if (validity != valid_none)
3766 		{
3767 		  replacement = copy_rtx (SET_SRC (set));
3768 		  if (validity == valid_reload)
3769 		    note = set_unique_reg_note (insn, REG_EQUIV, replacement);
3770 		}
3771 	    }
3772 
3773 	  /* If we haven't done so, record for reload that this is an
3774 	     equivalencing insn.  */
3775 	  if (note && !reg_equiv[regno].is_arg_equivalence)
3776 	    ira_reg_equiv[regno].init_insns
3777 	      = gen_rtx_INSN_LIST (VOIDmode, insn,
3778 				   ira_reg_equiv[regno].init_insns);
3779 
3780 	  if (replacement)
3781 	    {
3782 	      reg_equiv[regno].replacement = replacement;
3783 	      reg_equiv[regno].src_p = &SET_SRC (set);
3784 	      reg_equiv[regno].loop_depth = (short) loop_depth;
3785 
3786 	      /* Don't mess with things live during setjmp.  */
3787 	      if (optimize && !bitmap_bit_p (setjmp_crosses, regno))
3788 		{
3789 		  /* If the register is referenced exactly twice, meaning it is
3790 		     set once and used once, indicate that the reference may be
3791 		     replaced by the equivalence we computed above.  Do this
3792 		     even if the register is only used in one block so that
3793 		     dependencies can be handled where the last register is
3794 		     used in a different block (i.e. HIGH / LO_SUM sequences)
3795 		     and to reduce the number of registers alive across
3796 		     calls.  */
3797 
3798 		  if (REG_N_REFS (regno) == 2
3799 		      && (rtx_equal_p (replacement, src)
3800 			  || ! equiv_init_varies_p (src))
3801 		      && NONJUMP_INSN_P (insn)
3802 		      && equiv_init_movable_p (PATTERN (insn), regno))
3803 		    reg_equiv[regno].replace = 1;
3804 		}
3805 	    }
3806 	}
3807     }
3808 }
3809 
3810 /* For insns that set a MEM to the contents of a REG that is only used
3811    in a single basic block, see if the register is always equivalent
3812    to that memory location and if moving the store from INSN to the
3813    insn that sets REG is safe.  If so, put a REG_EQUIV note on the
3814    initializing insn.  */
3815 static void
add_store_equivs(void)3816 add_store_equivs (void)
3817 {
3818   auto_bitmap seen_insns;
3819 
3820   for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
3821     {
3822       rtx set, src, dest;
3823       unsigned regno;
3824       rtx_insn *init_insn;
3825 
3826       bitmap_set_bit (seen_insns, INSN_UID (insn));
3827 
3828       if (! INSN_P (insn))
3829 	continue;
3830 
3831       set = single_set (insn);
3832       if (! set)
3833 	continue;
3834 
3835       dest = SET_DEST (set);
3836       src = SET_SRC (set);
3837 
3838       /* Don't add a REG_EQUIV note if the insn already has one.  The existing
3839 	 REG_EQUIV is likely more useful than the one we are adding.  */
3840       if (MEM_P (dest) && REG_P (src)
3841 	  && (regno = REGNO (src)) >= FIRST_PSEUDO_REGISTER
3842 	  && REG_BASIC_BLOCK (regno) >= NUM_FIXED_BLOCKS
3843 	  && DF_REG_DEF_COUNT (regno) == 1
3844 	  && ! reg_equiv[regno].pdx_subregs
3845 	  && reg_equiv[regno].init_insns != NULL
3846 	  && (init_insn = reg_equiv[regno].init_insns->insn ()) != 0
3847 	  && bitmap_bit_p (seen_insns, INSN_UID (init_insn))
3848 	  && ! find_reg_note (init_insn, REG_EQUIV, NULL_RTX)
3849 	  && validate_equiv_mem (init_insn, src, dest) == valid_reload
3850 	  && ! memref_used_between_p (dest, init_insn, insn)
3851 	  /* Attaching a REG_EQUIV note will fail if INIT_INSN has
3852 	     multiple sets.  */
3853 	  && set_unique_reg_note (init_insn, REG_EQUIV, copy_rtx (dest)))
3854 	{
3855 	  /* This insn makes the equivalence, not the one initializing
3856 	     the register.  */
3857 	  ira_reg_equiv[regno].init_insns
3858 	    = gen_rtx_INSN_LIST (VOIDmode, insn, NULL_RTX);
3859 	  df_notes_rescan (init_insn);
3860 	  if (dump_file)
3861 	    fprintf (dump_file,
3862 		     "Adding REG_EQUIV to insn %d for source of insn %d\n",
3863 		     INSN_UID (init_insn),
3864 		     INSN_UID (insn));
3865 	}
3866     }
3867 }
3868 
3869 /* Scan all regs killed in an insn to see if any of them are registers
3870    only used that once.  If so, see if we can replace the reference
3871    with the equivalent form.  If we can, delete the initializing
3872    reference and this register will go away.  If we can't replace the
3873    reference, and the initializing reference is within the same loop
3874    (or in an inner loop), then move the register initialization just
3875    before the use, so that they are in the same basic block.  */
3876 static void
combine_and_move_insns(void)3877 combine_and_move_insns (void)
3878 {
3879   auto_bitmap cleared_regs;
3880   int max = max_reg_num ();
3881 
3882   for (int regno = FIRST_PSEUDO_REGISTER; regno < max; regno++)
3883     {
3884       if (!reg_equiv[regno].replace)
3885 	continue;
3886 
3887       rtx_insn *use_insn = 0;
3888       for (df_ref use = DF_REG_USE_CHAIN (regno);
3889 	   use;
3890 	   use = DF_REF_NEXT_REG (use))
3891 	if (DF_REF_INSN_INFO (use))
3892 	  {
3893 	    if (DEBUG_INSN_P (DF_REF_INSN (use)))
3894 	      continue;
3895 	    gcc_assert (!use_insn);
3896 	    use_insn = DF_REF_INSN (use);
3897 	  }
3898       gcc_assert (use_insn);
3899 
3900       /* Don't substitute into jumps.  indirect_jump_optimize does
3901 	 this for anything we are prepared to handle.  */
3902       if (JUMP_P (use_insn))
3903 	continue;
3904 
3905       /* Also don't substitute into a conditional trap insn -- it can become
3906 	 an unconditional trap, and that is a flow control insn.  */
3907       if (GET_CODE (PATTERN (use_insn)) == TRAP_IF)
3908 	continue;
3909 
3910       df_ref def = DF_REG_DEF_CHAIN (regno);
3911       gcc_assert (DF_REG_DEF_COUNT (regno) == 1 && DF_REF_INSN_INFO (def));
3912       rtx_insn *def_insn = DF_REF_INSN (def);
3913 
3914       /* We may not move instructions that can throw, since that
3915 	 changes basic block boundaries and we are not prepared to
3916 	 adjust the CFG to match.  */
3917       if (can_throw_internal (def_insn))
3918 	continue;
3919 
3920       /* Instructions with multiple sets can only be moved if DF analysis is
3921 	 performed for all of the registers set.  See PR91052.  */
3922       if (multiple_sets (def_insn))
3923 	continue;
3924 
3925       basic_block use_bb = BLOCK_FOR_INSN (use_insn);
3926       basic_block def_bb = BLOCK_FOR_INSN (def_insn);
3927       if (bb_loop_depth (use_bb) > bb_loop_depth (def_bb))
3928 	continue;
3929 
3930       if (asm_noperands (PATTERN (def_insn)) < 0
3931 	  && validate_replace_rtx (regno_reg_rtx[regno],
3932 				   *reg_equiv[regno].src_p, use_insn))
3933 	{
3934 	  rtx link;
3935 	  /* Append the REG_DEAD notes from def_insn.  */
3936 	  for (rtx *p = &REG_NOTES (def_insn); (link = *p) != 0; )
3937 	    {
3938 	      if (REG_NOTE_KIND (XEXP (link, 0)) == REG_DEAD)
3939 		{
3940 		  *p = XEXP (link, 1);
3941 		  XEXP (link, 1) = REG_NOTES (use_insn);
3942 		  REG_NOTES (use_insn) = link;
3943 		}
3944 	      else
3945 		p = &XEXP (link, 1);
3946 	    }
3947 
3948 	  remove_death (regno, use_insn);
3949 	  SET_REG_N_REFS (regno, 0);
3950 	  REG_FREQ (regno) = 0;
3951 	  df_ref use;
3952 	  FOR_EACH_INSN_USE (use, def_insn)
3953 	    {
3954 	      unsigned int use_regno = DF_REF_REGNO (use);
3955 	      if (!HARD_REGISTER_NUM_P (use_regno))
3956 		reg_equiv[use_regno].replace = 0;
3957 	    }
3958 
3959 	  delete_insn (def_insn);
3960 
3961 	  reg_equiv[regno].init_insns = NULL;
3962 	  ira_reg_equiv[regno].init_insns = NULL;
3963 	  bitmap_set_bit (cleared_regs, regno);
3964 	}
3965 
3966       /* Move the initialization of the register to just before
3967 	 USE_INSN.  Update the flow information.  */
3968       else if (prev_nondebug_insn (use_insn) != def_insn)
3969 	{
3970 	  rtx_insn *new_insn;
3971 
3972 	  new_insn = emit_insn_before (PATTERN (def_insn), use_insn);
3973 	  REG_NOTES (new_insn) = REG_NOTES (def_insn);
3974 	  REG_NOTES (def_insn) = 0;
3975 	  /* Rescan it to process the notes.  */
3976 	  df_insn_rescan (new_insn);
3977 
3978 	  /* Make sure this insn is recognized before reload begins,
3979 	     otherwise eliminate_regs_in_insn will die.  */
3980 	  INSN_CODE (new_insn) = INSN_CODE (def_insn);
3981 
3982 	  delete_insn (def_insn);
3983 
3984 	  XEXP (reg_equiv[regno].init_insns, 0) = new_insn;
3985 
3986 	  REG_BASIC_BLOCK (regno) = use_bb->index;
3987 	  REG_N_CALLS_CROSSED (regno) = 0;
3988 
3989 	  if (use_insn == BB_HEAD (use_bb))
3990 	    BB_HEAD (use_bb) = new_insn;
3991 
3992 	  /* We know regno dies in use_insn, but inside a loop
3993 	     REG_DEAD notes might be missing when def_insn was in
3994 	     another basic block.  However, when we move def_insn into
3995 	     this bb we'll definitely get a REG_DEAD note and reload
3996 	     will see the death.  It's possible that update_equiv_regs
3997 	     set up an equivalence referencing regno for a reg set by
3998 	     use_insn, when regno was seen as non-local.  Now that
3999 	     regno is local to this block, and dies, such an
4000 	     equivalence is invalid.  */
4001 	  if (find_reg_note (use_insn, REG_EQUIV, regno_reg_rtx[regno]))
4002 	    {
4003 	      rtx set = single_set (use_insn);
4004 	      if (set && REG_P (SET_DEST (set)))
4005 		no_equiv (SET_DEST (set), set, NULL);
4006 	    }
4007 
4008 	  ira_reg_equiv[regno].init_insns
4009 	    = gen_rtx_INSN_LIST (VOIDmode, new_insn, NULL_RTX);
4010 	  bitmap_set_bit (cleared_regs, regno);
4011 	}
4012     }
4013 
4014   if (!bitmap_empty_p (cleared_regs))
4015     {
4016       basic_block bb;
4017 
4018       FOR_EACH_BB_FN (bb, cfun)
4019 	{
4020 	  bitmap_and_compl_into (DF_LR_IN (bb), cleared_regs);
4021 	  bitmap_and_compl_into (DF_LR_OUT (bb), cleared_regs);
4022 	  if (!df_live)
4023 	    continue;
4024 	  bitmap_and_compl_into (DF_LIVE_IN (bb), cleared_regs);
4025 	  bitmap_and_compl_into (DF_LIVE_OUT (bb), cleared_regs);
4026 	}
4027 
4028       /* Last pass - adjust debug insns referencing cleared regs.  */
4029       if (MAY_HAVE_DEBUG_BIND_INSNS)
4030 	for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
4031 	  if (DEBUG_BIND_INSN_P (insn))
4032 	    {
4033 	      rtx old_loc = INSN_VAR_LOCATION_LOC (insn);
4034 	      INSN_VAR_LOCATION_LOC (insn)
4035 		= simplify_replace_fn_rtx (old_loc, NULL_RTX,
4036 					   adjust_cleared_regs,
4037 					   (void *) cleared_regs);
4038 	      if (old_loc != INSN_VAR_LOCATION_LOC (insn))
4039 		df_insn_rescan (insn);
4040 	    }
4041     }
4042 }
4043 
4044 /* A pass over indirect jumps, converting simple cases to direct jumps.
4045    Combine does this optimization too, but only within a basic block.  */
4046 static void
indirect_jump_optimize(void)4047 indirect_jump_optimize (void)
4048 {
4049   basic_block bb;
4050   bool rebuild_p = false;
4051 
4052   FOR_EACH_BB_REVERSE_FN (bb, cfun)
4053     {
4054       rtx_insn *insn = BB_END (bb);
4055       if (!JUMP_P (insn)
4056 	  || find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX))
4057 	continue;
4058 
4059       rtx x = pc_set (insn);
4060       if (!x || !REG_P (SET_SRC (x)))
4061 	continue;
4062 
4063       int regno = REGNO (SET_SRC (x));
4064       if (DF_REG_DEF_COUNT (regno) == 1)
4065 	{
4066 	  df_ref def = DF_REG_DEF_CHAIN (regno);
4067 	  if (!DF_REF_IS_ARTIFICIAL (def))
4068 	    {
4069 	      rtx_insn *def_insn = DF_REF_INSN (def);
4070 	      rtx lab = NULL_RTX;
4071 	      rtx set = single_set (def_insn);
4072 	      if (set && GET_CODE (SET_SRC (set)) == LABEL_REF)
4073 		lab = SET_SRC (set);
4074 	      else
4075 		{
4076 		  rtx eqnote = find_reg_note (def_insn, REG_EQUAL, NULL_RTX);
4077 		  if (eqnote && GET_CODE (XEXP (eqnote, 0)) == LABEL_REF)
4078 		    lab = XEXP (eqnote, 0);
4079 		}
4080 	      if (lab && validate_replace_rtx (SET_SRC (x), lab, insn))
4081 		rebuild_p = true;
4082 	    }
4083 	}
4084     }
4085 
4086   if (rebuild_p)
4087     {
4088       timevar_push (TV_JUMP);
4089       rebuild_jump_labels (get_insns ());
4090       if (purge_all_dead_edges ())
4091 	delete_unreachable_blocks ();
4092       timevar_pop (TV_JUMP);
4093     }
4094 }
4095 
4096 /* Set up fields memory, constant, and invariant from init_insns in
4097    the structures of array ira_reg_equiv.  */
4098 static void
setup_reg_equiv(void)4099 setup_reg_equiv (void)
4100 {
4101   int i;
4102   rtx_insn_list *elem, *prev_elem, *next_elem;
4103   rtx_insn *insn;
4104   rtx set, x;
4105 
4106   for (i = FIRST_PSEUDO_REGISTER; i < ira_reg_equiv_len; i++)
4107     for (prev_elem = NULL, elem = ira_reg_equiv[i].init_insns;
4108 	 elem;
4109 	 prev_elem = elem, elem = next_elem)
4110       {
4111 	next_elem = elem->next ();
4112 	insn = elem->insn ();
4113 	set = single_set (insn);
4114 
4115 	/* Init insns can set up equivalence when the reg is a destination or
4116 	   a source (in this case the destination is memory).  */
4117 	if (set != 0 && (REG_P (SET_DEST (set)) || REG_P (SET_SRC (set))))
4118 	  {
4119 	    if ((x = find_reg_note (insn, REG_EQUIV, NULL_RTX)) != NULL)
4120 	      {
4121 		x = XEXP (x, 0);
4122 		if (REG_P (SET_DEST (set))
4123 		    && REGNO (SET_DEST (set)) == (unsigned int) i
4124 		    && ! rtx_equal_p (SET_SRC (set), x) && MEM_P (x))
4125 		  {
4126 		    /* This insn reporting the equivalence but
4127 		       actually not setting it.  Remove it from the
4128 		       list.  */
4129 		    if (prev_elem == NULL)
4130 		      ira_reg_equiv[i].init_insns = next_elem;
4131 		    else
4132 		      XEXP (prev_elem, 1) = next_elem;
4133 		    elem = prev_elem;
4134 		  }
4135 	      }
4136 	    else if (REG_P (SET_DEST (set))
4137 		     && REGNO (SET_DEST (set)) == (unsigned int) i)
4138 	      x = SET_SRC (set);
4139 	    else
4140 	      {
4141 		gcc_assert (REG_P (SET_SRC (set))
4142 			    && REGNO (SET_SRC (set)) == (unsigned int) i);
4143 		x = SET_DEST (set);
4144 	      }
4145 	    if (! function_invariant_p (x)
4146 		|| ! flag_pic
4147 		/* A function invariant is often CONSTANT_P but may
4148 		   include a register.  We promise to only pass
4149 		   CONSTANT_P objects to LEGITIMATE_PIC_OPERAND_P.  */
4150 		|| (CONSTANT_P (x) && LEGITIMATE_PIC_OPERAND_P (x)))
4151 	      {
4152 		/* It can happen that a REG_EQUIV note contains a MEM
4153 		   that is not a legitimate memory operand.  As later
4154 		   stages of reload assume that all addresses found in
4155 		   the lra_regno_equiv_* arrays were originally
4156 		   legitimate, we ignore such REG_EQUIV notes.  */
4157 		if (memory_operand (x, VOIDmode))
4158 		  {
4159 		    ira_reg_equiv[i].defined_p = true;
4160 		    ira_reg_equiv[i].memory = x;
4161 		    continue;
4162 		  }
4163 		else if (function_invariant_p (x))
4164 		  {
4165 		    machine_mode mode;
4166 
4167 		    mode = GET_MODE (SET_DEST (set));
4168 		    if (GET_CODE (x) == PLUS
4169 			|| x == frame_pointer_rtx || x == arg_pointer_rtx)
4170 		      /* This is PLUS of frame pointer and a constant,
4171 			 or fp, or argp.  */
4172 		      ira_reg_equiv[i].invariant = x;
4173 		    else if (targetm.legitimate_constant_p (mode, x))
4174 		      ira_reg_equiv[i].constant = x;
4175 		    else
4176 		      {
4177 			ira_reg_equiv[i].memory = force_const_mem (mode, x);
4178 			if (ira_reg_equiv[i].memory == NULL_RTX)
4179 			  {
4180 			    ira_reg_equiv[i].defined_p = false;
4181 			    ira_reg_equiv[i].init_insns = NULL;
4182 			    break;
4183 			  }
4184 		      }
4185 		    ira_reg_equiv[i].defined_p = true;
4186 		    continue;
4187 		  }
4188 	      }
4189 	  }
4190 	ira_reg_equiv[i].defined_p = false;
4191 	ira_reg_equiv[i].init_insns = NULL;
4192 	break;
4193       }
4194 }
4195 
4196 
4197 
4198 /* Print chain C to FILE.  */
4199 static void
print_insn_chain(FILE * file,class insn_chain * c)4200 print_insn_chain (FILE *file, class insn_chain *c)
4201 {
4202   fprintf (file, "insn=%d, ", INSN_UID (c->insn));
4203   bitmap_print (file, &c->live_throughout, "live_throughout: ", ", ");
4204   bitmap_print (file, &c->dead_or_set, "dead_or_set: ", "\n");
4205 }
4206 
4207 
4208 /* Print all reload_insn_chains to FILE.  */
4209 static void
print_insn_chains(FILE * file)4210 print_insn_chains (FILE *file)
4211 {
4212   class insn_chain *c;
4213   for (c = reload_insn_chain; c ; c = c->next)
4214     print_insn_chain (file, c);
4215 }
4216 
4217 /* Return true if pseudo REGNO should be added to set live_throughout
4218    or dead_or_set of the insn chains for reload consideration.  */
4219 static bool
pseudo_for_reload_consideration_p(int regno)4220 pseudo_for_reload_consideration_p (int regno)
4221 {
4222   /* Consider spilled pseudos too for IRA because they still have a
4223      chance to get hard-registers in the reload when IRA is used.  */
4224   return (reg_renumber[regno] >= 0 || ira_conflicts_p);
4225 }
4226 
4227 /* Return true if we can track the individual bytes of subreg X.
4228    When returning true, set *OUTER_SIZE to the number of bytes in
4229    X itself, *INNER_SIZE to the number of bytes in the inner register
4230    and *START to the offset of the first byte.  */
4231 static bool
get_subreg_tracking_sizes(rtx x,HOST_WIDE_INT * outer_size,HOST_WIDE_INT * inner_size,HOST_WIDE_INT * start)4232 get_subreg_tracking_sizes (rtx x, HOST_WIDE_INT *outer_size,
4233 			   HOST_WIDE_INT *inner_size, HOST_WIDE_INT *start)
4234 {
4235   rtx reg = regno_reg_rtx[REGNO (SUBREG_REG (x))];
4236   return (GET_MODE_SIZE (GET_MODE (x)).is_constant (outer_size)
4237 	  && GET_MODE_SIZE (GET_MODE (reg)).is_constant (inner_size)
4238 	  && SUBREG_BYTE (x).is_constant (start));
4239 }
4240 
4241 /* Init LIVE_SUBREGS[ALLOCNUM] and LIVE_SUBREGS_USED[ALLOCNUM] for
4242    a register with SIZE bytes, making the register live if INIT_VALUE.  */
4243 static void
init_live_subregs(bool init_value,sbitmap * live_subregs,bitmap live_subregs_used,int allocnum,int size)4244 init_live_subregs (bool init_value, sbitmap *live_subregs,
4245 		   bitmap live_subregs_used, int allocnum, int size)
4246 {
4247   gcc_assert (size > 0);
4248 
4249   /* Been there, done that.  */
4250   if (bitmap_bit_p (live_subregs_used, allocnum))
4251     return;
4252 
4253   /* Create a new one.  */
4254   if (live_subregs[allocnum] == NULL)
4255     live_subregs[allocnum] = sbitmap_alloc (size);
4256 
4257   /* If the entire reg was live before blasting into subregs, we need
4258      to init all of the subregs to ones else init to 0.  */
4259   if (init_value)
4260     bitmap_ones (live_subregs[allocnum]);
4261   else
4262     bitmap_clear (live_subregs[allocnum]);
4263 
4264   bitmap_set_bit (live_subregs_used, allocnum);
4265 }
4266 
4267 /* Walk the insns of the current function and build reload_insn_chain,
4268    and record register life information.  */
4269 static void
build_insn_chain(void)4270 build_insn_chain (void)
4271 {
4272   unsigned int i;
4273   class insn_chain **p = &reload_insn_chain;
4274   basic_block bb;
4275   class insn_chain *c = NULL;
4276   class insn_chain *next = NULL;
4277   auto_bitmap live_relevant_regs;
4278   auto_bitmap elim_regset;
4279   /* live_subregs is a vector used to keep accurate information about
4280      which hardregs are live in multiword pseudos.  live_subregs and
4281      live_subregs_used are indexed by pseudo number.  The live_subreg
4282      entry for a particular pseudo is only used if the corresponding
4283      element is non zero in live_subregs_used.  The sbitmap size of
4284      live_subreg[allocno] is number of bytes that the pseudo can
4285      occupy.  */
4286   sbitmap *live_subregs = XCNEWVEC (sbitmap, max_regno);
4287   auto_bitmap live_subregs_used;
4288 
4289   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4290     if (TEST_HARD_REG_BIT (eliminable_regset, i))
4291       bitmap_set_bit (elim_regset, i);
4292   FOR_EACH_BB_REVERSE_FN (bb, cfun)
4293     {
4294       bitmap_iterator bi;
4295       rtx_insn *insn;
4296 
4297       CLEAR_REG_SET (live_relevant_regs);
4298       bitmap_clear (live_subregs_used);
4299 
4300       EXECUTE_IF_SET_IN_BITMAP (df_get_live_out (bb), 0, i, bi)
4301 	{
4302 	  if (i >= FIRST_PSEUDO_REGISTER)
4303 	    break;
4304 	  bitmap_set_bit (live_relevant_regs, i);
4305 	}
4306 
4307       EXECUTE_IF_SET_IN_BITMAP (df_get_live_out (bb),
4308 				FIRST_PSEUDO_REGISTER, i, bi)
4309 	{
4310 	  if (pseudo_for_reload_consideration_p (i))
4311 	    bitmap_set_bit (live_relevant_regs, i);
4312 	}
4313 
4314       FOR_BB_INSNS_REVERSE (bb, insn)
4315 	{
4316 	  if (!NOTE_P (insn) && !BARRIER_P (insn))
4317 	    {
4318 	      struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
4319 	      df_ref def, use;
4320 
4321 	      c = new_insn_chain ();
4322 	      c->next = next;
4323 	      next = c;
4324 	      *p = c;
4325 	      p = &c->prev;
4326 
4327 	      c->insn = insn;
4328 	      c->block = bb->index;
4329 
4330 	      if (NONDEBUG_INSN_P (insn))
4331 		FOR_EACH_INSN_INFO_DEF (def, insn_info)
4332 		  {
4333 		    unsigned int regno = DF_REF_REGNO (def);
4334 
4335 		    /* Ignore may clobbers because these are generated
4336 		       from calls. However, every other kind of def is
4337 		       added to dead_or_set.  */
4338 		    if (!DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER))
4339 		      {
4340 			if (regno < FIRST_PSEUDO_REGISTER)
4341 			  {
4342 			    if (!fixed_regs[regno])
4343 			      bitmap_set_bit (&c->dead_or_set, regno);
4344 			  }
4345 			else if (pseudo_for_reload_consideration_p (regno))
4346 			  bitmap_set_bit (&c->dead_or_set, regno);
4347 		      }
4348 
4349 		    if ((regno < FIRST_PSEUDO_REGISTER
4350 			 || reg_renumber[regno] >= 0
4351 			 || ira_conflicts_p)
4352 			&& (!DF_REF_FLAGS_IS_SET (def, DF_REF_CONDITIONAL)))
4353 		      {
4354 			rtx reg = DF_REF_REG (def);
4355 			HOST_WIDE_INT outer_size, inner_size, start;
4356 
4357 			/* We can usually track the liveness of individual
4358 			   bytes within a subreg.  The only exceptions are
4359 			   subregs wrapped in ZERO_EXTRACTs and subregs whose
4360 			   size is not known; in those cases we need to be
4361 			   conservative and treat the definition as a partial
4362 			   definition of the full register rather than a full
4363 			   definition of a specific part of the register.  */
4364 			if (GET_CODE (reg) == SUBREG
4365 			    && !DF_REF_FLAGS_IS_SET (def, DF_REF_ZERO_EXTRACT)
4366 			    && get_subreg_tracking_sizes (reg, &outer_size,
4367 							  &inner_size, &start))
4368 			  {
4369 			    HOST_WIDE_INT last = start + outer_size;
4370 
4371 			    init_live_subregs
4372 			      (bitmap_bit_p (live_relevant_regs, regno),
4373 			       live_subregs, live_subregs_used, regno,
4374 			       inner_size);
4375 
4376 			    if (!DF_REF_FLAGS_IS_SET
4377 				(def, DF_REF_STRICT_LOW_PART))
4378 			      {
4379 				/* Expand the range to cover entire words.
4380 				   Bytes added here are "don't care".  */
4381 				start
4382 				  = start / UNITS_PER_WORD * UNITS_PER_WORD;
4383 				last = ((last + UNITS_PER_WORD - 1)
4384 					/ UNITS_PER_WORD * UNITS_PER_WORD);
4385 			      }
4386 
4387 			    /* Ignore the paradoxical bits.  */
4388 			    if (last > SBITMAP_SIZE (live_subregs[regno]))
4389 			      last = SBITMAP_SIZE (live_subregs[regno]);
4390 
4391 			    while (start < last)
4392 			      {
4393 				bitmap_clear_bit (live_subregs[regno], start);
4394 				start++;
4395 			      }
4396 
4397 			    if (bitmap_empty_p (live_subregs[regno]))
4398 			      {
4399 				bitmap_clear_bit (live_subregs_used, regno);
4400 				bitmap_clear_bit (live_relevant_regs, regno);
4401 			      }
4402 			    else
4403 			      /* Set live_relevant_regs here because
4404 				 that bit has to be true to get us to
4405 				 look at the live_subregs fields.  */
4406 			      bitmap_set_bit (live_relevant_regs, regno);
4407 			  }
4408 			else
4409 			  {
4410 			    /* DF_REF_PARTIAL is generated for
4411 			       subregs, STRICT_LOW_PART, and
4412 			       ZERO_EXTRACT.  We handle the subreg
4413 			       case above so here we have to keep from
4414 			       modeling the def as a killing def.  */
4415 			    if (!DF_REF_FLAGS_IS_SET (def, DF_REF_PARTIAL))
4416 			      {
4417 				bitmap_clear_bit (live_subregs_used, regno);
4418 				bitmap_clear_bit (live_relevant_regs, regno);
4419 			      }
4420 			  }
4421 		      }
4422 		  }
4423 
4424 	      bitmap_and_compl_into (live_relevant_regs, elim_regset);
4425 	      bitmap_copy (&c->live_throughout, live_relevant_regs);
4426 
4427 	      if (NONDEBUG_INSN_P (insn))
4428 		FOR_EACH_INSN_INFO_USE (use, insn_info)
4429 		  {
4430 		    unsigned int regno = DF_REF_REGNO (use);
4431 		    rtx reg = DF_REF_REG (use);
4432 
4433 		    /* DF_REF_READ_WRITE on a use means that this use
4434 		       is fabricated from a def that is a partial set
4435 		       to a multiword reg.  Here, we only model the
4436 		       subreg case that is not wrapped in ZERO_EXTRACT
4437 		       precisely so we do not need to look at the
4438 		       fabricated use.  */
4439 		    if (DF_REF_FLAGS_IS_SET (use, DF_REF_READ_WRITE)
4440 			&& !DF_REF_FLAGS_IS_SET (use, DF_REF_ZERO_EXTRACT)
4441 			&& DF_REF_FLAGS_IS_SET (use, DF_REF_SUBREG))
4442 		      continue;
4443 
4444 		    /* Add the last use of each var to dead_or_set.  */
4445 		    if (!bitmap_bit_p (live_relevant_regs, regno))
4446 		      {
4447 			if (regno < FIRST_PSEUDO_REGISTER)
4448 			  {
4449 			    if (!fixed_regs[regno])
4450 			      bitmap_set_bit (&c->dead_or_set, regno);
4451 			  }
4452 			else if (pseudo_for_reload_consideration_p (regno))
4453 			  bitmap_set_bit (&c->dead_or_set, regno);
4454 		      }
4455 
4456 		    if (regno < FIRST_PSEUDO_REGISTER
4457 			|| pseudo_for_reload_consideration_p (regno))
4458 		      {
4459 			HOST_WIDE_INT outer_size, inner_size, start;
4460 			if (GET_CODE (reg) == SUBREG
4461 			    && !DF_REF_FLAGS_IS_SET (use,
4462 						     DF_REF_SIGN_EXTRACT
4463 						     | DF_REF_ZERO_EXTRACT)
4464 			    && get_subreg_tracking_sizes (reg, &outer_size,
4465 							  &inner_size, &start))
4466 			  {
4467 			    HOST_WIDE_INT last = start + outer_size;
4468 
4469 			    init_live_subregs
4470 			      (bitmap_bit_p (live_relevant_regs, regno),
4471 			       live_subregs, live_subregs_used, regno,
4472 			       inner_size);
4473 
4474 			    /* Ignore the paradoxical bits.  */
4475 			    if (last > SBITMAP_SIZE (live_subregs[regno]))
4476 			      last = SBITMAP_SIZE (live_subregs[regno]);
4477 
4478 			    while (start < last)
4479 			      {
4480 				bitmap_set_bit (live_subregs[regno], start);
4481 				start++;
4482 			      }
4483 			  }
4484 			else
4485 			  /* Resetting the live_subregs_used is
4486 			     effectively saying do not use the subregs
4487 			     because we are reading the whole
4488 			     pseudo.  */
4489 			  bitmap_clear_bit (live_subregs_used, regno);
4490 			bitmap_set_bit (live_relevant_regs, regno);
4491 		      }
4492 		  }
4493 	    }
4494 	}
4495 
4496       /* FIXME!! The following code is a disaster.  Reload needs to see the
4497 	 labels and jump tables that are just hanging out in between
4498 	 the basic blocks.  See pr33676.  */
4499       insn = BB_HEAD (bb);
4500 
4501       /* Skip over the barriers and cruft.  */
4502       while (insn && (BARRIER_P (insn) || NOTE_P (insn)
4503 		      || BLOCK_FOR_INSN (insn) == bb))
4504 	insn = PREV_INSN (insn);
4505 
4506       /* While we add anything except barriers and notes, the focus is
4507 	 to get the labels and jump tables into the
4508 	 reload_insn_chain.  */
4509       while (insn)
4510 	{
4511 	  if (!NOTE_P (insn) && !BARRIER_P (insn))
4512 	    {
4513 	      if (BLOCK_FOR_INSN (insn))
4514 		break;
4515 
4516 	      c = new_insn_chain ();
4517 	      c->next = next;
4518 	      next = c;
4519 	      *p = c;
4520 	      p = &c->prev;
4521 
4522 	      /* The block makes no sense here, but it is what the old
4523 		 code did.  */
4524 	      c->block = bb->index;
4525 	      c->insn = insn;
4526 	      bitmap_copy (&c->live_throughout, live_relevant_regs);
4527 	    }
4528 	  insn = PREV_INSN (insn);
4529 	}
4530     }
4531 
4532   reload_insn_chain = c;
4533   *p = NULL;
4534 
4535   for (i = 0; i < (unsigned int) max_regno; i++)
4536     if (live_subregs[i] != NULL)
4537       sbitmap_free (live_subregs[i]);
4538   free (live_subregs);
4539 
4540   if (dump_file)
4541     print_insn_chains (dump_file);
4542 }
4543 
4544 /* Examine the rtx found in *LOC, which is read or written to as determined
4545    by TYPE.  Return false if we find a reason why an insn containing this
4546    rtx should not be moved (such as accesses to non-constant memory), true
4547    otherwise.  */
4548 static bool
rtx_moveable_p(rtx * loc,enum op_type type)4549 rtx_moveable_p (rtx *loc, enum op_type type)
4550 {
4551   const char *fmt;
4552   rtx x = *loc;
4553   int i, j;
4554 
4555   enum rtx_code code = GET_CODE (x);
4556   switch (code)
4557     {
4558     case CONST:
4559     CASE_CONST_ANY:
4560     case SYMBOL_REF:
4561     case LABEL_REF:
4562       return true;
4563 
4564     case PC:
4565       return type == OP_IN;
4566 
4567     case REG:
4568       if (x == frame_pointer_rtx)
4569 	return true;
4570       if (HARD_REGISTER_P (x))
4571 	return false;
4572 
4573       return true;
4574 
4575     case MEM:
4576       if (type == OP_IN && MEM_READONLY_P (x))
4577 	return rtx_moveable_p (&XEXP (x, 0), OP_IN);
4578       return false;
4579 
4580     case SET:
4581       return (rtx_moveable_p (&SET_SRC (x), OP_IN)
4582 	      && rtx_moveable_p (&SET_DEST (x), OP_OUT));
4583 
4584     case STRICT_LOW_PART:
4585       return rtx_moveable_p (&XEXP (x, 0), OP_OUT);
4586 
4587     case ZERO_EXTRACT:
4588     case SIGN_EXTRACT:
4589       return (rtx_moveable_p (&XEXP (x, 0), type)
4590 	      && rtx_moveable_p (&XEXP (x, 1), OP_IN)
4591 	      && rtx_moveable_p (&XEXP (x, 2), OP_IN));
4592 
4593     case CLOBBER:
4594       return rtx_moveable_p (&SET_DEST (x), OP_OUT);
4595 
4596     case UNSPEC_VOLATILE:
4597       /* It is a bad idea to consider insns with such rtl
4598 	 as moveable ones.  The insn scheduler also considers them as barrier
4599 	 for a reason.  */
4600       return false;
4601 
4602     case ASM_OPERANDS:
4603       /* The same is true for volatile asm: it has unknown side effects, it
4604          cannot be moved at will.  */
4605       if (MEM_VOLATILE_P (x))
4606 	return false;
4607 
4608     default:
4609       break;
4610     }
4611 
4612   fmt = GET_RTX_FORMAT (code);
4613   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4614     {
4615       if (fmt[i] == 'e')
4616 	{
4617 	  if (!rtx_moveable_p (&XEXP (x, i), type))
4618 	    return false;
4619 	}
4620       else if (fmt[i] == 'E')
4621 	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4622 	  {
4623 	    if (!rtx_moveable_p (&XVECEXP (x, i, j), type))
4624 	      return false;
4625 	  }
4626     }
4627   return true;
4628 }
4629 
4630 /* A wrapper around dominated_by_p, which uses the information in UID_LUID
4631    to give dominance relationships between two insns I1 and I2.  */
4632 static bool
insn_dominated_by_p(rtx i1,rtx i2,int * uid_luid)4633 insn_dominated_by_p (rtx i1, rtx i2, int *uid_luid)
4634 {
4635   basic_block bb1 = BLOCK_FOR_INSN (i1);
4636   basic_block bb2 = BLOCK_FOR_INSN (i2);
4637 
4638   if (bb1 == bb2)
4639     return uid_luid[INSN_UID (i2)] < uid_luid[INSN_UID (i1)];
4640   return dominated_by_p (CDI_DOMINATORS, bb1, bb2);
4641 }
4642 
4643 /* Record the range of register numbers added by find_moveable_pseudos.  */
4644 int first_moveable_pseudo, last_moveable_pseudo;
4645 
4646 /* These two vectors hold data for every register added by
4647    find_movable_pseudos, with index 0 holding data for the
4648    first_moveable_pseudo.  */
4649 /* The original home register.  */
4650 static vec<rtx> pseudo_replaced_reg;
4651 
4652 /* Look for instances where we have an instruction that is known to increase
4653    register pressure, and whose result is not used immediately.  If it is
4654    possible to move the instruction downwards to just before its first use,
4655    split its lifetime into two ranges.  We create a new pseudo to compute the
4656    value, and emit a move instruction just before the first use.  If, after
4657    register allocation, the new pseudo remains unallocated, the function
4658    move_unallocated_pseudos then deletes the move instruction and places
4659    the computation just before the first use.
4660 
4661    Such a move is safe and profitable if all the input registers remain live
4662    and unchanged between the original computation and its first use.  In such
4663    a situation, the computation is known to increase register pressure, and
4664    moving it is known to at least not worsen it.
4665 
4666    We restrict moves to only those cases where a register remains unallocated,
4667    in order to avoid interfering too much with the instruction schedule.  As
4668    an exception, we may move insns which only modify their input register
4669    (typically induction variables), as this increases the freedom for our
4670    intended transformation, and does not limit the second instruction
4671    scheduler pass.  */
4672 
4673 static void
find_moveable_pseudos(void)4674 find_moveable_pseudos (void)
4675 {
4676   unsigned i;
4677   int max_regs = max_reg_num ();
4678   int max_uid = get_max_uid ();
4679   basic_block bb;
4680   int *uid_luid = XNEWVEC (int, max_uid);
4681   rtx_insn **closest_uses = XNEWVEC (rtx_insn *, max_regs);
4682   /* A set of registers which are live but not modified throughout a block.  */
4683   bitmap_head *bb_transp_live = XNEWVEC (bitmap_head,
4684 					 last_basic_block_for_fn (cfun));
4685   /* A set of registers which only exist in a given basic block.  */
4686   bitmap_head *bb_local = XNEWVEC (bitmap_head,
4687 				   last_basic_block_for_fn (cfun));
4688   /* A set of registers which are set once, in an instruction that can be
4689      moved freely downwards, but are otherwise transparent to a block.  */
4690   bitmap_head *bb_moveable_reg_sets = XNEWVEC (bitmap_head,
4691 					       last_basic_block_for_fn (cfun));
4692   auto_bitmap live, used, set, interesting, unusable_as_input;
4693   bitmap_iterator bi;
4694 
4695   first_moveable_pseudo = max_regs;
4696   pseudo_replaced_reg.release ();
4697   pseudo_replaced_reg.safe_grow_cleared (max_regs, true);
4698 
4699   df_analyze ();
4700   calculate_dominance_info (CDI_DOMINATORS);
4701 
4702   i = 0;
4703   FOR_EACH_BB_FN (bb, cfun)
4704     {
4705       rtx_insn *insn;
4706       bitmap transp = bb_transp_live + bb->index;
4707       bitmap moveable = bb_moveable_reg_sets + bb->index;
4708       bitmap local = bb_local + bb->index;
4709 
4710       bitmap_initialize (local, 0);
4711       bitmap_initialize (transp, 0);
4712       bitmap_initialize (moveable, 0);
4713       bitmap_copy (live, df_get_live_out (bb));
4714       bitmap_and_into (live, df_get_live_in (bb));
4715       bitmap_copy (transp, live);
4716       bitmap_clear (moveable);
4717       bitmap_clear (live);
4718       bitmap_clear (used);
4719       bitmap_clear (set);
4720       FOR_BB_INSNS (bb, insn)
4721 	if (NONDEBUG_INSN_P (insn))
4722 	  {
4723 	    df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
4724 	    df_ref def, use;
4725 
4726 	    uid_luid[INSN_UID (insn)] = i++;
4727 
4728 	    def = df_single_def (insn_info);
4729 	    use = df_single_use (insn_info);
4730 	    if (use
4731 		&& def
4732 		&& DF_REF_REGNO (use) == DF_REF_REGNO (def)
4733 		&& !bitmap_bit_p (set, DF_REF_REGNO (use))
4734 		&& rtx_moveable_p (&PATTERN (insn), OP_IN))
4735 	      {
4736 		unsigned regno = DF_REF_REGNO (use);
4737 		bitmap_set_bit (moveable, regno);
4738 		bitmap_set_bit (set, regno);
4739 		bitmap_set_bit (used, regno);
4740 		bitmap_clear_bit (transp, regno);
4741 		continue;
4742 	      }
4743 	    FOR_EACH_INSN_INFO_USE (use, insn_info)
4744 	      {
4745 		unsigned regno = DF_REF_REGNO (use);
4746 		bitmap_set_bit (used, regno);
4747 		if (bitmap_clear_bit (moveable, regno))
4748 		  bitmap_clear_bit (transp, regno);
4749 	      }
4750 
4751 	    FOR_EACH_INSN_INFO_DEF (def, insn_info)
4752 	      {
4753 		unsigned regno = DF_REF_REGNO (def);
4754 		bitmap_set_bit (set, regno);
4755 		bitmap_clear_bit (transp, regno);
4756 		bitmap_clear_bit (moveable, regno);
4757 	      }
4758 	  }
4759     }
4760 
4761   FOR_EACH_BB_FN (bb, cfun)
4762     {
4763       bitmap local = bb_local + bb->index;
4764       rtx_insn *insn;
4765 
4766       FOR_BB_INSNS (bb, insn)
4767 	if (NONDEBUG_INSN_P (insn))
4768 	  {
4769 	    df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
4770 	    rtx_insn *def_insn;
4771 	    rtx closest_use, note;
4772 	    df_ref def, use;
4773 	    unsigned regno;
4774 	    bool all_dominated, all_local;
4775 	    machine_mode mode;
4776 
4777 	    def = df_single_def (insn_info);
4778 	    /* There must be exactly one def in this insn.  */
4779 	    if (!def || !single_set (insn))
4780 	      continue;
4781 	    /* This must be the only definition of the reg.  We also limit
4782 	       which modes we deal with so that we can assume we can generate
4783 	       move instructions.  */
4784 	    regno = DF_REF_REGNO (def);
4785 	    mode = GET_MODE (DF_REF_REG (def));
4786 	    if (DF_REG_DEF_COUNT (regno) != 1
4787 		|| !DF_REF_INSN_INFO (def)
4788 		|| HARD_REGISTER_NUM_P (regno)
4789 		|| DF_REG_EQ_USE_COUNT (regno) > 0
4790 		|| (!INTEGRAL_MODE_P (mode)
4791 		    && !FLOAT_MODE_P (mode)
4792 		    && !OPAQUE_MODE_P (mode)))
4793 	      continue;
4794 	    def_insn = DF_REF_INSN (def);
4795 
4796 	    for (note = REG_NOTES (def_insn); note; note = XEXP (note, 1))
4797 	      if (REG_NOTE_KIND (note) == REG_EQUIV && MEM_P (XEXP (note, 0)))
4798 		break;
4799 
4800 	    if (note)
4801 	      {
4802 		if (dump_file)
4803 		  fprintf (dump_file, "Ignoring reg %d, has equiv memory\n",
4804 			   regno);
4805 		bitmap_set_bit (unusable_as_input, regno);
4806 		continue;
4807 	      }
4808 
4809 	    use = DF_REG_USE_CHAIN (regno);
4810 	    all_dominated = true;
4811 	    all_local = true;
4812 	    closest_use = NULL_RTX;
4813 	    for (; use; use = DF_REF_NEXT_REG (use))
4814 	      {
4815 		rtx_insn *insn;
4816 		if (!DF_REF_INSN_INFO (use))
4817 		  {
4818 		    all_dominated = false;
4819 		    all_local = false;
4820 		    break;
4821 		  }
4822 		insn = DF_REF_INSN (use);
4823 		if (DEBUG_INSN_P (insn))
4824 		  continue;
4825 		if (BLOCK_FOR_INSN (insn) != BLOCK_FOR_INSN (def_insn))
4826 		  all_local = false;
4827 		if (!insn_dominated_by_p (insn, def_insn, uid_luid))
4828 		  all_dominated = false;
4829 		if (closest_use != insn && closest_use != const0_rtx)
4830 		  {
4831 		    if (closest_use == NULL_RTX)
4832 		      closest_use = insn;
4833 		    else if (insn_dominated_by_p (closest_use, insn, uid_luid))
4834 		      closest_use = insn;
4835 		    else if (!insn_dominated_by_p (insn, closest_use, uid_luid))
4836 		      closest_use = const0_rtx;
4837 		  }
4838 	      }
4839 	    if (!all_dominated)
4840 	      {
4841 		if (dump_file)
4842 		  fprintf (dump_file, "Reg %d not all uses dominated by set\n",
4843 			   regno);
4844 		continue;
4845 	      }
4846 	    if (all_local)
4847 	      bitmap_set_bit (local, regno);
4848 	    if (closest_use == const0_rtx || closest_use == NULL
4849 		|| next_nonnote_nondebug_insn (def_insn) == closest_use)
4850 	      {
4851 		if (dump_file)
4852 		  fprintf (dump_file, "Reg %d uninteresting%s\n", regno,
4853 			   closest_use == const0_rtx || closest_use == NULL
4854 			   ? " (no unique first use)" : "");
4855 		continue;
4856 	      }
4857 
4858 	    bitmap_set_bit (interesting, regno);
4859 	    /* If we get here, we know closest_use is a non-NULL insn
4860 	       (as opposed to const_0_rtx).  */
4861 	    closest_uses[regno] = as_a <rtx_insn *> (closest_use);
4862 
4863 	    if (dump_file && (all_local || all_dominated))
4864 	      {
4865 		fprintf (dump_file, "Reg %u:", regno);
4866 		if (all_local)
4867 		  fprintf (dump_file, " local to bb %d", bb->index);
4868 		if (all_dominated)
4869 		  fprintf (dump_file, " def dominates all uses");
4870 		if (closest_use != const0_rtx)
4871 		  fprintf (dump_file, " has unique first use");
4872 		fputs ("\n", dump_file);
4873 	      }
4874 	  }
4875     }
4876 
4877   EXECUTE_IF_SET_IN_BITMAP (interesting, 0, i, bi)
4878     {
4879       df_ref def = DF_REG_DEF_CHAIN (i);
4880       rtx_insn *def_insn = DF_REF_INSN (def);
4881       basic_block def_block = BLOCK_FOR_INSN (def_insn);
4882       bitmap def_bb_local = bb_local + def_block->index;
4883       bitmap def_bb_moveable = bb_moveable_reg_sets + def_block->index;
4884       bitmap def_bb_transp = bb_transp_live + def_block->index;
4885       bool local_to_bb_p = bitmap_bit_p (def_bb_local, i);
4886       rtx_insn *use_insn = closest_uses[i];
4887       df_ref use;
4888       bool all_ok = true;
4889       bool all_transp = true;
4890 
4891       if (!REG_P (DF_REF_REG (def)))
4892 	continue;
4893 
4894       if (!local_to_bb_p)
4895 	{
4896 	  if (dump_file)
4897 	    fprintf (dump_file, "Reg %u not local to one basic block\n",
4898 		     i);
4899 	  continue;
4900 	}
4901       if (reg_equiv_init (i) != NULL_RTX)
4902 	{
4903 	  if (dump_file)
4904 	    fprintf (dump_file, "Ignoring reg %u with equiv init insn\n",
4905 		     i);
4906 	  continue;
4907 	}
4908       if (!rtx_moveable_p (&PATTERN (def_insn), OP_IN))
4909 	{
4910 	  if (dump_file)
4911 	    fprintf (dump_file, "Found def insn %d for %d to be not moveable\n",
4912 		     INSN_UID (def_insn), i);
4913 	  continue;
4914 	}
4915       if (dump_file)
4916 	fprintf (dump_file, "Examining insn %d, def for %d\n",
4917 		 INSN_UID (def_insn), i);
4918       FOR_EACH_INSN_USE (use, def_insn)
4919 	{
4920 	  unsigned regno = DF_REF_REGNO (use);
4921 	  if (bitmap_bit_p (unusable_as_input, regno))
4922 	    {
4923 	      all_ok = false;
4924 	      if (dump_file)
4925 		fprintf (dump_file, "  found unusable input reg %u.\n", regno);
4926 	      break;
4927 	    }
4928 	  if (!bitmap_bit_p (def_bb_transp, regno))
4929 	    {
4930 	      if (bitmap_bit_p (def_bb_moveable, regno)
4931 		  && !control_flow_insn_p (use_insn))
4932 		{
4933 		  if (modified_between_p (DF_REF_REG (use), def_insn, use_insn))
4934 		    {
4935 		      rtx_insn *x = NEXT_INSN (def_insn);
4936 		      while (!modified_in_p (DF_REF_REG (use), x))
4937 			{
4938 			  gcc_assert (x != use_insn);
4939 			  x = NEXT_INSN (x);
4940 			}
4941 		      if (dump_file)
4942 			fprintf (dump_file, "  input reg %u modified but insn %d moveable\n",
4943 				 regno, INSN_UID (x));
4944 		      emit_insn_after (PATTERN (x), use_insn);
4945 		      set_insn_deleted (x);
4946 		    }
4947 		  else
4948 		    {
4949 		      if (dump_file)
4950 			fprintf (dump_file, "  input reg %u modified between def and use\n",
4951 				 regno);
4952 		      all_transp = false;
4953 		    }
4954 		}
4955 	      else
4956 		all_transp = false;
4957 	    }
4958 	}
4959       if (!all_ok)
4960 	continue;
4961       if (!dbg_cnt (ira_move))
4962 	break;
4963       if (dump_file)
4964 	fprintf (dump_file, "  all ok%s\n", all_transp ? " and transp" : "");
4965 
4966       if (all_transp)
4967 	{
4968 	  rtx def_reg = DF_REF_REG (def);
4969 	  rtx newreg = ira_create_new_reg (def_reg);
4970 	  if (validate_change (def_insn, DF_REF_REAL_LOC (def), newreg, 0))
4971 	    {
4972 	      unsigned nregno = REGNO (newreg);
4973 	      emit_insn_before (gen_move_insn (def_reg, newreg), use_insn);
4974 	      nregno -= max_regs;
4975 	      pseudo_replaced_reg[nregno] = def_reg;
4976 	    }
4977 	}
4978     }
4979 
4980   FOR_EACH_BB_FN (bb, cfun)
4981     {
4982       bitmap_clear (bb_local + bb->index);
4983       bitmap_clear (bb_transp_live + bb->index);
4984       bitmap_clear (bb_moveable_reg_sets + bb->index);
4985     }
4986   free (uid_luid);
4987   free (closest_uses);
4988   free (bb_local);
4989   free (bb_transp_live);
4990   free (bb_moveable_reg_sets);
4991 
4992   last_moveable_pseudo = max_reg_num ();
4993 
4994   fix_reg_equiv_init ();
4995   expand_reg_info ();
4996   regstat_free_n_sets_and_refs ();
4997   regstat_free_ri ();
4998   regstat_init_n_sets_and_refs ();
4999   regstat_compute_ri ();
5000   free_dominance_info (CDI_DOMINATORS);
5001 }
5002 
5003 /* If SET pattern SET is an assignment from a hard register to a pseudo which
5004    is live at CALL_DOM (if non-NULL, otherwise this check is omitted), return
5005    the destination.  Otherwise return NULL.  */
5006 
5007 static rtx
interesting_dest_for_shprep_1(rtx set,basic_block call_dom)5008 interesting_dest_for_shprep_1 (rtx set, basic_block call_dom)
5009 {
5010   rtx src = SET_SRC (set);
5011   rtx dest = SET_DEST (set);
5012   if (!REG_P (src) || !HARD_REGISTER_P (src)
5013       || !REG_P (dest) || HARD_REGISTER_P (dest)
5014       || (call_dom && !bitmap_bit_p (df_get_live_in (call_dom), REGNO (dest))))
5015     return NULL;
5016   return dest;
5017 }
5018 
5019 /* If insn is interesting for parameter range-splitting shrink-wrapping
5020    preparation, i.e. it is a single set from a hard register to a pseudo, which
5021    is live at CALL_DOM (if non-NULL, otherwise this check is omitted), or a
5022    parallel statement with only one such statement, return the destination.
5023    Otherwise return NULL.  */
5024 
5025 static rtx
interesting_dest_for_shprep(rtx_insn * insn,basic_block call_dom)5026 interesting_dest_for_shprep (rtx_insn *insn, basic_block call_dom)
5027 {
5028   if (!INSN_P (insn))
5029     return NULL;
5030   rtx pat = PATTERN (insn);
5031   if (GET_CODE (pat) == SET)
5032     return interesting_dest_for_shprep_1 (pat, call_dom);
5033 
5034   if (GET_CODE (pat) != PARALLEL)
5035     return NULL;
5036   rtx ret = NULL;
5037   for (int i = 0; i < XVECLEN (pat, 0); i++)
5038     {
5039       rtx sub = XVECEXP (pat, 0, i);
5040       if (GET_CODE (sub) == USE || GET_CODE (sub) == CLOBBER)
5041 	continue;
5042       if (GET_CODE (sub) != SET
5043 	  || side_effects_p (sub))
5044 	return NULL;
5045       rtx dest = interesting_dest_for_shprep_1 (sub, call_dom);
5046       if (dest && ret)
5047 	return NULL;
5048       if (dest)
5049 	ret = dest;
5050     }
5051   return ret;
5052 }
5053 
5054 /* Split live ranges of pseudos that are loaded from hard registers in the
5055    first BB in a BB that dominates all non-sibling call if such a BB can be
5056    found and is not in a loop.  Return true if the function has made any
5057    changes.  */
5058 
5059 static bool
split_live_ranges_for_shrink_wrap(void)5060 split_live_ranges_for_shrink_wrap (void)
5061 {
5062   basic_block bb, call_dom = NULL;
5063   basic_block first = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5064   rtx_insn *insn, *last_interesting_insn = NULL;
5065   auto_bitmap need_new, reachable;
5066   vec<basic_block> queue;
5067 
5068   if (!SHRINK_WRAPPING_ENABLED)
5069     return false;
5070 
5071   queue.create (n_basic_blocks_for_fn (cfun));
5072 
5073   FOR_EACH_BB_FN (bb, cfun)
5074     FOR_BB_INSNS (bb, insn)
5075       if (CALL_P (insn) && !SIBLING_CALL_P (insn))
5076 	{
5077 	  if (bb == first)
5078 	    {
5079 	      queue.release ();
5080 	      return false;
5081 	    }
5082 
5083 	  bitmap_set_bit (need_new, bb->index);
5084 	  bitmap_set_bit (reachable, bb->index);
5085 	  queue.quick_push (bb);
5086 	  break;
5087 	}
5088 
5089   if (queue.is_empty ())
5090     {
5091       queue.release ();
5092       return false;
5093     }
5094 
5095   while (!queue.is_empty ())
5096     {
5097       edge e;
5098       edge_iterator ei;
5099 
5100       bb = queue.pop ();
5101       FOR_EACH_EDGE (e, ei, bb->succs)
5102 	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
5103 	    && bitmap_set_bit (reachable, e->dest->index))
5104 	  queue.quick_push (e->dest);
5105     }
5106   queue.release ();
5107 
5108   FOR_BB_INSNS (first, insn)
5109     {
5110       rtx dest = interesting_dest_for_shprep (insn, NULL);
5111       if (!dest)
5112 	continue;
5113 
5114       if (DF_REG_DEF_COUNT (REGNO (dest)) > 1)
5115 	return false;
5116 
5117       for (df_ref use = DF_REG_USE_CHAIN (REGNO(dest));
5118 	   use;
5119 	   use = DF_REF_NEXT_REG (use))
5120 	{
5121 	  int ubbi = DF_REF_BB (use)->index;
5122 	  if (bitmap_bit_p (reachable, ubbi))
5123 	    bitmap_set_bit (need_new, ubbi);
5124 	}
5125       last_interesting_insn = insn;
5126     }
5127 
5128   if (!last_interesting_insn)
5129     return false;
5130 
5131   call_dom = nearest_common_dominator_for_set (CDI_DOMINATORS, need_new);
5132   if (call_dom == first)
5133     return false;
5134 
5135   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
5136   while (bb_loop_depth (call_dom) > 0)
5137     call_dom = get_immediate_dominator (CDI_DOMINATORS, call_dom);
5138   loop_optimizer_finalize ();
5139 
5140   if (call_dom == first)
5141     return false;
5142 
5143   calculate_dominance_info (CDI_POST_DOMINATORS);
5144   if (dominated_by_p (CDI_POST_DOMINATORS, first, call_dom))
5145     {
5146       free_dominance_info (CDI_POST_DOMINATORS);
5147       return false;
5148     }
5149   free_dominance_info (CDI_POST_DOMINATORS);
5150 
5151   if (dump_file)
5152     fprintf (dump_file, "Will split live ranges of parameters at BB %i\n",
5153 	     call_dom->index);
5154 
5155   bool ret = false;
5156   FOR_BB_INSNS (first, insn)
5157     {
5158       rtx dest = interesting_dest_for_shprep (insn, call_dom);
5159       if (!dest || dest == pic_offset_table_rtx)
5160 	continue;
5161 
5162       bool need_newreg = false;
5163       df_ref use, next;
5164       for (use = DF_REG_USE_CHAIN (REGNO (dest)); use; use = next)
5165 	{
5166 	  rtx_insn *uin = DF_REF_INSN (use);
5167 	  next = DF_REF_NEXT_REG (use);
5168 
5169 	  if (DEBUG_INSN_P (uin))
5170 	    continue;
5171 
5172 	  basic_block ubb = BLOCK_FOR_INSN (uin);
5173 	  if (ubb == call_dom
5174 	      || dominated_by_p (CDI_DOMINATORS, ubb, call_dom))
5175 	    {
5176 	      need_newreg = true;
5177 	      break;
5178 	    }
5179 	}
5180 
5181       if (need_newreg)
5182 	{
5183 	  rtx newreg = ira_create_new_reg (dest);
5184 
5185 	  for (use = DF_REG_USE_CHAIN (REGNO (dest)); use; use = next)
5186 	    {
5187 	      rtx_insn *uin = DF_REF_INSN (use);
5188 	      next = DF_REF_NEXT_REG (use);
5189 
5190 	      basic_block ubb = BLOCK_FOR_INSN (uin);
5191 	      if (ubb == call_dom
5192 		  || dominated_by_p (CDI_DOMINATORS, ubb, call_dom))
5193 		validate_change (uin, DF_REF_REAL_LOC (use), newreg, true);
5194 	    }
5195 
5196 	  rtx_insn *new_move = gen_move_insn (newreg, dest);
5197 	  emit_insn_after (new_move, bb_note (call_dom));
5198 	  if (dump_file)
5199 	    {
5200 	      fprintf (dump_file, "Split live-range of register ");
5201 	      print_rtl_single (dump_file, dest);
5202 	    }
5203 	  ret = true;
5204 	}
5205 
5206       if (insn == last_interesting_insn)
5207 	break;
5208     }
5209   apply_change_group ();
5210   return ret;
5211 }
5212 
5213 /* Perform the second half of the transformation started in
5214    find_moveable_pseudos.  We look for instances where the newly introduced
5215    pseudo remains unallocated, and remove it by moving the definition to
5216    just before its use, replacing the move instruction generated by
5217    find_moveable_pseudos.  */
5218 static void
move_unallocated_pseudos(void)5219 move_unallocated_pseudos (void)
5220 {
5221   int i;
5222   for (i = first_moveable_pseudo; i < last_moveable_pseudo; i++)
5223     if (reg_renumber[i] < 0)
5224       {
5225 	int idx = i - first_moveable_pseudo;
5226 	rtx other_reg = pseudo_replaced_reg[idx];
5227 	/* The iterating range [first_moveable_pseudo, last_moveable_pseudo)
5228 	   covers every new pseudo created in find_moveable_pseudos,
5229 	   regardless of the validation with it is successful or not.
5230 	   So we need to skip the pseudos which were used in those failed
5231 	   validations to avoid unexpected DF info and consequent ICE.
5232 	   We only set pseudo_replaced_reg[] when the validation is successful
5233 	   in find_moveable_pseudos, it's enough to check it here.  */
5234 	if (!other_reg)
5235 	  continue;
5236 	rtx_insn *def_insn = DF_REF_INSN (DF_REG_DEF_CHAIN (i));
5237 	/* The use must follow all definitions of OTHER_REG, so we can
5238 	   insert the new definition immediately after any of them.  */
5239 	df_ref other_def = DF_REG_DEF_CHAIN (REGNO (other_reg));
5240 	rtx_insn *move_insn = DF_REF_INSN (other_def);
5241 	rtx_insn *newinsn = emit_insn_after (PATTERN (def_insn), move_insn);
5242 	rtx set;
5243 	int success;
5244 
5245 	if (dump_file)
5246 	  fprintf (dump_file, "moving def of %d (insn %d now) ",
5247 		   REGNO (other_reg), INSN_UID (def_insn));
5248 
5249 	delete_insn (move_insn);
5250 	while ((other_def = DF_REG_DEF_CHAIN (REGNO (other_reg))))
5251 	  delete_insn (DF_REF_INSN (other_def));
5252 	delete_insn (def_insn);
5253 
5254 	set = single_set (newinsn);
5255 	success = validate_change (newinsn, &SET_DEST (set), other_reg, 0);
5256 	gcc_assert (success);
5257 	if (dump_file)
5258 	  fprintf (dump_file, " %d) rather than keep unallocated replacement %d\n",
5259 		   INSN_UID (newinsn), i);
5260 	SET_REG_N_REFS (i, 0);
5261       }
5262 
5263   first_moveable_pseudo = last_moveable_pseudo = 0;
5264 }
5265 
5266 
5267 
5268 /* Code dealing with scratches (changing them onto
5269    pseudos and restoring them from the pseudos).
5270 
5271    We change scratches into pseudos at the beginning of IRA to
5272    simplify dealing with them (conflicts, hard register assignments).
5273 
5274    If the pseudo denoting scratch was spilled it means that we do not
5275    need a hard register for it.  Such pseudos are transformed back to
5276    scratches at the end of LRA.  */
5277 
5278 /* Description of location of a former scratch operand.	 */
5279 struct sloc
5280 {
5281   rtx_insn *insn; /* Insn where the scratch was.  */
5282   int nop;  /* Number of the operand which was a scratch.  */
5283   unsigned regno; /* regno gnerated instead of scratch */
5284   int icode;  /* Original icode from which scratch was removed.  */
5285 };
5286 
5287 typedef struct sloc *sloc_t;
5288 
5289 /* Locations of the former scratches.  */
5290 static vec<sloc_t> scratches;
5291 
5292 /* Bitmap of scratch regnos.  */
5293 static bitmap_head scratch_bitmap;
5294 
5295 /* Bitmap of scratch operands.	*/
5296 static bitmap_head scratch_operand_bitmap;
5297 
5298 /* Return true if pseudo REGNO is made of SCRATCH.  */
5299 bool
ira_former_scratch_p(int regno)5300 ira_former_scratch_p (int regno)
5301 {
5302   return bitmap_bit_p (&scratch_bitmap, regno);
5303 }
5304 
5305 /* Return true if the operand NOP of INSN is a former scratch.	*/
5306 bool
ira_former_scratch_operand_p(rtx_insn * insn,int nop)5307 ira_former_scratch_operand_p (rtx_insn *insn, int nop)
5308 {
5309   return bitmap_bit_p (&scratch_operand_bitmap,
5310 		       INSN_UID (insn) * MAX_RECOG_OPERANDS + nop) != 0;
5311 }
5312 
5313 /* Register operand NOP in INSN as a former scratch.  It will be
5314    changed to scratch back, if it is necessary, at the LRA end.  */
5315 void
ira_register_new_scratch_op(rtx_insn * insn,int nop,int icode)5316 ira_register_new_scratch_op (rtx_insn *insn, int nop, int icode)
5317 {
5318   rtx op = *recog_data.operand_loc[nop];
5319   sloc_t loc = XNEW (struct sloc);
5320   ira_assert (REG_P (op));
5321   loc->insn = insn;
5322   loc->nop = nop;
5323   loc->regno = REGNO (op);
5324   loc->icode = icode;
5325   scratches.safe_push (loc);
5326   bitmap_set_bit (&scratch_bitmap, REGNO (op));
5327   bitmap_set_bit (&scratch_operand_bitmap,
5328 		  INSN_UID (insn) * MAX_RECOG_OPERANDS + nop);
5329   add_reg_note (insn, REG_UNUSED, op);
5330 }
5331 
5332 /* Return true if string STR contains constraint 'X'.  */
5333 static bool
contains_X_constraint_p(const char * str)5334 contains_X_constraint_p (const char *str)
5335 {
5336   int c;
5337 
5338   while ((c = *str))
5339     {
5340       str += CONSTRAINT_LEN (c, str);
5341       if (c == 'X') return true;
5342     }
5343   return false;
5344 }
5345 
5346 /* Change INSN's scratches into pseudos and save their location.
5347    Return true if we changed any scratch.  */
5348 bool
ira_remove_insn_scratches(rtx_insn * insn,bool all_p,FILE * dump_file,rtx (* get_reg)(rtx original))5349 ira_remove_insn_scratches (rtx_insn *insn, bool all_p, FILE *dump_file,
5350 			   rtx (*get_reg) (rtx original))
5351 {
5352   int i;
5353   bool insn_changed_p;
5354   rtx reg, *loc;
5355 
5356   extract_insn (insn);
5357   insn_changed_p = false;
5358   for (i = 0; i < recog_data.n_operands; i++)
5359     {
5360       loc = recog_data.operand_loc[i];
5361       if (GET_CODE (*loc) == SCRATCH && GET_MODE (*loc) != VOIDmode)
5362 	{
5363 	  if (! all_p && contains_X_constraint_p (recog_data.constraints[i]))
5364 	    continue;
5365 	  insn_changed_p = true;
5366 	  *loc = reg = get_reg (*loc);
5367 	  ira_register_new_scratch_op (insn, i, INSN_CODE (insn));
5368 	  if (ira_dump_file != NULL)
5369 	    fprintf (dump_file,
5370 		     "Removing SCRATCH to p%u in insn #%u (nop %d)\n",
5371 		     REGNO (reg), INSN_UID (insn), i);
5372 	}
5373     }
5374   return insn_changed_p;
5375 }
5376 
5377 /* Return new register of the same mode as ORIGINAL.  Used in
5378    remove_scratches.  */
5379 static rtx
get_scratch_reg(rtx original)5380 get_scratch_reg (rtx original)
5381 {
5382   return gen_reg_rtx (GET_MODE (original));
5383 }
5384 
5385 /* Change scratches into pseudos and save their location.  Return true
5386    if we changed any scratch.  */
5387 static bool
remove_scratches(void)5388 remove_scratches (void)
5389 {
5390   bool change_p = false;
5391   basic_block bb;
5392   rtx_insn *insn;
5393 
5394   scratches.create (get_max_uid ());
5395   bitmap_initialize (&scratch_bitmap, &reg_obstack);
5396   bitmap_initialize (&scratch_operand_bitmap, &reg_obstack);
5397   FOR_EACH_BB_FN (bb, cfun)
5398     FOR_BB_INSNS (bb, insn)
5399     if (INSN_P (insn)
5400 	&& ira_remove_insn_scratches (insn, false, ira_dump_file, get_scratch_reg))
5401       {
5402 	/* Because we might use DF, we need to keep DF info up to date.  */
5403 	df_insn_rescan (insn);
5404 	change_p = true;
5405       }
5406   return change_p;
5407 }
5408 
5409 /* Changes pseudos created by function remove_scratches onto scratches.	 */
5410 void
ira_restore_scratches(FILE * dump_file)5411 ira_restore_scratches (FILE *dump_file)
5412 {
5413   int regno, n;
5414   unsigned i;
5415   rtx *op_loc;
5416   sloc_t loc;
5417 
5418   for (i = 0; scratches.iterate (i, &loc); i++)
5419     {
5420       /* Ignore already deleted insns.  */
5421       if (NOTE_P (loc->insn)
5422 	  && NOTE_KIND (loc->insn) == NOTE_INSN_DELETED)
5423 	continue;
5424       extract_insn (loc->insn);
5425       if (loc->icode != INSN_CODE (loc->insn))
5426 	{
5427 	  /* The icode doesn't match, which means the insn has been modified
5428 	     (e.g. register elimination).  The scratch cannot be restored.  */
5429 	  continue;
5430 	}
5431       op_loc = recog_data.operand_loc[loc->nop];
5432       if (REG_P (*op_loc)
5433 	  && ((regno = REGNO (*op_loc)) >= FIRST_PSEUDO_REGISTER)
5434 	  && reg_renumber[regno] < 0)
5435 	{
5436 	  /* It should be only case when scratch register with chosen
5437 	     constraint 'X' did not get memory or hard register.  */
5438 	  ira_assert (ira_former_scratch_p (regno));
5439 	  *op_loc = gen_rtx_SCRATCH (GET_MODE (*op_loc));
5440 	  for (n = 0; n < recog_data.n_dups; n++)
5441 	    *recog_data.dup_loc[n]
5442 	      = *recog_data.operand_loc[(int) recog_data.dup_num[n]];
5443 	  if (dump_file != NULL)
5444 	    fprintf (dump_file, "Restoring SCRATCH in insn #%u(nop %d)\n",
5445 		     INSN_UID (loc->insn), loc->nop);
5446 	}
5447     }
5448   for (i = 0; scratches.iterate (i, &loc); i++)
5449     free (loc);
5450   scratches.release ();
5451   bitmap_clear (&scratch_bitmap);
5452   bitmap_clear (&scratch_operand_bitmap);
5453 }
5454 
5455 
5456 
5457 /* If the backend knows where to allocate pseudos for hard
5458    register initial values, register these allocations now.  */
5459 static void
allocate_initial_values(void)5460 allocate_initial_values (void)
5461 {
5462   if (targetm.allocate_initial_value)
5463     {
5464       rtx hreg, preg, x;
5465       int i, regno;
5466 
5467       for (i = 0; HARD_REGISTER_NUM_P (i); i++)
5468 	{
5469 	  if (! initial_value_entry (i, &hreg, &preg))
5470 	    break;
5471 
5472 	  x = targetm.allocate_initial_value (hreg);
5473 	  regno = REGNO (preg);
5474 	  if (x && REG_N_SETS (regno) <= 1)
5475 	    {
5476 	      if (MEM_P (x))
5477 		reg_equiv_memory_loc (regno) = x;
5478 	      else
5479 		{
5480 		  basic_block bb;
5481 		  int new_regno;
5482 
5483 		  gcc_assert (REG_P (x));
5484 		  new_regno = REGNO (x);
5485 		  reg_renumber[regno] = new_regno;
5486 		  /* Poke the regno right into regno_reg_rtx so that even
5487 		     fixed regs are accepted.  */
5488 		  SET_REGNO (preg, new_regno);
5489 		  /* Update global register liveness information.  */
5490 		  FOR_EACH_BB_FN (bb, cfun)
5491 		    {
5492 		      if (REGNO_REG_SET_P (df_get_live_in (bb), regno))
5493 			SET_REGNO_REG_SET (df_get_live_in (bb), new_regno);
5494 		      if (REGNO_REG_SET_P (df_get_live_out (bb), regno))
5495 			SET_REGNO_REG_SET (df_get_live_out (bb), new_regno);
5496 		    }
5497 		}
5498 	    }
5499 	}
5500 
5501       gcc_checking_assert (! initial_value_entry (FIRST_PSEUDO_REGISTER,
5502 						  &hreg, &preg));
5503     }
5504 }
5505 
5506 
5507 
5508 
5509 /* True when we use LRA instead of reload pass for the current
5510    function.  */
5511 bool ira_use_lra_p;
5512 
5513 /* True if we have allocno conflicts.  It is false for non-optimized
5514    mode or when the conflict table is too big.  */
5515 bool ira_conflicts_p;
5516 
5517 /* Saved between IRA and reload.  */
5518 static int saved_flag_ira_share_spill_slots;
5519 
5520 /* This is the main entry of IRA.  */
5521 static void
ira(FILE * f)5522 ira (FILE *f)
5523 {
5524   bool loops_p;
5525   int ira_max_point_before_emit;
5526   bool saved_flag_caller_saves = flag_caller_saves;
5527   enum ira_region saved_flag_ira_region = flag_ira_region;
5528   basic_block bb;
5529   edge_iterator ei;
5530   edge e;
5531   bool output_jump_reload_p = false;
5532 
5533   if (ira_use_lra_p)
5534     {
5535       /* First put potential jump output reloads on the output edges
5536 	 as USE which will be removed at the end of LRA.  The major
5537 	 goal is actually to create BBs for critical edges for LRA and
5538 	 populate them later by live info.  In LRA it will be
5539 	 difficult to do this. */
5540       FOR_EACH_BB_FN (bb, cfun)
5541 	{
5542 	  rtx_insn *end = BB_END (bb);
5543 	  if (!JUMP_P (end))
5544 	    continue;
5545 	  extract_insn (end);
5546 	  for (int i = 0; i < recog_data.n_operands; i++)
5547 	    if (recog_data.operand_type[i] != OP_IN)
5548 	      {
5549 		bool skip_p = false;
5550 		FOR_EACH_EDGE (e, ei, bb->succs)
5551 		  if (EDGE_CRITICAL_P (e)
5552 		      && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
5553 		      && (e->flags & EDGE_ABNORMAL))
5554 		    {
5555 		      skip_p = true;
5556 		      break;
5557 		    }
5558 		if (skip_p)
5559 		  break;
5560 		output_jump_reload_p = true;
5561 		FOR_EACH_EDGE (e, ei, bb->succs)
5562 		  if (EDGE_CRITICAL_P (e)
5563 		      && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
5564 		    {
5565 		      start_sequence ();
5566 		      /* We need to put some no-op insn here.  We can
5567 			 not put a note as commit_edges insertion will
5568 			 fail.  */
5569 		      emit_insn (gen_rtx_USE (VOIDmode, const1_rtx));
5570 		      rtx_insn *insns = get_insns ();
5571 		      end_sequence ();
5572 		      insert_insn_on_edge (insns, e);
5573 		    }
5574 		break;
5575 	      }
5576 	}
5577       if (output_jump_reload_p)
5578 	commit_edge_insertions ();
5579     }
5580 
5581   if (flag_ira_verbose < 10)
5582     {
5583       internal_flag_ira_verbose = flag_ira_verbose;
5584       ira_dump_file = f;
5585     }
5586   else
5587     {
5588       internal_flag_ira_verbose = flag_ira_verbose - 10;
5589       ira_dump_file = stderr;
5590     }
5591 
5592   clear_bb_flags ();
5593 
5594   /* Determine if the current function is a leaf before running IRA
5595      since this can impact optimizations done by the prologue and
5596      epilogue thus changing register elimination offsets.
5597      Other target callbacks may use crtl->is_leaf too, including
5598      SHRINK_WRAPPING_ENABLED, so initialize as early as possible.  */
5599   crtl->is_leaf = leaf_function_p ();
5600 
5601   /* Perform target specific PIC register initialization.  */
5602   targetm.init_pic_reg ();
5603 
5604   ira_conflicts_p = optimize > 0;
5605 
5606   /* Determine the number of pseudos actually requiring coloring.  */
5607   unsigned int num_used_regs = 0;
5608   for (unsigned int i = FIRST_PSEUDO_REGISTER; i < DF_REG_SIZE (df); i++)
5609     if (DF_REG_DEF_COUNT (i) || DF_REG_USE_COUNT (i))
5610       num_used_regs++;
5611 
5612   /* If there are too many pseudos and/or basic blocks (e.g. 10K
5613      pseudos and 10K blocks or 100K pseudos and 1K blocks), we will
5614      use simplified and faster algorithms in LRA.  */
5615   lra_simple_p
5616     = ira_use_lra_p
5617       && num_used_regs >= (1U << 26) / last_basic_block_for_fn (cfun);
5618 
5619   if (lra_simple_p)
5620     {
5621       /* It permits to skip live range splitting in LRA.  */
5622       flag_caller_saves = false;
5623       /* There is no sense to do regional allocation when we use
5624 	simplified LRA.  */
5625       flag_ira_region = IRA_REGION_ONE;
5626       ira_conflicts_p = false;
5627     }
5628 
5629 #ifndef IRA_NO_OBSTACK
5630   gcc_obstack_init (&ira_obstack);
5631 #endif
5632   bitmap_obstack_initialize (&ira_bitmap_obstack);
5633 
5634   /* LRA uses its own infrastructure to handle caller save registers.  */
5635   if (flag_caller_saves && !ira_use_lra_p)
5636     init_caller_save ();
5637 
5638   setup_prohibited_mode_move_regs ();
5639   decrease_live_ranges_number ();
5640   df_note_add_problem ();
5641 
5642   /* DF_LIVE can't be used in the register allocator, too many other
5643      parts of the compiler depend on using the "classic" liveness
5644      interpretation of the DF_LR problem.  See PR38711.
5645      Remove the problem, so that we don't spend time updating it in
5646      any of the df_analyze() calls during IRA/LRA.  */
5647   if (optimize > 1)
5648     df_remove_problem (df_live);
5649   gcc_checking_assert (df_live == NULL);
5650 
5651   if (flag_checking)
5652     df->changeable_flags |= DF_VERIFY_SCHEDULED;
5653 
5654   df_analyze ();
5655 
5656   init_reg_equiv ();
5657   if (ira_conflicts_p)
5658     {
5659       calculate_dominance_info (CDI_DOMINATORS);
5660 
5661       if (split_live_ranges_for_shrink_wrap ())
5662 	df_analyze ();
5663 
5664       free_dominance_info (CDI_DOMINATORS);
5665     }
5666 
5667   df_clear_flags (DF_NO_INSN_RESCAN);
5668 
5669   indirect_jump_optimize ();
5670   if (delete_trivially_dead_insns (get_insns (), max_reg_num ()))
5671     df_analyze ();
5672 
5673   regstat_init_n_sets_and_refs ();
5674   regstat_compute_ri ();
5675 
5676   /* If we are not optimizing, then this is the only place before
5677      register allocation where dataflow is done.  And that is needed
5678      to generate these warnings.  */
5679   if (warn_clobbered)
5680     generate_setjmp_warnings ();
5681 
5682   /* update_equiv_regs can use reg classes of pseudos and they are set up in
5683      register pressure sensitive scheduling and loop invariant motion and in
5684      live range shrinking.  This info can become obsolete if we add new pseudos
5685      since the last set up.  Recalculate it again if the new pseudos were
5686      added.  */
5687   if (resize_reg_info () && (flag_sched_pressure || flag_live_range_shrinkage
5688 			     || flag_ira_loop_pressure))
5689     ira_set_pseudo_classes (true, ira_dump_file);
5690 
5691   init_alias_analysis ();
5692   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
5693   reg_equiv = XCNEWVEC (struct equivalence, max_reg_num ());
5694   update_equiv_regs_prescan ();
5695   update_equiv_regs ();
5696 
5697   /* Don't move insns if live range shrinkage or register
5698      pressure-sensitive scheduling were done because it will not
5699      improve allocation but likely worsen insn scheduling.  */
5700   if (optimize
5701       && !flag_live_range_shrinkage
5702       && !(flag_sched_pressure && flag_schedule_insns))
5703     combine_and_move_insns ();
5704 
5705   /* Gather additional equivalences with memory.  */
5706   if (optimize)
5707     add_store_equivs ();
5708 
5709   loop_optimizer_finalize ();
5710   free_dominance_info (CDI_DOMINATORS);
5711   end_alias_analysis ();
5712   free (reg_equiv);
5713 
5714   /* Once max_regno changes, we need to free and re-init/re-compute
5715      some data structures like regstat_n_sets_and_refs and reg_info_p.  */
5716   auto regstat_recompute_for_max_regno = []() {
5717     regstat_free_n_sets_and_refs ();
5718     regstat_free_ri ();
5719     regstat_init_n_sets_and_refs ();
5720     regstat_compute_ri ();
5721     resize_reg_info ();
5722   };
5723 
5724   int max_regno_before_rm = max_reg_num ();
5725   if (ira_use_lra_p && remove_scratches ())
5726     {
5727       ira_expand_reg_equiv ();
5728       /* For now remove_scatches is supposed to create pseudos when it
5729 	 succeeds, assert this happens all the time.  Once it doesn't
5730 	 hold, we should guard the regstat recompute for the case
5731 	 max_regno changes.  */
5732       gcc_assert (max_regno_before_rm != max_reg_num ());
5733       regstat_recompute_for_max_regno ();
5734     }
5735 
5736   setup_reg_equiv ();
5737   grow_reg_equivs ();
5738   setup_reg_equiv_init ();
5739 
5740   allocated_reg_info_size = max_reg_num ();
5741 
5742   /* It is not worth to do such improvement when we use a simple
5743      allocation because of -O0 usage or because the function is too
5744      big.  */
5745   if (ira_conflicts_p)
5746     find_moveable_pseudos ();
5747 
5748   max_regno_before_ira = max_reg_num ();
5749   ira_setup_eliminable_regset ();
5750 
5751   ira_overall_cost = ira_reg_cost = ira_mem_cost = 0;
5752   ira_load_cost = ira_store_cost = ira_shuffle_cost = 0;
5753   ira_move_loops_num = ira_additional_jumps_num = 0;
5754 
5755   ira_assert (current_loops == NULL);
5756   if (flag_ira_region == IRA_REGION_ALL || flag_ira_region == IRA_REGION_MIXED)
5757     loop_optimizer_init (AVOID_CFG_MODIFICATIONS | LOOPS_HAVE_RECORDED_EXITS);
5758 
5759   if (internal_flag_ira_verbose > 0 && ira_dump_file != NULL)
5760     fprintf (ira_dump_file, "Building IRA IR\n");
5761   loops_p = ira_build ();
5762 
5763   ira_assert (ira_conflicts_p || !loops_p);
5764 
5765   saved_flag_ira_share_spill_slots = flag_ira_share_spill_slots;
5766   if (too_high_register_pressure_p () || cfun->calls_setjmp)
5767     /* It is just wasting compiler's time to pack spilled pseudos into
5768        stack slots in this case -- prohibit it.  We also do this if
5769        there is setjmp call because a variable not modified between
5770        setjmp and longjmp the compiler is required to preserve its
5771        value and sharing slots does not guarantee it.  */
5772     flag_ira_share_spill_slots = FALSE;
5773 
5774   ira_color ();
5775 
5776   ira_max_point_before_emit = ira_max_point;
5777 
5778   ira_initiate_emit_data ();
5779 
5780   ira_emit (loops_p);
5781 
5782   max_regno = max_reg_num ();
5783   if (ira_conflicts_p)
5784     {
5785       if (! loops_p)
5786 	{
5787 	  if (! ira_use_lra_p)
5788 	    ira_initiate_assign ();
5789 	}
5790       else
5791 	{
5792 	  expand_reg_info ();
5793 
5794 	  if (ira_use_lra_p)
5795 	    {
5796 	      ira_allocno_t a;
5797 	      ira_allocno_iterator ai;
5798 
5799 	      FOR_EACH_ALLOCNO (a, ai)
5800                 {
5801                   int old_regno = ALLOCNO_REGNO (a);
5802                   int new_regno = REGNO (ALLOCNO_EMIT_DATA (a)->reg);
5803 
5804                   ALLOCNO_REGNO (a) = new_regno;
5805 
5806                   if (old_regno != new_regno)
5807                     setup_reg_classes (new_regno, reg_preferred_class (old_regno),
5808                                        reg_alternate_class (old_regno),
5809                                        reg_allocno_class (old_regno));
5810                 }
5811 	    }
5812 	  else
5813 	    {
5814 	      if (internal_flag_ira_verbose > 0 && ira_dump_file != NULL)
5815 		fprintf (ira_dump_file, "Flattening IR\n");
5816 	      ira_flattening (max_regno_before_ira, ira_max_point_before_emit);
5817 	    }
5818 	  /* New insns were generated: add notes and recalculate live
5819 	     info.  */
5820 	  df_analyze ();
5821 
5822 	  /* ??? Rebuild the loop tree, but why?  Does the loop tree
5823 	     change if new insns were generated?  Can that be handled
5824 	     by updating the loop tree incrementally?  */
5825 	  loop_optimizer_finalize ();
5826 	  free_dominance_info (CDI_DOMINATORS);
5827 	  loop_optimizer_init (AVOID_CFG_MODIFICATIONS
5828 			       | LOOPS_HAVE_RECORDED_EXITS);
5829 
5830 	  if (! ira_use_lra_p)
5831 	    {
5832 	      setup_allocno_assignment_flags ();
5833 	      ira_initiate_assign ();
5834 	      ira_reassign_conflict_allocnos (max_regno);
5835 	    }
5836 	}
5837     }
5838 
5839   ira_finish_emit_data ();
5840 
5841   setup_reg_renumber ();
5842 
5843   calculate_allocation_cost ();
5844 
5845 #ifdef ENABLE_IRA_CHECKING
5846   if (ira_conflicts_p && ! ira_use_lra_p)
5847     /* Opposite to reload pass, LRA does not use any conflict info
5848        from IRA.  We don't rebuild conflict info for LRA (through
5849        ira_flattening call) and cannot use the check here.  We could
5850        rebuild this info for LRA in the check mode but there is a risk
5851        that code generated with the check and without it will be a bit
5852        different.  Calling ira_flattening in any mode would be a
5853        wasting CPU time.  So do not check the allocation for LRA.  */
5854     check_allocation ();
5855 #endif
5856 
5857   if (max_regno != max_regno_before_ira)
5858     regstat_recompute_for_max_regno ();
5859 
5860   overall_cost_before = ira_overall_cost;
5861   if (! ira_conflicts_p)
5862     grow_reg_equivs ();
5863   else
5864     {
5865       fix_reg_equiv_init ();
5866 
5867 #ifdef ENABLE_IRA_CHECKING
5868       print_redundant_copies ();
5869 #endif
5870       if (! ira_use_lra_p)
5871 	{
5872 	  ira_spilled_reg_stack_slots_num = 0;
5873 	  ira_spilled_reg_stack_slots
5874 	    = ((class ira_spilled_reg_stack_slot *)
5875 	       ira_allocate (max_regno
5876 			     * sizeof (class ira_spilled_reg_stack_slot)));
5877 	  memset ((void *)ira_spilled_reg_stack_slots, 0,
5878 		  max_regno * sizeof (class ira_spilled_reg_stack_slot));
5879 	}
5880     }
5881   allocate_initial_values ();
5882 
5883   /* See comment for find_moveable_pseudos call.  */
5884   if (ira_conflicts_p)
5885     move_unallocated_pseudos ();
5886 
5887   /* Restore original values.  */
5888   if (lra_simple_p)
5889     {
5890       flag_caller_saves = saved_flag_caller_saves;
5891       flag_ira_region = saved_flag_ira_region;
5892     }
5893 }
5894 
5895 /* Modify asm goto to avoid further trouble with this insn.  We can
5896    not replace the insn by USE as in other asm insns as we still
5897    need to keep CFG consistency.  */
5898 void
ira_nullify_asm_goto(rtx_insn * insn)5899 ira_nullify_asm_goto (rtx_insn *insn)
5900 {
5901   ira_assert (JUMP_P (insn) && INSN_CODE (insn) < 0);
5902   rtx tmp = extract_asm_operands (PATTERN (insn));
5903   PATTERN (insn) = gen_rtx_ASM_OPERANDS (VOIDmode, ggc_strdup (""), "", 0,
5904 					 rtvec_alloc (0),
5905 					 rtvec_alloc (0),
5906 					 ASM_OPERANDS_LABEL_VEC (tmp),
5907 					 ASM_OPERANDS_SOURCE_LOCATION(tmp));
5908 }
5909 
5910 static void
do_reload(void)5911 do_reload (void)
5912 {
5913   basic_block bb;
5914   bool need_dce;
5915   unsigned pic_offset_table_regno = INVALID_REGNUM;
5916 
5917   if (flag_ira_verbose < 10)
5918     ira_dump_file = dump_file;
5919 
5920   /* If pic_offset_table_rtx is a pseudo register, then keep it so
5921      after reload to avoid possible wrong usages of hard reg assigned
5922      to it.  */
5923   if (pic_offset_table_rtx
5924       && REGNO (pic_offset_table_rtx) >= FIRST_PSEUDO_REGISTER)
5925     pic_offset_table_regno = REGNO (pic_offset_table_rtx);
5926 
5927   timevar_push (TV_RELOAD);
5928   if (ira_use_lra_p)
5929     {
5930       if (current_loops != NULL)
5931 	{
5932 	  loop_optimizer_finalize ();
5933 	  free_dominance_info (CDI_DOMINATORS);
5934 	}
5935       FOR_ALL_BB_FN (bb, cfun)
5936 	bb->loop_father = NULL;
5937       current_loops = NULL;
5938 
5939       ira_destroy ();
5940 
5941       lra (ira_dump_file);
5942       /* ???!!! Move it before lra () when we use ira_reg_equiv in
5943 	 LRA.  */
5944       vec_free (reg_equivs);
5945       reg_equivs = NULL;
5946       need_dce = false;
5947     }
5948   else
5949     {
5950       df_set_flags (DF_NO_INSN_RESCAN);
5951       build_insn_chain ();
5952 
5953       need_dce = reload (get_insns (), ira_conflicts_p);
5954     }
5955 
5956   timevar_pop (TV_RELOAD);
5957 
5958   timevar_push (TV_IRA);
5959 
5960   if (ira_conflicts_p && ! ira_use_lra_p)
5961     {
5962       ira_free (ira_spilled_reg_stack_slots);
5963       ira_finish_assign ();
5964     }
5965 
5966   if (internal_flag_ira_verbose > 0 && ira_dump_file != NULL
5967       && overall_cost_before != ira_overall_cost)
5968     fprintf (ira_dump_file, "+++Overall after reload %" PRId64 "\n",
5969 	     ira_overall_cost);
5970 
5971   flag_ira_share_spill_slots = saved_flag_ira_share_spill_slots;
5972 
5973   if (! ira_use_lra_p)
5974     {
5975       ira_destroy ();
5976       if (current_loops != NULL)
5977 	{
5978 	  loop_optimizer_finalize ();
5979 	  free_dominance_info (CDI_DOMINATORS);
5980 	}
5981       FOR_ALL_BB_FN (bb, cfun)
5982 	bb->loop_father = NULL;
5983       current_loops = NULL;
5984 
5985       regstat_free_ri ();
5986       regstat_free_n_sets_and_refs ();
5987     }
5988 
5989   if (optimize)
5990     cleanup_cfg (CLEANUP_EXPENSIVE);
5991 
5992   finish_reg_equiv ();
5993 
5994   bitmap_obstack_release (&ira_bitmap_obstack);
5995 #ifndef IRA_NO_OBSTACK
5996   obstack_free (&ira_obstack, NULL);
5997 #endif
5998 
5999   /* The code after the reload has changed so much that at this point
6000      we might as well just rescan everything.  Note that
6001      df_rescan_all_insns is not going to help here because it does not
6002      touch the artificial uses and defs.  */
6003   df_finish_pass (true);
6004   df_scan_alloc (NULL);
6005   df_scan_blocks ();
6006 
6007   if (optimize > 1)
6008     {
6009       df_live_add_problem ();
6010       df_live_set_all_dirty ();
6011     }
6012 
6013   if (optimize)
6014     df_analyze ();
6015 
6016   if (need_dce && optimize)
6017     run_fast_dce ();
6018 
6019   /* Diagnose uses of the hard frame pointer when it is used as a global
6020      register.  Often we can get away with letting the user appropriate
6021      the frame pointer, but we should let them know when code generation
6022      makes that impossible.  */
6023   if (global_regs[HARD_FRAME_POINTER_REGNUM] && frame_pointer_needed)
6024     {
6025       tree decl = global_regs_decl[HARD_FRAME_POINTER_REGNUM];
6026       error_at (DECL_SOURCE_LOCATION (current_function_decl),
6027                 "frame pointer required, but reserved");
6028       inform (DECL_SOURCE_LOCATION (decl), "for %qD", decl);
6029     }
6030 
6031   /* If we are doing generic stack checking, give a warning if this
6032      function's frame size is larger than we expect.  */
6033   if (flag_stack_check == GENERIC_STACK_CHECK)
6034     {
6035       poly_int64 size = get_frame_size () + STACK_CHECK_FIXED_FRAME_SIZE;
6036 
6037       for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6038 	if (df_regs_ever_live_p (i)
6039 	    && !fixed_regs[i]
6040 	    && !crtl->abi->clobbers_full_reg_p (i))
6041 	  size += UNITS_PER_WORD;
6042 
6043       if (constant_lower_bound (size) > STACK_CHECK_MAX_FRAME_SIZE)
6044 	warning (0, "frame size too large for reliable stack checking");
6045     }
6046 
6047   if (pic_offset_table_regno != INVALID_REGNUM)
6048     pic_offset_table_rtx = gen_rtx_REG (Pmode, pic_offset_table_regno);
6049 
6050   timevar_pop (TV_IRA);
6051 }
6052 
6053 /* Run the integrated register allocator.  */
6054 
6055 namespace {
6056 
6057 const pass_data pass_data_ira =
6058 {
6059   RTL_PASS, /* type */
6060   "ira", /* name */
6061   OPTGROUP_NONE, /* optinfo_flags */
6062   TV_IRA, /* tv_id */
6063   0, /* properties_required */
6064   0, /* properties_provided */
6065   0, /* properties_destroyed */
6066   0, /* todo_flags_start */
6067   TODO_do_not_ggc_collect, /* todo_flags_finish */
6068 };
6069 
6070 class pass_ira : public rtl_opt_pass
6071 {
6072 public:
pass_ira(gcc::context * ctxt)6073   pass_ira (gcc::context *ctxt)
6074     : rtl_opt_pass (pass_data_ira, ctxt)
6075   {}
6076 
6077   /* opt_pass methods: */
gate(function *)6078   virtual bool gate (function *)
6079     {
6080       return !targetm.no_register_allocation;
6081     }
execute(function *)6082   virtual unsigned int execute (function *)
6083     {
6084       ira (dump_file);
6085       return 0;
6086     }
6087 
6088 }; // class pass_ira
6089 
6090 } // anon namespace
6091 
6092 rtl_opt_pass *
make_pass_ira(gcc::context * ctxt)6093 make_pass_ira (gcc::context *ctxt)
6094 {
6095   return new pass_ira (ctxt);
6096 }
6097 
6098 namespace {
6099 
6100 const pass_data pass_data_reload =
6101 {
6102   RTL_PASS, /* type */
6103   "reload", /* name */
6104   OPTGROUP_NONE, /* optinfo_flags */
6105   TV_RELOAD, /* tv_id */
6106   0, /* properties_required */
6107   0, /* properties_provided */
6108   0, /* properties_destroyed */
6109   0, /* todo_flags_start */
6110   0, /* todo_flags_finish */
6111 };
6112 
6113 class pass_reload : public rtl_opt_pass
6114 {
6115 public:
pass_reload(gcc::context * ctxt)6116   pass_reload (gcc::context *ctxt)
6117     : rtl_opt_pass (pass_data_reload, ctxt)
6118   {}
6119 
6120   /* opt_pass methods: */
gate(function *)6121   virtual bool gate (function *)
6122     {
6123       return !targetm.no_register_allocation;
6124     }
execute(function *)6125   virtual unsigned int execute (function *)
6126     {
6127       do_reload ();
6128       return 0;
6129     }
6130 
6131 }; // class pass_reload
6132 
6133 } // anon namespace
6134 
6135 rtl_opt_pass *
make_pass_reload(gcc::context * ctxt)6136 make_pass_reload (gcc::context *ctxt)
6137 {
6138   return new pass_reload (ctxt);
6139 }
6140