1 /*
2  * Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  *
23  */
24 
25 #include "precompiled.hpp"
26 #include "compiler/oopMap.hpp"
27 #include "memory/allocation.inline.hpp"
28 #include "memory/resourceArea.hpp"
29 #include "opto/addnode.hpp"
30 #include "opto/block.hpp"
31 #include "opto/callnode.hpp"
32 #include "opto/cfgnode.hpp"
33 #include "opto/chaitin.hpp"
34 #include "opto/coalesce.hpp"
35 #include "opto/indexSet.hpp"
36 #include "opto/machnode.hpp"
37 #include "opto/memnode.hpp"
38 #include "opto/opcodes.hpp"
39 
PhaseIFG(Arena * arena)40 PhaseIFG::PhaseIFG( Arena *arena ) : Phase(Interference_Graph), _arena(arena) {
41 }
42 
init(uint maxlrg)43 void PhaseIFG::init( uint maxlrg ) {
44   _maxlrg = maxlrg;
45   _yanked = new (_arena) VectorSet(_arena);
46   _is_square = false;
47   // Make uninitialized adjacency lists
48   _adjs = (IndexSet*)_arena->Amalloc(sizeof(IndexSet)*maxlrg);
49   // Also make empty live range structures
50   _lrgs = (LRG *)_arena->Amalloc( maxlrg * sizeof(LRG) );
51   memset((void*)_lrgs,0,sizeof(LRG)*maxlrg);
52   // Init all to empty
53   for( uint i = 0; i < maxlrg; i++ ) {
54     _adjs[i].initialize(maxlrg);
55     _lrgs[i].Set_All();
56   }
57 }
58 
59 // Add edge between vertices a & b.  These are sorted (triangular matrix),
60 // then the smaller number is inserted in the larger numbered array.
add_edge(uint a,uint b)61 int PhaseIFG::add_edge( uint a, uint b ) {
62   lrgs(a).invalid_degree();
63   lrgs(b).invalid_degree();
64   // Sort a and b, so that a is bigger
65   assert( !_is_square, "only on triangular" );
66   if( a < b ) { uint tmp = a; a = b; b = tmp; }
67   return _adjs[a].insert( b );
68 }
69 
70 // Is there an edge between a and b?
test_edge(uint a,uint b) const71 int PhaseIFG::test_edge( uint a, uint b ) const {
72   // Sort a and b, so that a is larger
73   assert( !_is_square, "only on triangular" );
74   if( a < b ) { uint tmp = a; a = b; b = tmp; }
75   return _adjs[a].member(b);
76 }
77 
78 // Convert triangular matrix to square matrix
SquareUp()79 void PhaseIFG::SquareUp() {
80   assert( !_is_square, "only on triangular" );
81 
82   // Simple transpose
83   for(uint i = 0; i < _maxlrg; i++ ) {
84     if (!_adjs[i].is_empty()) {
85       IndexSetIterator elements(&_adjs[i]);
86       uint datum;
87       while ((datum = elements.next()) != 0) {
88         _adjs[datum].insert(i);
89       }
90     }
91   }
92   _is_square = true;
93 }
94 
95 // Compute effective degree in bulk
Compute_Effective_Degree()96 void PhaseIFG::Compute_Effective_Degree() {
97   assert( _is_square, "only on square" );
98 
99   for( uint i = 0; i < _maxlrg; i++ )
100     lrgs(i).set_degree(effective_degree(i));
101 }
102 
test_edge_sq(uint a,uint b) const103 int PhaseIFG::test_edge_sq( uint a, uint b ) const {
104   assert( _is_square, "only on square" );
105   // Swap, so that 'a' has the lesser count.  Then binary search is on
106   // the smaller of a's list and b's list.
107   if( neighbor_cnt(a) > neighbor_cnt(b) ) { uint tmp = a; a = b; b = tmp; }
108   //return _adjs[a].unordered_member(b);
109   return _adjs[a].member(b);
110 }
111 
112 // Union edges of B into A
Union(uint a,uint b)113 void PhaseIFG::Union(uint a, uint b) {
114   assert( _is_square, "only on square" );
115   IndexSet *A = &_adjs[a];
116   if (!_adjs[b].is_empty()) {
117     IndexSetIterator b_elements(&_adjs[b]);
118     uint datum;
119     while ((datum = b_elements.next()) != 0) {
120       if (A->insert(datum)) {
121         _adjs[datum].insert(a);
122         lrgs(a).invalid_degree();
123         lrgs(datum).invalid_degree();
124       }
125     }
126   }
127 }
128 
129 // Yank a Node and all connected edges from the IFG.  Return a
130 // list of neighbors (edges) yanked.
remove_node(uint a)131 IndexSet *PhaseIFG::remove_node( uint a ) {
132   assert( _is_square, "only on square" );
133   assert( !_yanked->test(a), "" );
134   _yanked->set(a);
135 
136   // I remove the LRG from all neighbors.
137   LRG &lrg_a = lrgs(a);
138 
139   if (!_adjs[a].is_empty()) {
140     IndexSetIterator elements(&_adjs[a]);
141     uint datum;
142     while ((datum = elements.next()) != 0) {
143       _adjs[datum].remove(a);
144       lrgs(datum).inc_degree(-lrg_a.compute_degree(lrgs(datum)));
145     }
146   }
147   return neighbors(a);
148 }
149 
150 // Re-insert a yanked Node.
re_insert(uint a)151 void PhaseIFG::re_insert(uint a) {
152   assert( _is_square, "only on square" );
153   assert( _yanked->test(a), "" );
154   _yanked->remove(a);
155 
156   if (_adjs[a].is_empty()) return;
157 
158   IndexSetIterator elements(&_adjs[a]);
159   uint datum;
160   while ((datum = elements.next()) != 0) {
161     _adjs[datum].insert(a);
162     lrgs(datum).invalid_degree();
163   }
164 }
165 
166 // Compute the degree between 2 live ranges.  If both live ranges are
167 // aligned-adjacent powers-of-2 then we use the MAX size.  If either is
168 // mis-aligned (or for Fat-Projections, not-adjacent) then we have to
169 // MULTIPLY the sizes.  Inspect Brigg's thesis on register pairs to see why
170 // this is so.
compute_degree(LRG & l) const171 int LRG::compute_degree(LRG &l) const {
172   int tmp;
173   int num_regs = _num_regs;
174   int nregs = l.num_regs();
175   tmp =  (_fat_proj || l._fat_proj)     // either is a fat-proj?
176     ? (num_regs * nregs)                // then use product
177     : MAX2(num_regs,nregs);             // else use max
178   return tmp;
179 }
180 
181 // Compute effective degree for this live range.  If both live ranges are
182 // aligned-adjacent powers-of-2 then we use the MAX size.  If either is
183 // mis-aligned (or for Fat-Projections, not-adjacent) then we have to
184 // MULTIPLY the sizes.  Inspect Brigg's thesis on register pairs to see why
185 // this is so.
effective_degree(uint lidx) const186 int PhaseIFG::effective_degree(uint lidx) const {
187   IndexSet *s = neighbors(lidx);
188   if (s->is_empty()) return 0;
189   int eff = 0;
190   int num_regs = lrgs(lidx).num_regs();
191   int fat_proj = lrgs(lidx)._fat_proj;
192   IndexSetIterator elements(s);
193   uint nidx;
194   while ((nidx = elements.next()) != 0) {
195     LRG &lrgn = lrgs(nidx);
196     int nregs = lrgn.num_regs();
197     eff += (fat_proj || lrgn._fat_proj) // either is a fat-proj?
198       ? (num_regs * nregs)              // then use product
199       : MAX2(num_regs,nregs);           // else use max
200   }
201   return eff;
202 }
203 
204 
205 #ifndef PRODUCT
dump() const206 void PhaseIFG::dump() const {
207   tty->print_cr("-- Interference Graph --%s--",
208                 _is_square ? "square" : "triangular" );
209   if (_is_square) {
210     for (uint i = 0; i < _maxlrg; i++) {
211       tty->print(_yanked->test(i) ? "XX " : "  ");
212       tty->print("L%d: { ",i);
213       if (!_adjs[i].is_empty()) {
214         IndexSetIterator elements(&_adjs[i]);
215         uint datum;
216         while ((datum = elements.next()) != 0) {
217           tty->print("L%d ", datum);
218         }
219       }
220       tty->print_cr("}");
221 
222     }
223     return;
224   }
225 
226   // Triangular
227   for( uint i = 0; i < _maxlrg; i++ ) {
228     uint j;
229     tty->print(_yanked->test(i) ? "XX " : "  ");
230     tty->print("L%d: { ",i);
231     for( j = _maxlrg; j > i; j-- )
232       if( test_edge(j - 1,i) ) {
233         tty->print("L%d ",j - 1);
234       }
235     tty->print("| ");
236     if (!_adjs[i].is_empty()) {
237       IndexSetIterator elements(&_adjs[i]);
238       uint datum;
239       while ((datum = elements.next()) != 0) {
240         tty->print("L%d ", datum);
241       }
242     }
243     tty->print("}\n");
244   }
245   tty->print("\n");
246 }
247 
stats() const248 void PhaseIFG::stats() const {
249   ResourceMark rm;
250   int *h_cnt = NEW_RESOURCE_ARRAY(int,_maxlrg*2);
251   memset( h_cnt, 0, sizeof(int)*_maxlrg*2 );
252   uint i;
253   for( i = 0; i < _maxlrg; i++ ) {
254     h_cnt[neighbor_cnt(i)]++;
255   }
256   tty->print_cr("--Histogram of counts--");
257   for( i = 0; i < _maxlrg*2; i++ )
258     if( h_cnt[i] )
259       tty->print("%d/%d ",i,h_cnt[i]);
260   tty->cr();
261 }
262 
verify(const PhaseChaitin * pc) const263 void PhaseIFG::verify( const PhaseChaitin *pc ) const {
264   // IFG is square, sorted and no need for Find
265   for( uint i = 0; i < _maxlrg; i++ ) {
266     assert(!_yanked->test(i) || !neighbor_cnt(i), "Is removed completely" );
267     IndexSet *set = &_adjs[i];
268     if (!set->is_empty()) {
269       IndexSetIterator elements(set);
270       uint idx;
271       uint last = 0;
272       while ((idx = elements.next()) != 0) {
273         assert(idx != i, "Must have empty diagonal");
274         assert(pc->_lrg_map.find_const(idx) == idx, "Must not need Find");
275         assert(_adjs[idx].member(i), "IFG not square");
276         assert(!_yanked->test(idx), "No yanked neighbors");
277         assert(last < idx, "not sorted increasing");
278         last = idx;
279       }
280     }
281     assert(!lrgs(i)._degree_valid || effective_degree(i) == lrgs(i).degree(), "degree is valid but wrong");
282   }
283 }
284 #endif
285 
286 /*
287  * Interfere this register with everything currently live.
288  * Check for interference by checking overlap of regmasks.
289  * Only interfere if acceptable register masks overlap.
290  */
interfere_with_live(uint lid,IndexSet * liveout)291 void PhaseChaitin::interfere_with_live(uint lid, IndexSet* liveout) {
292   if (!liveout->is_empty()) {
293     LRG& lrg = lrgs(lid);
294     const RegMask &rm = lrg.mask();
295     IndexSetIterator elements(liveout);
296     uint interfering_lid = elements.next();
297     while (interfering_lid != 0) {
298       LRG& interfering_lrg = lrgs(interfering_lid);
299       if (rm.overlap(interfering_lrg.mask())) {
300         _ifg->add_edge(lid, interfering_lid);
301       }
302       interfering_lid = elements.next();
303     }
304   }
305 }
306 
307 // Actually build the interference graph.  Uses virtual registers only, no
308 // physical register masks.  This allows me to be very aggressive when
309 // coalescing copies.  Some of this aggressiveness will have to be undone
310 // later, but I'd rather get all the copies I can now (since unremoved copies
311 // at this point can end up in bad places).  Copies I re-insert later I have
312 // more opportunity to insert them in low-frequency locations.
build_ifg_virtual()313 void PhaseChaitin::build_ifg_virtual( ) {
314   Compile::TracePhase tp("buildIFG_virt", &timers[_t_buildIFGvirtual]);
315 
316   // For all blocks (in any order) do...
317   for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
318     Block* block = _cfg.get_block(i);
319     IndexSet* liveout = _live->live(block);
320 
321     // The IFG is built by a single reverse pass over each basic block.
322     // Starting with the known live-out set, we remove things that get
323     // defined and add things that become live (essentially executing one
324     // pass of a standard LIVE analysis). Just before a Node defines a value
325     // (and removes it from the live-ness set) that value is certainly live.
326     // The defined value interferes with everything currently live.  The
327     // value is then removed from the live-ness set and it's inputs are
328     // added to the live-ness set.
329     for (uint j = block->end_idx() + 1; j > 1; j--) {
330       Node* n = block->get_node(j - 1);
331 
332       // Get value being defined
333       uint r = _lrg_map.live_range_id(n);
334 
335       // Some special values do not allocate
336       if (r) {
337 
338         // Remove from live-out set
339         liveout->remove(r);
340 
341         // Copies do not define a new value and so do not interfere.
342         // Remove the copies source from the liveout set before interfering.
343         uint idx = n->is_Copy();
344         if (idx != 0) {
345           liveout->remove(_lrg_map.live_range_id(n->in(idx)));
346         }
347 
348         // Interfere with everything live
349         interfere_with_live(r, liveout);
350       }
351 
352       // Make all inputs live
353       if (!n->is_Phi()) {      // Phi function uses come from prior block
354         for(uint k = 1; k < n->req(); k++) {
355           liveout->insert(_lrg_map.live_range_id(n->in(k)));
356         }
357       }
358 
359       // 2-address instructions always have the defined value live
360       // on entry to the instruction, even though it is being defined
361       // by the instruction.  We pretend a virtual copy sits just prior
362       // to the instruction and kills the src-def'd register.
363       // In other words, for 2-address instructions the defined value
364       // interferes with all inputs.
365       uint idx;
366       if( n->is_Mach() && (idx = n->as_Mach()->two_adr()) ) {
367         const MachNode *mach = n->as_Mach();
368         // Sometimes my 2-address ADDs are commuted in a bad way.
369         // We generally want the USE-DEF register to refer to the
370         // loop-varying quantity, to avoid a copy.
371         uint op = mach->ideal_Opcode();
372         // Check that mach->num_opnds() == 3 to ensure instruction is
373         // not subsuming constants, effectively excludes addI_cin_imm
374         // Can NOT swap for instructions like addI_cin_imm since it
375         // is adding zero to yhi + carry and the second ideal-input
376         // points to the result of adding low-halves.
377         // Checking req() and num_opnds() does NOT distinguish addI_cout from addI_cout_imm
378         if( (op == Op_AddI && mach->req() == 3 && mach->num_opnds() == 3) &&
379             n->in(1)->bottom_type()->base() == Type::Int &&
380             // See if the ADD is involved in a tight data loop the wrong way
381             n->in(2)->is_Phi() &&
382             n->in(2)->in(2) == n ) {
383           Node *tmp = n->in(1);
384           n->set_req( 1, n->in(2) );
385           n->set_req( 2, tmp );
386         }
387         // Defined value interferes with all inputs
388         uint lidx = _lrg_map.live_range_id(n->in(idx));
389         for (uint k = 1; k < n->req(); k++) {
390           uint kidx = _lrg_map.live_range_id(n->in(k));
391           if (kidx != lidx) {
392             _ifg->add_edge(r, kidx);
393           }
394         }
395       }
396     } // End of forall instructions in block
397   } // End of forall blocks
398 }
399 
400 #ifdef ASSERT
count_int_pressure(IndexSet * liveout)401 uint PhaseChaitin::count_int_pressure(IndexSet* liveout) {
402   if (liveout->is_empty()) {
403     return 0;
404   }
405   IndexSetIterator elements(liveout);
406   uint lidx = elements.next();
407   uint cnt = 0;
408   while (lidx != 0) {
409     LRG& lrg = lrgs(lidx);
410     if (lrg.mask_is_nonempty_and_up() &&
411         !lrg.is_float_or_vector() &&
412         (lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ||
413          (Matcher::has_predicated_vectors() &&
414           lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegVectMask])))) {
415       cnt += lrg.reg_pressure();
416     }
417     lidx = elements.next();
418   }
419   return cnt;
420 }
421 
count_float_pressure(IndexSet * liveout)422 uint PhaseChaitin::count_float_pressure(IndexSet* liveout) {
423   if (liveout->is_empty()) {
424     return 0;
425   }
426   IndexSetIterator elements(liveout);
427   uint lidx = elements.next();
428   uint cnt = 0;
429   while (lidx != 0) {
430     LRG& lrg = lrgs(lidx);
431     if (lrg.mask_is_nonempty_and_up() && lrg.is_float_or_vector()) {
432       cnt += lrg.reg_pressure();
433     }
434     lidx = elements.next();
435   }
436   return cnt;
437 }
438 #endif
439 
440 /*
441  * Adjust register pressure down by 1.  Capture last hi-to-low transition,
442  */
lower_pressure(Block * b,uint location,LRG & lrg,IndexSet * liveout,Pressure & int_pressure,Pressure & float_pressure)443 void PhaseChaitin::lower_pressure(Block* b, uint location, LRG& lrg, IndexSet* liveout, Pressure& int_pressure, Pressure& float_pressure) {
444   if (lrg.mask_is_nonempty_and_up()) {
445     if (lrg.is_float_or_vector()) {
446       float_pressure.lower(lrg, location);
447     } else {
448       // Do not count the SP and flag registers
449       const RegMask& r = lrg.mask();
450       if (r.overlap(*Matcher::idealreg2regmask[Op_RegI]) ||
451            (Matcher::has_predicated_vectors() &&
452             r.overlap(*Matcher::idealreg2regmask[Op_RegVectMask]))) {
453         int_pressure.lower(lrg, location);
454       }
455     }
456   }
457   if (_scheduling_info_generated == false) {
458     assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect");
459     assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect");
460   }
461 }
462 
463 /* Go to the first non-phi index in a block */
first_nonphi_index(Block * b)464 static uint first_nonphi_index(Block* b) {
465   uint i;
466   uint end_idx = b->end_idx();
467   for (i = 1; i < end_idx; i++) {
468     Node* n = b->get_node(i);
469     if (!n->is_Phi()) {
470       break;
471     }
472   }
473   return i;
474 }
475 
476 /*
477  * Spills could be inserted before a CreateEx node which should be the first
478  * instruction in a block after Phi nodes. If so, move the CreateEx node up.
479  */
move_exception_node_up(Block * b,uint first_inst,uint last_inst)480 static void move_exception_node_up(Block* b, uint first_inst, uint last_inst) {
481   for (uint i = first_inst; i < last_inst; i++) {
482     Node* ex = b->get_node(i);
483     if (ex->is_SpillCopy()) {
484       continue;
485     }
486 
487     if (i > first_inst &&
488         ex->is_Mach() && ex->as_Mach()->ideal_Opcode() == Op_CreateEx) {
489       b->remove_node(i);
490       b->insert_node(ex, first_inst);
491     }
492     // Stop once a CreateEx or any other node is found
493     break;
494   }
495 }
496 
497 /*
498  * When new live ranges are live, we raise the register pressure
499  */
raise_pressure(Block * b,LRG & lrg,Pressure & int_pressure,Pressure & float_pressure)500 void PhaseChaitin::raise_pressure(Block* b, LRG& lrg, Pressure& int_pressure, Pressure& float_pressure) {
501   if (lrg.mask_is_nonempty_and_up()) {
502     if (lrg.is_float_or_vector()) {
503       float_pressure.raise(lrg);
504     } else {
505       // Do not count the SP and flag registers
506       const RegMask& rm = lrg.mask();
507       if (rm.overlap(*Matcher::idealreg2regmask[Op_RegI]) ||
508            (Matcher::has_predicated_vectors() &&
509             rm.overlap(*Matcher::idealreg2regmask[Op_RegVectMask]))) {
510         int_pressure.raise(lrg);
511       }
512     }
513   }
514 }
515 
516 
517 /*
518  * Computes the initial register pressure of a block, looking at all live
519  * ranges in the liveout. The register pressure is computed for both float
520  * and int/pointer registers.
521  * Live ranges in the liveout are presumed live for the whole block.
522  * We add the cost for the whole block to the area of the live ranges initially.
523  * If a live range gets killed in the block, we'll subtract the unused part of
524  * the block from the area.
525  */
compute_initial_block_pressure(Block * b,IndexSet * liveout,Pressure & int_pressure,Pressure & float_pressure,double cost)526 void PhaseChaitin::compute_initial_block_pressure(Block* b, IndexSet* liveout, Pressure& int_pressure, Pressure& float_pressure, double cost) {
527   if (!liveout->is_empty()) {
528     IndexSetIterator elements(liveout);
529     uint lid = elements.next();
530     while (lid != 0) {
531       LRG &lrg = lrgs(lid);
532       lrg._area += cost;
533       raise_pressure(b, lrg, int_pressure, float_pressure);
534       lid = elements.next();
535     }
536   }
537   assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect");
538   assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect");
539 }
540 
541 /*
542 * Computes the entry register pressure of a block, looking at all live
543 * ranges in the livein. The register pressure is computed for both float
544 * and int/pointer registers.
545 */
compute_entry_block_pressure(Block * b)546 void PhaseChaitin::compute_entry_block_pressure(Block* b) {
547   IndexSet *livein = _live->livein(b);
548   if (!livein->is_empty()) {
549     IndexSetIterator elements(livein);
550     uint lid = elements.next();
551     while (lid != 0) {
552       LRG &lrg = lrgs(lid);
553       raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure);
554       lid = elements.next();
555     }
556   }
557   // Now check phis for locally defined inputs
558   for (uint j = 0; j < b->number_of_nodes(); j++) {
559     Node* n = b->get_node(j);
560     if (n->is_Phi()) {
561       for (uint k = 1; k < n->req(); k++) {
562         Node* phi_in = n->in(k);
563         // Because we are talking about phis, raise register pressure once for each
564         // instance of a phi to account for a single value
565         if (_cfg.get_block_for_node(phi_in) == b) {
566           LRG& lrg = lrgs(phi_in->_idx);
567           raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure);
568           break;
569         }
570       }
571     }
572   }
573   _sched_int_pressure.set_start_pressure(_sched_int_pressure.current_pressure());
574   _sched_float_pressure.set_start_pressure(_sched_float_pressure.current_pressure());
575 }
576 
577 /*
578 * Computes the exit register pressure of a block, looking at all live
579 * ranges in the liveout. The register pressure is computed for both float
580 * and int/pointer registers.
581 */
compute_exit_block_pressure(Block * b)582 void PhaseChaitin::compute_exit_block_pressure(Block* b) {
583 
584   IndexSet* livein = _live->live(b);
585   _sched_int_pressure.set_current_pressure(0);
586   _sched_float_pressure.set_current_pressure(0);
587   if (!livein->is_empty()) {
588     IndexSetIterator elements(livein);
589     uint lid = elements.next();
590     while (lid != 0) {
591       LRG &lrg = lrgs(lid);
592       raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure);
593       lid = elements.next();
594     }
595   }
596 }
597 
598 /*
599  * Remove dead node if it's not used.
600  * We only remove projection nodes if the node "defining" the projection is
601  * dead, for example on x86, if we have a dead Add node we remove its
602  * RFLAGS node.
603  */
remove_node_if_not_used(Block * b,uint location,Node * n,uint lid,IndexSet * liveout)604 bool PhaseChaitin::remove_node_if_not_used(Block* b, uint location, Node* n, uint lid, IndexSet* liveout) {
605   Node* def = n->in(0);
606   if (!n->is_Proj() ||
607       (_lrg_map.live_range_id(def) && !liveout->member(_lrg_map.live_range_id(def)))) {
608     if (n->is_MachProj()) {
609       // Don't remove KILL projections if their "defining" nodes have
610       // memory effects (have SCMemProj projection node) -
611       // they are not dead even when their result is not used.
612       // For example, compareAndSwapL (and other CAS) and EncodeISOArray nodes.
613       // The method add_input_to_liveout() keeps such nodes alive (put them on liveout list)
614       // when it sees SCMemProj node in a block. Unfortunately SCMemProj node could be placed
615       // in block in such order that KILL MachProj nodes are processed first.
616       if (def->has_out_with(Op_SCMemProj)) {
617         return false;
618       }
619     }
620     b->remove_node(location);
621     LRG& lrg = lrgs(lid);
622     if (lrg._def == n) {
623       lrg._def = 0;
624     }
625     n->disconnect_inputs(C);
626     _cfg.unmap_node_from_block(n);
627     n->replace_by(C->top());
628     return true;
629   }
630   return false;
631 }
632 
633 /*
634  * When encountering a fat projection, we might go from a low to high to low
635  * (since the fat proj only lives at this instruction) going backwards in the
636  * block. If we find a low to high transition, we record it.
637  */
check_for_high_pressure_transition_at_fatproj(uint & block_reg_pressure,uint location,LRG & lrg,Pressure & pressure,const int op_regtype)638 void PhaseChaitin::check_for_high_pressure_transition_at_fatproj(uint& block_reg_pressure, uint location, LRG& lrg, Pressure& pressure, const int op_regtype) {
639   RegMask mask_tmp = lrg.mask();
640   mask_tmp.AND(*Matcher::idealreg2regmask[op_regtype]);
641   pressure.check_pressure_at_fatproj(location, mask_tmp);
642 }
643 
644 /*
645  * Insure high score for immediate-use spill copies so they get a color.
646  * All single-use MachSpillCopy(s) that immediately precede their
647  * use must color early.  If a longer live range steals their
648  * color, the spill copy will split and may push another spill copy
649  * further away resulting in an infinite spill-split-retry cycle.
650  * Assigning a zero area results in a high score() and a good
651  * location in the simplify list.
652  */
assign_high_score_to_immediate_copies(Block * b,Node * n,LRG & lrg,uint next_inst,uint last_inst)653 void PhaseChaitin::assign_high_score_to_immediate_copies(Block* b, Node* n, LRG& lrg, uint next_inst, uint last_inst) {
654   if (n->is_SpillCopy() &&
655       lrg.is_singledef() && // A multi defined live range can still split
656       n->outcnt() == 1 &&   // and use must be in this block
657       _cfg.get_block_for_node(n->unique_out()) == b) {
658 
659     Node* single_use = n->unique_out();
660     assert(b->find_node(single_use) >= next_inst, "Use must be later in block");
661     // Use can be earlier in block if it is a Phi, but then I should be a MultiDef
662 
663     // Find first non SpillCopy 'm' that follows the current instruction
664     // (current_inst - 1) is index for current instruction 'n'
665     Node* m = n;
666     for (uint i = next_inst; i <= last_inst && m->is_SpillCopy(); ++i) {
667       m = b->get_node(i);
668     }
669     if (m == single_use) {
670       lrg._area = 0.0;
671     }
672   }
673 }
674 
675 /*
676  * Copies do not define a new value and so do not interfere.
677  * Remove the copies source from the liveout set before interfering.
678  */
remove_interference_from_copy(Block * b,uint location,uint lid_copy,IndexSet * liveout,double cost,Pressure & int_pressure,Pressure & float_pressure)679 void PhaseChaitin::remove_interference_from_copy(Block* b, uint location, uint lid_copy, IndexSet* liveout, double cost, Pressure& int_pressure, Pressure& float_pressure) {
680   if (liveout->remove(lid_copy)) {
681     LRG& lrg_copy = lrgs(lid_copy);
682     lrg_copy._area -= cost;
683 
684     // Lower register pressure since copy and definition can share the same register
685     lower_pressure(b, location, lrg_copy, liveout, int_pressure, float_pressure);
686   }
687 }
688 
689 /*
690  * The defined value must go in a particular register. Remove that register from
691  * all conflicting parties and avoid the interference.
692  */
remove_bound_register_from_interfering_live_ranges(LRG & lrg,IndexSet * liveout,uint & must_spill)693 void PhaseChaitin::remove_bound_register_from_interfering_live_ranges(LRG& lrg, IndexSet* liveout, uint& must_spill) {
694   if (liveout->is_empty()) return;
695   // Check for common case
696   const RegMask& rm = lrg.mask();
697   int r_size = lrg.num_regs();
698   // Smear odd bits
699   IndexSetIterator elements(liveout);
700   uint l = elements.next();
701   while (l != 0) {
702     LRG& interfering_lrg = lrgs(l);
703     // If 'l' must spill already, do not further hack his bits.
704     // He'll get some interferences and be forced to spill later.
705     if (interfering_lrg._must_spill) {
706       l = elements.next();
707       continue;
708     }
709 
710     // Remove bound register(s) from 'l's choices
711     RegMask old = interfering_lrg.mask();
712     uint old_size = interfering_lrg.mask_size();
713 
714     // Remove the bits from LRG 'rm' from LRG 'l' so 'l' no
715     // longer interferes with 'rm'.  If 'l' requires aligned
716     // adjacent pairs, subtract out bit pairs.
717     assert(!interfering_lrg._is_vector || !interfering_lrg._fat_proj, "sanity");
718 
719     if (interfering_lrg.num_regs() > 1 && !interfering_lrg._fat_proj) {
720       RegMask r2mask = rm;
721       // Leave only aligned set of bits.
722       r2mask.smear_to_sets(interfering_lrg.num_regs());
723       // It includes vector case.
724       interfering_lrg.SUBTRACT(r2mask);
725       interfering_lrg.compute_set_mask_size();
726     } else if (r_size != 1) {
727       // fat proj
728       interfering_lrg.SUBTRACT(rm);
729       interfering_lrg.compute_set_mask_size();
730     } else {
731       // Common case: size 1 bound removal
732       OptoReg::Name r_reg = rm.find_first_elem();
733       if (interfering_lrg.mask().Member(r_reg)) {
734         interfering_lrg.Remove(r_reg);
735         interfering_lrg.set_mask_size(interfering_lrg.mask().is_AllStack() ? LRG::AllStack_size : old_size - 1);
736       }
737     }
738 
739     // If 'l' goes completely dry, it must spill.
740     if (interfering_lrg.not_free()) {
741       // Give 'l' some kind of reasonable mask, so it picks up
742       // interferences (and will spill later).
743       interfering_lrg.set_mask(old);
744       interfering_lrg.set_mask_size(old_size);
745       must_spill++;
746       interfering_lrg._must_spill = 1;
747       interfering_lrg.set_reg(OptoReg::Name(LRG::SPILL_REG));
748     }
749     l = elements.next();
750   }
751 }
752 
753 /*
754  * Start loop at 1 (skip control edge) for most Nodes. SCMemProj's might be the
755  * sole use of a StoreLConditional. While StoreLConditionals set memory (the
756  * SCMemProj use) they also def flags; if that flag def is unused the allocator
757  * sees a flag-setting instruction with no use of the flags and assumes it's
758  * dead.  This keeps the (useless) flag-setting behavior alive while also
759  * keeping the (useful) memory update effect.
760  */
add_input_to_liveout(Block * b,Node * n,IndexSet * liveout,double cost,Pressure & int_pressure,Pressure & float_pressure)761 void PhaseChaitin::add_input_to_liveout(Block* b, Node* n, IndexSet* liveout, double cost, Pressure& int_pressure, Pressure& float_pressure) {
762   JVMState* jvms = n->jvms();
763   uint debug_start = jvms ? jvms->debug_start() : 999999;
764 
765   for (uint k = ((n->Opcode() == Op_SCMemProj) ? 0:1); k < n->req(); k++) {
766     Node* def = n->in(k);
767     uint lid = _lrg_map.live_range_id(def);
768     if (!lid) {
769       continue;
770     }
771     LRG& lrg = lrgs(lid);
772 
773     // No use-side cost for spilling debug info
774     if (k < debug_start) {
775       // A USE costs twice block frequency (once for the Load, once
776       // for a Load-delay).  Rematerialized uses only cost once.
777       lrg._cost += (def->rematerialize() ? b->_freq : (b->_freq * 2));
778     }
779 
780     if (liveout->insert(lid)) {
781       // Newly live things assumed live from here to top of block
782       lrg._area += cost;
783       raise_pressure(b, lrg, int_pressure, float_pressure);
784       assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect");
785       assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect");
786     }
787     assert(lrg._area >= 0.0, "negative spill area" );
788   }
789 }
790 
791 /*
792  * If we run off the top of the block with high pressure just record that the
793  * whole block is high pressure. (Even though we might have a transition
794  * later down in the block)
795  */
check_for_high_pressure_block(Pressure & pressure)796 void PhaseChaitin::check_for_high_pressure_block(Pressure& pressure) {
797   // current pressure now means the pressure before the first instruction in the block
798   // (since we have stepped through all instructions backwards)
799   if (pressure.current_pressure() > pressure.high_pressure_limit()) {
800     pressure.set_high_pressure_index_to_block_start();
801   }
802 }
803 
804 /*
805  * Compute high pressure indice; avoid landing in the middle of projnodes
806  * and set the high pressure index for the block
807  */
adjust_high_pressure_index(Block * b,uint & block_hrp_index,Pressure & pressure)808 void PhaseChaitin::adjust_high_pressure_index(Block* b, uint& block_hrp_index, Pressure& pressure) {
809   uint i = pressure.high_pressure_index();
810   if (i < b->number_of_nodes() && i < b->end_idx() + 1) {
811     Node* cur = b->get_node(i);
812     while (cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch()) {
813       cur = b->get_node(--i);
814     }
815   }
816   block_hrp_index = i;
817 }
818 
print_pressure_info(Pressure & pressure,const char * str)819 void PhaseChaitin::print_pressure_info(Pressure& pressure, const char *str) {
820   if (str != NULL) {
821     tty->print_cr("#  *** %s ***", str);
822   }
823   tty->print_cr("#     start pressure is = %d", pressure.start_pressure());
824   tty->print_cr("#     max pressure is = %d", pressure.final_pressure());
825   tty->print_cr("#     end pressure is = %d", pressure.current_pressure());
826   tty->print_cr("#");
827 }
828 
829 /* Build an interference graph:
830  *   That is, if 2 live ranges are simultaneously alive but in their acceptable
831  *   register sets do not overlap, then they do not interfere. The IFG is built
832  *   by a single reverse pass over each basic block. Starting with the known
833  *   live-out set, we remove things that get defined and add things that become
834  *   live (essentially executing one pass of a standard LIVE analysis). Just
835  *   before a Node defines a value (and removes it from the live-ness set) that
836  *   value is certainly live. The defined value interferes with everything
837  *   currently live. The value is then removed from the live-ness set and it's
838  *   inputs are added to the live-ness set.
839  * Compute register pressure for each block:
840  *   We store the biggest register pressure for each block and also the first
841  *   low to high register pressure transition within the block (if any).
842  */
build_ifg_physical(ResourceArea * a)843 uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
844   Compile::TracePhase tp("buildIFG", &timers[_t_buildIFGphysical]);
845 
846   uint must_spill = 0;
847   for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
848     Block* block = _cfg.get_block(i);
849 
850     // Clone (rather than smash in place) the liveout info, so it is alive
851     // for the "collect_gc_info" phase later.
852     IndexSet liveout(_live->live(block));
853 
854     uint first_inst = first_nonphi_index(block);
855     uint last_inst = block->end_idx();
856 
857     move_exception_node_up(block, first_inst, last_inst);
858 
859     Pressure int_pressure(last_inst + 1, INTPRESSURE);
860     Pressure float_pressure(last_inst + 1, FLOATPRESSURE);
861     block->_reg_pressure = 0;
862     block->_freg_pressure = 0;
863 
864     int inst_count = last_inst - first_inst;
865     double cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count);
866     assert(cost >= 0.0, "negative spill cost" );
867 
868     compute_initial_block_pressure(block, &liveout, int_pressure, float_pressure, cost);
869 
870     for (uint location = last_inst; location > 0; location--) {
871       Node* n = block->get_node(location);
872       uint lid = _lrg_map.live_range_id(n);
873 
874       if (lid) {
875         LRG& lrg = lrgs(lid);
876 
877         // A DEF normally costs block frequency; rematerialized values are
878         // removed from the DEF sight, so LOWER costs here.
879         lrg._cost += n->rematerialize() ? 0 : block->_freq;
880 
881         if (!liveout.member(lid) && n->Opcode() != Op_SafePoint) {
882           if (remove_node_if_not_used(block, location, n, lid, &liveout)) {
883             float_pressure.lower_high_pressure_index();
884             int_pressure.lower_high_pressure_index();
885             continue;
886           }
887           if (lrg._fat_proj) {
888             check_for_high_pressure_transition_at_fatproj(block->_reg_pressure, location, lrg, int_pressure, Op_RegI);
889             check_for_high_pressure_transition_at_fatproj(block->_freg_pressure, location, lrg, float_pressure, Op_RegD);
890           }
891         } else {
892           // A live range ends at its definition, remove the remaining area.
893           // If the cost is +Inf (which might happen in extreme cases), the lrg area will also be +Inf,
894           // and +Inf - +Inf = NaN. So let's not do that subtraction.
895           if (g_isfinite(cost)) {
896             lrg._area -= cost;
897           }
898           assert(lrg._area >= 0.0, "negative spill area" );
899 
900           assign_high_score_to_immediate_copies(block, n, lrg, location + 1, last_inst);
901 
902           if (liveout.remove(lid)) {
903             lower_pressure(block, location, lrg, &liveout, int_pressure, float_pressure);
904           }
905           uint copy_idx = n->is_Copy();
906           if (copy_idx) {
907             uint lid_copy = _lrg_map.live_range_id(n->in(copy_idx));
908             remove_interference_from_copy(block, location, lid_copy, &liveout, cost, int_pressure, float_pressure);
909           }
910         }
911 
912         // Since rematerializable DEFs are not bound but the live range is,
913         // some uses must be bound. If we spill live range 'r', it can
914         // rematerialize at each use site according to its bindings.
915         if (lrg.is_bound() && !n->rematerialize() && lrg.mask().is_NotEmpty()) {
916           remove_bound_register_from_interfering_live_ranges(lrg, &liveout, must_spill);
917         }
918         interfere_with_live(lid, &liveout);
919       }
920 
921       // Area remaining in the block
922       inst_count--;
923       cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count);
924 
925       if (!n->is_Phi()) {
926         add_input_to_liveout(block, n, &liveout, cost, int_pressure, float_pressure);
927       }
928     }
929 
930     check_for_high_pressure_block(int_pressure);
931     check_for_high_pressure_block(float_pressure);
932     adjust_high_pressure_index(block, block->_ihrp_index, int_pressure);
933     adjust_high_pressure_index(block, block->_fhrp_index, float_pressure);
934     // set the final_pressure as the register pressure for the block
935     block->_reg_pressure = int_pressure.final_pressure();
936     block->_freg_pressure = float_pressure.final_pressure();
937 
938 #ifndef PRODUCT
939     // Gather Register Pressure Statistics
940     if (PrintOptoStatistics) {
941       if (block->_reg_pressure > int_pressure.high_pressure_limit() || block->_freg_pressure > float_pressure.high_pressure_limit()) {
942         _high_pressure++;
943       } else {
944         _low_pressure++;
945       }
946     }
947 #endif
948   }
949 
950   return must_spill;
951 }
952