1 /*
2  * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  *
23  */
24 
25 #include "precompiled.hpp"
26 #include "compiler/oopMap.hpp"
27 #include "memory/allocation.inline.hpp"
28 #include "memory/resourceArea.hpp"
29 #include "opto/addnode.hpp"
30 #include "opto/block.hpp"
31 #include "opto/callnode.hpp"
32 #include "opto/cfgnode.hpp"
33 #include "opto/chaitin.hpp"
34 #include "opto/coalesce.hpp"
35 #include "opto/indexSet.hpp"
36 #include "opto/machnode.hpp"
37 #include "opto/memnode.hpp"
38 #include "opto/opcodes.hpp"
39 
PhaseIFG(Arena * arena)40 PhaseIFG::PhaseIFG( Arena *arena ) : Phase(Interference_Graph), _arena(arena) {
41 }
42 
init(uint maxlrg)43 void PhaseIFG::init( uint maxlrg ) {
44   _maxlrg = maxlrg;
45   _yanked = new (_arena) VectorSet(_arena);
46   _is_square = false;
47   // Make uninitialized adjacency lists
48   _adjs = (IndexSet*)_arena->Amalloc(sizeof(IndexSet)*maxlrg);
49   // Also make empty live range structures
50   _lrgs = (LRG *)_arena->Amalloc( maxlrg * sizeof(LRG) );
51   memset((void*)_lrgs,0,sizeof(LRG)*maxlrg);
52   // Init all to empty
53   for( uint i = 0; i < maxlrg; i++ ) {
54     _adjs[i].initialize(maxlrg);
55     _lrgs[i].Set_All();
56   }
57 }
58 
59 // Add edge between vertices a & b.  These are sorted (triangular matrix),
60 // then the smaller number is inserted in the larger numbered array.
add_edge(uint a,uint b)61 int PhaseIFG::add_edge( uint a, uint b ) {
62   lrgs(a).invalid_degree();
63   lrgs(b).invalid_degree();
64   // Sort a and b, so that a is bigger
65   assert( !_is_square, "only on triangular" );
66   if( a < b ) { uint tmp = a; a = b; b = tmp; }
67   return _adjs[a].insert( b );
68 }
69 
70 // Is there an edge between a and b?
test_edge(uint a,uint b) const71 int PhaseIFG::test_edge( uint a, uint b ) const {
72   // Sort a and b, so that a is larger
73   assert( !_is_square, "only on triangular" );
74   if( a < b ) { uint tmp = a; a = b; b = tmp; }
75   return _adjs[a].member(b);
76 }
77 
78 // Convert triangular matrix to square matrix
SquareUp()79 void PhaseIFG::SquareUp() {
80   assert( !_is_square, "only on triangular" );
81 
82   // Simple transpose
83   for(uint i = 0; i < _maxlrg; i++ ) {
84     if (!_adjs[i].is_empty()) {
85       IndexSetIterator elements(&_adjs[i]);
86       uint datum;
87       while ((datum = elements.next()) != 0) {
88         _adjs[datum].insert(i);
89       }
90     }
91   }
92   _is_square = true;
93 }
94 
95 // Compute effective degree in bulk
Compute_Effective_Degree()96 void PhaseIFG::Compute_Effective_Degree() {
97   assert( _is_square, "only on square" );
98 
99   for( uint i = 0; i < _maxlrg; i++ )
100     lrgs(i).set_degree(effective_degree(i));
101 }
102 
test_edge_sq(uint a,uint b) const103 int PhaseIFG::test_edge_sq( uint a, uint b ) const {
104   assert( _is_square, "only on square" );
105   // Swap, so that 'a' has the lesser count.  Then binary search is on
106   // the smaller of a's list and b's list.
107   if( neighbor_cnt(a) > neighbor_cnt(b) ) { uint tmp = a; a = b; b = tmp; }
108   //return _adjs[a].unordered_member(b);
109   return _adjs[a].member(b);
110 }
111 
112 // Union edges of B into A
Union(uint a,uint b)113 void PhaseIFG::Union(uint a, uint b) {
114   assert( _is_square, "only on square" );
115   IndexSet *A = &_adjs[a];
116   if (!_adjs[b].is_empty()) {
117     IndexSetIterator b_elements(&_adjs[b]);
118     uint datum;
119     while ((datum = b_elements.next()) != 0) {
120       if (A->insert(datum)) {
121         _adjs[datum].insert(a);
122         lrgs(a).invalid_degree();
123         lrgs(datum).invalid_degree();
124       }
125     }
126   }
127 }
128 
129 // Yank a Node and all connected edges from the IFG.  Return a
130 // list of neighbors (edges) yanked.
remove_node(uint a)131 IndexSet *PhaseIFG::remove_node( uint a ) {
132   assert( _is_square, "only on square" );
133   assert( !_yanked->test(a), "" );
134   _yanked->set(a);
135 
136   // I remove the LRG from all neighbors.
137   LRG &lrg_a = lrgs(a);
138 
139   if (!_adjs[a].is_empty()) {
140     IndexSetIterator elements(&_adjs[a]);
141     uint datum;
142     while ((datum = elements.next()) != 0) {
143       _adjs[datum].remove(a);
144       lrgs(datum).inc_degree(-lrg_a.compute_degree(lrgs(datum)));
145     }
146   }
147   return neighbors(a);
148 }
149 
150 // Re-insert a yanked Node.
re_insert(uint a)151 void PhaseIFG::re_insert(uint a) {
152   assert( _is_square, "only on square" );
153   assert( _yanked->test(a), "" );
154   _yanked->remove(a);
155 
156   if (_adjs[a].is_empty()) return;
157 
158   IndexSetIterator elements(&_adjs[a]);
159   uint datum;
160   while ((datum = elements.next()) != 0) {
161     _adjs[datum].insert(a);
162     lrgs(datum).invalid_degree();
163   }
164 }
165 
166 // Compute the degree between 2 live ranges.  If both live ranges are
167 // aligned-adjacent powers-of-2 then we use the MAX size.  If either is
168 // mis-aligned (or for Fat-Projections, not-adjacent) then we have to
169 // MULTIPLY the sizes.  Inspect Brigg's thesis on register pairs to see why
170 // this is so.
compute_degree(LRG & l) const171 int LRG::compute_degree(LRG &l) const {
172   int tmp;
173   int num_regs = _num_regs;
174   int nregs = l.num_regs();
175   tmp =  (_fat_proj || l._fat_proj)     // either is a fat-proj?
176     ? (num_regs * nregs)                // then use product
177     : MAX2(num_regs,nregs);             // else use max
178   return tmp;
179 }
180 
181 // Compute effective degree for this live range.  If both live ranges are
182 // aligned-adjacent powers-of-2 then we use the MAX size.  If either is
183 // mis-aligned (or for Fat-Projections, not-adjacent) then we have to
184 // MULTIPLY the sizes.  Inspect Brigg's thesis on register pairs to see why
185 // this is so.
effective_degree(uint lidx) const186 int PhaseIFG::effective_degree(uint lidx) const {
187   IndexSet *s = neighbors(lidx);
188   if (s->is_empty()) return 0;
189   int eff = 0;
190   int num_regs = lrgs(lidx).num_regs();
191   int fat_proj = lrgs(lidx)._fat_proj;
192   IndexSetIterator elements(s);
193   uint nidx;
194   while ((nidx = elements.next()) != 0) {
195     LRG &lrgn = lrgs(nidx);
196     int nregs = lrgn.num_regs();
197     eff += (fat_proj || lrgn._fat_proj) // either is a fat-proj?
198       ? (num_regs * nregs)              // then use product
199       : MAX2(num_regs,nregs);           // else use max
200   }
201   return eff;
202 }
203 
204 
205 #ifndef PRODUCT
dump() const206 void PhaseIFG::dump() const {
207   tty->print_cr("-- Interference Graph --%s--",
208                 _is_square ? "square" : "triangular" );
209   if (_is_square) {
210     for (uint i = 0; i < _maxlrg; i++) {
211       tty->print(_yanked->test(i) ? "XX " : "  ");
212       tty->print("L%d: { ",i);
213       if (!_adjs[i].is_empty()) {
214         IndexSetIterator elements(&_adjs[i]);
215         uint datum;
216         while ((datum = elements.next()) != 0) {
217           tty->print("L%d ", datum);
218         }
219       }
220       tty->print_cr("}");
221 
222     }
223     return;
224   }
225 
226   // Triangular
227   for( uint i = 0; i < _maxlrg; i++ ) {
228     uint j;
229     tty->print(_yanked->test(i) ? "XX " : "  ");
230     tty->print("L%d: { ",i);
231     for( j = _maxlrg; j > i; j-- )
232       if( test_edge(j - 1,i) ) {
233         tty->print("L%d ",j - 1);
234       }
235     tty->print("| ");
236     if (!_adjs[i].is_empty()) {
237       IndexSetIterator elements(&_adjs[i]);
238       uint datum;
239       while ((datum = elements.next()) != 0) {
240         tty->print("L%d ", datum);
241       }
242     }
243     tty->print("}\n");
244   }
245   tty->print("\n");
246 }
247 
stats() const248 void PhaseIFG::stats() const {
249   ResourceMark rm;
250   int *h_cnt = NEW_RESOURCE_ARRAY(int,_maxlrg*2);
251   memset( h_cnt, 0, sizeof(int)*_maxlrg*2 );
252   uint i;
253   for( i = 0; i < _maxlrg; i++ ) {
254     h_cnt[neighbor_cnt(i)]++;
255   }
256   tty->print_cr("--Histogram of counts--");
257   for( i = 0; i < _maxlrg*2; i++ )
258     if( h_cnt[i] )
259       tty->print("%d/%d ",i,h_cnt[i]);
260   tty->cr();
261 }
262 
verify(const PhaseChaitin * pc) const263 void PhaseIFG::verify( const PhaseChaitin *pc ) const {
264   // IFG is square, sorted and no need for Find
265   for( uint i = 0; i < _maxlrg; i++ ) {
266     assert(!_yanked->test(i) || !neighbor_cnt(i), "Is removed completely" );
267     IndexSet *set = &_adjs[i];
268     if (!set->is_empty()) {
269       IndexSetIterator elements(set);
270       uint idx;
271       uint last = 0;
272       while ((idx = elements.next()) != 0) {
273         assert(idx != i, "Must have empty diagonal");
274         assert(pc->_lrg_map.find_const(idx) == idx, "Must not need Find");
275         assert(_adjs[idx].member(i), "IFG not square");
276         assert(!_yanked->test(idx), "No yanked neighbors");
277         assert(last < idx, "not sorted increasing");
278         last = idx;
279       }
280     }
281     assert(!lrgs(i)._degree_valid || effective_degree(i) == lrgs(i).degree(), "degree is valid but wrong");
282   }
283 }
284 #endif
285 
286 /*
287  * Interfere this register with everything currently live.
288  * Check for interference by checking overlap of regmasks.
289  * Only interfere if acceptable register masks overlap.
290  */
interfere_with_live(uint lid,IndexSet * liveout)291 void PhaseChaitin::interfere_with_live(uint lid, IndexSet* liveout) {
292   if (!liveout->is_empty()) {
293     LRG& lrg = lrgs(lid);
294     const RegMask &rm = lrg.mask();
295     IndexSetIterator elements(liveout);
296     uint interfering_lid = elements.next();
297     while (interfering_lid != 0) {
298       LRG& interfering_lrg = lrgs(interfering_lid);
299       if (rm.overlap(interfering_lrg.mask())) {
300         _ifg->add_edge(lid, interfering_lid);
301       }
302       interfering_lid = elements.next();
303     }
304   }
305 }
306 
307 // Actually build the interference graph.  Uses virtual registers only, no
308 // physical register masks.  This allows me to be very aggressive when
309 // coalescing copies.  Some of this aggressiveness will have to be undone
310 // later, but I'd rather get all the copies I can now (since unremoved copies
311 // at this point can end up in bad places).  Copies I re-insert later I have
312 // more opportunity to insert them in low-frequency locations.
build_ifg_virtual()313 void PhaseChaitin::build_ifg_virtual( ) {
314   Compile::TracePhase tp("buildIFG_virt", &timers[_t_buildIFGvirtual]);
315 
316   // For all blocks (in any order) do...
317   for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
318     Block* block = _cfg.get_block(i);
319     IndexSet* liveout = _live->live(block);
320 
321     // The IFG is built by a single reverse pass over each basic block.
322     // Starting with the known live-out set, we remove things that get
323     // defined and add things that become live (essentially executing one
324     // pass of a standard LIVE analysis). Just before a Node defines a value
325     // (and removes it from the live-ness set) that value is certainly live.
326     // The defined value interferes with everything currently live.  The
327     // value is then removed from the live-ness set and it's inputs are
328     // added to the live-ness set.
329     for (uint j = block->end_idx() + 1; j > 1; j--) {
330       Node* n = block->get_node(j - 1);
331 
332       // Get value being defined
333       uint r = _lrg_map.live_range_id(n);
334 
335       // Some special values do not allocate
336       if (r) {
337 
338         // Remove from live-out set
339         liveout->remove(r);
340 
341         // Copies do not define a new value and so do not interfere.
342         // Remove the copies source from the liveout set before interfering.
343         uint idx = n->is_Copy();
344         if (idx != 0) {
345           liveout->remove(_lrg_map.live_range_id(n->in(idx)));
346         }
347 
348         // Interfere with everything live
349         interfere_with_live(r, liveout);
350       }
351 
352       // Make all inputs live
353       if (!n->is_Phi()) {      // Phi function uses come from prior block
354         for(uint k = 1; k < n->req(); k++) {
355           liveout->insert(_lrg_map.live_range_id(n->in(k)));
356         }
357       }
358 
359       // 2-address instructions always have the defined value live
360       // on entry to the instruction, even though it is being defined
361       // by the instruction.  We pretend a virtual copy sits just prior
362       // to the instruction and kills the src-def'd register.
363       // In other words, for 2-address instructions the defined value
364       // interferes with all inputs.
365       uint idx;
366       if( n->is_Mach() && (idx = n->as_Mach()->two_adr()) ) {
367         const MachNode *mach = n->as_Mach();
368         // Sometimes my 2-address ADDs are commuted in a bad way.
369         // We generally want the USE-DEF register to refer to the
370         // loop-varying quantity, to avoid a copy.
371         uint op = mach->ideal_Opcode();
372         // Check that mach->num_opnds() == 3 to ensure instruction is
373         // not subsuming constants, effectively excludes addI_cin_imm
374         // Can NOT swap for instructions like addI_cin_imm since it
375         // is adding zero to yhi + carry and the second ideal-input
376         // points to the result of adding low-halves.
377         // Checking req() and num_opnds() does NOT distinguish addI_cout from addI_cout_imm
378         if( (op == Op_AddI && mach->req() == 3 && mach->num_opnds() == 3) &&
379             n->in(1)->bottom_type()->base() == Type::Int &&
380             // See if the ADD is involved in a tight data loop the wrong way
381             n->in(2)->is_Phi() &&
382             n->in(2)->in(2) == n ) {
383           Node *tmp = n->in(1);
384           n->set_req( 1, n->in(2) );
385           n->set_req( 2, tmp );
386         }
387         // Defined value interferes with all inputs
388         uint lidx = _lrg_map.live_range_id(n->in(idx));
389         for (uint k = 1; k < n->req(); k++) {
390           uint kidx = _lrg_map.live_range_id(n->in(k));
391           if (kidx != lidx) {
392             _ifg->add_edge(r, kidx);
393           }
394         }
395       }
396     } // End of forall instructions in block
397   } // End of forall blocks
398 }
399 
400 #ifdef ASSERT
count_int_pressure(IndexSet * liveout)401 uint PhaseChaitin::count_int_pressure(IndexSet* liveout) {
402   if (liveout->is_empty()) {
403     return 0;
404   }
405   IndexSetIterator elements(liveout);
406   uint lidx = elements.next();
407   uint cnt = 0;
408   while (lidx != 0) {
409     LRG& lrg = lrgs(lidx);
410     if (lrg.mask_is_nonempty_and_up() &&
411         !lrg.is_float_or_vector() &&
412         lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegI])) {
413       cnt += lrg.reg_pressure();
414     }
415     lidx = elements.next();
416   }
417   return cnt;
418 }
419 
count_float_pressure(IndexSet * liveout)420 uint PhaseChaitin::count_float_pressure(IndexSet* liveout) {
421   if (liveout->is_empty()) {
422     return 0;
423   }
424   IndexSetIterator elements(liveout);
425   uint lidx = elements.next();
426   uint cnt = 0;
427   while (lidx != 0) {
428     LRG& lrg = lrgs(lidx);
429     if (lrg.mask_is_nonempty_and_up() && lrg.is_float_or_vector()) {
430       cnt += lrg.reg_pressure();
431     }
432     lidx = elements.next();
433   }
434   return cnt;
435 }
436 #endif
437 
438 /*
439  * Adjust register pressure down by 1.  Capture last hi-to-low transition,
440  */
lower_pressure(Block * b,uint location,LRG & lrg,IndexSet * liveout,Pressure & int_pressure,Pressure & float_pressure)441 void PhaseChaitin::lower_pressure(Block* b, uint location, LRG& lrg, IndexSet* liveout, Pressure& int_pressure, Pressure& float_pressure) {
442   if (lrg.mask_is_nonempty_and_up()) {
443     if (lrg.is_float_or_vector()) {
444       float_pressure.lower(lrg, location);
445     } else {
446       // Do not count the SP and flag registers
447       const RegMask& r = lrg.mask();
448       if (r.overlap(*Matcher::idealreg2regmask[Op_RegI])) {
449         int_pressure.lower(lrg, location);
450       }
451     }
452   }
453   if (_scheduling_info_generated == false) {
454     assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect");
455     assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect");
456   }
457 }
458 
459 /* Go to the first non-phi index in a block */
first_nonphi_index(Block * b)460 static uint first_nonphi_index(Block* b) {
461   uint i;
462   uint end_idx = b->end_idx();
463   for (i = 1; i < end_idx; i++) {
464     Node* n = b->get_node(i);
465     if (!n->is_Phi()) {
466       break;
467     }
468   }
469   return i;
470 }
471 
472 /*
473  * Spills could be inserted before a CreateEx node which should be the first
474  * instruction in a block after Phi nodes. If so, move the CreateEx node up.
475  */
move_exception_node_up(Block * b,uint first_inst,uint last_inst)476 static void move_exception_node_up(Block* b, uint first_inst, uint last_inst) {
477   for (uint i = first_inst; i < last_inst; i++) {
478     Node* ex = b->get_node(i);
479     if (ex->is_SpillCopy()) {
480       continue;
481     }
482 
483     if (i > first_inst &&
484         ex->is_Mach() && ex->as_Mach()->ideal_Opcode() == Op_CreateEx) {
485       b->remove_node(i);
486       b->insert_node(ex, first_inst);
487     }
488     // Stop once a CreateEx or any other node is found
489     break;
490   }
491 }
492 
493 /*
494  * When new live ranges are live, we raise the register pressure
495  */
raise_pressure(Block * b,LRG & lrg,Pressure & int_pressure,Pressure & float_pressure)496 void PhaseChaitin::raise_pressure(Block* b, LRG& lrg, Pressure& int_pressure, Pressure& float_pressure) {
497   if (lrg.mask_is_nonempty_and_up()) {
498     if (lrg.is_float_or_vector()) {
499       float_pressure.raise(lrg);
500     } else {
501       // Do not count the SP and flag registers
502       const RegMask& rm = lrg.mask();
503       if (rm.overlap(*Matcher::idealreg2regmask[Op_RegI])) {
504         int_pressure.raise(lrg);
505       }
506     }
507   }
508 }
509 
510 
511 /*
512  * Computes the initial register pressure of a block, looking at all live
513  * ranges in the liveout. The register pressure is computed for both float
514  * and int/pointer registers.
515  * Live ranges in the liveout are presumed live for the whole block.
516  * We add the cost for the whole block to the area of the live ranges initially.
517  * If a live range gets killed in the block, we'll subtract the unused part of
518  * the block from the area.
519  */
compute_initial_block_pressure(Block * b,IndexSet * liveout,Pressure & int_pressure,Pressure & float_pressure,double cost)520 void PhaseChaitin::compute_initial_block_pressure(Block* b, IndexSet* liveout, Pressure& int_pressure, Pressure& float_pressure, double cost) {
521   if (!liveout->is_empty()) {
522     IndexSetIterator elements(liveout);
523     uint lid = elements.next();
524     while (lid != 0) {
525       LRG &lrg = lrgs(lid);
526       lrg._area += cost;
527       raise_pressure(b, lrg, int_pressure, float_pressure);
528       lid = elements.next();
529     }
530   }
531   assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect");
532   assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect");
533 }
534 
535 /*
536 * Computes the entry register pressure of a block, looking at all live
537 * ranges in the livein. The register pressure is computed for both float
538 * and int/pointer registers.
539 */
compute_entry_block_pressure(Block * b)540 void PhaseChaitin::compute_entry_block_pressure(Block* b) {
541   IndexSet *livein = _live->livein(b);
542   if (!livein->is_empty()) {
543     IndexSetIterator elements(livein);
544     uint lid = elements.next();
545     while (lid != 0) {
546       LRG &lrg = lrgs(lid);
547       raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure);
548       lid = elements.next();
549     }
550   }
551   // Now check phis for locally defined inputs
552   for (uint j = 0; j < b->number_of_nodes(); j++) {
553     Node* n = b->get_node(j);
554     if (n->is_Phi()) {
555       for (uint k = 1; k < n->req(); k++) {
556         Node* phi_in = n->in(k);
557         // Because we are talking about phis, raise register pressure once for each
558         // instance of a phi to account for a single value
559         if (_cfg.get_block_for_node(phi_in) == b) {
560           LRG& lrg = lrgs(phi_in->_idx);
561           raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure);
562           break;
563         }
564       }
565     }
566   }
567   _sched_int_pressure.set_start_pressure(_sched_int_pressure.current_pressure());
568   _sched_float_pressure.set_start_pressure(_sched_float_pressure.current_pressure());
569 }
570 
571 /*
572 * Computes the exit register pressure of a block, looking at all live
573 * ranges in the liveout. The register pressure is computed for both float
574 * and int/pointer registers.
575 */
compute_exit_block_pressure(Block * b)576 void PhaseChaitin::compute_exit_block_pressure(Block* b) {
577 
578   IndexSet* livein = _live->live(b);
579   _sched_int_pressure.set_current_pressure(0);
580   _sched_float_pressure.set_current_pressure(0);
581   if (!livein->is_empty()) {
582     IndexSetIterator elements(livein);
583     uint lid = elements.next();
584     while (lid != 0) {
585       LRG &lrg = lrgs(lid);
586       raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure);
587       lid = elements.next();
588     }
589   }
590 }
591 
592 /*
593  * Remove dead node if it's not used.
594  * We only remove projection nodes if the node "defining" the projection is
595  * dead, for example on x86, if we have a dead Add node we remove its
596  * RFLAGS node.
597  */
remove_node_if_not_used(Block * b,uint location,Node * n,uint lid,IndexSet * liveout)598 bool PhaseChaitin::remove_node_if_not_used(Block* b, uint location, Node* n, uint lid, IndexSet* liveout) {
599   Node* def = n->in(0);
600   if (!n->is_Proj() ||
601       (_lrg_map.live_range_id(def) && !liveout->member(_lrg_map.live_range_id(def)))) {
602     if (n->is_MachProj()) {
603       // Don't remove KILL projections if their "defining" nodes have
604       // memory effects (have SCMemProj projection node) -
605       // they are not dead even when their result is not used.
606       // For example, compareAndSwapL (and other CAS) and EncodeISOArray nodes.
607       // The method add_input_to_liveout() keeps such nodes alive (put them on liveout list)
608       // when it sees SCMemProj node in a block. Unfortunately SCMemProj node could be placed
609       // in block in such order that KILL MachProj nodes are processed first.
610       if (def->has_out_with(Op_SCMemProj)) {
611         return false;
612       }
613     }
614     b->remove_node(location);
615     LRG& lrg = lrgs(lid);
616     if (lrg._def == n) {
617       lrg._def = 0;
618     }
619     n->disconnect_inputs(C);
620     _cfg.unmap_node_from_block(n);
621     n->replace_by(C->top());
622     return true;
623   }
624   return false;
625 }
626 
627 /*
628  * When encountering a fat projection, we might go from a low to high to low
629  * (since the fat proj only lives at this instruction) going backwards in the
630  * block. If we find a low to high transition, we record it.
631  */
check_for_high_pressure_transition_at_fatproj(uint & block_reg_pressure,uint location,LRG & lrg,Pressure & pressure,const int op_regtype)632 void PhaseChaitin::check_for_high_pressure_transition_at_fatproj(uint& block_reg_pressure, uint location, LRG& lrg, Pressure& pressure, const int op_regtype) {
633   RegMask mask_tmp = lrg.mask();
634   mask_tmp.AND(*Matcher::idealreg2regmask[op_regtype]);
635   pressure.check_pressure_at_fatproj(location, mask_tmp);
636 }
637 
638 /*
639  * Insure high score for immediate-use spill copies so they get a color.
640  * All single-use MachSpillCopy(s) that immediately precede their
641  * use must color early.  If a longer live range steals their
642  * color, the spill copy will split and may push another spill copy
643  * further away resulting in an infinite spill-split-retry cycle.
644  * Assigning a zero area results in a high score() and a good
645  * location in the simplify list.
646  */
assign_high_score_to_immediate_copies(Block * b,Node * n,LRG & lrg,uint next_inst,uint last_inst)647 void PhaseChaitin::assign_high_score_to_immediate_copies(Block* b, Node* n, LRG& lrg, uint next_inst, uint last_inst) {
648   if (n->is_SpillCopy() &&
649       lrg.is_singledef() && // A multi defined live range can still split
650       n->outcnt() == 1 &&   // and use must be in this block
651       _cfg.get_block_for_node(n->unique_out()) == b) {
652 
653     Node* single_use = n->unique_out();
654     assert(b->find_node(single_use) >= next_inst, "Use must be later in block");
655     // Use can be earlier in block if it is a Phi, but then I should be a MultiDef
656 
657     // Find first non SpillCopy 'm' that follows the current instruction
658     // (current_inst - 1) is index for current instruction 'n'
659     Node* m = n;
660     for (uint i = next_inst; i <= last_inst && m->is_SpillCopy(); ++i) {
661       m = b->get_node(i);
662     }
663     if (m == single_use) {
664       lrg._area = 0.0;
665     }
666   }
667 }
668 
669 /*
670  * Copies do not define a new value and so do not interfere.
671  * Remove the copies source from the liveout set before interfering.
672  */
remove_interference_from_copy(Block * b,uint location,uint lid_copy,IndexSet * liveout,double cost,Pressure & int_pressure,Pressure & float_pressure)673 void PhaseChaitin::remove_interference_from_copy(Block* b, uint location, uint lid_copy, IndexSet* liveout, double cost, Pressure& int_pressure, Pressure& float_pressure) {
674   if (liveout->remove(lid_copy)) {
675     LRG& lrg_copy = lrgs(lid_copy);
676     lrg_copy._area -= cost;
677 
678     // Lower register pressure since copy and definition can share the same register
679     lower_pressure(b, location, lrg_copy, liveout, int_pressure, float_pressure);
680   }
681 }
682 
683 /*
684  * The defined value must go in a particular register. Remove that register from
685  * all conflicting parties and avoid the interference.
686  */
remove_bound_register_from_interfering_live_ranges(LRG & lrg,IndexSet * liveout,uint & must_spill)687 void PhaseChaitin::remove_bound_register_from_interfering_live_ranges(LRG& lrg, IndexSet* liveout, uint& must_spill) {
688   if (liveout->is_empty()) return;
689   // Check for common case
690   const RegMask& rm = lrg.mask();
691   int r_size = lrg.num_regs();
692   // Smear odd bits
693   IndexSetIterator elements(liveout);
694   uint l = elements.next();
695   while (l != 0) {
696     LRG& interfering_lrg = lrgs(l);
697     // If 'l' must spill already, do not further hack his bits.
698     // He'll get some interferences and be forced to spill later.
699     if (interfering_lrg._must_spill) {
700       l = elements.next();
701       continue;
702     }
703 
704     // Remove bound register(s) from 'l's choices
705     RegMask old = interfering_lrg.mask();
706     uint old_size = interfering_lrg.mask_size();
707 
708     // Remove the bits from LRG 'rm' from LRG 'l' so 'l' no
709     // longer interferes with 'rm'.  If 'l' requires aligned
710     // adjacent pairs, subtract out bit pairs.
711     assert(!interfering_lrg._is_vector || !interfering_lrg._fat_proj, "sanity");
712 
713     if (interfering_lrg.num_regs() > 1 && !interfering_lrg._fat_proj) {
714       RegMask r2mask = rm;
715       // Leave only aligned set of bits.
716       r2mask.smear_to_sets(interfering_lrg.num_regs());
717       // It includes vector case.
718       interfering_lrg.SUBTRACT(r2mask);
719       interfering_lrg.compute_set_mask_size();
720     } else if (r_size != 1) {
721       // fat proj
722       interfering_lrg.SUBTRACT(rm);
723       interfering_lrg.compute_set_mask_size();
724     } else {
725       // Common case: size 1 bound removal
726       OptoReg::Name r_reg = rm.find_first_elem();
727       if (interfering_lrg.mask().Member(r_reg)) {
728         interfering_lrg.Remove(r_reg);
729         interfering_lrg.set_mask_size(interfering_lrg.mask().is_AllStack() ? LRG::AllStack_size : old_size - 1);
730       }
731     }
732 
733     // If 'l' goes completely dry, it must spill.
734     if (interfering_lrg.not_free()) {
735       // Give 'l' some kind of reasonable mask, so it picks up
736       // interferences (and will spill later).
737       interfering_lrg.set_mask(old);
738       interfering_lrg.set_mask_size(old_size);
739       must_spill++;
740       interfering_lrg._must_spill = 1;
741       interfering_lrg.set_reg(OptoReg::Name(LRG::SPILL_REG));
742     }
743     l = elements.next();
744   }
745 }
746 
747 /*
748  * Start loop at 1 (skip control edge) for most Nodes. SCMemProj's might be the
749  * sole use of a StoreLConditional. While StoreLConditionals set memory (the
750  * SCMemProj use) they also def flags; if that flag def is unused the allocator
751  * sees a flag-setting instruction with no use of the flags and assumes it's
752  * dead.  This keeps the (useless) flag-setting behavior alive while also
753  * keeping the (useful) memory update effect.
754  */
add_input_to_liveout(Block * b,Node * n,IndexSet * liveout,double cost,Pressure & int_pressure,Pressure & float_pressure)755 void PhaseChaitin::add_input_to_liveout(Block* b, Node* n, IndexSet* liveout, double cost, Pressure& int_pressure, Pressure& float_pressure) {
756   JVMState* jvms = n->jvms();
757   uint debug_start = jvms ? jvms->debug_start() : 999999;
758 
759   for (uint k = ((n->Opcode() == Op_SCMemProj) ? 0:1); k < n->req(); k++) {
760     Node* def = n->in(k);
761     uint lid = _lrg_map.live_range_id(def);
762     if (!lid) {
763       continue;
764     }
765     LRG& lrg = lrgs(lid);
766 
767     // No use-side cost for spilling debug info
768     if (k < debug_start) {
769       // A USE costs twice block frequency (once for the Load, once
770       // for a Load-delay).  Rematerialized uses only cost once.
771       lrg._cost += (def->rematerialize() ? b->_freq : (b->_freq * 2));
772     }
773 
774     if (liveout->insert(lid)) {
775       // Newly live things assumed live from here to top of block
776       lrg._area += cost;
777       raise_pressure(b, lrg, int_pressure, float_pressure);
778       assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect");
779       assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect");
780     }
781     assert(lrg._area >= 0.0, "negative spill area" );
782   }
783 }
784 
785 /*
786  * If we run off the top of the block with high pressure just record that the
787  * whole block is high pressure. (Even though we might have a transition
788  * later down in the block)
789  */
check_for_high_pressure_block(Pressure & pressure)790 void PhaseChaitin::check_for_high_pressure_block(Pressure& pressure) {
791   // current pressure now means the pressure before the first instruction in the block
792   // (since we have stepped through all instructions backwards)
793   if (pressure.current_pressure() > pressure.high_pressure_limit()) {
794     pressure.set_high_pressure_index_to_block_start();
795   }
796 }
797 
798 /*
799  * Compute high pressure indice; avoid landing in the middle of projnodes
800  * and set the high pressure index for the block
801  */
adjust_high_pressure_index(Block * b,uint & block_hrp_index,Pressure & pressure)802 void PhaseChaitin::adjust_high_pressure_index(Block* b, uint& block_hrp_index, Pressure& pressure) {
803   uint i = pressure.high_pressure_index();
804   if (i < b->number_of_nodes() && i < b->end_idx() + 1) {
805     Node* cur = b->get_node(i);
806     while (cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch()) {
807       cur = b->get_node(--i);
808     }
809   }
810   block_hrp_index = i;
811 }
812 
print_pressure_info(Pressure & pressure,const char * str)813 void PhaseChaitin::print_pressure_info(Pressure& pressure, const char *str) {
814   if (str != NULL) {
815     tty->print_cr("#  *** %s ***", str);
816   }
817   tty->print_cr("#     start pressure is = %d", pressure.start_pressure());
818   tty->print_cr("#     max pressure is = %d", pressure.final_pressure());
819   tty->print_cr("#     end pressure is = %d", pressure.current_pressure());
820   tty->print_cr("#");
821 }
822 
823 /* Build an interference graph:
824  *   That is, if 2 live ranges are simultaneously alive but in their acceptable
825  *   register sets do not overlap, then they do not interfere. The IFG is built
826  *   by a single reverse pass over each basic block. Starting with the known
827  *   live-out set, we remove things that get defined and add things that become
828  *   live (essentially executing one pass of a standard LIVE analysis). Just
829  *   before a Node defines a value (and removes it from the live-ness set) that
830  *   value is certainly live. The defined value interferes with everything
831  *   currently live. The value is then removed from the live-ness set and it's
832  *   inputs are added to the live-ness set.
833  * Compute register pressure for each block:
834  *   We store the biggest register pressure for each block and also the first
835  *   low to high register pressure transition within the block (if any).
836  */
build_ifg_physical(ResourceArea * a)837 uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
838   Compile::TracePhase tp("buildIFG", &timers[_t_buildIFGphysical]);
839 
840   uint must_spill = 0;
841   for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
842     Block* block = _cfg.get_block(i);
843 
844     // Clone (rather than smash in place) the liveout info, so it is alive
845     // for the "collect_gc_info" phase later.
846     IndexSet liveout(_live->live(block));
847 
848     uint first_inst = first_nonphi_index(block);
849     uint last_inst = block->end_idx();
850 
851     move_exception_node_up(block, first_inst, last_inst);
852 
853     Pressure int_pressure(last_inst + 1, INTPRESSURE);
854     Pressure float_pressure(last_inst + 1, FLOATPRESSURE);
855     block->_reg_pressure = 0;
856     block->_freg_pressure = 0;
857 
858     int inst_count = last_inst - first_inst;
859     double cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count);
860     assert(cost >= 0.0, "negative spill cost" );
861 
862     compute_initial_block_pressure(block, &liveout, int_pressure, float_pressure, cost);
863 
864     for (uint location = last_inst; location > 0; location--) {
865       Node* n = block->get_node(location);
866       uint lid = _lrg_map.live_range_id(n);
867 
868       if (lid) {
869         LRG& lrg = lrgs(lid);
870 
871         // A DEF normally costs block frequency; rematerialized values are
872         // removed from the DEF sight, so LOWER costs here.
873         lrg._cost += n->rematerialize() ? 0 : block->_freq;
874 
875         if (!liveout.member(lid) && n->Opcode() != Op_SafePoint) {
876           if (remove_node_if_not_used(block, location, n, lid, &liveout)) {
877             float_pressure.lower_high_pressure_index();
878             int_pressure.lower_high_pressure_index();
879             continue;
880           }
881           if (lrg._fat_proj) {
882             check_for_high_pressure_transition_at_fatproj(block->_reg_pressure, location, lrg, int_pressure, Op_RegI);
883             check_for_high_pressure_transition_at_fatproj(block->_freg_pressure, location, lrg, float_pressure, Op_RegD);
884           }
885         } else {
886           // A live range ends at its definition, remove the remaining area.
887           // If the cost is +Inf (which might happen in extreme cases), the lrg area will also be +Inf,
888           // and +Inf - +Inf = NaN. So let's not do that subtraction.
889           if (g_isfinite(cost)) {
890             lrg._area -= cost;
891           }
892           assert(lrg._area >= 0.0, "negative spill area" );
893 
894           assign_high_score_to_immediate_copies(block, n, lrg, location + 1, last_inst);
895 
896           if (liveout.remove(lid)) {
897             lower_pressure(block, location, lrg, &liveout, int_pressure, float_pressure);
898           }
899           uint copy_idx = n->is_Copy();
900           if (copy_idx) {
901             uint lid_copy = _lrg_map.live_range_id(n->in(copy_idx));
902             remove_interference_from_copy(block, location, lid_copy, &liveout, cost, int_pressure, float_pressure);
903           }
904         }
905 
906         // Since rematerializable DEFs are not bound but the live range is,
907         // some uses must be bound. If we spill live range 'r', it can
908         // rematerialize at each use site according to its bindings.
909         if (lrg.is_bound() && !n->rematerialize() && lrg.mask().is_NotEmpty()) {
910           remove_bound_register_from_interfering_live_ranges(lrg, &liveout, must_spill);
911         }
912         interfere_with_live(lid, &liveout);
913       }
914 
915       // Area remaining in the block
916       inst_count--;
917       cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count);
918 
919       if (!n->is_Phi()) {
920         add_input_to_liveout(block, n, &liveout, cost, int_pressure, float_pressure);
921       }
922     }
923 
924     check_for_high_pressure_block(int_pressure);
925     check_for_high_pressure_block(float_pressure);
926     adjust_high_pressure_index(block, block->_ihrp_index, int_pressure);
927     adjust_high_pressure_index(block, block->_fhrp_index, float_pressure);
928     // set the final_pressure as the register pressure for the block
929     block->_reg_pressure = int_pressure.final_pressure();
930     block->_freg_pressure = float_pressure.final_pressure();
931 
932 #ifndef PRODUCT
933     // Gather Register Pressure Statistics
934     if (PrintOptoStatistics) {
935       if (block->_reg_pressure > int_pressure.high_pressure_limit() || block->_freg_pressure > float_pressure.high_pressure_limit()) {
936         _high_pressure++;
937       } else {
938         _low_pressure++;
939       }
940     }
941 #endif
942   }
943 
944   return must_spill;
945 }
946