1 /*
2 * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "compiler/oopMap.hpp"
27 #include "memory/allocation.inline.hpp"
28 #include "memory/resourceArea.hpp"
29 #include "opto/addnode.hpp"
30 #include "opto/block.hpp"
31 #include "opto/callnode.hpp"
32 #include "opto/cfgnode.hpp"
33 #include "opto/chaitin.hpp"
34 #include "opto/coalesce.hpp"
35 #include "opto/indexSet.hpp"
36 #include "opto/machnode.hpp"
37 #include "opto/memnode.hpp"
38 #include "opto/opcodes.hpp"
39
PhaseIFG(Arena * arena)40 PhaseIFG::PhaseIFG( Arena *arena ) : Phase(Interference_Graph), _arena(arena) {
41 }
42
init(uint maxlrg)43 void PhaseIFG::init( uint maxlrg ) {
44 _maxlrg = maxlrg;
45 _yanked = new (_arena) VectorSet(_arena);
46 _is_square = false;
47 // Make uninitialized adjacency lists
48 _adjs = (IndexSet*)_arena->Amalloc(sizeof(IndexSet)*maxlrg);
49 // Also make empty live range structures
50 _lrgs = (LRG *)_arena->Amalloc( maxlrg * sizeof(LRG) );
51 memset((void*)_lrgs,0,sizeof(LRG)*maxlrg);
52 // Init all to empty
53 for( uint i = 0; i < maxlrg; i++ ) {
54 _adjs[i].initialize(maxlrg);
55 _lrgs[i].Set_All();
56 }
57 }
58
59 // Add edge between vertices a & b. These are sorted (triangular matrix),
60 // then the smaller number is inserted in the larger numbered array.
add_edge(uint a,uint b)61 int PhaseIFG::add_edge( uint a, uint b ) {
62 lrgs(a).invalid_degree();
63 lrgs(b).invalid_degree();
64 // Sort a and b, so that a is bigger
65 assert( !_is_square, "only on triangular" );
66 if( a < b ) { uint tmp = a; a = b; b = tmp; }
67 return _adjs[a].insert( b );
68 }
69
70 // Is there an edge between a and b?
test_edge(uint a,uint b) const71 int PhaseIFG::test_edge( uint a, uint b ) const {
72 // Sort a and b, so that a is larger
73 assert( !_is_square, "only on triangular" );
74 if( a < b ) { uint tmp = a; a = b; b = tmp; }
75 return _adjs[a].member(b);
76 }
77
78 // Convert triangular matrix to square matrix
SquareUp()79 void PhaseIFG::SquareUp() {
80 assert( !_is_square, "only on triangular" );
81
82 // Simple transpose
83 for(uint i = 0; i < _maxlrg; i++ ) {
84 if (!_adjs[i].is_empty()) {
85 IndexSetIterator elements(&_adjs[i]);
86 uint datum;
87 while ((datum = elements.next()) != 0) {
88 _adjs[datum].insert(i);
89 }
90 }
91 }
92 _is_square = true;
93 }
94
95 // Compute effective degree in bulk
Compute_Effective_Degree()96 void PhaseIFG::Compute_Effective_Degree() {
97 assert( _is_square, "only on square" );
98
99 for( uint i = 0; i < _maxlrg; i++ )
100 lrgs(i).set_degree(effective_degree(i));
101 }
102
test_edge_sq(uint a,uint b) const103 int PhaseIFG::test_edge_sq( uint a, uint b ) const {
104 assert( _is_square, "only on square" );
105 // Swap, so that 'a' has the lesser count. Then binary search is on
106 // the smaller of a's list and b's list.
107 if( neighbor_cnt(a) > neighbor_cnt(b) ) { uint tmp = a; a = b; b = tmp; }
108 //return _adjs[a].unordered_member(b);
109 return _adjs[a].member(b);
110 }
111
112 // Union edges of B into A
Union(uint a,uint b)113 void PhaseIFG::Union(uint a, uint b) {
114 assert( _is_square, "only on square" );
115 IndexSet *A = &_adjs[a];
116 if (!_adjs[b].is_empty()) {
117 IndexSetIterator b_elements(&_adjs[b]);
118 uint datum;
119 while ((datum = b_elements.next()) != 0) {
120 if (A->insert(datum)) {
121 _adjs[datum].insert(a);
122 lrgs(a).invalid_degree();
123 lrgs(datum).invalid_degree();
124 }
125 }
126 }
127 }
128
129 // Yank a Node and all connected edges from the IFG. Return a
130 // list of neighbors (edges) yanked.
remove_node(uint a)131 IndexSet *PhaseIFG::remove_node( uint a ) {
132 assert( _is_square, "only on square" );
133 assert( !_yanked->test(a), "" );
134 _yanked->set(a);
135
136 // I remove the LRG from all neighbors.
137 LRG &lrg_a = lrgs(a);
138
139 if (!_adjs[a].is_empty()) {
140 IndexSetIterator elements(&_adjs[a]);
141 uint datum;
142 while ((datum = elements.next()) != 0) {
143 _adjs[datum].remove(a);
144 lrgs(datum).inc_degree(-lrg_a.compute_degree(lrgs(datum)));
145 }
146 }
147 return neighbors(a);
148 }
149
150 // Re-insert a yanked Node.
re_insert(uint a)151 void PhaseIFG::re_insert(uint a) {
152 assert( _is_square, "only on square" );
153 assert( _yanked->test(a), "" );
154 _yanked->remove(a);
155
156 if (_adjs[a].is_empty()) return;
157
158 IndexSetIterator elements(&_adjs[a]);
159 uint datum;
160 while ((datum = elements.next()) != 0) {
161 _adjs[datum].insert(a);
162 lrgs(datum).invalid_degree();
163 }
164 }
165
166 // Compute the degree between 2 live ranges. If both live ranges are
167 // aligned-adjacent powers-of-2 then we use the MAX size. If either is
168 // mis-aligned (or for Fat-Projections, not-adjacent) then we have to
169 // MULTIPLY the sizes. Inspect Brigg's thesis on register pairs to see why
170 // this is so.
compute_degree(LRG & l) const171 int LRG::compute_degree(LRG &l) const {
172 int tmp;
173 int num_regs = _num_regs;
174 int nregs = l.num_regs();
175 tmp = (_fat_proj || l._fat_proj) // either is a fat-proj?
176 ? (num_regs * nregs) // then use product
177 : MAX2(num_regs,nregs); // else use max
178 return tmp;
179 }
180
181 // Compute effective degree for this live range. If both live ranges are
182 // aligned-adjacent powers-of-2 then we use the MAX size. If either is
183 // mis-aligned (or for Fat-Projections, not-adjacent) then we have to
184 // MULTIPLY the sizes. Inspect Brigg's thesis on register pairs to see why
185 // this is so.
effective_degree(uint lidx) const186 int PhaseIFG::effective_degree(uint lidx) const {
187 IndexSet *s = neighbors(lidx);
188 if (s->is_empty()) return 0;
189 int eff = 0;
190 int num_regs = lrgs(lidx).num_regs();
191 int fat_proj = lrgs(lidx)._fat_proj;
192 IndexSetIterator elements(s);
193 uint nidx;
194 while ((nidx = elements.next()) != 0) {
195 LRG &lrgn = lrgs(nidx);
196 int nregs = lrgn.num_regs();
197 eff += (fat_proj || lrgn._fat_proj) // either is a fat-proj?
198 ? (num_regs * nregs) // then use product
199 : MAX2(num_regs,nregs); // else use max
200 }
201 return eff;
202 }
203
204
205 #ifndef PRODUCT
dump() const206 void PhaseIFG::dump() const {
207 tty->print_cr("-- Interference Graph --%s--",
208 _is_square ? "square" : "triangular" );
209 if (_is_square) {
210 for (uint i = 0; i < _maxlrg; i++) {
211 tty->print(_yanked->test(i) ? "XX " : " ");
212 tty->print("L%d: { ",i);
213 if (!_adjs[i].is_empty()) {
214 IndexSetIterator elements(&_adjs[i]);
215 uint datum;
216 while ((datum = elements.next()) != 0) {
217 tty->print("L%d ", datum);
218 }
219 }
220 tty->print_cr("}");
221
222 }
223 return;
224 }
225
226 // Triangular
227 for( uint i = 0; i < _maxlrg; i++ ) {
228 uint j;
229 tty->print(_yanked->test(i) ? "XX " : " ");
230 tty->print("L%d: { ",i);
231 for( j = _maxlrg; j > i; j-- )
232 if( test_edge(j - 1,i) ) {
233 tty->print("L%d ",j - 1);
234 }
235 tty->print("| ");
236 if (!_adjs[i].is_empty()) {
237 IndexSetIterator elements(&_adjs[i]);
238 uint datum;
239 while ((datum = elements.next()) != 0) {
240 tty->print("L%d ", datum);
241 }
242 }
243 tty->print("}\n");
244 }
245 tty->print("\n");
246 }
247
stats() const248 void PhaseIFG::stats() const {
249 ResourceMark rm;
250 int *h_cnt = NEW_RESOURCE_ARRAY(int,_maxlrg*2);
251 memset( h_cnt, 0, sizeof(int)*_maxlrg*2 );
252 uint i;
253 for( i = 0; i < _maxlrg; i++ ) {
254 h_cnt[neighbor_cnt(i)]++;
255 }
256 tty->print_cr("--Histogram of counts--");
257 for( i = 0; i < _maxlrg*2; i++ )
258 if( h_cnt[i] )
259 tty->print("%d/%d ",i,h_cnt[i]);
260 tty->cr();
261 }
262
verify(const PhaseChaitin * pc) const263 void PhaseIFG::verify( const PhaseChaitin *pc ) const {
264 // IFG is square, sorted and no need for Find
265 for( uint i = 0; i < _maxlrg; i++ ) {
266 assert(!_yanked->test(i) || !neighbor_cnt(i), "Is removed completely" );
267 IndexSet *set = &_adjs[i];
268 if (!set->is_empty()) {
269 IndexSetIterator elements(set);
270 uint idx;
271 uint last = 0;
272 while ((idx = elements.next()) != 0) {
273 assert(idx != i, "Must have empty diagonal");
274 assert(pc->_lrg_map.find_const(idx) == idx, "Must not need Find");
275 assert(_adjs[idx].member(i), "IFG not square");
276 assert(!_yanked->test(idx), "No yanked neighbors");
277 assert(last < idx, "not sorted increasing");
278 last = idx;
279 }
280 }
281 assert(!lrgs(i)._degree_valid || effective_degree(i) == lrgs(i).degree(), "degree is valid but wrong");
282 }
283 }
284 #endif
285
286 /*
287 * Interfere this register with everything currently live.
288 * Check for interference by checking overlap of regmasks.
289 * Only interfere if acceptable register masks overlap.
290 */
interfere_with_live(uint lid,IndexSet * liveout)291 void PhaseChaitin::interfere_with_live(uint lid, IndexSet* liveout) {
292 if (!liveout->is_empty()) {
293 LRG& lrg = lrgs(lid);
294 const RegMask &rm = lrg.mask();
295 IndexSetIterator elements(liveout);
296 uint interfering_lid = elements.next();
297 while (interfering_lid != 0) {
298 LRG& interfering_lrg = lrgs(interfering_lid);
299 if (rm.overlap(interfering_lrg.mask())) {
300 _ifg->add_edge(lid, interfering_lid);
301 }
302 interfering_lid = elements.next();
303 }
304 }
305 }
306
307 // Actually build the interference graph. Uses virtual registers only, no
308 // physical register masks. This allows me to be very aggressive when
309 // coalescing copies. Some of this aggressiveness will have to be undone
310 // later, but I'd rather get all the copies I can now (since unremoved copies
311 // at this point can end up in bad places). Copies I re-insert later I have
312 // more opportunity to insert them in low-frequency locations.
build_ifg_virtual()313 void PhaseChaitin::build_ifg_virtual( ) {
314 Compile::TracePhase tp("buildIFG_virt", &timers[_t_buildIFGvirtual]);
315
316 // For all blocks (in any order) do...
317 for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
318 Block* block = _cfg.get_block(i);
319 IndexSet* liveout = _live->live(block);
320
321 // The IFG is built by a single reverse pass over each basic block.
322 // Starting with the known live-out set, we remove things that get
323 // defined and add things that become live (essentially executing one
324 // pass of a standard LIVE analysis). Just before a Node defines a value
325 // (and removes it from the live-ness set) that value is certainly live.
326 // The defined value interferes with everything currently live. The
327 // value is then removed from the live-ness set and it's inputs are
328 // added to the live-ness set.
329 for (uint j = block->end_idx() + 1; j > 1; j--) {
330 Node* n = block->get_node(j - 1);
331
332 // Get value being defined
333 uint r = _lrg_map.live_range_id(n);
334
335 // Some special values do not allocate
336 if (r) {
337
338 // Remove from live-out set
339 liveout->remove(r);
340
341 // Copies do not define a new value and so do not interfere.
342 // Remove the copies source from the liveout set before interfering.
343 uint idx = n->is_Copy();
344 if (idx != 0) {
345 liveout->remove(_lrg_map.live_range_id(n->in(idx)));
346 }
347
348 // Interfere with everything live
349 interfere_with_live(r, liveout);
350 }
351
352 // Make all inputs live
353 if (!n->is_Phi()) { // Phi function uses come from prior block
354 for(uint k = 1; k < n->req(); k++) {
355 liveout->insert(_lrg_map.live_range_id(n->in(k)));
356 }
357 }
358
359 // 2-address instructions always have the defined value live
360 // on entry to the instruction, even though it is being defined
361 // by the instruction. We pretend a virtual copy sits just prior
362 // to the instruction and kills the src-def'd register.
363 // In other words, for 2-address instructions the defined value
364 // interferes with all inputs.
365 uint idx;
366 if( n->is_Mach() && (idx = n->as_Mach()->two_adr()) ) {
367 const MachNode *mach = n->as_Mach();
368 // Sometimes my 2-address ADDs are commuted in a bad way.
369 // We generally want the USE-DEF register to refer to the
370 // loop-varying quantity, to avoid a copy.
371 uint op = mach->ideal_Opcode();
372 // Check that mach->num_opnds() == 3 to ensure instruction is
373 // not subsuming constants, effectively excludes addI_cin_imm
374 // Can NOT swap for instructions like addI_cin_imm since it
375 // is adding zero to yhi + carry and the second ideal-input
376 // points to the result of adding low-halves.
377 // Checking req() and num_opnds() does NOT distinguish addI_cout from addI_cout_imm
378 if( (op == Op_AddI && mach->req() == 3 && mach->num_opnds() == 3) &&
379 n->in(1)->bottom_type()->base() == Type::Int &&
380 // See if the ADD is involved in a tight data loop the wrong way
381 n->in(2)->is_Phi() &&
382 n->in(2)->in(2) == n ) {
383 Node *tmp = n->in(1);
384 n->set_req( 1, n->in(2) );
385 n->set_req( 2, tmp );
386 }
387 // Defined value interferes with all inputs
388 uint lidx = _lrg_map.live_range_id(n->in(idx));
389 for (uint k = 1; k < n->req(); k++) {
390 uint kidx = _lrg_map.live_range_id(n->in(k));
391 if (kidx != lidx) {
392 _ifg->add_edge(r, kidx);
393 }
394 }
395 }
396 } // End of forall instructions in block
397 } // End of forall blocks
398 }
399
400 #ifdef ASSERT
count_int_pressure(IndexSet * liveout)401 uint PhaseChaitin::count_int_pressure(IndexSet* liveout) {
402 if (liveout->is_empty()) {
403 return 0;
404 }
405 IndexSetIterator elements(liveout);
406 uint lidx = elements.next();
407 uint cnt = 0;
408 while (lidx != 0) {
409 LRG& lrg = lrgs(lidx);
410 if (lrg.mask_is_nonempty_and_up() &&
411 !lrg.is_float_or_vector() &&
412 lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegI])) {
413 cnt += lrg.reg_pressure();
414 }
415 lidx = elements.next();
416 }
417 return cnt;
418 }
419
count_float_pressure(IndexSet * liveout)420 uint PhaseChaitin::count_float_pressure(IndexSet* liveout) {
421 if (liveout->is_empty()) {
422 return 0;
423 }
424 IndexSetIterator elements(liveout);
425 uint lidx = elements.next();
426 uint cnt = 0;
427 while (lidx != 0) {
428 LRG& lrg = lrgs(lidx);
429 if (lrg.mask_is_nonempty_and_up() && lrg.is_float_or_vector()) {
430 cnt += lrg.reg_pressure();
431 }
432 lidx = elements.next();
433 }
434 return cnt;
435 }
436 #endif
437
438 /*
439 * Adjust register pressure down by 1. Capture last hi-to-low transition,
440 */
lower_pressure(Block * b,uint location,LRG & lrg,IndexSet * liveout,Pressure & int_pressure,Pressure & float_pressure)441 void PhaseChaitin::lower_pressure(Block* b, uint location, LRG& lrg, IndexSet* liveout, Pressure& int_pressure, Pressure& float_pressure) {
442 if (lrg.mask_is_nonempty_and_up()) {
443 if (lrg.is_float_or_vector()) {
444 float_pressure.lower(lrg, location);
445 } else {
446 // Do not count the SP and flag registers
447 const RegMask& r = lrg.mask();
448 if (r.overlap(*Matcher::idealreg2regmask[Op_RegI])) {
449 int_pressure.lower(lrg, location);
450 }
451 }
452 }
453 if (_scheduling_info_generated == false) {
454 assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect");
455 assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect");
456 }
457 }
458
459 /* Go to the first non-phi index in a block */
first_nonphi_index(Block * b)460 static uint first_nonphi_index(Block* b) {
461 uint i;
462 uint end_idx = b->end_idx();
463 for (i = 1; i < end_idx; i++) {
464 Node* n = b->get_node(i);
465 if (!n->is_Phi()) {
466 break;
467 }
468 }
469 return i;
470 }
471
472 /*
473 * Spills could be inserted before a CreateEx node which should be the first
474 * instruction in a block after Phi nodes. If so, move the CreateEx node up.
475 */
move_exception_node_up(Block * b,uint first_inst,uint last_inst)476 static void move_exception_node_up(Block* b, uint first_inst, uint last_inst) {
477 for (uint i = first_inst; i < last_inst; i++) {
478 Node* ex = b->get_node(i);
479 if (ex->is_SpillCopy()) {
480 continue;
481 }
482
483 if (i > first_inst &&
484 ex->is_Mach() && ex->as_Mach()->ideal_Opcode() == Op_CreateEx) {
485 b->remove_node(i);
486 b->insert_node(ex, first_inst);
487 }
488 // Stop once a CreateEx or any other node is found
489 break;
490 }
491 }
492
493 /*
494 * When new live ranges are live, we raise the register pressure
495 */
raise_pressure(Block * b,LRG & lrg,Pressure & int_pressure,Pressure & float_pressure)496 void PhaseChaitin::raise_pressure(Block* b, LRG& lrg, Pressure& int_pressure, Pressure& float_pressure) {
497 if (lrg.mask_is_nonempty_and_up()) {
498 if (lrg.is_float_or_vector()) {
499 float_pressure.raise(lrg);
500 } else {
501 // Do not count the SP and flag registers
502 const RegMask& rm = lrg.mask();
503 if (rm.overlap(*Matcher::idealreg2regmask[Op_RegI])) {
504 int_pressure.raise(lrg);
505 }
506 }
507 }
508 }
509
510
511 /*
512 * Computes the initial register pressure of a block, looking at all live
513 * ranges in the liveout. The register pressure is computed for both float
514 * and int/pointer registers.
515 * Live ranges in the liveout are presumed live for the whole block.
516 * We add the cost for the whole block to the area of the live ranges initially.
517 * If a live range gets killed in the block, we'll subtract the unused part of
518 * the block from the area.
519 */
compute_initial_block_pressure(Block * b,IndexSet * liveout,Pressure & int_pressure,Pressure & float_pressure,double cost)520 void PhaseChaitin::compute_initial_block_pressure(Block* b, IndexSet* liveout, Pressure& int_pressure, Pressure& float_pressure, double cost) {
521 if (!liveout->is_empty()) {
522 IndexSetIterator elements(liveout);
523 uint lid = elements.next();
524 while (lid != 0) {
525 LRG &lrg = lrgs(lid);
526 lrg._area += cost;
527 raise_pressure(b, lrg, int_pressure, float_pressure);
528 lid = elements.next();
529 }
530 }
531 assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect");
532 assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect");
533 }
534
535 /*
536 * Computes the entry register pressure of a block, looking at all live
537 * ranges in the livein. The register pressure is computed for both float
538 * and int/pointer registers.
539 */
compute_entry_block_pressure(Block * b)540 void PhaseChaitin::compute_entry_block_pressure(Block* b) {
541 IndexSet *livein = _live->livein(b);
542 if (!livein->is_empty()) {
543 IndexSetIterator elements(livein);
544 uint lid = elements.next();
545 while (lid != 0) {
546 LRG &lrg = lrgs(lid);
547 raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure);
548 lid = elements.next();
549 }
550 }
551 // Now check phis for locally defined inputs
552 for (uint j = 0; j < b->number_of_nodes(); j++) {
553 Node* n = b->get_node(j);
554 if (n->is_Phi()) {
555 for (uint k = 1; k < n->req(); k++) {
556 Node* phi_in = n->in(k);
557 // Because we are talking about phis, raise register pressure once for each
558 // instance of a phi to account for a single value
559 if (_cfg.get_block_for_node(phi_in) == b) {
560 LRG& lrg = lrgs(phi_in->_idx);
561 raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure);
562 break;
563 }
564 }
565 }
566 }
567 _sched_int_pressure.set_start_pressure(_sched_int_pressure.current_pressure());
568 _sched_float_pressure.set_start_pressure(_sched_float_pressure.current_pressure());
569 }
570
571 /*
572 * Computes the exit register pressure of a block, looking at all live
573 * ranges in the liveout. The register pressure is computed for both float
574 * and int/pointer registers.
575 */
compute_exit_block_pressure(Block * b)576 void PhaseChaitin::compute_exit_block_pressure(Block* b) {
577
578 IndexSet* livein = _live->live(b);
579 _sched_int_pressure.set_current_pressure(0);
580 _sched_float_pressure.set_current_pressure(0);
581 if (!livein->is_empty()) {
582 IndexSetIterator elements(livein);
583 uint lid = elements.next();
584 while (lid != 0) {
585 LRG &lrg = lrgs(lid);
586 raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure);
587 lid = elements.next();
588 }
589 }
590 }
591
592 /*
593 * Remove dead node if it's not used.
594 * We only remove projection nodes if the node "defining" the projection is
595 * dead, for example on x86, if we have a dead Add node we remove its
596 * RFLAGS node.
597 */
remove_node_if_not_used(Block * b,uint location,Node * n,uint lid,IndexSet * liveout)598 bool PhaseChaitin::remove_node_if_not_used(Block* b, uint location, Node* n, uint lid, IndexSet* liveout) {
599 Node* def = n->in(0);
600 if (!n->is_Proj() ||
601 (_lrg_map.live_range_id(def) && !liveout->member(_lrg_map.live_range_id(def)))) {
602 if (n->is_MachProj()) {
603 // Don't remove KILL projections if their "defining" nodes have
604 // memory effects (have SCMemProj projection node) -
605 // they are not dead even when their result is not used.
606 // For example, compareAndSwapL (and other CAS) and EncodeISOArray nodes.
607 // The method add_input_to_liveout() keeps such nodes alive (put them on liveout list)
608 // when it sees SCMemProj node in a block. Unfortunately SCMemProj node could be placed
609 // in block in such order that KILL MachProj nodes are processed first.
610 if (def->has_out_with(Op_SCMemProj)) {
611 return false;
612 }
613 }
614 b->remove_node(location);
615 LRG& lrg = lrgs(lid);
616 if (lrg._def == n) {
617 lrg._def = 0;
618 }
619 n->disconnect_inputs(C);
620 _cfg.unmap_node_from_block(n);
621 n->replace_by(C->top());
622 return true;
623 }
624 return false;
625 }
626
627 /*
628 * When encountering a fat projection, we might go from a low to high to low
629 * (since the fat proj only lives at this instruction) going backwards in the
630 * block. If we find a low to high transition, we record it.
631 */
check_for_high_pressure_transition_at_fatproj(uint & block_reg_pressure,uint location,LRG & lrg,Pressure & pressure,const int op_regtype)632 void PhaseChaitin::check_for_high_pressure_transition_at_fatproj(uint& block_reg_pressure, uint location, LRG& lrg, Pressure& pressure, const int op_regtype) {
633 RegMask mask_tmp = lrg.mask();
634 mask_tmp.AND(*Matcher::idealreg2regmask[op_regtype]);
635 pressure.check_pressure_at_fatproj(location, mask_tmp);
636 }
637
638 /*
639 * Insure high score for immediate-use spill copies so they get a color.
640 * All single-use MachSpillCopy(s) that immediately precede their
641 * use must color early. If a longer live range steals their
642 * color, the spill copy will split and may push another spill copy
643 * further away resulting in an infinite spill-split-retry cycle.
644 * Assigning a zero area results in a high score() and a good
645 * location in the simplify list.
646 */
assign_high_score_to_immediate_copies(Block * b,Node * n,LRG & lrg,uint next_inst,uint last_inst)647 void PhaseChaitin::assign_high_score_to_immediate_copies(Block* b, Node* n, LRG& lrg, uint next_inst, uint last_inst) {
648 if (n->is_SpillCopy() &&
649 lrg.is_singledef() && // A multi defined live range can still split
650 n->outcnt() == 1 && // and use must be in this block
651 _cfg.get_block_for_node(n->unique_out()) == b) {
652
653 Node* single_use = n->unique_out();
654 assert(b->find_node(single_use) >= next_inst, "Use must be later in block");
655 // Use can be earlier in block if it is a Phi, but then I should be a MultiDef
656
657 // Find first non SpillCopy 'm' that follows the current instruction
658 // (current_inst - 1) is index for current instruction 'n'
659 Node* m = n;
660 for (uint i = next_inst; i <= last_inst && m->is_SpillCopy(); ++i) {
661 m = b->get_node(i);
662 }
663 if (m == single_use) {
664 lrg._area = 0.0;
665 }
666 }
667 }
668
669 /*
670 * Copies do not define a new value and so do not interfere.
671 * Remove the copies source from the liveout set before interfering.
672 */
remove_interference_from_copy(Block * b,uint location,uint lid_copy,IndexSet * liveout,double cost,Pressure & int_pressure,Pressure & float_pressure)673 void PhaseChaitin::remove_interference_from_copy(Block* b, uint location, uint lid_copy, IndexSet* liveout, double cost, Pressure& int_pressure, Pressure& float_pressure) {
674 if (liveout->remove(lid_copy)) {
675 LRG& lrg_copy = lrgs(lid_copy);
676 lrg_copy._area -= cost;
677
678 // Lower register pressure since copy and definition can share the same register
679 lower_pressure(b, location, lrg_copy, liveout, int_pressure, float_pressure);
680 }
681 }
682
683 /*
684 * The defined value must go in a particular register. Remove that register from
685 * all conflicting parties and avoid the interference.
686 */
remove_bound_register_from_interfering_live_ranges(LRG & lrg,IndexSet * liveout,uint & must_spill)687 void PhaseChaitin::remove_bound_register_from_interfering_live_ranges(LRG& lrg, IndexSet* liveout, uint& must_spill) {
688 if (liveout->is_empty()) return;
689 // Check for common case
690 const RegMask& rm = lrg.mask();
691 int r_size = lrg.num_regs();
692 // Smear odd bits
693 IndexSetIterator elements(liveout);
694 uint l = elements.next();
695 while (l != 0) {
696 LRG& interfering_lrg = lrgs(l);
697 // If 'l' must spill already, do not further hack his bits.
698 // He'll get some interferences and be forced to spill later.
699 if (interfering_lrg._must_spill) {
700 l = elements.next();
701 continue;
702 }
703
704 // Remove bound register(s) from 'l's choices
705 RegMask old = interfering_lrg.mask();
706 uint old_size = interfering_lrg.mask_size();
707
708 // Remove the bits from LRG 'rm' from LRG 'l' so 'l' no
709 // longer interferes with 'rm'. If 'l' requires aligned
710 // adjacent pairs, subtract out bit pairs.
711 assert(!interfering_lrg._is_vector || !interfering_lrg._fat_proj, "sanity");
712
713 if (interfering_lrg.num_regs() > 1 && !interfering_lrg._fat_proj) {
714 RegMask r2mask = rm;
715 // Leave only aligned set of bits.
716 r2mask.smear_to_sets(interfering_lrg.num_regs());
717 // It includes vector case.
718 interfering_lrg.SUBTRACT(r2mask);
719 interfering_lrg.compute_set_mask_size();
720 } else if (r_size != 1) {
721 // fat proj
722 interfering_lrg.SUBTRACT(rm);
723 interfering_lrg.compute_set_mask_size();
724 } else {
725 // Common case: size 1 bound removal
726 OptoReg::Name r_reg = rm.find_first_elem();
727 if (interfering_lrg.mask().Member(r_reg)) {
728 interfering_lrg.Remove(r_reg);
729 interfering_lrg.set_mask_size(interfering_lrg.mask().is_AllStack() ? LRG::AllStack_size : old_size - 1);
730 }
731 }
732
733 // If 'l' goes completely dry, it must spill.
734 if (interfering_lrg.not_free()) {
735 // Give 'l' some kind of reasonable mask, so it picks up
736 // interferences (and will spill later).
737 interfering_lrg.set_mask(old);
738 interfering_lrg.set_mask_size(old_size);
739 must_spill++;
740 interfering_lrg._must_spill = 1;
741 interfering_lrg.set_reg(OptoReg::Name(LRG::SPILL_REG));
742 }
743 l = elements.next();
744 }
745 }
746
747 /*
748 * Start loop at 1 (skip control edge) for most Nodes. SCMemProj's might be the
749 * sole use of a StoreLConditional. While StoreLConditionals set memory (the
750 * SCMemProj use) they also def flags; if that flag def is unused the allocator
751 * sees a flag-setting instruction with no use of the flags and assumes it's
752 * dead. This keeps the (useless) flag-setting behavior alive while also
753 * keeping the (useful) memory update effect.
754 */
add_input_to_liveout(Block * b,Node * n,IndexSet * liveout,double cost,Pressure & int_pressure,Pressure & float_pressure)755 void PhaseChaitin::add_input_to_liveout(Block* b, Node* n, IndexSet* liveout, double cost, Pressure& int_pressure, Pressure& float_pressure) {
756 JVMState* jvms = n->jvms();
757 uint debug_start = jvms ? jvms->debug_start() : 999999;
758
759 for (uint k = ((n->Opcode() == Op_SCMemProj) ? 0:1); k < n->req(); k++) {
760 Node* def = n->in(k);
761 uint lid = _lrg_map.live_range_id(def);
762 if (!lid) {
763 continue;
764 }
765 LRG& lrg = lrgs(lid);
766
767 // No use-side cost for spilling debug info
768 if (k < debug_start) {
769 // A USE costs twice block frequency (once for the Load, once
770 // for a Load-delay). Rematerialized uses only cost once.
771 lrg._cost += (def->rematerialize() ? b->_freq : (b->_freq * 2));
772 }
773
774 if (liveout->insert(lid)) {
775 // Newly live things assumed live from here to top of block
776 lrg._area += cost;
777 raise_pressure(b, lrg, int_pressure, float_pressure);
778 assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect");
779 assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect");
780 }
781 assert(lrg._area >= 0.0, "negative spill area" );
782 }
783 }
784
785 /*
786 * If we run off the top of the block with high pressure just record that the
787 * whole block is high pressure. (Even though we might have a transition
788 * later down in the block)
789 */
check_for_high_pressure_block(Pressure & pressure)790 void PhaseChaitin::check_for_high_pressure_block(Pressure& pressure) {
791 // current pressure now means the pressure before the first instruction in the block
792 // (since we have stepped through all instructions backwards)
793 if (pressure.current_pressure() > pressure.high_pressure_limit()) {
794 pressure.set_high_pressure_index_to_block_start();
795 }
796 }
797
798 /*
799 * Compute high pressure indice; avoid landing in the middle of projnodes
800 * and set the high pressure index for the block
801 */
adjust_high_pressure_index(Block * b,uint & block_hrp_index,Pressure & pressure)802 void PhaseChaitin::adjust_high_pressure_index(Block* b, uint& block_hrp_index, Pressure& pressure) {
803 uint i = pressure.high_pressure_index();
804 if (i < b->number_of_nodes() && i < b->end_idx() + 1) {
805 Node* cur = b->get_node(i);
806 while (cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch()) {
807 cur = b->get_node(--i);
808 }
809 }
810 block_hrp_index = i;
811 }
812
print_pressure_info(Pressure & pressure,const char * str)813 void PhaseChaitin::print_pressure_info(Pressure& pressure, const char *str) {
814 if (str != NULL) {
815 tty->print_cr("# *** %s ***", str);
816 }
817 tty->print_cr("# start pressure is = %d", pressure.start_pressure());
818 tty->print_cr("# max pressure is = %d", pressure.final_pressure());
819 tty->print_cr("# end pressure is = %d", pressure.current_pressure());
820 tty->print_cr("#");
821 }
822
823 /* Build an interference graph:
824 * That is, if 2 live ranges are simultaneously alive but in their acceptable
825 * register sets do not overlap, then they do not interfere. The IFG is built
826 * by a single reverse pass over each basic block. Starting with the known
827 * live-out set, we remove things that get defined and add things that become
828 * live (essentially executing one pass of a standard LIVE analysis). Just
829 * before a Node defines a value (and removes it from the live-ness set) that
830 * value is certainly live. The defined value interferes with everything
831 * currently live. The value is then removed from the live-ness set and it's
832 * inputs are added to the live-ness set.
833 * Compute register pressure for each block:
834 * We store the biggest register pressure for each block and also the first
835 * low to high register pressure transition within the block (if any).
836 */
build_ifg_physical(ResourceArea * a)837 uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
838 Compile::TracePhase tp("buildIFG", &timers[_t_buildIFGphysical]);
839
840 uint must_spill = 0;
841 for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
842 Block* block = _cfg.get_block(i);
843
844 // Clone (rather than smash in place) the liveout info, so it is alive
845 // for the "collect_gc_info" phase later.
846 IndexSet liveout(_live->live(block));
847
848 uint first_inst = first_nonphi_index(block);
849 uint last_inst = block->end_idx();
850
851 move_exception_node_up(block, first_inst, last_inst);
852
853 Pressure int_pressure(last_inst + 1, INTPRESSURE);
854 Pressure float_pressure(last_inst + 1, FLOATPRESSURE);
855 block->_reg_pressure = 0;
856 block->_freg_pressure = 0;
857
858 int inst_count = last_inst - first_inst;
859 double cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count);
860 assert(cost >= 0.0, "negative spill cost" );
861
862 compute_initial_block_pressure(block, &liveout, int_pressure, float_pressure, cost);
863
864 for (uint location = last_inst; location > 0; location--) {
865 Node* n = block->get_node(location);
866 uint lid = _lrg_map.live_range_id(n);
867
868 if (lid) {
869 LRG& lrg = lrgs(lid);
870
871 // A DEF normally costs block frequency; rematerialized values are
872 // removed from the DEF sight, so LOWER costs here.
873 lrg._cost += n->rematerialize() ? 0 : block->_freq;
874
875 if (!liveout.member(lid) && n->Opcode() != Op_SafePoint) {
876 if (remove_node_if_not_used(block, location, n, lid, &liveout)) {
877 float_pressure.lower_high_pressure_index();
878 int_pressure.lower_high_pressure_index();
879 continue;
880 }
881 if (lrg._fat_proj) {
882 check_for_high_pressure_transition_at_fatproj(block->_reg_pressure, location, lrg, int_pressure, Op_RegI);
883 check_for_high_pressure_transition_at_fatproj(block->_freg_pressure, location, lrg, float_pressure, Op_RegD);
884 }
885 } else {
886 // A live range ends at its definition, remove the remaining area.
887 // If the cost is +Inf (which might happen in extreme cases), the lrg area will also be +Inf,
888 // and +Inf - +Inf = NaN. So let's not do that subtraction.
889 if (g_isfinite(cost)) {
890 lrg._area -= cost;
891 }
892 assert(lrg._area >= 0.0, "negative spill area" );
893
894 assign_high_score_to_immediate_copies(block, n, lrg, location + 1, last_inst);
895
896 if (liveout.remove(lid)) {
897 lower_pressure(block, location, lrg, &liveout, int_pressure, float_pressure);
898 }
899 uint copy_idx = n->is_Copy();
900 if (copy_idx) {
901 uint lid_copy = _lrg_map.live_range_id(n->in(copy_idx));
902 remove_interference_from_copy(block, location, lid_copy, &liveout, cost, int_pressure, float_pressure);
903 }
904 }
905
906 // Since rematerializable DEFs are not bound but the live range is,
907 // some uses must be bound. If we spill live range 'r', it can
908 // rematerialize at each use site according to its bindings.
909 if (lrg.is_bound() && !n->rematerialize() && lrg.mask().is_NotEmpty()) {
910 remove_bound_register_from_interfering_live_ranges(lrg, &liveout, must_spill);
911 }
912 interfere_with_live(lid, &liveout);
913 }
914
915 // Area remaining in the block
916 inst_count--;
917 cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count);
918
919 if (!n->is_Phi()) {
920 add_input_to_liveout(block, n, &liveout, cost, int_pressure, float_pressure);
921 }
922 }
923
924 check_for_high_pressure_block(int_pressure);
925 check_for_high_pressure_block(float_pressure);
926 adjust_high_pressure_index(block, block->_ihrp_index, int_pressure);
927 adjust_high_pressure_index(block, block->_fhrp_index, float_pressure);
928 // set the final_pressure as the register pressure for the block
929 block->_reg_pressure = int_pressure.final_pressure();
930 block->_freg_pressure = float_pressure.final_pressure();
931
932 #ifndef PRODUCT
933 // Gather Register Pressure Statistics
934 if (PrintOptoStatistics) {
935 if (block->_reg_pressure > int_pressure.high_pressure_limit() || block->_freg_pressure > float_pressure.high_pressure_limit()) {
936 _high_pressure++;
937 } else {
938 _low_pressure++;
939 }
940 }
941 #endif
942 }
943
944 return must_spill;
945 }
946