1 /*
2 * Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "compiler/oopMap.hpp"
27 #include "memory/allocation.inline.hpp"
28 #include "memory/resourceArea.hpp"
29 #include "opto/addnode.hpp"
30 #include "opto/block.hpp"
31 #include "opto/callnode.hpp"
32 #include "opto/cfgnode.hpp"
33 #include "opto/chaitin.hpp"
34 #include "opto/coalesce.hpp"
35 #include "opto/indexSet.hpp"
36 #include "opto/machnode.hpp"
37 #include "opto/memnode.hpp"
38 #include "opto/opcodes.hpp"
39
PhaseIFG(Arena * arena)40 PhaseIFG::PhaseIFG( Arena *arena ) : Phase(Interference_Graph), _arena(arena) {
41 }
42
init(uint maxlrg)43 void PhaseIFG::init( uint maxlrg ) {
44 _maxlrg = maxlrg;
45 _yanked = new (_arena) VectorSet(_arena);
46 _is_square = false;
47 // Make uninitialized adjacency lists
48 _adjs = (IndexSet*)_arena->Amalloc(sizeof(IndexSet)*maxlrg);
49 // Also make empty live range structures
50 _lrgs = (LRG *)_arena->Amalloc( maxlrg * sizeof(LRG) );
51 memset((void*)_lrgs,0,sizeof(LRG)*maxlrg);
52 // Init all to empty
53 for( uint i = 0; i < maxlrg; i++ ) {
54 _adjs[i].initialize(maxlrg);
55 _lrgs[i].Set_All();
56 }
57 }
58
59 // Add edge between vertices a & b. These are sorted (triangular matrix),
60 // then the smaller number is inserted in the larger numbered array.
add_edge(uint a,uint b)61 int PhaseIFG::add_edge( uint a, uint b ) {
62 lrgs(a).invalid_degree();
63 lrgs(b).invalid_degree();
64 // Sort a and b, so that a is bigger
65 assert( !_is_square, "only on triangular" );
66 if( a < b ) { uint tmp = a; a = b; b = tmp; }
67 return _adjs[a].insert( b );
68 }
69
70 // Is there an edge between a and b?
test_edge(uint a,uint b) const71 int PhaseIFG::test_edge( uint a, uint b ) const {
72 // Sort a and b, so that a is larger
73 assert( !_is_square, "only on triangular" );
74 if( a < b ) { uint tmp = a; a = b; b = tmp; }
75 return _adjs[a].member(b);
76 }
77
78 // Convert triangular matrix to square matrix
SquareUp()79 void PhaseIFG::SquareUp() {
80 assert( !_is_square, "only on triangular" );
81
82 // Simple transpose
83 for(uint i = 0; i < _maxlrg; i++ ) {
84 if (!_adjs[i].is_empty()) {
85 IndexSetIterator elements(&_adjs[i]);
86 uint datum;
87 while ((datum = elements.next()) != 0) {
88 _adjs[datum].insert(i);
89 }
90 }
91 }
92 _is_square = true;
93 }
94
95 // Compute effective degree in bulk
Compute_Effective_Degree()96 void PhaseIFG::Compute_Effective_Degree() {
97 assert( _is_square, "only on square" );
98
99 for( uint i = 0; i < _maxlrg; i++ )
100 lrgs(i).set_degree(effective_degree(i));
101 }
102
test_edge_sq(uint a,uint b) const103 int PhaseIFG::test_edge_sq( uint a, uint b ) const {
104 assert( _is_square, "only on square" );
105 // Swap, so that 'a' has the lesser count. Then binary search is on
106 // the smaller of a's list and b's list.
107 if( neighbor_cnt(a) > neighbor_cnt(b) ) { uint tmp = a; a = b; b = tmp; }
108 //return _adjs[a].unordered_member(b);
109 return _adjs[a].member(b);
110 }
111
112 // Union edges of B into A
Union(uint a,uint b)113 void PhaseIFG::Union(uint a, uint b) {
114 assert( _is_square, "only on square" );
115 IndexSet *A = &_adjs[a];
116 if (!_adjs[b].is_empty()) {
117 IndexSetIterator b_elements(&_adjs[b]);
118 uint datum;
119 while ((datum = b_elements.next()) != 0) {
120 if (A->insert(datum)) {
121 _adjs[datum].insert(a);
122 lrgs(a).invalid_degree();
123 lrgs(datum).invalid_degree();
124 }
125 }
126 }
127 }
128
129 // Yank a Node and all connected edges from the IFG. Return a
130 // list of neighbors (edges) yanked.
remove_node(uint a)131 IndexSet *PhaseIFG::remove_node( uint a ) {
132 assert( _is_square, "only on square" );
133 assert( !_yanked->test(a), "" );
134 _yanked->set(a);
135
136 // I remove the LRG from all neighbors.
137 LRG &lrg_a = lrgs(a);
138
139 if (!_adjs[a].is_empty()) {
140 IndexSetIterator elements(&_adjs[a]);
141 uint datum;
142 while ((datum = elements.next()) != 0) {
143 _adjs[datum].remove(a);
144 lrgs(datum).inc_degree(-lrg_a.compute_degree(lrgs(datum)));
145 }
146 }
147 return neighbors(a);
148 }
149
150 // Re-insert a yanked Node.
re_insert(uint a)151 void PhaseIFG::re_insert(uint a) {
152 assert( _is_square, "only on square" );
153 assert( _yanked->test(a), "" );
154 _yanked->remove(a);
155
156 if (_adjs[a].is_empty()) return;
157
158 IndexSetIterator elements(&_adjs[a]);
159 uint datum;
160 while ((datum = elements.next()) != 0) {
161 _adjs[datum].insert(a);
162 lrgs(datum).invalid_degree();
163 }
164 }
165
166 // Compute the degree between 2 live ranges. If both live ranges are
167 // aligned-adjacent powers-of-2 then we use the MAX size. If either is
168 // mis-aligned (or for Fat-Projections, not-adjacent) then we have to
169 // MULTIPLY the sizes. Inspect Brigg's thesis on register pairs to see why
170 // this is so.
compute_degree(LRG & l) const171 int LRG::compute_degree(LRG &l) const {
172 int tmp;
173 int num_regs = _num_regs;
174 int nregs = l.num_regs();
175 tmp = (_fat_proj || l._fat_proj) // either is a fat-proj?
176 ? (num_regs * nregs) // then use product
177 : MAX2(num_regs,nregs); // else use max
178 return tmp;
179 }
180
181 // Compute effective degree for this live range. If both live ranges are
182 // aligned-adjacent powers-of-2 then we use the MAX size. If either is
183 // mis-aligned (or for Fat-Projections, not-adjacent) then we have to
184 // MULTIPLY the sizes. Inspect Brigg's thesis on register pairs to see why
185 // this is so.
effective_degree(uint lidx) const186 int PhaseIFG::effective_degree(uint lidx) const {
187 IndexSet *s = neighbors(lidx);
188 if (s->is_empty()) return 0;
189 int eff = 0;
190 int num_regs = lrgs(lidx).num_regs();
191 int fat_proj = lrgs(lidx)._fat_proj;
192 IndexSetIterator elements(s);
193 uint nidx;
194 while ((nidx = elements.next()) != 0) {
195 LRG &lrgn = lrgs(nidx);
196 int nregs = lrgn.num_regs();
197 eff += (fat_proj || lrgn._fat_proj) // either is a fat-proj?
198 ? (num_regs * nregs) // then use product
199 : MAX2(num_regs,nregs); // else use max
200 }
201 return eff;
202 }
203
204
205 #ifndef PRODUCT
dump() const206 void PhaseIFG::dump() const {
207 tty->print_cr("-- Interference Graph --%s--",
208 _is_square ? "square" : "triangular" );
209 if (_is_square) {
210 for (uint i = 0; i < _maxlrg; i++) {
211 tty->print(_yanked->test(i) ? "XX " : " ");
212 tty->print("L%d: { ",i);
213 if (!_adjs[i].is_empty()) {
214 IndexSetIterator elements(&_adjs[i]);
215 uint datum;
216 while ((datum = elements.next()) != 0) {
217 tty->print("L%d ", datum);
218 }
219 }
220 tty->print_cr("}");
221
222 }
223 return;
224 }
225
226 // Triangular
227 for( uint i = 0; i < _maxlrg; i++ ) {
228 uint j;
229 tty->print(_yanked->test(i) ? "XX " : " ");
230 tty->print("L%d: { ",i);
231 for( j = _maxlrg; j > i; j-- )
232 if( test_edge(j - 1,i) ) {
233 tty->print("L%d ",j - 1);
234 }
235 tty->print("| ");
236 if (!_adjs[i].is_empty()) {
237 IndexSetIterator elements(&_adjs[i]);
238 uint datum;
239 while ((datum = elements.next()) != 0) {
240 tty->print("L%d ", datum);
241 }
242 }
243 tty->print("}\n");
244 }
245 tty->print("\n");
246 }
247
stats() const248 void PhaseIFG::stats() const {
249 ResourceMark rm;
250 int *h_cnt = NEW_RESOURCE_ARRAY(int,_maxlrg*2);
251 memset( h_cnt, 0, sizeof(int)*_maxlrg*2 );
252 uint i;
253 for( i = 0; i < _maxlrg; i++ ) {
254 h_cnt[neighbor_cnt(i)]++;
255 }
256 tty->print_cr("--Histogram of counts--");
257 for( i = 0; i < _maxlrg*2; i++ )
258 if( h_cnt[i] )
259 tty->print("%d/%d ",i,h_cnt[i]);
260 tty->cr();
261 }
262
verify(const PhaseChaitin * pc) const263 void PhaseIFG::verify( const PhaseChaitin *pc ) const {
264 // IFG is square, sorted and no need for Find
265 for( uint i = 0; i < _maxlrg; i++ ) {
266 assert(!_yanked->test(i) || !neighbor_cnt(i), "Is removed completely" );
267 IndexSet *set = &_adjs[i];
268 if (!set->is_empty()) {
269 IndexSetIterator elements(set);
270 uint idx;
271 uint last = 0;
272 while ((idx = elements.next()) != 0) {
273 assert(idx != i, "Must have empty diagonal");
274 assert(pc->_lrg_map.find_const(idx) == idx, "Must not need Find");
275 assert(_adjs[idx].member(i), "IFG not square");
276 assert(!_yanked->test(idx), "No yanked neighbors");
277 assert(last < idx, "not sorted increasing");
278 last = idx;
279 }
280 }
281 assert(!lrgs(i)._degree_valid || effective_degree(i) == lrgs(i).degree(), "degree is valid but wrong");
282 }
283 }
284 #endif
285
286 /*
287 * Interfere this register with everything currently live.
288 * Check for interference by checking overlap of regmasks.
289 * Only interfere if acceptable register masks overlap.
290 */
interfere_with_live(uint lid,IndexSet * liveout)291 void PhaseChaitin::interfere_with_live(uint lid, IndexSet* liveout) {
292 if (!liveout->is_empty()) {
293 LRG& lrg = lrgs(lid);
294 const RegMask &rm = lrg.mask();
295 IndexSetIterator elements(liveout);
296 uint interfering_lid = elements.next();
297 while (interfering_lid != 0) {
298 LRG& interfering_lrg = lrgs(interfering_lid);
299 if (rm.overlap(interfering_lrg.mask())) {
300 _ifg->add_edge(lid, interfering_lid);
301 }
302 interfering_lid = elements.next();
303 }
304 }
305 }
306
307 // Actually build the interference graph. Uses virtual registers only, no
308 // physical register masks. This allows me to be very aggressive when
309 // coalescing copies. Some of this aggressiveness will have to be undone
310 // later, but I'd rather get all the copies I can now (since unremoved copies
311 // at this point can end up in bad places). Copies I re-insert later I have
312 // more opportunity to insert them in low-frequency locations.
build_ifg_virtual()313 void PhaseChaitin::build_ifg_virtual( ) {
314 Compile::TracePhase tp("buildIFG_virt", &timers[_t_buildIFGvirtual]);
315
316 // For all blocks (in any order) do...
317 for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
318 Block* block = _cfg.get_block(i);
319 IndexSet* liveout = _live->live(block);
320
321 // The IFG is built by a single reverse pass over each basic block.
322 // Starting with the known live-out set, we remove things that get
323 // defined and add things that become live (essentially executing one
324 // pass of a standard LIVE analysis). Just before a Node defines a value
325 // (and removes it from the live-ness set) that value is certainly live.
326 // The defined value interferes with everything currently live. The
327 // value is then removed from the live-ness set and it's inputs are
328 // added to the live-ness set.
329 for (uint j = block->end_idx() + 1; j > 1; j--) {
330 Node* n = block->get_node(j - 1);
331
332 // Get value being defined
333 uint r = _lrg_map.live_range_id(n);
334
335 // Some special values do not allocate
336 if (r) {
337
338 // Remove from live-out set
339 liveout->remove(r);
340
341 // Copies do not define a new value and so do not interfere.
342 // Remove the copies source from the liveout set before interfering.
343 uint idx = n->is_Copy();
344 if (idx != 0) {
345 liveout->remove(_lrg_map.live_range_id(n->in(idx)));
346 }
347
348 // Interfere with everything live
349 interfere_with_live(r, liveout);
350 }
351
352 // Make all inputs live
353 if (!n->is_Phi()) { // Phi function uses come from prior block
354 for(uint k = 1; k < n->req(); k++) {
355 liveout->insert(_lrg_map.live_range_id(n->in(k)));
356 }
357 }
358
359 // 2-address instructions always have the defined value live
360 // on entry to the instruction, even though it is being defined
361 // by the instruction. We pretend a virtual copy sits just prior
362 // to the instruction and kills the src-def'd register.
363 // In other words, for 2-address instructions the defined value
364 // interferes with all inputs.
365 uint idx;
366 if( n->is_Mach() && (idx = n->as_Mach()->two_adr()) ) {
367 const MachNode *mach = n->as_Mach();
368 // Sometimes my 2-address ADDs are commuted in a bad way.
369 // We generally want the USE-DEF register to refer to the
370 // loop-varying quantity, to avoid a copy.
371 uint op = mach->ideal_Opcode();
372 // Check that mach->num_opnds() == 3 to ensure instruction is
373 // not subsuming constants, effectively excludes addI_cin_imm
374 // Can NOT swap for instructions like addI_cin_imm since it
375 // is adding zero to yhi + carry and the second ideal-input
376 // points to the result of adding low-halves.
377 // Checking req() and num_opnds() does NOT distinguish addI_cout from addI_cout_imm
378 if( (op == Op_AddI && mach->req() == 3 && mach->num_opnds() == 3) &&
379 n->in(1)->bottom_type()->base() == Type::Int &&
380 // See if the ADD is involved in a tight data loop the wrong way
381 n->in(2)->is_Phi() &&
382 n->in(2)->in(2) == n ) {
383 Node *tmp = n->in(1);
384 n->set_req( 1, n->in(2) );
385 n->set_req( 2, tmp );
386 }
387 // Defined value interferes with all inputs
388 uint lidx = _lrg_map.live_range_id(n->in(idx));
389 for (uint k = 1; k < n->req(); k++) {
390 uint kidx = _lrg_map.live_range_id(n->in(k));
391 if (kidx != lidx) {
392 _ifg->add_edge(r, kidx);
393 }
394 }
395 }
396 } // End of forall instructions in block
397 } // End of forall blocks
398 }
399
400 #ifdef ASSERT
count_int_pressure(IndexSet * liveout)401 uint PhaseChaitin::count_int_pressure(IndexSet* liveout) {
402 if (liveout->is_empty()) {
403 return 0;
404 }
405 IndexSetIterator elements(liveout);
406 uint lidx = elements.next();
407 uint cnt = 0;
408 while (lidx != 0) {
409 LRG& lrg = lrgs(lidx);
410 if (lrg.mask_is_nonempty_and_up() &&
411 !lrg.is_float_or_vector() &&
412 (lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ||
413 (Matcher::has_predicated_vectors() &&
414 lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegVectMask])))) {
415 cnt += lrg.reg_pressure();
416 }
417 lidx = elements.next();
418 }
419 return cnt;
420 }
421
count_float_pressure(IndexSet * liveout)422 uint PhaseChaitin::count_float_pressure(IndexSet* liveout) {
423 if (liveout->is_empty()) {
424 return 0;
425 }
426 IndexSetIterator elements(liveout);
427 uint lidx = elements.next();
428 uint cnt = 0;
429 while (lidx != 0) {
430 LRG& lrg = lrgs(lidx);
431 if (lrg.mask_is_nonempty_and_up() && lrg.is_float_or_vector()) {
432 cnt += lrg.reg_pressure();
433 }
434 lidx = elements.next();
435 }
436 return cnt;
437 }
438 #endif
439
440 /*
441 * Adjust register pressure down by 1. Capture last hi-to-low transition,
442 */
lower_pressure(Block * b,uint location,LRG & lrg,IndexSet * liveout,Pressure & int_pressure,Pressure & float_pressure)443 void PhaseChaitin::lower_pressure(Block* b, uint location, LRG& lrg, IndexSet* liveout, Pressure& int_pressure, Pressure& float_pressure) {
444 if (lrg.mask_is_nonempty_and_up()) {
445 if (lrg.is_float_or_vector()) {
446 float_pressure.lower(lrg, location);
447 } else {
448 // Do not count the SP and flag registers
449 const RegMask& r = lrg.mask();
450 if (r.overlap(*Matcher::idealreg2regmask[Op_RegI]) ||
451 (Matcher::has_predicated_vectors() &&
452 r.overlap(*Matcher::idealreg2regmask[Op_RegVectMask]))) {
453 int_pressure.lower(lrg, location);
454 }
455 }
456 }
457 if (_scheduling_info_generated == false) {
458 assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect");
459 assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect");
460 }
461 }
462
463 /* Go to the first non-phi index in a block */
first_nonphi_index(Block * b)464 static uint first_nonphi_index(Block* b) {
465 uint i;
466 uint end_idx = b->end_idx();
467 for (i = 1; i < end_idx; i++) {
468 Node* n = b->get_node(i);
469 if (!n->is_Phi()) {
470 break;
471 }
472 }
473 return i;
474 }
475
476 /*
477 * Spills could be inserted before a CreateEx node which should be the first
478 * instruction in a block after Phi nodes. If so, move the CreateEx node up.
479 */
move_exception_node_up(Block * b,uint first_inst,uint last_inst)480 static void move_exception_node_up(Block* b, uint first_inst, uint last_inst) {
481 for (uint i = first_inst; i < last_inst; i++) {
482 Node* ex = b->get_node(i);
483 if (ex->is_SpillCopy()) {
484 continue;
485 }
486
487 if (i > first_inst &&
488 ex->is_Mach() && ex->as_Mach()->ideal_Opcode() == Op_CreateEx) {
489 b->remove_node(i);
490 b->insert_node(ex, first_inst);
491 }
492 // Stop once a CreateEx or any other node is found
493 break;
494 }
495 }
496
497 /*
498 * When new live ranges are live, we raise the register pressure
499 */
raise_pressure(Block * b,LRG & lrg,Pressure & int_pressure,Pressure & float_pressure)500 void PhaseChaitin::raise_pressure(Block* b, LRG& lrg, Pressure& int_pressure, Pressure& float_pressure) {
501 if (lrg.mask_is_nonempty_and_up()) {
502 if (lrg.is_float_or_vector()) {
503 float_pressure.raise(lrg);
504 } else {
505 // Do not count the SP and flag registers
506 const RegMask& rm = lrg.mask();
507 if (rm.overlap(*Matcher::idealreg2regmask[Op_RegI]) ||
508 (Matcher::has_predicated_vectors() &&
509 rm.overlap(*Matcher::idealreg2regmask[Op_RegVectMask]))) {
510 int_pressure.raise(lrg);
511 }
512 }
513 }
514 }
515
516
517 /*
518 * Computes the initial register pressure of a block, looking at all live
519 * ranges in the liveout. The register pressure is computed for both float
520 * and int/pointer registers.
521 * Live ranges in the liveout are presumed live for the whole block.
522 * We add the cost for the whole block to the area of the live ranges initially.
523 * If a live range gets killed in the block, we'll subtract the unused part of
524 * the block from the area.
525 */
compute_initial_block_pressure(Block * b,IndexSet * liveout,Pressure & int_pressure,Pressure & float_pressure,double cost)526 void PhaseChaitin::compute_initial_block_pressure(Block* b, IndexSet* liveout, Pressure& int_pressure, Pressure& float_pressure, double cost) {
527 if (!liveout->is_empty()) {
528 IndexSetIterator elements(liveout);
529 uint lid = elements.next();
530 while (lid != 0) {
531 LRG &lrg = lrgs(lid);
532 lrg._area += cost;
533 raise_pressure(b, lrg, int_pressure, float_pressure);
534 lid = elements.next();
535 }
536 }
537 assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect");
538 assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect");
539 }
540
541 /*
542 * Computes the entry register pressure of a block, looking at all live
543 * ranges in the livein. The register pressure is computed for both float
544 * and int/pointer registers.
545 */
compute_entry_block_pressure(Block * b)546 void PhaseChaitin::compute_entry_block_pressure(Block* b) {
547 IndexSet *livein = _live->livein(b);
548 if (!livein->is_empty()) {
549 IndexSetIterator elements(livein);
550 uint lid = elements.next();
551 while (lid != 0) {
552 LRG &lrg = lrgs(lid);
553 raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure);
554 lid = elements.next();
555 }
556 }
557 // Now check phis for locally defined inputs
558 for (uint j = 0; j < b->number_of_nodes(); j++) {
559 Node* n = b->get_node(j);
560 if (n->is_Phi()) {
561 for (uint k = 1; k < n->req(); k++) {
562 Node* phi_in = n->in(k);
563 // Because we are talking about phis, raise register pressure once for each
564 // instance of a phi to account for a single value
565 if (_cfg.get_block_for_node(phi_in) == b) {
566 LRG& lrg = lrgs(phi_in->_idx);
567 raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure);
568 break;
569 }
570 }
571 }
572 }
573 _sched_int_pressure.set_start_pressure(_sched_int_pressure.current_pressure());
574 _sched_float_pressure.set_start_pressure(_sched_float_pressure.current_pressure());
575 }
576
577 /*
578 * Computes the exit register pressure of a block, looking at all live
579 * ranges in the liveout. The register pressure is computed for both float
580 * and int/pointer registers.
581 */
compute_exit_block_pressure(Block * b)582 void PhaseChaitin::compute_exit_block_pressure(Block* b) {
583
584 IndexSet* livein = _live->live(b);
585 _sched_int_pressure.set_current_pressure(0);
586 _sched_float_pressure.set_current_pressure(0);
587 if (!livein->is_empty()) {
588 IndexSetIterator elements(livein);
589 uint lid = elements.next();
590 while (lid != 0) {
591 LRG &lrg = lrgs(lid);
592 raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure);
593 lid = elements.next();
594 }
595 }
596 }
597
598 /*
599 * Remove dead node if it's not used.
600 * We only remove projection nodes if the node "defining" the projection is
601 * dead, for example on x86, if we have a dead Add node we remove its
602 * RFLAGS node.
603 */
remove_node_if_not_used(Block * b,uint location,Node * n,uint lid,IndexSet * liveout)604 bool PhaseChaitin::remove_node_if_not_used(Block* b, uint location, Node* n, uint lid, IndexSet* liveout) {
605 Node* def = n->in(0);
606 if (!n->is_Proj() ||
607 (_lrg_map.live_range_id(def) && !liveout->member(_lrg_map.live_range_id(def)))) {
608 if (n->is_MachProj()) {
609 // Don't remove KILL projections if their "defining" nodes have
610 // memory effects (have SCMemProj projection node) -
611 // they are not dead even when their result is not used.
612 // For example, compareAndSwapL (and other CAS) and EncodeISOArray nodes.
613 // The method add_input_to_liveout() keeps such nodes alive (put them on liveout list)
614 // when it sees SCMemProj node in a block. Unfortunately SCMemProj node could be placed
615 // in block in such order that KILL MachProj nodes are processed first.
616 if (def->has_out_with(Op_SCMemProj)) {
617 return false;
618 }
619 }
620 b->remove_node(location);
621 LRG& lrg = lrgs(lid);
622 if (lrg._def == n) {
623 lrg._def = 0;
624 }
625 n->disconnect_inputs(C);
626 _cfg.unmap_node_from_block(n);
627 n->replace_by(C->top());
628 return true;
629 }
630 return false;
631 }
632
633 /*
634 * When encountering a fat projection, we might go from a low to high to low
635 * (since the fat proj only lives at this instruction) going backwards in the
636 * block. If we find a low to high transition, we record it.
637 */
check_for_high_pressure_transition_at_fatproj(uint & block_reg_pressure,uint location,LRG & lrg,Pressure & pressure,const int op_regtype)638 void PhaseChaitin::check_for_high_pressure_transition_at_fatproj(uint& block_reg_pressure, uint location, LRG& lrg, Pressure& pressure, const int op_regtype) {
639 RegMask mask_tmp = lrg.mask();
640 mask_tmp.AND(*Matcher::idealreg2regmask[op_regtype]);
641 pressure.check_pressure_at_fatproj(location, mask_tmp);
642 }
643
644 /*
645 * Insure high score for immediate-use spill copies so they get a color.
646 * All single-use MachSpillCopy(s) that immediately precede their
647 * use must color early. If a longer live range steals their
648 * color, the spill copy will split and may push another spill copy
649 * further away resulting in an infinite spill-split-retry cycle.
650 * Assigning a zero area results in a high score() and a good
651 * location in the simplify list.
652 */
assign_high_score_to_immediate_copies(Block * b,Node * n,LRG & lrg,uint next_inst,uint last_inst)653 void PhaseChaitin::assign_high_score_to_immediate_copies(Block* b, Node* n, LRG& lrg, uint next_inst, uint last_inst) {
654 if (n->is_SpillCopy() &&
655 lrg.is_singledef() && // A multi defined live range can still split
656 n->outcnt() == 1 && // and use must be in this block
657 _cfg.get_block_for_node(n->unique_out()) == b) {
658
659 Node* single_use = n->unique_out();
660 assert(b->find_node(single_use) >= next_inst, "Use must be later in block");
661 // Use can be earlier in block if it is a Phi, but then I should be a MultiDef
662
663 // Find first non SpillCopy 'm' that follows the current instruction
664 // (current_inst - 1) is index for current instruction 'n'
665 Node* m = n;
666 for (uint i = next_inst; i <= last_inst && m->is_SpillCopy(); ++i) {
667 m = b->get_node(i);
668 }
669 if (m == single_use) {
670 lrg._area = 0.0;
671 }
672 }
673 }
674
675 /*
676 * Copies do not define a new value and so do not interfere.
677 * Remove the copies source from the liveout set before interfering.
678 */
remove_interference_from_copy(Block * b,uint location,uint lid_copy,IndexSet * liveout,double cost,Pressure & int_pressure,Pressure & float_pressure)679 void PhaseChaitin::remove_interference_from_copy(Block* b, uint location, uint lid_copy, IndexSet* liveout, double cost, Pressure& int_pressure, Pressure& float_pressure) {
680 if (liveout->remove(lid_copy)) {
681 LRG& lrg_copy = lrgs(lid_copy);
682 lrg_copy._area -= cost;
683
684 // Lower register pressure since copy and definition can share the same register
685 lower_pressure(b, location, lrg_copy, liveout, int_pressure, float_pressure);
686 }
687 }
688
689 /*
690 * The defined value must go in a particular register. Remove that register from
691 * all conflicting parties and avoid the interference.
692 */
remove_bound_register_from_interfering_live_ranges(LRG & lrg,IndexSet * liveout,uint & must_spill)693 void PhaseChaitin::remove_bound_register_from_interfering_live_ranges(LRG& lrg, IndexSet* liveout, uint& must_spill) {
694 if (liveout->is_empty()) return;
695 // Check for common case
696 const RegMask& rm = lrg.mask();
697 int r_size = lrg.num_regs();
698 // Smear odd bits
699 IndexSetIterator elements(liveout);
700 uint l = elements.next();
701 while (l != 0) {
702 LRG& interfering_lrg = lrgs(l);
703 // If 'l' must spill already, do not further hack his bits.
704 // He'll get some interferences and be forced to spill later.
705 if (interfering_lrg._must_spill) {
706 l = elements.next();
707 continue;
708 }
709
710 // Remove bound register(s) from 'l's choices
711 RegMask old = interfering_lrg.mask();
712 uint old_size = interfering_lrg.mask_size();
713
714 // Remove the bits from LRG 'rm' from LRG 'l' so 'l' no
715 // longer interferes with 'rm'. If 'l' requires aligned
716 // adjacent pairs, subtract out bit pairs.
717 assert(!interfering_lrg._is_vector || !interfering_lrg._fat_proj, "sanity");
718
719 if (interfering_lrg.num_regs() > 1 && !interfering_lrg._fat_proj) {
720 RegMask r2mask = rm;
721 // Leave only aligned set of bits.
722 r2mask.smear_to_sets(interfering_lrg.num_regs());
723 // It includes vector case.
724 interfering_lrg.SUBTRACT(r2mask);
725 interfering_lrg.compute_set_mask_size();
726 } else if (r_size != 1) {
727 // fat proj
728 interfering_lrg.SUBTRACT(rm);
729 interfering_lrg.compute_set_mask_size();
730 } else {
731 // Common case: size 1 bound removal
732 OptoReg::Name r_reg = rm.find_first_elem();
733 if (interfering_lrg.mask().Member(r_reg)) {
734 interfering_lrg.Remove(r_reg);
735 interfering_lrg.set_mask_size(interfering_lrg.mask().is_AllStack() ? LRG::AllStack_size : old_size - 1);
736 }
737 }
738
739 // If 'l' goes completely dry, it must spill.
740 if (interfering_lrg.not_free()) {
741 // Give 'l' some kind of reasonable mask, so it picks up
742 // interferences (and will spill later).
743 interfering_lrg.set_mask(old);
744 interfering_lrg.set_mask_size(old_size);
745 must_spill++;
746 interfering_lrg._must_spill = 1;
747 interfering_lrg.set_reg(OptoReg::Name(LRG::SPILL_REG));
748 }
749 l = elements.next();
750 }
751 }
752
753 /*
754 * Start loop at 1 (skip control edge) for most Nodes. SCMemProj's might be the
755 * sole use of a StoreLConditional. While StoreLConditionals set memory (the
756 * SCMemProj use) they also def flags; if that flag def is unused the allocator
757 * sees a flag-setting instruction with no use of the flags and assumes it's
758 * dead. This keeps the (useless) flag-setting behavior alive while also
759 * keeping the (useful) memory update effect.
760 */
add_input_to_liveout(Block * b,Node * n,IndexSet * liveout,double cost,Pressure & int_pressure,Pressure & float_pressure)761 void PhaseChaitin::add_input_to_liveout(Block* b, Node* n, IndexSet* liveout, double cost, Pressure& int_pressure, Pressure& float_pressure) {
762 JVMState* jvms = n->jvms();
763 uint debug_start = jvms ? jvms->debug_start() : 999999;
764
765 for (uint k = ((n->Opcode() == Op_SCMemProj) ? 0:1); k < n->req(); k++) {
766 Node* def = n->in(k);
767 uint lid = _lrg_map.live_range_id(def);
768 if (!lid) {
769 continue;
770 }
771 LRG& lrg = lrgs(lid);
772
773 // No use-side cost for spilling debug info
774 if (k < debug_start) {
775 // A USE costs twice block frequency (once for the Load, once
776 // for a Load-delay). Rematerialized uses only cost once.
777 lrg._cost += (def->rematerialize() ? b->_freq : (b->_freq * 2));
778 }
779
780 if (liveout->insert(lid)) {
781 // Newly live things assumed live from here to top of block
782 lrg._area += cost;
783 raise_pressure(b, lrg, int_pressure, float_pressure);
784 assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect");
785 assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect");
786 }
787 assert(lrg._area >= 0.0, "negative spill area" );
788 }
789 }
790
791 /*
792 * If we run off the top of the block with high pressure just record that the
793 * whole block is high pressure. (Even though we might have a transition
794 * later down in the block)
795 */
check_for_high_pressure_block(Pressure & pressure)796 void PhaseChaitin::check_for_high_pressure_block(Pressure& pressure) {
797 // current pressure now means the pressure before the first instruction in the block
798 // (since we have stepped through all instructions backwards)
799 if (pressure.current_pressure() > pressure.high_pressure_limit()) {
800 pressure.set_high_pressure_index_to_block_start();
801 }
802 }
803
804 /*
805 * Compute high pressure indice; avoid landing in the middle of projnodes
806 * and set the high pressure index for the block
807 */
adjust_high_pressure_index(Block * b,uint & block_hrp_index,Pressure & pressure)808 void PhaseChaitin::adjust_high_pressure_index(Block* b, uint& block_hrp_index, Pressure& pressure) {
809 uint i = pressure.high_pressure_index();
810 if (i < b->number_of_nodes() && i < b->end_idx() + 1) {
811 Node* cur = b->get_node(i);
812 while (cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch()) {
813 cur = b->get_node(--i);
814 }
815 }
816 block_hrp_index = i;
817 }
818
print_pressure_info(Pressure & pressure,const char * str)819 void PhaseChaitin::print_pressure_info(Pressure& pressure, const char *str) {
820 if (str != NULL) {
821 tty->print_cr("# *** %s ***", str);
822 }
823 tty->print_cr("# start pressure is = %d", pressure.start_pressure());
824 tty->print_cr("# max pressure is = %d", pressure.final_pressure());
825 tty->print_cr("# end pressure is = %d", pressure.current_pressure());
826 tty->print_cr("#");
827 }
828
829 /* Build an interference graph:
830 * That is, if 2 live ranges are simultaneously alive but in their acceptable
831 * register sets do not overlap, then they do not interfere. The IFG is built
832 * by a single reverse pass over each basic block. Starting with the known
833 * live-out set, we remove things that get defined and add things that become
834 * live (essentially executing one pass of a standard LIVE analysis). Just
835 * before a Node defines a value (and removes it from the live-ness set) that
836 * value is certainly live. The defined value interferes with everything
837 * currently live. The value is then removed from the live-ness set and it's
838 * inputs are added to the live-ness set.
839 * Compute register pressure for each block:
840 * We store the biggest register pressure for each block and also the first
841 * low to high register pressure transition within the block (if any).
842 */
build_ifg_physical(ResourceArea * a)843 uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
844 Compile::TracePhase tp("buildIFG", &timers[_t_buildIFGphysical]);
845
846 uint must_spill = 0;
847 for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
848 Block* block = _cfg.get_block(i);
849
850 // Clone (rather than smash in place) the liveout info, so it is alive
851 // for the "collect_gc_info" phase later.
852 IndexSet liveout(_live->live(block));
853
854 uint first_inst = first_nonphi_index(block);
855 uint last_inst = block->end_idx();
856
857 move_exception_node_up(block, first_inst, last_inst);
858
859 Pressure int_pressure(last_inst + 1, INTPRESSURE);
860 Pressure float_pressure(last_inst + 1, FLOATPRESSURE);
861 block->_reg_pressure = 0;
862 block->_freg_pressure = 0;
863
864 int inst_count = last_inst - first_inst;
865 double cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count);
866 assert(cost >= 0.0, "negative spill cost" );
867
868 compute_initial_block_pressure(block, &liveout, int_pressure, float_pressure, cost);
869
870 for (uint location = last_inst; location > 0; location--) {
871 Node* n = block->get_node(location);
872 uint lid = _lrg_map.live_range_id(n);
873
874 if (lid) {
875 LRG& lrg = lrgs(lid);
876
877 // A DEF normally costs block frequency; rematerialized values are
878 // removed from the DEF sight, so LOWER costs here.
879 lrg._cost += n->rematerialize() ? 0 : block->_freq;
880
881 if (!liveout.member(lid) && n->Opcode() != Op_SafePoint) {
882 if (remove_node_if_not_used(block, location, n, lid, &liveout)) {
883 float_pressure.lower_high_pressure_index();
884 int_pressure.lower_high_pressure_index();
885 continue;
886 }
887 if (lrg._fat_proj) {
888 check_for_high_pressure_transition_at_fatproj(block->_reg_pressure, location, lrg, int_pressure, Op_RegI);
889 check_for_high_pressure_transition_at_fatproj(block->_freg_pressure, location, lrg, float_pressure, Op_RegD);
890 }
891 } else {
892 // A live range ends at its definition, remove the remaining area.
893 // If the cost is +Inf (which might happen in extreme cases), the lrg area will also be +Inf,
894 // and +Inf - +Inf = NaN. So let's not do that subtraction.
895 if (g_isfinite(cost)) {
896 lrg._area -= cost;
897 }
898 assert(lrg._area >= 0.0, "negative spill area" );
899
900 assign_high_score_to_immediate_copies(block, n, lrg, location + 1, last_inst);
901
902 if (liveout.remove(lid)) {
903 lower_pressure(block, location, lrg, &liveout, int_pressure, float_pressure);
904 }
905 uint copy_idx = n->is_Copy();
906 if (copy_idx) {
907 uint lid_copy = _lrg_map.live_range_id(n->in(copy_idx));
908 remove_interference_from_copy(block, location, lid_copy, &liveout, cost, int_pressure, float_pressure);
909 }
910 }
911
912 // Since rematerializable DEFs are not bound but the live range is,
913 // some uses must be bound. If we spill live range 'r', it can
914 // rematerialize at each use site according to its bindings.
915 if (lrg.is_bound() && !n->rematerialize() && lrg.mask().is_NotEmpty()) {
916 remove_bound_register_from_interfering_live_ranges(lrg, &liveout, must_spill);
917 }
918 interfere_with_live(lid, &liveout);
919 }
920
921 // Area remaining in the block
922 inst_count--;
923 cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count);
924
925 if (!n->is_Phi()) {
926 add_input_to_liveout(block, n, &liveout, cost, int_pressure, float_pressure);
927 }
928 }
929
930 check_for_high_pressure_block(int_pressure);
931 check_for_high_pressure_block(float_pressure);
932 adjust_high_pressure_index(block, block->_ihrp_index, int_pressure);
933 adjust_high_pressure_index(block, block->_fhrp_index, float_pressure);
934 // set the final_pressure as the register pressure for the block
935 block->_reg_pressure = int_pressure.final_pressure();
936 block->_freg_pressure = float_pressure.final_pressure();
937
938 #ifndef PRODUCT
939 // Gather Register Pressure Statistics
940 if (PrintOptoStatistics) {
941 if (block->_reg_pressure > int_pressure.high_pressure_limit() || block->_freg_pressure > float_pressure.high_pressure_limit()) {
942 _high_pressure++;
943 } else {
944 _low_pressure++;
945 }
946 }
947 #endif
948 }
949
950 return must_spill;
951 }
952