1 /*
2 * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "gc/shared/barrierSet.hpp"
27 #include "gc/shared/c2/barrierSetC2.hpp"
28 #include "memory/allocation.inline.hpp"
29 #include "memory/resourceArea.hpp"
30 #include "opto/addnode.hpp"
31 #include "opto/callnode.hpp"
32 #include "opto/castnode.hpp"
33 #include "opto/connode.hpp"
34 #include "opto/castnode.hpp"
35 #include "opto/divnode.hpp"
36 #include "opto/loopnode.hpp"
37 #include "opto/matcher.hpp"
38 #include "opto/mulnode.hpp"
39 #include "opto/movenode.hpp"
40 #include "opto/opaquenode.hpp"
41 #include "opto/rootnode.hpp"
42 #include "opto/subnode.hpp"
43 #include "utilities/macros.hpp"
44
45 //=============================================================================
46 //------------------------------split_thru_phi---------------------------------
47 // Split Node 'n' through merge point if there is enough win.
split_thru_phi(Node * n,Node * region,int policy)48 Node* PhaseIdealLoop::split_thru_phi(Node* n, Node* region, int policy) {
49 if (n->Opcode() == Op_ConvI2L && n->bottom_type() != TypeLong::LONG) {
50 // ConvI2L may have type information on it which is unsafe to push up
51 // so disable this for now
52 return NULL;
53 }
54
55 // Splitting range check CastIIs through a loop induction Phi can
56 // cause new Phis to be created that are left unrelated to the loop
57 // induction Phi and prevent optimizations (vectorization)
58 if (n->Opcode() == Op_CastII && n->as_CastII()->has_range_check() &&
59 region->is_CountedLoop() && n->in(1) == region->as_CountedLoop()->phi()) {
60 return NULL;
61 }
62
63 // Bail out if 'n' is a Div or Mod node whose zero check was removed earlier (i.e. control is NULL) and its divisor is an induction variable
64 // phi p of a trip-counted (integer) loop whose inputs could be zero (include zero in their type range). p could have a more precise type
65 // range that does not necessarily include all values of its inputs. Since each of these inputs will be a divisor of the newly cloned nodes
66 // of 'n', we need to bail out of one of these divisors could be zero (zero in its type range).
67 if ((n->Opcode() == Op_DivI || n->Opcode() == Op_ModI) && n->in(0) == NULL
68 && region->is_CountedLoop() && n->in(2) == region->as_CountedLoop()->phi()) {
69 Node* phi = region->as_CountedLoop()->phi();
70 for (uint i = 1; i < phi->req(); i++) {
71 if (_igvn.type(phi->in(i))->filter_speculative(TypeInt::ZERO) != Type::TOP) {
72 // Zero could be a possible value but we already removed the zero check. Bail out to avoid a possible division by zero at a later point.
73 return NULL;
74 }
75 }
76 }
77
78 int wins = 0;
79 assert(!n->is_CFG(), "");
80 assert(region->is_Region(), "");
81
82 const Type* type = n->bottom_type();
83 const TypeOopPtr* t_oop = _igvn.type(n)->isa_oopptr();
84 Node* phi;
85 if (t_oop != NULL && t_oop->is_known_instance_field()) {
86 int iid = t_oop->instance_id();
87 int index = C->get_alias_index(t_oop);
88 int offset = t_oop->offset();
89 phi = new PhiNode(region, type, NULL, iid, index, offset);
90 } else {
91 phi = PhiNode::make_blank(region, n);
92 }
93 uint old_unique = C->unique();
94 for (uint i = 1; i < region->req(); i++) {
95 Node* x;
96 Node* the_clone = NULL;
97 if (region->in(i) == C->top()) {
98 x = C->top(); // Dead path? Use a dead data op
99 } else {
100 x = n->clone(); // Else clone up the data op
101 the_clone = x; // Remember for possible deletion.
102 // Alter data node to use pre-phi inputs
103 if (n->in(0) == region)
104 x->set_req( 0, region->in(i) );
105 for (uint j = 1; j < n->req(); j++) {
106 Node* in = n->in(j);
107 if (in->is_Phi() && in->in(0) == region)
108 x->set_req(j, in->in(i)); // Use pre-Phi input for the clone
109 }
110 }
111 // Check for a 'win' on some paths
112 const Type* t = x->Value(&_igvn);
113
114 bool singleton = t->singleton();
115
116 // A TOP singleton indicates that there are no possible values incoming
117 // along a particular edge. In most cases, this is OK, and the Phi will
118 // be eliminated later in an Ideal call. However, we can't allow this to
119 // happen if the singleton occurs on loop entry, as the elimination of
120 // the PhiNode may cause the resulting node to migrate back to a previous
121 // loop iteration.
122 if (singleton && t == Type::TOP) {
123 // Is_Loop() == false does not confirm the absence of a loop (e.g., an
124 // irreducible loop may not be indicated by an affirmative is_Loop());
125 // therefore, the only top we can split thru a phi is on a backedge of
126 // a loop.
127 singleton &= region->is_Loop() && (i != LoopNode::EntryControl);
128 }
129
130 if (singleton) {
131 wins++;
132 x = ((PhaseGVN&)_igvn).makecon(t);
133 } else {
134 // We now call Identity to try to simplify the cloned node.
135 // Note that some Identity methods call phase->type(this).
136 // Make sure that the type array is big enough for
137 // our new node, even though we may throw the node away.
138 // (Note: This tweaking with igvn only works because x is a new node.)
139 _igvn.set_type(x, t);
140 // If x is a TypeNode, capture any more-precise type permanently into Node
141 // otherwise it will be not updated during igvn->transform since
142 // igvn->type(x) is set to x->Value() already.
143 x->raise_bottom_type(t);
144 Node *y = _igvn.apply_identity(x);
145 if (y != x) {
146 wins++;
147 x = y;
148 } else {
149 y = _igvn.hash_find(x);
150 if (y) {
151 wins++;
152 x = y;
153 } else {
154 // Else x is a new node we are keeping
155 // We do not need register_new_node_with_optimizer
156 // because set_type has already been called.
157 _igvn._worklist.push(x);
158 }
159 }
160 }
161 if (x != the_clone && the_clone != NULL)
162 _igvn.remove_dead_node(the_clone);
163 phi->set_req( i, x );
164 }
165 // Too few wins?
166 if (wins <= policy) {
167 _igvn.remove_dead_node(phi);
168 return NULL;
169 }
170
171 // Record Phi
172 register_new_node( phi, region );
173
174 for (uint i2 = 1; i2 < phi->req(); i2++) {
175 Node *x = phi->in(i2);
176 // If we commoned up the cloned 'x' with another existing Node,
177 // the existing Node picks up a new use. We need to make the
178 // existing Node occur higher up so it dominates its uses.
179 Node *old_ctrl;
180 IdealLoopTree *old_loop;
181
182 if (x->is_Con()) {
183 // Constant's control is always root.
184 set_ctrl(x, C->root());
185 continue;
186 }
187 // The occasional new node
188 if (x->_idx >= old_unique) { // Found a new, unplaced node?
189 old_ctrl = NULL;
190 old_loop = NULL; // Not in any prior loop
191 } else {
192 old_ctrl = get_ctrl(x);
193 old_loop = get_loop(old_ctrl); // Get prior loop
194 }
195 // New late point must dominate new use
196 Node *new_ctrl = dom_lca(old_ctrl, region->in(i2));
197 if (new_ctrl == old_ctrl) // Nothing is changed
198 continue;
199
200 IdealLoopTree *new_loop = get_loop(new_ctrl);
201
202 // Don't move x into a loop if its uses are
203 // outside of loop. Otherwise x will be cloned
204 // for each use outside of this loop.
205 IdealLoopTree *use_loop = get_loop(region);
206 if (!new_loop->is_member(use_loop) &&
207 (old_loop == NULL || !new_loop->is_member(old_loop))) {
208 // Take early control, later control will be recalculated
209 // during next iteration of loop optimizations.
210 new_ctrl = get_early_ctrl(x);
211 new_loop = get_loop(new_ctrl);
212 }
213 // Set new location
214 set_ctrl(x, new_ctrl);
215 // If changing loop bodies, see if we need to collect into new body
216 if (old_loop != new_loop) {
217 if (old_loop && !old_loop->_child)
218 old_loop->_body.yank(x);
219 if (!new_loop->_child)
220 new_loop->_body.push(x); // Collect body info
221 }
222 }
223
224 return phi;
225 }
226
227 //------------------------------dominated_by------------------------------------
228 // Replace the dominated test with an obvious true or false. Place it on the
229 // IGVN worklist for later cleanup. Move control-dependent data Nodes on the
230 // live path up to the dominating control.
dominated_by(Node * prevdom,Node * iff,bool flip,bool exclude_loop_predicate)231 void PhaseIdealLoop::dominated_by( Node *prevdom, Node *iff, bool flip, bool exclude_loop_predicate ) {
232 if (VerifyLoopOptimizations && PrintOpto) { tty->print_cr("dominating test"); }
233
234 // prevdom is the dominating projection of the dominating test.
235 assert( iff->is_If(), "" );
236 assert(iff->Opcode() == Op_If || iff->Opcode() == Op_CountedLoopEnd || iff->Opcode() == Op_RangeCheck, "Check this code when new subtype is added");
237 int pop = prevdom->Opcode();
238 assert( pop == Op_IfFalse || pop == Op_IfTrue, "" );
239 if (flip) {
240 if (pop == Op_IfTrue)
241 pop = Op_IfFalse;
242 else
243 pop = Op_IfTrue;
244 }
245 // 'con' is set to true or false to kill the dominated test.
246 Node *con = _igvn.makecon(pop == Op_IfTrue ? TypeInt::ONE : TypeInt::ZERO);
247 set_ctrl(con, C->root()); // Constant gets a new use
248 // Hack the dominated test
249 _igvn.replace_input_of(iff, 1, con);
250
251 // If I dont have a reachable TRUE and FALSE path following the IfNode then
252 // I can assume this path reaches an infinite loop. In this case it's not
253 // important to optimize the data Nodes - either the whole compilation will
254 // be tossed or this path (and all data Nodes) will go dead.
255 if (iff->outcnt() != 2) return;
256
257 // Make control-dependent data Nodes on the live path (path that will remain
258 // once the dominated IF is removed) become control-dependent on the
259 // dominating projection.
260 Node* dp = iff->as_If()->proj_out_or_null(pop == Op_IfTrue);
261
262 // Loop predicates may have depending checks which should not
263 // be skipped. For example, range check predicate has two checks
264 // for lower and upper bounds.
265 if (dp == NULL)
266 return;
267
268 ProjNode* dp_proj = dp->as_Proj();
269 ProjNode* unc_proj = iff->as_If()->proj_out(1 - dp_proj->_con)->as_Proj();
270 if (exclude_loop_predicate &&
271 (unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_predicate) != NULL ||
272 unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_profile_predicate) != NULL ||
273 unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_range_check) != NULL)) {
274 // If this is a range check (IfNode::is_range_check), do not
275 // reorder because Compile::allow_range_check_smearing might have
276 // changed the check.
277 return; // Let IGVN transformation change control dependence.
278 }
279
280 IdealLoopTree *old_loop = get_loop(dp);
281
282 for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
283 Node* cd = dp->fast_out(i); // Control-dependent node
284 if (cd->depends_only_on_test()) {
285 assert(cd->in(0) == dp, "");
286 _igvn.replace_input_of(cd, 0, prevdom);
287 set_early_ctrl(cd);
288 IdealLoopTree *new_loop = get_loop(get_ctrl(cd));
289 if (old_loop != new_loop) {
290 if (!old_loop->_child) old_loop->_body.yank(cd);
291 if (!new_loop->_child) new_loop->_body.push(cd);
292 }
293 --i;
294 --imax;
295 }
296 }
297 }
298
299 //------------------------------has_local_phi_input----------------------------
300 // Return TRUE if 'n' has Phi inputs from its local block and no other
301 // block-local inputs (all non-local-phi inputs come from earlier blocks)
has_local_phi_input(Node * n)302 Node *PhaseIdealLoop::has_local_phi_input( Node *n ) {
303 Node *n_ctrl = get_ctrl(n);
304 // See if some inputs come from a Phi in this block, or from before
305 // this block.
306 uint i;
307 for( i = 1; i < n->req(); i++ ) {
308 Node *phi = n->in(i);
309 if( phi->is_Phi() && phi->in(0) == n_ctrl )
310 break;
311 }
312 if( i >= n->req() )
313 return NULL; // No Phi inputs; nowhere to clone thru
314
315 // Check for inputs created between 'n' and the Phi input. These
316 // must split as well; they have already been given the chance
317 // (courtesy of a post-order visit) and since they did not we must
318 // recover the 'cost' of splitting them by being very profitable
319 // when splitting 'n'. Since this is unlikely we simply give up.
320 for( i = 1; i < n->req(); i++ ) {
321 Node *m = n->in(i);
322 if( get_ctrl(m) == n_ctrl && !m->is_Phi() ) {
323 // We allow the special case of AddP's with no local inputs.
324 // This allows us to split-up address expressions.
325 if (m->is_AddP() &&
326 get_ctrl(m->in(2)) != n_ctrl &&
327 get_ctrl(m->in(3)) != n_ctrl) {
328 // Move the AddP up to dominating point
329 Node* c = find_non_split_ctrl(idom(n_ctrl));
330 if (c->is_OuterStripMinedLoop()) {
331 c->as_Loop()->verify_strip_mined(1);
332 c = c->in(LoopNode::EntryControl);
333 }
334 set_ctrl_and_loop(m, c);
335 continue;
336 }
337 return NULL;
338 }
339 assert(n->is_Phi() || m->is_Phi() || is_dominator(get_ctrl(m), n_ctrl), "m has strange control");
340 }
341
342 return n_ctrl;
343 }
344
345 //------------------------------remix_address_expressions----------------------
346 // Rework addressing expressions to get the most loop-invariant stuff
347 // moved out. We'd like to do all associative operators, but it's especially
348 // important (common) to do address expressions.
remix_address_expressions(Node * n)349 Node *PhaseIdealLoop::remix_address_expressions( Node *n ) {
350 if (!has_ctrl(n)) return NULL;
351 Node *n_ctrl = get_ctrl(n);
352 IdealLoopTree *n_loop = get_loop(n_ctrl);
353
354 // See if 'n' mixes loop-varying and loop-invariant inputs and
355 // itself is loop-varying.
356
357 // Only interested in binary ops (and AddP)
358 if( n->req() < 3 || n->req() > 4 ) return NULL;
359
360 Node *n1_ctrl = get_ctrl(n->in( 1));
361 Node *n2_ctrl = get_ctrl(n->in( 2));
362 Node *n3_ctrl = get_ctrl(n->in(n->req() == 3 ? 2 : 3));
363 IdealLoopTree *n1_loop = get_loop( n1_ctrl );
364 IdealLoopTree *n2_loop = get_loop( n2_ctrl );
365 IdealLoopTree *n3_loop = get_loop( n3_ctrl );
366
367 // Does one of my inputs spin in a tighter loop than self?
368 if( (n_loop->is_member( n1_loop ) && n_loop != n1_loop) ||
369 (n_loop->is_member( n2_loop ) && n_loop != n2_loop) ||
370 (n_loop->is_member( n3_loop ) && n_loop != n3_loop) )
371 return NULL; // Leave well enough alone
372
373 // Is at least one of my inputs loop-invariant?
374 if( n1_loop == n_loop &&
375 n2_loop == n_loop &&
376 n3_loop == n_loop )
377 return NULL; // No loop-invariant inputs
378
379
380 int n_op = n->Opcode();
381
382 // Replace expressions like ((V+I) << 2) with (V<<2 + I<<2).
383 if( n_op == Op_LShiftI ) {
384 // Scale is loop invariant
385 Node *scale = n->in(2);
386 Node *scale_ctrl = get_ctrl(scale);
387 IdealLoopTree *scale_loop = get_loop(scale_ctrl );
388 if( n_loop == scale_loop || !scale_loop->is_member( n_loop ) )
389 return NULL;
390 const TypeInt *scale_t = scale->bottom_type()->isa_int();
391 if( scale_t && scale_t->is_con() && scale_t->get_con() >= 16 )
392 return NULL; // Dont bother with byte/short masking
393 // Add must vary with loop (else shift would be loop-invariant)
394 Node *add = n->in(1);
395 Node *add_ctrl = get_ctrl(add);
396 IdealLoopTree *add_loop = get_loop(add_ctrl);
397 //assert( n_loop == add_loop, "" );
398 if( n_loop != add_loop ) return NULL; // happens w/ evil ZKM loops
399
400 // Convert I-V into I+ (0-V); same for V-I
401 if( add->Opcode() == Op_SubI &&
402 _igvn.type( add->in(1) ) != TypeInt::ZERO ) {
403 Node *zero = _igvn.intcon(0);
404 set_ctrl(zero, C->root());
405 Node *neg = new SubINode( _igvn.intcon(0), add->in(2) );
406 register_new_node( neg, get_ctrl(add->in(2) ) );
407 add = new AddINode( add->in(1), neg );
408 register_new_node( add, add_ctrl );
409 }
410 if( add->Opcode() != Op_AddI ) return NULL;
411 // See if one add input is loop invariant
412 Node *add_var = add->in(1);
413 Node *add_var_ctrl = get_ctrl(add_var);
414 IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
415 Node *add_invar = add->in(2);
416 Node *add_invar_ctrl = get_ctrl(add_invar);
417 IdealLoopTree *add_invar_loop = get_loop(add_invar_ctrl );
418 if( add_var_loop == n_loop ) {
419 } else if( add_invar_loop == n_loop ) {
420 // Swap to find the invariant part
421 add_invar = add_var;
422 add_invar_ctrl = add_var_ctrl;
423 add_invar_loop = add_var_loop;
424 add_var = add->in(2);
425 Node *add_var_ctrl = get_ctrl(add_var);
426 IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
427 } else // Else neither input is loop invariant
428 return NULL;
429 if( n_loop == add_invar_loop || !add_invar_loop->is_member( n_loop ) )
430 return NULL; // No invariant part of the add?
431
432 // Yes! Reshape address expression!
433 Node *inv_scale = new LShiftINode( add_invar, scale );
434 Node *inv_scale_ctrl =
435 dom_depth(add_invar_ctrl) > dom_depth(scale_ctrl) ?
436 add_invar_ctrl : scale_ctrl;
437 register_new_node( inv_scale, inv_scale_ctrl );
438 Node *var_scale = new LShiftINode( add_var, scale );
439 register_new_node( var_scale, n_ctrl );
440 Node *var_add = new AddINode( var_scale, inv_scale );
441 register_new_node( var_add, n_ctrl );
442 _igvn.replace_node( n, var_add );
443 return var_add;
444 }
445
446 // Replace (I+V) with (V+I)
447 if( n_op == Op_AddI ||
448 n_op == Op_AddL ||
449 n_op == Op_AddF ||
450 n_op == Op_AddD ||
451 n_op == Op_MulI ||
452 n_op == Op_MulL ||
453 n_op == Op_MulF ||
454 n_op == Op_MulD ) {
455 if( n2_loop == n_loop ) {
456 assert( n1_loop != n_loop, "" );
457 n->swap_edges(1, 2);
458 }
459 }
460
461 // Replace ((I1 +p V) +p I2) with ((I1 +p I2) +p V),
462 // but not if I2 is a constant.
463 if( n_op == Op_AddP ) {
464 if( n2_loop == n_loop && n3_loop != n_loop ) {
465 if( n->in(2)->Opcode() == Op_AddP && !n->in(3)->is_Con() ) {
466 Node *n22_ctrl = get_ctrl(n->in(2)->in(2));
467 Node *n23_ctrl = get_ctrl(n->in(2)->in(3));
468 IdealLoopTree *n22loop = get_loop( n22_ctrl );
469 IdealLoopTree *n23_loop = get_loop( n23_ctrl );
470 if( n22loop != n_loop && n22loop->is_member(n_loop) &&
471 n23_loop == n_loop ) {
472 Node *add1 = new AddPNode( n->in(1), n->in(2)->in(2), n->in(3) );
473 // Stuff new AddP in the loop preheader
474 register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) );
475 Node *add2 = new AddPNode( n->in(1), add1, n->in(2)->in(3) );
476 register_new_node( add2, n_ctrl );
477 _igvn.replace_node( n, add2 );
478 return add2;
479 }
480 }
481 }
482
483 // Replace (I1 +p (I2 + V)) with ((I1 +p I2) +p V)
484 if (n2_loop != n_loop && n3_loop == n_loop) {
485 if (n->in(3)->Opcode() == Op_AddX) {
486 Node *V = n->in(3)->in(1);
487 Node *I = n->in(3)->in(2);
488 if (is_member(n_loop,get_ctrl(V))) {
489 } else {
490 Node *tmp = V; V = I; I = tmp;
491 }
492 if (!is_member(n_loop,get_ctrl(I))) {
493 Node *add1 = new AddPNode(n->in(1), n->in(2), I);
494 // Stuff new AddP in the loop preheader
495 register_new_node(add1, n_loop->_head->in(LoopNode::EntryControl));
496 Node *add2 = new AddPNode(n->in(1), add1, V);
497 register_new_node(add2, n_ctrl);
498 _igvn.replace_node(n, add2);
499 return add2;
500 }
501 }
502 }
503 }
504
505 return NULL;
506 }
507
508 // Optimize ((in1[2*i] * in2[2*i]) + (in1[2*i+1] * in2[2*i+1]))
convert_add_to_muladd(Node * n)509 Node *PhaseIdealLoop::convert_add_to_muladd(Node* n) {
510 assert(n->Opcode() == Op_AddI, "sanity");
511 Node * nn = NULL;
512 Node * in1 = n->in(1);
513 Node * in2 = n->in(2);
514 if (in1->Opcode() == Op_MulI && in2->Opcode() == Op_MulI) {
515 IdealLoopTree* loop_n = get_loop(get_ctrl(n));
516 if (loop_n->_head->as_Loop()->is_valid_counted_loop() &&
517 Matcher::match_rule_supported(Op_MulAddS2I) &&
518 Matcher::match_rule_supported(Op_MulAddVS2VI)) {
519 Node* mul_in1 = in1->in(1);
520 Node* mul_in2 = in1->in(2);
521 Node* mul_in3 = in2->in(1);
522 Node* mul_in4 = in2->in(2);
523 if (mul_in1->Opcode() == Op_LoadS &&
524 mul_in2->Opcode() == Op_LoadS &&
525 mul_in3->Opcode() == Op_LoadS &&
526 mul_in4->Opcode() == Op_LoadS) {
527 IdealLoopTree* loop1 = get_loop(get_ctrl(mul_in1));
528 IdealLoopTree* loop2 = get_loop(get_ctrl(mul_in2));
529 IdealLoopTree* loop3 = get_loop(get_ctrl(mul_in3));
530 IdealLoopTree* loop4 = get_loop(get_ctrl(mul_in4));
531 IdealLoopTree* loop5 = get_loop(get_ctrl(in1));
532 IdealLoopTree* loop6 = get_loop(get_ctrl(in2));
533 // All nodes should be in the same counted loop.
534 if (loop_n == loop1 && loop_n == loop2 && loop_n == loop3 &&
535 loop_n == loop4 && loop_n == loop5 && loop_n == loop6) {
536 Node* adr1 = mul_in1->in(MemNode::Address);
537 Node* adr2 = mul_in2->in(MemNode::Address);
538 Node* adr3 = mul_in3->in(MemNode::Address);
539 Node* adr4 = mul_in4->in(MemNode::Address);
540 if (adr1->is_AddP() && adr2->is_AddP() && adr3->is_AddP() && adr4->is_AddP()) {
541 if ((adr1->in(AddPNode::Base) == adr3->in(AddPNode::Base)) &&
542 (adr2->in(AddPNode::Base) == adr4->in(AddPNode::Base))) {
543 nn = new MulAddS2INode(mul_in1, mul_in2, mul_in3, mul_in4);
544 register_new_node(nn, get_ctrl(n));
545 _igvn.replace_node(n, nn);
546 return nn;
547 } else if ((adr1->in(AddPNode::Base) == adr4->in(AddPNode::Base)) &&
548 (adr2->in(AddPNode::Base) == adr3->in(AddPNode::Base))) {
549 nn = new MulAddS2INode(mul_in1, mul_in2, mul_in4, mul_in3);
550 register_new_node(nn, get_ctrl(n));
551 _igvn.replace_node(n, nn);
552 return nn;
553 }
554 }
555 }
556 }
557 }
558 }
559 return nn;
560 }
561
562 //------------------------------conditional_move-------------------------------
563 // Attempt to replace a Phi with a conditional move. We have some pretty
564 // strict profitability requirements. All Phis at the merge point must
565 // be converted, so we can remove the control flow. We need to limit the
566 // number of c-moves to a small handful. All code that was in the side-arms
567 // of the CFG diamond is now speculatively executed. This code has to be
568 // "cheap enough". We are pretty much limited to CFG diamonds that merge
569 // 1 or 2 items with a total of 1 or 2 ops executed speculatively.
conditional_move(Node * region)570 Node *PhaseIdealLoop::conditional_move( Node *region ) {
571
572 assert(region->is_Region(), "sanity check");
573 if (region->req() != 3) return NULL;
574
575 // Check for CFG diamond
576 Node *lp = region->in(1);
577 Node *rp = region->in(2);
578 if (!lp || !rp) return NULL;
579 Node *lp_c = lp->in(0);
580 if (lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If()) return NULL;
581 IfNode *iff = lp_c->as_If();
582
583 // Check for ops pinned in an arm of the diamond.
584 // Can't remove the control flow in this case
585 if (lp->outcnt() > 1) return NULL;
586 if (rp->outcnt() > 1) return NULL;
587
588 IdealLoopTree* r_loop = get_loop(region);
589 assert(r_loop == get_loop(iff), "sanity");
590 // Always convert to CMOVE if all results are used only outside this loop.
591 bool used_inside_loop = (r_loop == _ltree_root);
592
593 // Check profitability
594 int cost = 0;
595 int phis = 0;
596 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
597 Node *out = region->fast_out(i);
598 if (!out->is_Phi()) continue; // Ignore other control edges, etc
599 phis++;
600 PhiNode* phi = out->as_Phi();
601 BasicType bt = phi->type()->basic_type();
602 switch (bt) {
603 case T_DOUBLE:
604 case T_FLOAT:
605 if (C->use_cmove()) {
606 continue; //TODO: maybe we want to add some cost
607 }
608 cost += Matcher::float_cmove_cost(); // Could be very expensive
609 break;
610 case T_LONG: {
611 cost += Matcher::long_cmove_cost(); // May encodes as 2 CMOV's
612 }
613 case T_INT: // These all CMOV fine
614 case T_ADDRESS: { // (RawPtr)
615 cost++;
616 break;
617 }
618 case T_NARROWOOP: // Fall through
619 case T_OBJECT: { // Base oops are OK, but not derived oops
620 const TypeOopPtr *tp = phi->type()->make_ptr()->isa_oopptr();
621 // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a
622 // CMOVE'd derived pointer? It's a CMOVE'd derived base. Thus
623 // CMOVE'ing a derived pointer requires we also CMOVE the base. If we
624 // have a Phi for the base here that we convert to a CMOVE all is well
625 // and good. But if the base is dead, we'll not make a CMOVE. Later
626 // the allocator will have to produce a base by creating a CMOVE of the
627 // relevant bases. This puts the allocator in the business of
628 // manufacturing expensive instructions, generally a bad plan.
629 // Just Say No to Conditionally-Moved Derived Pointers.
630 if (tp && tp->offset() != 0)
631 return NULL;
632 cost++;
633 break;
634 }
635 default:
636 return NULL; // In particular, can't do memory or I/O
637 }
638 // Add in cost any speculative ops
639 for (uint j = 1; j < region->req(); j++) {
640 Node *proj = region->in(j);
641 Node *inp = phi->in(j);
642 if (get_ctrl(inp) == proj) { // Found local op
643 cost++;
644 // Check for a chain of dependent ops; these will all become
645 // speculative in a CMOV.
646 for (uint k = 1; k < inp->req(); k++)
647 if (get_ctrl(inp->in(k)) == proj)
648 cost += ConditionalMoveLimit; // Too much speculative goo
649 }
650 }
651 // See if the Phi is used by a Cmp or Narrow oop Decode/Encode.
652 // This will likely Split-If, a higher-payoff operation.
653 for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) {
654 Node* use = phi->fast_out(k);
655 if (use->is_Cmp() || use->is_DecodeNarrowPtr() || use->is_EncodeNarrowPtr())
656 cost += ConditionalMoveLimit;
657 // Is there a use inside the loop?
658 // Note: check only basic types since CMoveP is pinned.
659 if (!used_inside_loop && is_java_primitive(bt)) {
660 IdealLoopTree* u_loop = get_loop(has_ctrl(use) ? get_ctrl(use) : use);
661 if (r_loop == u_loop || r_loop->is_member(u_loop)) {
662 used_inside_loop = true;
663 }
664 }
665 }
666 }//for
667 Node* bol = iff->in(1);
668 if (bol->Opcode() == Op_Opaque4) {
669 return NULL; // Ignore loop predicate checks (the Opaque4 ensures they will go away)
670 }
671 assert(bol->Opcode() == Op_Bool, "Unexpected node");
672 int cmp_op = bol->in(1)->Opcode();
673 // It is expensive to generate flags from a float compare.
674 // Avoid duplicated float compare.
675 if (phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL;
676
677 float infrequent_prob = PROB_UNLIKELY_MAG(3);
678 // Ignore cost and blocks frequency if CMOVE can be moved outside the loop.
679 if (used_inside_loop) {
680 if (cost >= ConditionalMoveLimit) return NULL; // Too much goo
681
682 // BlockLayoutByFrequency optimization moves infrequent branch
683 // from hot path. No point in CMOV'ing in such case (110 is used
684 // instead of 100 to take into account not exactness of float value).
685 if (BlockLayoutByFrequency) {
686 infrequent_prob = MAX2(infrequent_prob, (float)BlockLayoutMinDiamondPercentage/110.0f);
687 }
688 }
689 // Check for highly predictable branch. No point in CMOV'ing if
690 // we are going to predict accurately all the time.
691 if (C->use_cmove() && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) {
692 //keep going
693 } else if (iff->_prob < infrequent_prob ||
694 iff->_prob > (1.0f - infrequent_prob))
695 return NULL;
696
697 // --------------
698 // Now replace all Phis with CMOV's
699 Node *cmov_ctrl = iff->in(0);
700 uint flip = (lp->Opcode() == Op_IfTrue);
701 Node_List wq;
702 while (1) {
703 PhiNode* phi = NULL;
704 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
705 Node *out = region->fast_out(i);
706 if (out->is_Phi()) {
707 phi = out->as_Phi();
708 break;
709 }
710 }
711 if (phi == NULL) break;
712 if (PrintOpto && VerifyLoopOptimizations) { tty->print_cr("CMOV"); }
713 // Move speculative ops
714 wq.push(phi);
715 while (wq.size() > 0) {
716 Node *n = wq.pop();
717 for (uint j = 1; j < n->req(); j++) {
718 Node* m = n->in(j);
719 if (m != NULL && !is_dominator(get_ctrl(m), cmov_ctrl)) {
720 #ifndef PRODUCT
721 if (PrintOpto && VerifyLoopOptimizations) {
722 tty->print(" speculate: ");
723 m->dump();
724 }
725 #endif
726 set_ctrl(m, cmov_ctrl);
727 wq.push(m);
728 }
729 }
730 }
731 Node *cmov = CMoveNode::make(cmov_ctrl, iff->in(1), phi->in(1+flip), phi->in(2-flip), _igvn.type(phi));
732 register_new_node( cmov, cmov_ctrl );
733 _igvn.replace_node( phi, cmov );
734 #ifndef PRODUCT
735 if (TraceLoopOpts) {
736 tty->print("CMOV ");
737 r_loop->dump_head();
738 if (Verbose) {
739 bol->in(1)->dump(1);
740 cmov->dump(1);
741 }
742 }
743 if (VerifyLoopOptimizations) verify();
744 #endif
745 }
746
747 // The useless CFG diamond will fold up later; see the optimization in
748 // RegionNode::Ideal.
749 _igvn._worklist.push(region);
750
751 return iff->in(1);
752 }
753
enqueue_cfg_uses(Node * m,Unique_Node_List & wq)754 static void enqueue_cfg_uses(Node* m, Unique_Node_List& wq) {
755 for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax; i++) {
756 Node* u = m->fast_out(i);
757 if (u->is_CFG()) {
758 if (u->Opcode() == Op_NeverBranch) {
759 u = ((NeverBranchNode*)u)->proj_out(0);
760 enqueue_cfg_uses(u, wq);
761 } else {
762 wq.push(u);
763 }
764 }
765 }
766 }
767
768 // Try moving a store out of a loop, right before the loop
try_move_store_before_loop(Node * n,Node * n_ctrl)769 Node* PhaseIdealLoop::try_move_store_before_loop(Node* n, Node *n_ctrl) {
770 // Store has to be first in the loop body
771 IdealLoopTree *n_loop = get_loop(n_ctrl);
772 if (n->is_Store() && n_loop != _ltree_root &&
773 n_loop->is_loop() && n_loop->_head->is_Loop() &&
774 n->in(0) != NULL) {
775 Node* address = n->in(MemNode::Address);
776 Node* value = n->in(MemNode::ValueIn);
777 Node* mem = n->in(MemNode::Memory);
778 IdealLoopTree* address_loop = get_loop(get_ctrl(address));
779 IdealLoopTree* value_loop = get_loop(get_ctrl(value));
780
781 // - address and value must be loop invariant
782 // - memory must be a memory Phi for the loop
783 // - Store must be the only store on this memory slice in the
784 // loop: if there's another store following this one then value
785 // written at iteration i by the second store could be overwritten
786 // at iteration i+n by the first store: it's not safe to move the
787 // first store out of the loop
788 // - nothing must observe the memory Phi: it guarantees no read
789 // before the store, we are also guaranteed the store post
790 // dominates the loop head (ignoring a possible early
791 // exit). Otherwise there would be extra Phi involved between the
792 // loop's Phi and the store.
793 // - there must be no early exit from the loop before the Store
794 // (such an exit most of the time would be an extra use of the
795 // memory Phi but sometimes is a bottom memory Phi that takes the
796 // store as input).
797
798 if (!n_loop->is_member(address_loop) &&
799 !n_loop->is_member(value_loop) &&
800 mem->is_Phi() && mem->in(0) == n_loop->_head &&
801 mem->outcnt() == 1 &&
802 mem->in(LoopNode::LoopBackControl) == n) {
803
804 assert(n_loop->_tail != NULL, "need a tail");
805 assert(is_dominator(n_ctrl, n_loop->_tail), "store control must not be in a branch in the loop");
806
807 // Verify that there's no early exit of the loop before the store.
808 bool ctrl_ok = false;
809 {
810 // Follow control from loop head until n, we exit the loop or
811 // we reach the tail
812 ResourceMark rm;
813 Unique_Node_List wq;
814 wq.push(n_loop->_head);
815
816 for (uint next = 0; next < wq.size(); ++next) {
817 Node *m = wq.at(next);
818 if (m == n->in(0)) {
819 ctrl_ok = true;
820 continue;
821 }
822 assert(!has_ctrl(m), "should be CFG");
823 if (!n_loop->is_member(get_loop(m)) || m == n_loop->_tail) {
824 ctrl_ok = false;
825 break;
826 }
827 enqueue_cfg_uses(m, wq);
828 if (wq.size() > 10) {
829 ctrl_ok = false;
830 break;
831 }
832 }
833 }
834 if (ctrl_ok) {
835 // move the Store
836 _igvn.replace_input_of(mem, LoopNode::LoopBackControl, mem);
837 _igvn.replace_input_of(n, 0, n_loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl));
838 _igvn.replace_input_of(n, MemNode::Memory, mem->in(LoopNode::EntryControl));
839 // Disconnect the phi now. An empty phi can confuse other
840 // optimizations in this pass of loop opts.
841 _igvn.replace_node(mem, mem->in(LoopNode::EntryControl));
842 n_loop->_body.yank(mem);
843
844 set_ctrl_and_loop(n, n->in(0));
845
846 return n;
847 }
848 }
849 }
850 return NULL;
851 }
852
853 // Try moving a store out of a loop, right after the loop
try_move_store_after_loop(Node * n)854 void PhaseIdealLoop::try_move_store_after_loop(Node* n) {
855 if (n->is_Store() && n->in(0) != NULL) {
856 Node *n_ctrl = get_ctrl(n);
857 IdealLoopTree *n_loop = get_loop(n_ctrl);
858 // Store must be in a loop
859 if (n_loop != _ltree_root && !n_loop->_irreducible) {
860 Node* address = n->in(MemNode::Address);
861 Node* value = n->in(MemNode::ValueIn);
862 IdealLoopTree* address_loop = get_loop(get_ctrl(address));
863 // address must be loop invariant
864 if (!n_loop->is_member(address_loop)) {
865 // Store must be last on this memory slice in the loop and
866 // nothing in the loop must observe it
867 Node* phi = NULL;
868 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
869 Node* u = n->fast_out(i);
870 if (has_ctrl(u)) { // control use?
871 IdealLoopTree *u_loop = get_loop(get_ctrl(u));
872 if (!n_loop->is_member(u_loop)) {
873 continue;
874 }
875 if (u->is_Phi() && u->in(0) == n_loop->_head) {
876 assert(_igvn.type(u) == Type::MEMORY, "bad phi");
877 // multiple phis on the same slice are possible
878 if (phi != NULL) {
879 return;
880 }
881 phi = u;
882 continue;
883 }
884 }
885 return;
886 }
887 if (phi != NULL) {
888 // Nothing in the loop before the store (next iteration)
889 // must observe the stored value
890 bool mem_ok = true;
891 {
892 ResourceMark rm;
893 Unique_Node_List wq;
894 wq.push(phi);
895 for (uint next = 0; next < wq.size() && mem_ok; ++next) {
896 Node *m = wq.at(next);
897 for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax && mem_ok; i++) {
898 Node* u = m->fast_out(i);
899 if (u->is_Store() || u->is_Phi()) {
900 if (u != n) {
901 wq.push(u);
902 mem_ok = (wq.size() <= 10);
903 }
904 } else {
905 mem_ok = false;
906 break;
907 }
908 }
909 }
910 }
911 if (mem_ok) {
912 // Move the store out of the loop if the LCA of all
913 // users (except for the phi) is outside the loop.
914 Node* hook = new Node(1);
915 hook->init_req(0, n_ctrl); // Add an input to prevent hook from being dead
916 _igvn.rehash_node_delayed(phi);
917 int count = phi->replace_edge(n, hook);
918 assert(count > 0, "inconsistent phi");
919
920 // Compute latest point this store can go
921 Node* lca = get_late_ctrl(n, get_ctrl(n));
922 if (lca->is_OuterStripMinedLoop()) {
923 lca = lca->in(LoopNode::EntryControl);
924 }
925 if (n_loop->is_member(get_loop(lca))) {
926 // LCA is in the loop - bail out
927 _igvn.replace_node(hook, n);
928 return;
929 }
930 #ifdef ASSERT
931 if (n_loop->_head->is_Loop() && n_loop->_head->as_Loop()->is_strip_mined()) {
932 assert(n_loop->_head->Opcode() == Op_CountedLoop, "outer loop is a strip mined");
933 n_loop->_head->as_Loop()->verify_strip_mined(1);
934 Node* outer = n_loop->_head->as_CountedLoop()->outer_loop();
935 IdealLoopTree* outer_loop = get_loop(outer);
936 assert(n_loop->_parent == outer_loop, "broken loop tree");
937 assert(get_loop(lca) == outer_loop, "safepoint in outer loop consume all memory state");
938 }
939 #endif
940
941 // Move store out of the loop
942 _igvn.replace_node(hook, n->in(MemNode::Memory));
943 _igvn.replace_input_of(n, 0, lca);
944 set_ctrl_and_loop(n, lca);
945
946 // Disconnect the phi now. An empty phi can confuse other
947 // optimizations in this pass of loop opts..
948 if (phi->in(LoopNode::LoopBackControl) == phi) {
949 _igvn.replace_node(phi, phi->in(LoopNode::EntryControl));
950 n_loop->_body.yank(phi);
951 }
952 }
953 }
954 }
955 }
956 }
957 }
958
959 //------------------------------split_if_with_blocks_pre-----------------------
960 // Do the real work in a non-recursive function. Data nodes want to be
961 // cloned in the pre-order so they can feed each other nicely.
split_if_with_blocks_pre(Node * n)962 Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {
963 BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
964 Node* bs_res = bs->split_if_pre(this, n);
965 if (bs_res != NULL) {
966 return bs_res;
967 }
968 // Cloning these guys is unlikely to win
969 int n_op = n->Opcode();
970 if( n_op == Op_MergeMem ) return n;
971 if( n->is_Proj() ) return n;
972 // Do not clone-up CmpFXXX variations, as these are always
973 // followed by a CmpI
974 if( n->is_Cmp() ) return n;
975 // Attempt to use a conditional move instead of a phi/branch
976 if( ConditionalMoveLimit > 0 && n_op == Op_Region ) {
977 Node *cmov = conditional_move( n );
978 if( cmov ) return cmov;
979 }
980 if( n->is_CFG() || n->is_LoadStore() )
981 return n;
982 if( n_op == Op_Opaque1 || // Opaque nodes cannot be mod'd
983 n_op == Op_Opaque2 ) {
984 if( !C->major_progress() ) // If chance of no more loop opts...
985 _igvn._worklist.push(n); // maybe we'll remove them
986 return n;
987 }
988
989 if( n->is_Con() ) return n; // No cloning for Con nodes
990
991 Node *n_ctrl = get_ctrl(n);
992 if( !n_ctrl ) return n; // Dead node
993
994 Node* res = try_move_store_before_loop(n, n_ctrl);
995 if (res != NULL) {
996 return n;
997 }
998
999 // Attempt to remix address expressions for loop invariants
1000 Node *m = remix_address_expressions( n );
1001 if( m ) return m;
1002
1003 if (n_op == Op_AddI) {
1004 Node *nn = convert_add_to_muladd( n );
1005 if ( nn ) return nn;
1006 }
1007
1008 if (n->is_ConstraintCast()) {
1009 Node* dom_cast = n->as_ConstraintCast()->dominating_cast(&_igvn, this);
1010 // ConstraintCastNode::dominating_cast() uses node control input to determine domination.
1011 // Node control inputs don't necessarily agree with loop control info (due to
1012 // transformations happened in between), thus additional dominance check is needed
1013 // to keep loop info valid.
1014 if (dom_cast != NULL && is_dominator(get_ctrl(dom_cast), get_ctrl(n))) {
1015 _igvn.replace_node(n, dom_cast);
1016 return dom_cast;
1017 }
1018 }
1019
1020 // Determine if the Node has inputs from some local Phi.
1021 // Returns the block to clone thru.
1022 Node *n_blk = has_local_phi_input( n );
1023 if( !n_blk ) return n;
1024
1025 // Do not clone the trip counter through on a CountedLoop
1026 // (messes up the canonical shape).
1027 if( n_blk->is_CountedLoop() && n->Opcode() == Op_AddI ) return n;
1028
1029 // Check for having no control input; not pinned. Allow
1030 // dominating control.
1031 if (n->in(0)) {
1032 Node *dom = idom(n_blk);
1033 if (dom_lca(n->in(0), dom) != n->in(0)) {
1034 return n;
1035 }
1036 }
1037 // Policy: when is it profitable. You must get more wins than
1038 // policy before it is considered profitable. Policy is usually 0,
1039 // so 1 win is considered profitable. Big merges will require big
1040 // cloning, so get a larger policy.
1041 int policy = n_blk->req() >> 2;
1042
1043 // If the loop is a candidate for range check elimination,
1044 // delay splitting through it's phi until a later loop optimization
1045 if (n_blk->is_CountedLoop()) {
1046 IdealLoopTree *lp = get_loop(n_blk);
1047 if (lp && lp->_rce_candidate) {
1048 return n;
1049 }
1050 }
1051
1052 if (must_throttle_split_if()) return n;
1053
1054 // Split 'n' through the merge point if it is profitable
1055 Node *phi = split_thru_phi( n, n_blk, policy );
1056 if (!phi) return n;
1057
1058 // Found a Phi to split thru!
1059 // Replace 'n' with the new phi
1060 _igvn.replace_node( n, phi );
1061 // Moved a load around the loop, 'en-registering' something.
1062 if (n_blk->is_Loop() && n->is_Load() &&
1063 !phi->in(LoopNode::LoopBackControl)->is_Load())
1064 C->set_major_progress();
1065
1066 return phi;
1067 }
1068
merge_point_too_heavy(Compile * C,Node * region)1069 static bool merge_point_too_heavy(Compile* C, Node* region) {
1070 // Bail out if the region and its phis have too many users.
1071 int weight = 0;
1072 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
1073 weight += region->fast_out(i)->outcnt();
1074 }
1075 int nodes_left = C->max_node_limit() - C->live_nodes();
1076 if (weight * 8 > nodes_left) {
1077 if (PrintOpto) {
1078 tty->print_cr("*** Split-if bails out: %d nodes, region weight %d", C->unique(), weight);
1079 }
1080 return true;
1081 } else {
1082 return false;
1083 }
1084 }
1085
merge_point_safe(Node * region)1086 static bool merge_point_safe(Node* region) {
1087 // 4799512: Stop split_if_with_blocks from splitting a block with a ConvI2LNode
1088 // having a PhiNode input. This sidesteps the dangerous case where the split
1089 // ConvI2LNode may become TOP if the input Value() does not
1090 // overlap the ConvI2L range, leaving a node which may not dominate its
1091 // uses.
1092 // A better fix for this problem can be found in the BugTraq entry, but
1093 // expediency for Mantis demands this hack.
1094 #ifdef _LP64
1095 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
1096 Node* n = region->fast_out(i);
1097 if (n->is_Phi()) {
1098 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
1099 Node* m = n->fast_out(j);
1100 if (m->Opcode() == Op_ConvI2L)
1101 return false;
1102 if (m->is_CastII() && m->isa_CastII()->has_range_check()) {
1103 return false;
1104 }
1105 }
1106 }
1107 }
1108 #endif
1109 return true;
1110 }
1111
1112
1113 //------------------------------place_near_use---------------------------------
1114 // Place some computation next to use but not inside inner loops.
1115 // For inner loop uses move it to the preheader area.
place_near_use(Node * useblock) const1116 Node *PhaseIdealLoop::place_near_use(Node *useblock) const {
1117 IdealLoopTree *u_loop = get_loop( useblock );
1118 if (u_loop->_irreducible) {
1119 return useblock;
1120 }
1121 if (u_loop->_child) {
1122 if (useblock == u_loop->_head && u_loop->_head->is_OuterStripMinedLoop()) {
1123 return u_loop->_head->in(LoopNode::EntryControl);
1124 }
1125 return useblock;
1126 }
1127 return u_loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl);
1128 }
1129
1130
identical_backtoback_ifs(Node * n)1131 bool PhaseIdealLoop::identical_backtoback_ifs(Node *n) {
1132 if (!n->is_If() || n->is_CountedLoopEnd()) {
1133 return false;
1134 }
1135 if (!n->in(0)->is_Region()) {
1136 return false;
1137 }
1138 Node* region = n->in(0);
1139 Node* dom = idom(region);
1140 if (!dom->is_If() || dom->in(1) != n->in(1)) {
1141 return false;
1142 }
1143 IfNode* dom_if = dom->as_If();
1144 Node* proj_true = dom_if->proj_out(1);
1145 Node* proj_false = dom_if->proj_out(0);
1146
1147 for (uint i = 1; i < region->req(); i++) {
1148 if (is_dominator(proj_true, region->in(i))) {
1149 continue;
1150 }
1151 if (is_dominator(proj_false, region->in(i))) {
1152 continue;
1153 }
1154 return false;
1155 }
1156
1157 return true;
1158 }
1159
1160
can_split_if(Node * n_ctrl)1161 bool PhaseIdealLoop::can_split_if(Node* n_ctrl) {
1162 if (must_throttle_split_if()) {
1163 return false;
1164 }
1165
1166 // Do not do 'split-if' if irreducible loops are present.
1167 if (_has_irreducible_loops) {
1168 return false;
1169 }
1170
1171 if (merge_point_too_heavy(C, n_ctrl)) {
1172 return false;
1173 }
1174
1175 // Do not do 'split-if' if some paths are dead. First do dead code
1176 // elimination and then see if its still profitable.
1177 for (uint i = 1; i < n_ctrl->req(); i++) {
1178 if (n_ctrl->in(i) == C->top()) {
1179 return false;
1180 }
1181 }
1182
1183 // If trying to do a 'Split-If' at the loop head, it is only
1184 // profitable if the cmp folds up on BOTH paths. Otherwise we
1185 // risk peeling a loop forever.
1186
1187 // CNC - Disabled for now. Requires careful handling of loop
1188 // body selection for the cloned code. Also, make sure we check
1189 // for any input path not being in the same loop as n_ctrl. For
1190 // irreducible loops we cannot check for 'n_ctrl->is_Loop()'
1191 // because the alternative loop entry points won't be converted
1192 // into LoopNodes.
1193 IdealLoopTree *n_loop = get_loop(n_ctrl);
1194 for (uint j = 1; j < n_ctrl->req(); j++) {
1195 if (get_loop(n_ctrl->in(j)) != n_loop) {
1196 return false;
1197 }
1198 }
1199
1200 // Check for safety of the merge point.
1201 if (!merge_point_safe(n_ctrl)) {
1202 return false;
1203 }
1204
1205 return true;
1206 }
1207
1208 // Detect if the node is the inner strip-mined loop
1209 // Return: NULL if it's not the case, or the exit of outer strip-mined loop
is_inner_of_stripmined_loop(const Node * out)1210 static Node* is_inner_of_stripmined_loop(const Node* out) {
1211 Node* out_le = NULL;
1212
1213 if (out->is_CountedLoopEnd()) {
1214 const CountedLoopNode* loop = out->as_CountedLoopEnd()->loopnode();
1215
1216 if (loop != NULL && loop->is_strip_mined()) {
1217 out_le = loop->in(LoopNode::EntryControl)->as_OuterStripMinedLoop()->outer_loop_exit();
1218 }
1219 }
1220
1221 return out_le;
1222 }
1223
1224 //------------------------------split_if_with_blocks_post----------------------
1225 // Do the real work in a non-recursive function. CFG hackery wants to be
1226 // in the post-order, so it can dirty the I-DOM info and not use the dirtied
1227 // info.
split_if_with_blocks_post(Node * n)1228 void PhaseIdealLoop::split_if_with_blocks_post(Node *n) {
1229
1230 // Cloning Cmp through Phi's involves the split-if transform.
1231 // FastLock is not used by an If
1232 if (n->is_Cmp() && !n->is_FastLock()) {
1233 Node *n_ctrl = get_ctrl(n);
1234 // Determine if the Node has inputs from some local Phi.
1235 // Returns the block to clone thru.
1236 Node *n_blk = has_local_phi_input(n);
1237 if (n_blk != n_ctrl) {
1238 return;
1239 }
1240
1241 if (!can_split_if(n_ctrl)) {
1242 return;
1243 }
1244
1245 if (n->outcnt() != 1) {
1246 return; // Multiple bool's from 1 compare?
1247 }
1248 Node *bol = n->unique_out();
1249 assert(bol->is_Bool(), "expect a bool here");
1250 if (bol->outcnt() != 1) {
1251 return;// Multiple branches from 1 compare?
1252 }
1253 Node *iff = bol->unique_out();
1254
1255 // Check some safety conditions
1256 if (iff->is_If()) { // Classic split-if?
1257 if (iff->in(0) != n_ctrl) {
1258 return; // Compare must be in same blk as if
1259 }
1260 } else if (iff->is_CMove()) { // Trying to split-up a CMOVE
1261 // Can't split CMove with different control edge.
1262 if (iff->in(0) != NULL && iff->in(0) != n_ctrl ) {
1263 return;
1264 }
1265 if (get_ctrl(iff->in(2)) == n_ctrl ||
1266 get_ctrl(iff->in(3)) == n_ctrl) {
1267 return; // Inputs not yet split-up
1268 }
1269 if (get_loop(n_ctrl) != get_loop(get_ctrl(iff))) {
1270 return; // Loop-invar test gates loop-varying CMOVE
1271 }
1272 } else {
1273 return; // some other kind of node, such as an Allocate
1274 }
1275
1276 // When is split-if profitable? Every 'win' on means some control flow
1277 // goes dead, so it's almost always a win.
1278 int policy = 0;
1279 // Split compare 'n' through the merge point if it is profitable
1280 Node *phi = split_thru_phi( n, n_ctrl, policy);
1281 if (!phi) {
1282 return;
1283 }
1284
1285 // Found a Phi to split thru!
1286 // Replace 'n' with the new phi
1287 _igvn.replace_node(n, phi);
1288
1289 // Now split the bool up thru the phi
1290 Node *bolphi = split_thru_phi(bol, n_ctrl, -1);
1291 guarantee(bolphi != NULL, "null boolean phi node");
1292
1293 _igvn.replace_node(bol, bolphi);
1294 assert(iff->in(1) == bolphi, "");
1295
1296 if (bolphi->Value(&_igvn)->singleton()) {
1297 return;
1298 }
1299
1300 // Conditional-move? Must split up now
1301 if (!iff->is_If()) {
1302 Node *cmovphi = split_thru_phi(iff, n_ctrl, -1);
1303 _igvn.replace_node(iff, cmovphi);
1304 return;
1305 }
1306
1307 // Now split the IF
1308 do_split_if(iff);
1309 return;
1310 }
1311
1312 // Two identical ifs back to back can be merged
1313 if (identical_backtoback_ifs(n) && can_split_if(n->in(0))) {
1314 Node *n_ctrl = n->in(0);
1315 PhiNode* bolphi = PhiNode::make_blank(n_ctrl, n->in(1));
1316 IfNode* dom_if = idom(n_ctrl)->as_If();
1317 Node* proj_true = dom_if->proj_out(1);
1318 Node* proj_false = dom_if->proj_out(0);
1319 Node* con_true = _igvn.makecon(TypeInt::ONE);
1320 Node* con_false = _igvn.makecon(TypeInt::ZERO);
1321
1322 for (uint i = 1; i < n_ctrl->req(); i++) {
1323 if (is_dominator(proj_true, n_ctrl->in(i))) {
1324 bolphi->init_req(i, con_true);
1325 } else {
1326 assert(is_dominator(proj_false, n_ctrl->in(i)), "bad if");
1327 bolphi->init_req(i, con_false);
1328 }
1329 }
1330 register_new_node(bolphi, n_ctrl);
1331 _igvn.replace_input_of(n, 1, bolphi);
1332
1333 // Now split the IF
1334 do_split_if(n);
1335 return;
1336 }
1337
1338 // Check for an IF ready to split; one that has its
1339 // condition codes input coming from a Phi at the block start.
1340 int n_op = n->Opcode();
1341
1342 // Check for an IF being dominated by another IF same test
1343 if (n_op == Op_If ||
1344 n_op == Op_RangeCheck) {
1345 Node *bol = n->in(1);
1346 uint max = bol->outcnt();
1347 // Check for same test used more than once?
1348 if (max > 1 && bol->is_Bool()) {
1349 // Search up IDOMs to see if this IF is dominated.
1350 Node *cutoff = get_ctrl(bol);
1351
1352 // Now search up IDOMs till cutoff, looking for a dominating test
1353 Node *prevdom = n;
1354 Node *dom = idom(prevdom);
1355 while (dom != cutoff) {
1356 if (dom->req() > 1 && dom->in(1) == bol && prevdom->in(0) == dom) {
1357 // It's invalid to move control dependent data nodes in the inner
1358 // strip-mined loop, because:
1359 // 1) break validation of LoopNode::verify_strip_mined()
1360 // 2) move code with side-effect in strip-mined loop
1361 // Move to the exit of outer strip-mined loop in that case.
1362 Node* out_le = is_inner_of_stripmined_loop(dom);
1363 if (out_le != NULL) {
1364 prevdom = out_le;
1365 }
1366 // Replace the dominated test with an obvious true or false.
1367 // Place it on the IGVN worklist for later cleanup.
1368 C->set_major_progress();
1369 dominated_by(prevdom, n, false, true);
1370 #ifndef PRODUCT
1371 if( VerifyLoopOptimizations ) verify();
1372 #endif
1373 return;
1374 }
1375 prevdom = dom;
1376 dom = idom(prevdom);
1377 }
1378 }
1379 }
1380
1381 // See if a shared loop-varying computation has no loop-varying uses.
1382 // Happens if something is only used for JVM state in uncommon trap exits,
1383 // like various versions of induction variable+offset. Clone the
1384 // computation per usage to allow it to sink out of the loop.
1385 if (has_ctrl(n) && !n->in(0)) {// n not dead and has no control edge (can float about)
1386 Node *n_ctrl = get_ctrl(n);
1387 IdealLoopTree *n_loop = get_loop(n_ctrl);
1388 if( n_loop != _ltree_root ) {
1389 DUIterator_Fast imax, i = n->fast_outs(imax);
1390 for (; i < imax; i++) {
1391 Node* u = n->fast_out(i);
1392 if( !has_ctrl(u) ) break; // Found control user
1393 IdealLoopTree *u_loop = get_loop(get_ctrl(u));
1394 if( u_loop == n_loop ) break; // Found loop-varying use
1395 if( n_loop->is_member( u_loop ) ) break; // Found use in inner loop
1396 if( u->Opcode() == Op_Opaque1 ) break; // Found loop limit, bugfix for 4677003
1397 }
1398 bool did_break = (i < imax); // Did we break out of the previous loop?
1399 if (!did_break && n->outcnt() > 1) { // All uses in outer loops!
1400 Node *late_load_ctrl = NULL;
1401 if (n->is_Load()) {
1402 // If n is a load, get and save the result from get_late_ctrl(),
1403 // to be later used in calculating the control for n's clones.
1404 clear_dom_lca_tags();
1405 late_load_ctrl = get_late_ctrl(n, n_ctrl);
1406 }
1407 // If n is a load, and the late control is the same as the current
1408 // control, then the cloning of n is a pointless exercise, because
1409 // GVN will ensure that we end up where we started.
1410 if (!n->is_Load() || late_load_ctrl != n_ctrl) {
1411 BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
1412 for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; ) {
1413 Node *u = n->last_out(j); // Clone private computation per use
1414 _igvn.rehash_node_delayed(u);
1415 Node *x = n->clone(); // Clone computation
1416 Node *x_ctrl = NULL;
1417 if( u->is_Phi() ) {
1418 // Replace all uses of normal nodes. Replace Phi uses
1419 // individually, so the separate Nodes can sink down
1420 // different paths.
1421 uint k = 1;
1422 while( u->in(k) != n ) k++;
1423 u->set_req( k, x );
1424 // x goes next to Phi input path
1425 x_ctrl = u->in(0)->in(k);
1426 --j;
1427 } else { // Normal use
1428 // Replace all uses
1429 for( uint k = 0; k < u->req(); k++ ) {
1430 if( u->in(k) == n ) {
1431 u->set_req( k, x );
1432 --j;
1433 }
1434 }
1435 x_ctrl = get_ctrl(u);
1436 }
1437
1438 // Find control for 'x' next to use but not inside inner loops.
1439 // For inner loop uses get the preheader area.
1440 x_ctrl = place_near_use(x_ctrl);
1441
1442 if (bs->sink_node(this, n, x, x_ctrl, n_ctrl)) {
1443 continue;
1444 }
1445
1446 if (n->is_Load()) {
1447 // For loads, add a control edge to a CFG node outside of the loop
1448 // to force them to not combine and return back inside the loop
1449 // during GVN optimization (4641526).
1450 //
1451 // Because we are setting the actual control input, factor in
1452 // the result from get_late_ctrl() so we respect any
1453 // anti-dependences. (6233005).
1454 x_ctrl = dom_lca(late_load_ctrl, x_ctrl);
1455
1456 // Don't allow the control input to be a CFG splitting node.
1457 // Such nodes should only have ProjNodes as outs, e.g. IfNode
1458 // should only have IfTrueNode and IfFalseNode (4985384).
1459 x_ctrl = find_non_split_ctrl(x_ctrl);
1460
1461 IdealLoopTree* x_loop = get_loop(x_ctrl);
1462 Node* x_head = x_loop->_head;
1463 if (x_head->is_Loop() && (x_head->is_OuterStripMinedLoop() || x_head->as_Loop()->is_strip_mined()) && is_dominator(n_ctrl, x_head)) {
1464 // Anti dependence analysis is sometimes too
1465 // conservative: a store in the outer strip mined loop
1466 // can prevent a load from floating out of the outer
1467 // strip mined loop but the load may not be referenced
1468 // from the safepoint: loop strip mining verification
1469 // code reports a problem in that case. Make sure the
1470 // load is not moved in the outer strip mined loop in
1471 // that case.
1472 x_ctrl = x_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl);
1473 }
1474 assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone");
1475
1476 x->set_req(0, x_ctrl);
1477 }
1478 register_new_node(x, x_ctrl);
1479
1480 // Some institutional knowledge is needed here: 'x' is
1481 // yanked because if the optimizer runs GVN on it all the
1482 // cloned x's will common up and undo this optimization and
1483 // be forced back in the loop. This is annoying because it
1484 // makes +VerifyOpto report false-positives on progress. I
1485 // tried setting control edges on the x's to force them to
1486 // not combine, but the matching gets worried when it tries
1487 // to fold a StoreP and an AddP together (as part of an
1488 // address expression) and the AddP and StoreP have
1489 // different controls.
1490 if (!x->is_Load() && !x->is_DecodeNarrowPtr()) _igvn._worklist.yank(x);
1491 }
1492 _igvn.remove_dead_node(n);
1493 }
1494 }
1495 }
1496 }
1497
1498 try_move_store_after_loop(n);
1499
1500 // Check for Opaque2's who's loop has disappeared - who's input is in the
1501 // same loop nest as their output. Remove 'em, they are no longer useful.
1502 if( n_op == Op_Opaque2 &&
1503 n->in(1) != NULL &&
1504 get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) {
1505 _igvn.replace_node( n, n->in(1) );
1506 }
1507 }
1508
1509 //------------------------------split_if_with_blocks---------------------------
1510 // Check for aggressive application of 'split-if' optimization,
1511 // using basic block level info.
split_if_with_blocks(VectorSet & visited,Node_Stack & nstack)1512 void PhaseIdealLoop::split_if_with_blocks(VectorSet &visited, Node_Stack &nstack) {
1513 Node* root = C->root();
1514 visited.set(root->_idx); // first, mark root as visited
1515 // Do pre-visit work for root
1516 Node* n = split_if_with_blocks_pre(root);
1517 uint cnt = n->outcnt();
1518 uint i = 0;
1519
1520 while (true) {
1521 // Visit all children
1522 if (i < cnt) {
1523 Node* use = n->raw_out(i);
1524 ++i;
1525 if (use->outcnt() != 0 && !visited.test_set(use->_idx)) {
1526 // Now do pre-visit work for this use
1527 use = split_if_with_blocks_pre(use);
1528 nstack.push(n, i); // Save parent and next use's index.
1529 n = use; // Process all children of current use.
1530 cnt = use->outcnt();
1531 i = 0;
1532 }
1533 }
1534 else {
1535 // All of n's children have been processed, complete post-processing.
1536 if (cnt != 0 && !n->is_Con()) {
1537 assert(has_node(n), "no dead nodes");
1538 split_if_with_blocks_post(n);
1539 }
1540 if (must_throttle_split_if()) {
1541 nstack.clear();
1542 }
1543 if (nstack.is_empty()) {
1544 // Finished all nodes on stack.
1545 break;
1546 }
1547 // Get saved parent node and next use's index. Visit the rest of uses.
1548 n = nstack.node();
1549 cnt = n->outcnt();
1550 i = nstack.index();
1551 nstack.pop();
1552 }
1553 }
1554 }
1555
1556
1557 //=============================================================================
1558 //
1559 // C L O N E A L O O P B O D Y
1560 //
1561
1562 //------------------------------clone_iff--------------------------------------
1563 // Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
1564 // "Nearly" because all Nodes have been cloned from the original in the loop,
1565 // but the fall-in edges to the Cmp are different. Clone bool/Cmp pairs
1566 // through the Phi recursively, and return a Bool.
clone_iff(PhiNode * phi,IdealLoopTree * loop)1567 Node* PhaseIdealLoop::clone_iff(PhiNode *phi, IdealLoopTree *loop) {
1568
1569 // Convert this Phi into a Phi merging Bools
1570 uint i;
1571 for (i = 1; i < phi->req(); i++) {
1572 Node *b = phi->in(i);
1573 if (b->is_Phi()) {
1574 _igvn.replace_input_of(phi, i, clone_iff(b->as_Phi(), loop));
1575 } else {
1576 assert(b->is_Bool() || b->Opcode() == Op_Opaque4, "");
1577 }
1578 }
1579
1580 Node* n = phi->in(1);
1581 Node* sample_opaque = NULL;
1582 Node *sample_bool = NULL;
1583 if (n->Opcode() == Op_Opaque4) {
1584 sample_opaque = n;
1585 sample_bool = n->in(1);
1586 assert(sample_bool->is_Bool(), "wrong type");
1587 } else {
1588 sample_bool = n;
1589 }
1590 Node *sample_cmp = sample_bool->in(1);
1591
1592 // Make Phis to merge the Cmp's inputs.
1593 PhiNode *phi1 = new PhiNode(phi->in(0), Type::TOP);
1594 PhiNode *phi2 = new PhiNode(phi->in(0), Type::TOP);
1595 for (i = 1; i < phi->req(); i++) {
1596 Node *n1 = sample_opaque == NULL ? phi->in(i)->in(1)->in(1) : phi->in(i)->in(1)->in(1)->in(1);
1597 Node *n2 = sample_opaque == NULL ? phi->in(i)->in(1)->in(2) : phi->in(i)->in(1)->in(1)->in(2);
1598 phi1->set_req(i, n1);
1599 phi2->set_req(i, n2);
1600 phi1->set_type(phi1->type()->meet_speculative(n1->bottom_type()));
1601 phi2->set_type(phi2->type()->meet_speculative(n2->bottom_type()));
1602 }
1603 // See if these Phis have been made before.
1604 // Register with optimizer
1605 Node *hit1 = _igvn.hash_find_insert(phi1);
1606 if (hit1) { // Hit, toss just made Phi
1607 _igvn.remove_dead_node(phi1); // Remove new phi
1608 assert(hit1->is_Phi(), "" );
1609 phi1 = (PhiNode*)hit1; // Use existing phi
1610 } else { // Miss
1611 _igvn.register_new_node_with_optimizer(phi1);
1612 }
1613 Node *hit2 = _igvn.hash_find_insert(phi2);
1614 if (hit2) { // Hit, toss just made Phi
1615 _igvn.remove_dead_node(phi2); // Remove new phi
1616 assert(hit2->is_Phi(), "" );
1617 phi2 = (PhiNode*)hit2; // Use existing phi
1618 } else { // Miss
1619 _igvn.register_new_node_with_optimizer(phi2);
1620 }
1621 // Register Phis with loop/block info
1622 set_ctrl(phi1, phi->in(0));
1623 set_ctrl(phi2, phi->in(0));
1624 // Make a new Cmp
1625 Node *cmp = sample_cmp->clone();
1626 cmp->set_req(1, phi1);
1627 cmp->set_req(2, phi2);
1628 _igvn.register_new_node_with_optimizer(cmp);
1629 set_ctrl(cmp, phi->in(0));
1630
1631 // Make a new Bool
1632 Node *b = sample_bool->clone();
1633 b->set_req(1,cmp);
1634 _igvn.register_new_node_with_optimizer(b);
1635 set_ctrl(b, phi->in(0));
1636
1637 if (sample_opaque != NULL) {
1638 Node* opaque = sample_opaque->clone();
1639 opaque->set_req(1, b);
1640 _igvn.register_new_node_with_optimizer(opaque);
1641 set_ctrl(opaque, phi->in(0));
1642 return opaque;
1643 }
1644
1645 assert(b->is_Bool(), "");
1646 return b;
1647 }
1648
1649 //------------------------------clone_bool-------------------------------------
1650 // Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
1651 // "Nearly" because all Nodes have been cloned from the original in the loop,
1652 // but the fall-in edges to the Cmp are different. Clone bool/Cmp pairs
1653 // through the Phi recursively, and return a Bool.
clone_bool(PhiNode * phi,IdealLoopTree * loop)1654 CmpNode *PhaseIdealLoop::clone_bool( PhiNode *phi, IdealLoopTree *loop ) {
1655 uint i;
1656 // Convert this Phi into a Phi merging Bools
1657 for( i = 1; i < phi->req(); i++ ) {
1658 Node *b = phi->in(i);
1659 if( b->is_Phi() ) {
1660 _igvn.replace_input_of(phi, i, clone_bool( b->as_Phi(), loop ));
1661 } else {
1662 assert( b->is_Cmp() || b->is_top(), "inputs are all Cmp or TOP" );
1663 }
1664 }
1665
1666 Node *sample_cmp = phi->in(1);
1667
1668 // Make Phis to merge the Cmp's inputs.
1669 PhiNode *phi1 = new PhiNode( phi->in(0), Type::TOP );
1670 PhiNode *phi2 = new PhiNode( phi->in(0), Type::TOP );
1671 for( uint j = 1; j < phi->req(); j++ ) {
1672 Node *cmp_top = phi->in(j); // Inputs are all Cmp or TOP
1673 Node *n1, *n2;
1674 if( cmp_top->is_Cmp() ) {
1675 n1 = cmp_top->in(1);
1676 n2 = cmp_top->in(2);
1677 } else {
1678 n1 = n2 = cmp_top;
1679 }
1680 phi1->set_req( j, n1 );
1681 phi2->set_req( j, n2 );
1682 phi1->set_type(phi1->type()->meet_speculative(n1->bottom_type()));
1683 phi2->set_type(phi2->type()->meet_speculative(n2->bottom_type()));
1684 }
1685
1686 // See if these Phis have been made before.
1687 // Register with optimizer
1688 Node *hit1 = _igvn.hash_find_insert(phi1);
1689 if( hit1 ) { // Hit, toss just made Phi
1690 _igvn.remove_dead_node(phi1); // Remove new phi
1691 assert( hit1->is_Phi(), "" );
1692 phi1 = (PhiNode*)hit1; // Use existing phi
1693 } else { // Miss
1694 _igvn.register_new_node_with_optimizer(phi1);
1695 }
1696 Node *hit2 = _igvn.hash_find_insert(phi2);
1697 if( hit2 ) { // Hit, toss just made Phi
1698 _igvn.remove_dead_node(phi2); // Remove new phi
1699 assert( hit2->is_Phi(), "" );
1700 phi2 = (PhiNode*)hit2; // Use existing phi
1701 } else { // Miss
1702 _igvn.register_new_node_with_optimizer(phi2);
1703 }
1704 // Register Phis with loop/block info
1705 set_ctrl(phi1, phi->in(0));
1706 set_ctrl(phi2, phi->in(0));
1707 // Make a new Cmp
1708 Node *cmp = sample_cmp->clone();
1709 cmp->set_req( 1, phi1 );
1710 cmp->set_req( 2, phi2 );
1711 _igvn.register_new_node_with_optimizer(cmp);
1712 set_ctrl(cmp, phi->in(0));
1713
1714 assert( cmp->is_Cmp(), "" );
1715 return (CmpNode*)cmp;
1716 }
1717
1718 //------------------------------sink_use---------------------------------------
1719 // If 'use' was in the loop-exit block, it now needs to be sunk
1720 // below the post-loop merge point.
sink_use(Node * use,Node * post_loop)1721 void PhaseIdealLoop::sink_use( Node *use, Node *post_loop ) {
1722 if (!use->is_CFG() && get_ctrl(use) == post_loop->in(2)) {
1723 set_ctrl(use, post_loop);
1724 for (DUIterator j = use->outs(); use->has_out(j); j++)
1725 sink_use(use->out(j), post_loop);
1726 }
1727 }
1728
clone_loop_handle_data_uses(Node * old,Node_List & old_new,IdealLoopTree * loop,IdealLoopTree * outer_loop,Node_List * & split_if_set,Node_List * & split_bool_set,Node_List * & split_cex_set,Node_List & worklist,uint new_counter,CloneLoopMode mode)1729 void PhaseIdealLoop::clone_loop_handle_data_uses(Node* old, Node_List &old_new,
1730 IdealLoopTree* loop, IdealLoopTree* outer_loop,
1731 Node_List*& split_if_set, Node_List*& split_bool_set,
1732 Node_List*& split_cex_set, Node_List& worklist,
1733 uint new_counter, CloneLoopMode mode) {
1734 Node* nnn = old_new[old->_idx];
1735 // Copy uses to a worklist, so I can munge the def-use info
1736 // with impunity.
1737 for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
1738 worklist.push(old->fast_out(j));
1739
1740 while( worklist.size() ) {
1741 Node *use = worklist.pop();
1742 if (!has_node(use)) continue; // Ignore dead nodes
1743 if (use->in(0) == C->top()) continue;
1744 IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
1745 // Check for data-use outside of loop - at least one of OLD or USE
1746 // must not be a CFG node.
1747 #ifdef ASSERT
1748 if (loop->_head->as_Loop()->is_strip_mined() && outer_loop->is_member(use_loop) && !loop->is_member(use_loop) && old_new[use->_idx] == NULL) {
1749 Node* sfpt = loop->_head->as_CountedLoop()->outer_safepoint();
1750 assert(mode != IgnoreStripMined, "incorrect cloning mode");
1751 assert((mode == ControlAroundStripMined && use == sfpt) || !use->is_reachable_from_root(), "missed a node");
1752 }
1753 #endif
1754 if (!loop->is_member(use_loop) && !outer_loop->is_member(use_loop) && (!old->is_CFG() || !use->is_CFG())) {
1755
1756 // If the Data use is an IF, that means we have an IF outside of the
1757 // loop that is switching on a condition that is set inside of the
1758 // loop. Happens if people set a loop-exit flag; then test the flag
1759 // in the loop to break the loop, then test is again outside of the
1760 // loop to determine which way the loop exited.
1761 // Loop predicate If node connects to Bool node through Opaque1 node.
1762 if (use->is_If() || use->is_CMove() || C->is_predicate_opaq(use) || use->Opcode() == Op_Opaque4) {
1763 // Since this code is highly unlikely, we lazily build the worklist
1764 // of such Nodes to go split.
1765 if (!split_if_set) {
1766 ResourceArea *area = Thread::current()->resource_area();
1767 split_if_set = new Node_List(area);
1768 }
1769 split_if_set->push(use);
1770 }
1771 if (use->is_Bool()) {
1772 if (!split_bool_set) {
1773 ResourceArea *area = Thread::current()->resource_area();
1774 split_bool_set = new Node_List(area);
1775 }
1776 split_bool_set->push(use);
1777 }
1778 if (use->Opcode() == Op_CreateEx) {
1779 if (!split_cex_set) {
1780 ResourceArea *area = Thread::current()->resource_area();
1781 split_cex_set = new Node_List(area);
1782 }
1783 split_cex_set->push(use);
1784 }
1785
1786
1787 // Get "block" use is in
1788 uint idx = 0;
1789 while( use->in(idx) != old ) idx++;
1790 Node *prev = use->is_CFG() ? use : get_ctrl(use);
1791 assert(!loop->is_member(get_loop(prev)) && !outer_loop->is_member(get_loop(prev)), "" );
1792 Node *cfg = prev->_idx >= new_counter
1793 ? prev->in(2)
1794 : idom(prev);
1795 if( use->is_Phi() ) // Phi use is in prior block
1796 cfg = prev->in(idx); // NOT in block of Phi itself
1797 if (cfg->is_top()) { // Use is dead?
1798 _igvn.replace_input_of(use, idx, C->top());
1799 continue;
1800 }
1801
1802 // If use is referenced through control edge... (idx == 0)
1803 if (mode == IgnoreStripMined && idx == 0) {
1804 LoopNode *head = loop->_head->as_Loop();
1805 if (head->is_strip_mined() && is_dominator(head->outer_loop_exit(), prev)) {
1806 // That node is outside the inner loop, leave it outside the
1807 // outer loop as well to not confuse verification code.
1808 assert(!loop->_parent->is_member(use_loop), "should be out of the outer loop");
1809 _igvn.replace_input_of(use, 0, head->outer_loop_exit());
1810 continue;
1811 }
1812 }
1813
1814 while(!outer_loop->is_member(get_loop(cfg))) {
1815 prev = cfg;
1816 cfg = cfg->_idx >= new_counter ? cfg->in(2) : idom(cfg);
1817 }
1818 // If the use occurs after merging several exits from the loop, then
1819 // old value must have dominated all those exits. Since the same old
1820 // value was used on all those exits we did not need a Phi at this
1821 // merge point. NOW we do need a Phi here. Each loop exit value
1822 // is now merged with the peeled body exit; each exit gets its own
1823 // private Phi and those Phis need to be merged here.
1824 Node *phi;
1825 if( prev->is_Region() ) {
1826 if( idx == 0 ) { // Updating control edge?
1827 phi = prev; // Just use existing control
1828 } else { // Else need a new Phi
1829 phi = PhiNode::make( prev, old );
1830 // Now recursively fix up the new uses of old!
1831 for( uint i = 1; i < prev->req(); i++ ) {
1832 worklist.push(phi); // Onto worklist once for each 'old' input
1833 }
1834 }
1835 } else {
1836 // Get new RegionNode merging old and new loop exits
1837 prev = old_new[prev->_idx];
1838 assert( prev, "just made this in step 7" );
1839 if( idx == 0) { // Updating control edge?
1840 phi = prev; // Just use existing control
1841 } else { // Else need a new Phi
1842 // Make a new Phi merging data values properly
1843 phi = PhiNode::make( prev, old );
1844 phi->set_req( 1, nnn );
1845 }
1846 }
1847 // If inserting a new Phi, check for prior hits
1848 if( idx != 0 ) {
1849 Node *hit = _igvn.hash_find_insert(phi);
1850 if( hit == NULL ) {
1851 _igvn.register_new_node_with_optimizer(phi); // Register new phi
1852 } else { // or
1853 // Remove the new phi from the graph and use the hit
1854 _igvn.remove_dead_node(phi);
1855 phi = hit; // Use existing phi
1856 }
1857 set_ctrl(phi, prev);
1858 }
1859 // Make 'use' use the Phi instead of the old loop body exit value
1860 _igvn.replace_input_of(use, idx, phi);
1861 if( use->_idx >= new_counter ) { // If updating new phis
1862 // Not needed for correctness, but prevents a weak assert
1863 // in AddPNode from tripping (when we end up with different
1864 // base & derived Phis that will become the same after
1865 // IGVN does CSE).
1866 Node *hit = _igvn.hash_find_insert(use);
1867 if( hit ) // Go ahead and re-hash for hits.
1868 _igvn.replace_node( use, hit );
1869 }
1870
1871 // If 'use' was in the loop-exit block, it now needs to be sunk
1872 // below the post-loop merge point.
1873 sink_use( use, prev );
1874 }
1875 }
1876 }
1877
clone_outer_loop_helper(Node * n,const IdealLoopTree * loop,const IdealLoopTree * outer_loop,const Node_List & old_new,Unique_Node_List & wq,PhaseIdealLoop * phase,bool check_old_new)1878 static void clone_outer_loop_helper(Node* n, const IdealLoopTree *loop, const IdealLoopTree* outer_loop,
1879 const Node_List &old_new, Unique_Node_List& wq, PhaseIdealLoop* phase,
1880 bool check_old_new) {
1881 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
1882 Node* u = n->fast_out(j);
1883 assert(check_old_new || old_new[u->_idx] == NULL, "shouldn't have been cloned");
1884 if (!u->is_CFG() && (!check_old_new || old_new[u->_idx] == NULL)) {
1885 Node* c = phase->get_ctrl(u);
1886 IdealLoopTree* u_loop = phase->get_loop(c);
1887 assert(!loop->is_member(u_loop), "can be in outer loop or out of both loops only");
1888 if (outer_loop->is_member(u_loop)) {
1889 wq.push(u);
1890 }
1891 }
1892 }
1893 }
1894
clone_outer_loop(LoopNode * head,CloneLoopMode mode,IdealLoopTree * loop,IdealLoopTree * outer_loop,int dd,Node_List & old_new,Node_List & extra_data_nodes)1895 void PhaseIdealLoop::clone_outer_loop(LoopNode* head, CloneLoopMode mode, IdealLoopTree *loop,
1896 IdealLoopTree* outer_loop, int dd, Node_List &old_new,
1897 Node_List& extra_data_nodes) {
1898 if (head->is_strip_mined() && mode != IgnoreStripMined) {
1899 CountedLoopNode* cl = head->as_CountedLoop();
1900 Node* l = cl->outer_loop();
1901 Node* tail = cl->outer_loop_tail();
1902 IfNode* le = cl->outer_loop_end();
1903 Node* sfpt = cl->outer_safepoint();
1904 CountedLoopEndNode* cle = cl->loopexit();
1905 CountedLoopNode* new_cl = old_new[cl->_idx]->as_CountedLoop();
1906 CountedLoopEndNode* new_cle = new_cl->as_CountedLoop()->loopexit_or_null();
1907 Node* cle_out = cle->proj_out(false);
1908
1909 Node* new_sfpt = NULL;
1910 Node* new_cle_out = cle_out->clone();
1911 old_new.map(cle_out->_idx, new_cle_out);
1912 if (mode == CloneIncludesStripMined) {
1913 // clone outer loop body
1914 Node* new_l = l->clone();
1915 Node* new_tail = tail->clone();
1916 IfNode* new_le = le->clone()->as_If();
1917 new_sfpt = sfpt->clone();
1918
1919 set_loop(new_l, outer_loop->_parent);
1920 set_idom(new_l, new_l->in(LoopNode::EntryControl), dd);
1921 set_loop(new_cle_out, outer_loop->_parent);
1922 set_idom(new_cle_out, new_cle, dd);
1923 set_loop(new_sfpt, outer_loop->_parent);
1924 set_idom(new_sfpt, new_cle_out, dd);
1925 set_loop(new_le, outer_loop->_parent);
1926 set_idom(new_le, new_sfpt, dd);
1927 set_loop(new_tail, outer_loop->_parent);
1928 set_idom(new_tail, new_le, dd);
1929 set_idom(new_cl, new_l, dd);
1930
1931 old_new.map(l->_idx, new_l);
1932 old_new.map(tail->_idx, new_tail);
1933 old_new.map(le->_idx, new_le);
1934 old_new.map(sfpt->_idx, new_sfpt);
1935
1936 new_l->set_req(LoopNode::LoopBackControl, new_tail);
1937 new_l->set_req(0, new_l);
1938 new_tail->set_req(0, new_le);
1939 new_le->set_req(0, new_sfpt);
1940 new_sfpt->set_req(0, new_cle_out);
1941 new_cle_out->set_req(0, new_cle);
1942 new_cl->set_req(LoopNode::EntryControl, new_l);
1943
1944 _igvn.register_new_node_with_optimizer(new_l);
1945 _igvn.register_new_node_with_optimizer(new_tail);
1946 _igvn.register_new_node_with_optimizer(new_le);
1947 } else {
1948 Node *newhead = old_new[loop->_head->_idx];
1949 newhead->as_Loop()->clear_strip_mined();
1950 _igvn.replace_input_of(newhead, LoopNode::EntryControl, newhead->in(LoopNode::EntryControl)->in(LoopNode::EntryControl));
1951 set_idom(newhead, newhead->in(LoopNode::EntryControl), dd);
1952 }
1953 // Look at data node that were assigned a control in the outer
1954 // loop: they are kept in the outer loop by the safepoint so start
1955 // from the safepoint node's inputs.
1956 IdealLoopTree* outer_loop = get_loop(l);
1957 Node_Stack stack(2);
1958 stack.push(sfpt, 1);
1959 uint new_counter = C->unique();
1960 while (stack.size() > 0) {
1961 Node* n = stack.node();
1962 uint i = stack.index();
1963 while (i < n->req() &&
1964 (n->in(i) == NULL ||
1965 !has_ctrl(n->in(i)) ||
1966 get_loop(get_ctrl(n->in(i))) != outer_loop ||
1967 (old_new[n->in(i)->_idx] != NULL && old_new[n->in(i)->_idx]->_idx >= new_counter))) {
1968 i++;
1969 }
1970 if (i < n->req()) {
1971 stack.set_index(i+1);
1972 stack.push(n->in(i), 0);
1973 } else {
1974 assert(old_new[n->_idx] == NULL || n == sfpt || old_new[n->_idx]->_idx < new_counter, "no clone yet");
1975 Node* m = n == sfpt ? new_sfpt : n->clone();
1976 if (m != NULL) {
1977 for (uint i = 0; i < n->req(); i++) {
1978 if (m->in(i) != NULL && old_new[m->in(i)->_idx] != NULL) {
1979 m->set_req(i, old_new[m->in(i)->_idx]);
1980 }
1981 }
1982 } else {
1983 assert(n == sfpt && mode != CloneIncludesStripMined, "where's the safepoint clone?");
1984 }
1985 if (n != sfpt) {
1986 extra_data_nodes.push(n);
1987 _igvn.register_new_node_with_optimizer(m);
1988 assert(get_ctrl(n) == cle_out, "what other control?");
1989 set_ctrl(m, new_cle_out);
1990 old_new.map(n->_idx, m);
1991 }
1992 stack.pop();
1993 }
1994 }
1995 if (mode == CloneIncludesStripMined) {
1996 _igvn.register_new_node_with_optimizer(new_sfpt);
1997 _igvn.register_new_node_with_optimizer(new_cle_out);
1998 }
1999 // Some other transformation may have pessimistically assign some
2000 // data nodes to the outer loop. Set their control so they are out
2001 // of the outer loop.
2002 ResourceMark rm;
2003 Unique_Node_List wq;
2004 for (uint i = 0; i < extra_data_nodes.size(); i++) {
2005 Node* old = extra_data_nodes.at(i);
2006 clone_outer_loop_helper(old, loop, outer_loop, old_new, wq, this, true);
2007 }
2008 Node* new_ctrl = cl->outer_loop_exit();
2009 assert(get_loop(new_ctrl) != outer_loop, "must be out of the loop nest");
2010 for (uint i = 0; i < wq.size(); i++) {
2011 Node* n = wq.at(i);
2012 set_ctrl(n, new_ctrl);
2013 clone_outer_loop_helper(n, loop, outer_loop, old_new, wq, this, false);
2014 }
2015 } else {
2016 Node *newhead = old_new[loop->_head->_idx];
2017 set_idom(newhead, newhead->in(LoopNode::EntryControl), dd);
2018 }
2019 }
2020
2021 //------------------------------clone_loop-------------------------------------
2022 //
2023 // C L O N E A L O O P B O D Y
2024 //
2025 // This is the basic building block of the loop optimizations. It clones an
2026 // entire loop body. It makes an old_new loop body mapping; with this mapping
2027 // you can find the new-loop equivalent to an old-loop node. All new-loop
2028 // nodes are exactly equal to their old-loop counterparts, all edges are the
2029 // same. All exits from the old-loop now have a RegionNode that merges the
2030 // equivalent new-loop path. This is true even for the normal "loop-exit"
2031 // condition. All uses of loop-invariant old-loop values now come from (one
2032 // or more) Phis that merge their new-loop equivalents.
2033 //
2034 // This operation leaves the graph in an illegal state: there are two valid
2035 // control edges coming from the loop pre-header to both loop bodies. I'll
2036 // definitely have to hack the graph after running this transform.
2037 //
2038 // From this building block I will further edit edges to perform loop peeling
2039 // or loop unrolling or iteration splitting (Range-Check-Elimination), etc.
2040 //
2041 // Parameter side_by_size_idom:
2042 // When side_by_size_idom is NULL, the dominator tree is constructed for
2043 // the clone loop to dominate the original. Used in construction of
2044 // pre-main-post loop sequence.
2045 // When nonnull, the clone and original are side-by-side, both are
2046 // dominated by the side_by_side_idom node. Used in construction of
2047 // unswitched loops.
clone_loop(IdealLoopTree * loop,Node_List & old_new,int dd,CloneLoopMode mode,Node * side_by_side_idom)2048 void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd,
2049 CloneLoopMode mode, Node* side_by_side_idom) {
2050
2051 LoopNode* head = loop->_head->as_Loop();
2052 head->verify_strip_mined(1);
2053
2054 if (C->do_vector_loop() && PrintOpto) {
2055 const char* mname = C->method()->name()->as_quoted_ascii();
2056 if (mname != NULL) {
2057 tty->print("PhaseIdealLoop::clone_loop: for vectorize method %s\n", mname);
2058 }
2059 }
2060
2061 CloneMap& cm = C->clone_map();
2062 Dict* dict = cm.dict();
2063 if (C->do_vector_loop()) {
2064 cm.set_clone_idx(cm.max_gen()+1);
2065 #ifndef PRODUCT
2066 if (PrintOpto) {
2067 tty->print_cr("PhaseIdealLoop::clone_loop: _clone_idx %d", cm.clone_idx());
2068 loop->dump_head();
2069 }
2070 #endif
2071 }
2072
2073 // Step 1: Clone the loop body. Make the old->new mapping.
2074 uint i;
2075 for( i = 0; i < loop->_body.size(); i++ ) {
2076 Node *old = loop->_body.at(i);
2077 Node *nnn = old->clone();
2078 old_new.map( old->_idx, nnn );
2079 if (C->do_vector_loop()) {
2080 cm.verify_insert_and_clone(old, nnn, cm.clone_idx());
2081 }
2082 _igvn.register_new_node_with_optimizer(nnn);
2083 }
2084
2085 IdealLoopTree* outer_loop = (head->is_strip_mined() && mode != IgnoreStripMined) ? get_loop(head->as_CountedLoop()->outer_loop()) : loop;
2086
2087 // Step 2: Fix the edges in the new body. If the old input is outside the
2088 // loop use it. If the old input is INside the loop, use the corresponding
2089 // new node instead.
2090 for( i = 0; i < loop->_body.size(); i++ ) {
2091 Node *old = loop->_body.at(i);
2092 Node *nnn = old_new[old->_idx];
2093 // Fix CFG/Loop controlling the new node
2094 if (has_ctrl(old)) {
2095 set_ctrl(nnn, old_new[get_ctrl(old)->_idx]);
2096 } else {
2097 set_loop(nnn, outer_loop->_parent);
2098 if (old->outcnt() > 0) {
2099 set_idom( nnn, old_new[idom(old)->_idx], dd );
2100 }
2101 }
2102 // Correct edges to the new node
2103 for( uint j = 0; j < nnn->req(); j++ ) {
2104 Node *n = nnn->in(j);
2105 if( n ) {
2106 IdealLoopTree *old_in_loop = get_loop( has_ctrl(n) ? get_ctrl(n) : n );
2107 if( loop->is_member( old_in_loop ) )
2108 nnn->set_req(j, old_new[n->_idx]);
2109 }
2110 }
2111 _igvn.hash_find_insert(nnn);
2112 }
2113
2114 ResourceArea *area = Thread::current()->resource_area();
2115 Node_List extra_data_nodes(area); // data nodes in the outer strip mined loop
2116 clone_outer_loop(head, mode, loop, outer_loop, dd, old_new, extra_data_nodes);
2117
2118 // Step 3: Now fix control uses. Loop varying control uses have already
2119 // been fixed up (as part of all input edges in Step 2). Loop invariant
2120 // control uses must be either an IfFalse or an IfTrue. Make a merge
2121 // point to merge the old and new IfFalse/IfTrue nodes; make the use
2122 // refer to this.
2123 Node_List worklist(area);
2124 uint new_counter = C->unique();
2125 for( i = 0; i < loop->_body.size(); i++ ) {
2126 Node* old = loop->_body.at(i);
2127 if( !old->is_CFG() ) continue;
2128
2129 // Copy uses to a worklist, so I can munge the def-use info
2130 // with impunity.
2131 for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
2132 worklist.push(old->fast_out(j));
2133
2134 while( worklist.size() ) { // Visit all uses
2135 Node *use = worklist.pop();
2136 if (!has_node(use)) continue; // Ignore dead nodes
2137 IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
2138 if( !loop->is_member( use_loop ) && use->is_CFG() ) {
2139 // Both OLD and USE are CFG nodes here.
2140 assert( use->is_Proj(), "" );
2141 Node* nnn = old_new[old->_idx];
2142
2143 Node* newuse = NULL;
2144 if (head->is_strip_mined() && mode != IgnoreStripMined) {
2145 CountedLoopNode* cl = head->as_CountedLoop();
2146 CountedLoopEndNode* cle = cl->loopexit();
2147 Node* cle_out = cle->proj_out_or_null(false);
2148 if (use == cle_out) {
2149 IfNode* le = cl->outer_loop_end();
2150 use = le->proj_out(false);
2151 use_loop = get_loop(use);
2152 if (mode == CloneIncludesStripMined) {
2153 nnn = old_new[le->_idx];
2154 } else {
2155 newuse = old_new[cle_out->_idx];
2156 }
2157 }
2158 }
2159 if (newuse == NULL) {
2160 newuse = use->clone();
2161 }
2162
2163 // Clone the loop exit control projection
2164 if (C->do_vector_loop()) {
2165 cm.verify_insert_and_clone(use, newuse, cm.clone_idx());
2166 }
2167 newuse->set_req(0,nnn);
2168 _igvn.register_new_node_with_optimizer(newuse);
2169 set_loop(newuse, use_loop);
2170 set_idom(newuse, nnn, dom_depth(nnn) + 1 );
2171
2172 // We need a Region to merge the exit from the peeled body and the
2173 // exit from the old loop body.
2174 RegionNode *r = new RegionNode(3);
2175 // Map the old use to the new merge point
2176 old_new.map( use->_idx, r );
2177 uint dd_r = MIN2(dom_depth(newuse),dom_depth(use));
2178 assert( dd_r >= dom_depth(dom_lca(newuse,use)), "" );
2179
2180 // The original user of 'use' uses 'r' instead.
2181 for (DUIterator_Last lmin, l = use->last_outs(lmin); l >= lmin;) {
2182 Node* useuse = use->last_out(l);
2183 _igvn.rehash_node_delayed(useuse);
2184 uint uses_found = 0;
2185 if( useuse->in(0) == use ) {
2186 useuse->set_req(0, r);
2187 uses_found++;
2188 if( useuse->is_CFG() ) {
2189 assert( dom_depth(useuse) > dd_r, "" );
2190 set_idom(useuse, r, dom_depth(useuse));
2191 }
2192 }
2193 for( uint k = 1; k < useuse->req(); k++ ) {
2194 if( useuse->in(k) == use ) {
2195 useuse->set_req(k, r);
2196 uses_found++;
2197 if (useuse->is_Loop() && k == LoopNode::EntryControl) {
2198 assert(dom_depth(useuse) > dd_r , "");
2199 set_idom(useuse, r, dom_depth(useuse));
2200 }
2201 }
2202 }
2203 l -= uses_found; // we deleted 1 or more copies of this edge
2204 }
2205
2206 // Now finish up 'r'
2207 r->set_req( 1, newuse );
2208 r->set_req( 2, use );
2209 _igvn.register_new_node_with_optimizer(r);
2210 set_loop(r, use_loop);
2211 set_idom(r, !side_by_side_idom ? newuse->in(0) : side_by_side_idom, dd_r);
2212 } // End of if a loop-exit test
2213 }
2214 }
2215
2216 // Step 4: If loop-invariant use is not control, it must be dominated by a
2217 // loop exit IfFalse/IfTrue. Find "proper" loop exit. Make a Region
2218 // there if needed. Make a Phi there merging old and new used values.
2219 Node_List *split_if_set = NULL;
2220 Node_List *split_bool_set = NULL;
2221 Node_List *split_cex_set = NULL;
2222 for( i = 0; i < loop->_body.size(); i++ ) {
2223 Node* old = loop->_body.at(i);
2224 clone_loop_handle_data_uses(old, old_new, loop, outer_loop, split_if_set,
2225 split_bool_set, split_cex_set, worklist, new_counter,
2226 mode);
2227 }
2228
2229 for (i = 0; i < extra_data_nodes.size(); i++) {
2230 Node* old = extra_data_nodes.at(i);
2231 clone_loop_handle_data_uses(old, old_new, loop, outer_loop, split_if_set,
2232 split_bool_set, split_cex_set, worklist, new_counter,
2233 mode);
2234 }
2235
2236 // Check for IFs that need splitting/cloning. Happens if an IF outside of
2237 // the loop uses a condition set in the loop. The original IF probably
2238 // takes control from one or more OLD Regions (which in turn get from NEW
2239 // Regions). In any case, there will be a set of Phis for each merge point
2240 // from the IF up to where the original BOOL def exists the loop.
2241 if (split_if_set) {
2242 while (split_if_set->size()) {
2243 Node *iff = split_if_set->pop();
2244 if (iff->in(1)->is_Phi()) {
2245 Node *b = clone_iff(iff->in(1)->as_Phi(), loop);
2246 _igvn.replace_input_of(iff, 1, b);
2247 }
2248 }
2249 }
2250 if (split_bool_set) {
2251 while (split_bool_set->size()) {
2252 Node *b = split_bool_set->pop();
2253 Node *phi = b->in(1);
2254 assert(phi->is_Phi(), "");
2255 CmpNode *cmp = clone_bool((PhiNode*)phi, loop);
2256 _igvn.replace_input_of(b, 1, cmp);
2257 }
2258 }
2259 if (split_cex_set) {
2260 while (split_cex_set->size()) {
2261 Node *b = split_cex_set->pop();
2262 assert(b->in(0)->is_Region(), "");
2263 assert(b->in(1)->is_Phi(), "");
2264 assert(b->in(0)->in(0) == b->in(1)->in(0), "");
2265 split_up(b, b->in(0), NULL);
2266 }
2267 }
2268
2269 }
2270
2271
2272 //---------------------- stride_of_possible_iv -------------------------------------
2273 // Looks for an iff/bool/comp with one operand of the compare
2274 // being a cycle involving an add and a phi,
2275 // with an optional truncation (left-shift followed by a right-shift)
2276 // of the add. Returns zero if not an iv.
stride_of_possible_iv(Node * iff)2277 int PhaseIdealLoop::stride_of_possible_iv(Node* iff) {
2278 Node* trunc1 = NULL;
2279 Node* trunc2 = NULL;
2280 const TypeInt* ttype = NULL;
2281 if (!iff->is_If() || iff->in(1) == NULL || !iff->in(1)->is_Bool()) {
2282 return 0;
2283 }
2284 BoolNode* bl = iff->in(1)->as_Bool();
2285 Node* cmp = bl->in(1);
2286 if (!cmp || (cmp->Opcode() != Op_CmpI && cmp->Opcode() != Op_CmpU)) {
2287 return 0;
2288 }
2289 // Must have an invariant operand
2290 if (is_member(get_loop(iff), get_ctrl(cmp->in(2)))) {
2291 return 0;
2292 }
2293 Node* add2 = NULL;
2294 Node* cmp1 = cmp->in(1);
2295 if (cmp1->is_Phi()) {
2296 // (If (Bool (CmpX phi:(Phi ...(Optional-trunc(AddI phi add2))) )))
2297 Node* phi = cmp1;
2298 for (uint i = 1; i < phi->req(); i++) {
2299 Node* in = phi->in(i);
2300 Node* add = CountedLoopNode::match_incr_with_optional_truncation(in,
2301 &trunc1, &trunc2, &ttype);
2302 if (add && add->in(1) == phi) {
2303 add2 = add->in(2);
2304 break;
2305 }
2306 }
2307 } else {
2308 // (If (Bool (CmpX addtrunc:(Optional-trunc((AddI (Phi ...addtrunc...) add2)) )))
2309 Node* addtrunc = cmp1;
2310 Node* add = CountedLoopNode::match_incr_with_optional_truncation(addtrunc,
2311 &trunc1, &trunc2, &ttype);
2312 if (add && add->in(1)->is_Phi()) {
2313 Node* phi = add->in(1);
2314 for (uint i = 1; i < phi->req(); i++) {
2315 if (phi->in(i) == addtrunc) {
2316 add2 = add->in(2);
2317 break;
2318 }
2319 }
2320 }
2321 }
2322 if (add2 != NULL) {
2323 const TypeInt* add2t = _igvn.type(add2)->is_int();
2324 if (add2t->is_con()) {
2325 return add2t->get_con();
2326 }
2327 }
2328 return 0;
2329 }
2330
2331
2332 //---------------------- stay_in_loop -------------------------------------
2333 // Return the (unique) control output node that's in the loop (if it exists.)
stay_in_loop(Node * n,IdealLoopTree * loop)2334 Node* PhaseIdealLoop::stay_in_loop( Node* n, IdealLoopTree *loop) {
2335 Node* unique = NULL;
2336 if (!n) return NULL;
2337 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
2338 Node* use = n->fast_out(i);
2339 if (!has_ctrl(use) && loop->is_member(get_loop(use))) {
2340 if (unique != NULL) {
2341 return NULL;
2342 }
2343 unique = use;
2344 }
2345 }
2346 return unique;
2347 }
2348
2349 //------------------------------ register_node -------------------------------------
2350 // Utility to register node "n" with PhaseIdealLoop
register_node(Node * n,IdealLoopTree * loop,Node * pred,int ddepth)2351 void PhaseIdealLoop::register_node(Node* n, IdealLoopTree *loop, Node* pred, int ddepth) {
2352 _igvn.register_new_node_with_optimizer(n);
2353 loop->_body.push(n);
2354 if (n->is_CFG()) {
2355 set_loop(n, loop);
2356 set_idom(n, pred, ddepth);
2357 } else {
2358 set_ctrl(n, pred);
2359 }
2360 }
2361
2362 //------------------------------ proj_clone -------------------------------------
2363 // Utility to create an if-projection
proj_clone(ProjNode * p,IfNode * iff)2364 ProjNode* PhaseIdealLoop::proj_clone(ProjNode* p, IfNode* iff) {
2365 ProjNode* c = p->clone()->as_Proj();
2366 c->set_req(0, iff);
2367 return c;
2368 }
2369
2370 //------------------------------ short_circuit_if -------------------------------------
2371 // Force the iff control output to be the live_proj
short_circuit_if(IfNode * iff,ProjNode * live_proj)2372 Node* PhaseIdealLoop::short_circuit_if(IfNode* iff, ProjNode* live_proj) {
2373 guarantee(live_proj != NULL, "null projection");
2374 int proj_con = live_proj->_con;
2375 assert(proj_con == 0 || proj_con == 1, "false or true projection");
2376 Node *con = _igvn.intcon(proj_con);
2377 set_ctrl(con, C->root());
2378 if (iff) {
2379 iff->set_req(1, con);
2380 }
2381 return con;
2382 }
2383
2384 //------------------------------ insert_if_before_proj -------------------------------------
2385 // Insert a new if before an if projection (* - new node)
2386 //
2387 // before
2388 // if(test)
2389 // / \
2390 // v v
2391 // other-proj proj (arg)
2392 //
2393 // after
2394 // if(test)
2395 // / \
2396 // / v
2397 // | * proj-clone
2398 // v |
2399 // other-proj v
2400 // * new_if(relop(cmp[IU](left,right)))
2401 // / \
2402 // v v
2403 // * new-proj proj
2404 // (returned)
2405 //
insert_if_before_proj(Node * left,bool Signed,BoolTest::mask relop,Node * right,ProjNode * proj)2406 ProjNode* PhaseIdealLoop::insert_if_before_proj(Node* left, bool Signed, BoolTest::mask relop, Node* right, ProjNode* proj) {
2407 IfNode* iff = proj->in(0)->as_If();
2408 IdealLoopTree *loop = get_loop(proj);
2409 ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
2410 int ddepth = dom_depth(proj);
2411
2412 _igvn.rehash_node_delayed(iff);
2413 _igvn.rehash_node_delayed(proj);
2414
2415 proj->set_req(0, NULL); // temporary disconnect
2416 ProjNode* proj2 = proj_clone(proj, iff);
2417 register_node(proj2, loop, iff, ddepth);
2418
2419 Node* cmp = Signed ? (Node*) new CmpINode(left, right) : (Node*) new CmpUNode(left, right);
2420 register_node(cmp, loop, proj2, ddepth);
2421
2422 BoolNode* bol = new BoolNode(cmp, relop);
2423 register_node(bol, loop, proj2, ddepth);
2424
2425 int opcode = iff->Opcode();
2426 assert(opcode == Op_If || opcode == Op_RangeCheck, "unexpected opcode");
2427 IfNode* new_if = (opcode == Op_If) ? new IfNode(proj2, bol, iff->_prob, iff->_fcnt):
2428 new RangeCheckNode(proj2, bol, iff->_prob, iff->_fcnt);
2429 register_node(new_if, loop, proj2, ddepth);
2430
2431 proj->set_req(0, new_if); // reattach
2432 set_idom(proj, new_if, ddepth);
2433
2434 ProjNode* new_exit = proj_clone(other_proj, new_if)->as_Proj();
2435 guarantee(new_exit != NULL, "null exit node");
2436 register_node(new_exit, get_loop(other_proj), new_if, ddepth);
2437
2438 return new_exit;
2439 }
2440
2441 //------------------------------ insert_region_before_proj -------------------------------------
2442 // Insert a region before an if projection (* - new node)
2443 //
2444 // before
2445 // if(test)
2446 // / |
2447 // v |
2448 // proj v
2449 // other-proj
2450 //
2451 // after
2452 // if(test)
2453 // / |
2454 // v |
2455 // * proj-clone v
2456 // | other-proj
2457 // v
2458 // * new-region
2459 // |
2460 // v
2461 // * dum_if
2462 // / \
2463 // v \
2464 // * dum-proj v
2465 // proj
2466 //
insert_region_before_proj(ProjNode * proj)2467 RegionNode* PhaseIdealLoop::insert_region_before_proj(ProjNode* proj) {
2468 IfNode* iff = proj->in(0)->as_If();
2469 IdealLoopTree *loop = get_loop(proj);
2470 ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
2471 int ddepth = dom_depth(proj);
2472
2473 _igvn.rehash_node_delayed(iff);
2474 _igvn.rehash_node_delayed(proj);
2475
2476 proj->set_req(0, NULL); // temporary disconnect
2477 ProjNode* proj2 = proj_clone(proj, iff);
2478 register_node(proj2, loop, iff, ddepth);
2479
2480 RegionNode* reg = new RegionNode(2);
2481 reg->set_req(1, proj2);
2482 register_node(reg, loop, iff, ddepth);
2483
2484 IfNode* dum_if = new IfNode(reg, short_circuit_if(NULL, proj), iff->_prob, iff->_fcnt);
2485 register_node(dum_if, loop, reg, ddepth);
2486
2487 proj->set_req(0, dum_if); // reattach
2488 set_idom(proj, dum_if, ddepth);
2489
2490 ProjNode* dum_proj = proj_clone(other_proj, dum_if);
2491 register_node(dum_proj, loop, dum_if, ddepth);
2492
2493 return reg;
2494 }
2495
2496 //------------------------------ insert_cmpi_loop_exit -------------------------------------
2497 // Clone a signed compare loop exit from an unsigned compare and
2498 // insert it before the unsigned cmp on the stay-in-loop path.
2499 // All new nodes inserted in the dominator tree between the original
2500 // if and it's projections. The original if test is replaced with
2501 // a constant to force the stay-in-loop path.
2502 //
2503 // This is done to make sure that the original if and it's projections
2504 // still dominate the same set of control nodes, that the ctrl() relation
2505 // from data nodes to them is preserved, and that their loop nesting is
2506 // preserved.
2507 //
2508 // before
2509 // if(i <u limit) unsigned compare loop exit
2510 // / |
2511 // v v
2512 // exit-proj stay-in-loop-proj
2513 //
2514 // after
2515 // if(stay-in-loop-const) original if
2516 // / |
2517 // / v
2518 // / if(i < limit) new signed test
2519 // / / |
2520 // / / v
2521 // / / if(i <u limit) new cloned unsigned test
2522 // / / / |
2523 // v v v |
2524 // region |
2525 // | |
2526 // dum-if |
2527 // / | |
2528 // ether | |
2529 // v v
2530 // exit-proj stay-in-loop-proj
2531 //
insert_cmpi_loop_exit(IfNode * if_cmpu,IdealLoopTree * loop)2532 IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop) {
2533 const bool Signed = true;
2534 const bool Unsigned = false;
2535
2536 BoolNode* bol = if_cmpu->in(1)->as_Bool();
2537 if (bol->_test._test != BoolTest::lt) return NULL;
2538 CmpNode* cmpu = bol->in(1)->as_Cmp();
2539 if (cmpu->Opcode() != Op_CmpU) return NULL;
2540 int stride = stride_of_possible_iv(if_cmpu);
2541 if (stride == 0) return NULL;
2542
2543 Node* lp_proj = stay_in_loop(if_cmpu, loop);
2544 guarantee(lp_proj != NULL, "null loop node");
2545
2546 ProjNode* lp_continue = lp_proj->as_Proj();
2547 ProjNode* lp_exit = if_cmpu->proj_out(!lp_continue->is_IfTrue())->as_Proj();
2548
2549 Node* limit = NULL;
2550 if (stride > 0) {
2551 limit = cmpu->in(2);
2552 } else {
2553 limit = _igvn.makecon(TypeInt::ZERO);
2554 set_ctrl(limit, C->root());
2555 }
2556 // Create a new region on the exit path
2557 RegionNode* reg = insert_region_before_proj(lp_exit);
2558 guarantee(reg != NULL, "null region node");
2559
2560 // Clone the if-cmpu-true-false using a signed compare
2561 BoolTest::mask rel_i = stride > 0 ? bol->_test._test : BoolTest::ge;
2562 ProjNode* cmpi_exit = insert_if_before_proj(cmpu->in(1), Signed, rel_i, limit, lp_continue);
2563 reg->add_req(cmpi_exit);
2564
2565 // Clone the if-cmpu-true-false
2566 BoolTest::mask rel_u = bol->_test._test;
2567 ProjNode* cmpu_exit = insert_if_before_proj(cmpu->in(1), Unsigned, rel_u, cmpu->in(2), lp_continue);
2568 reg->add_req(cmpu_exit);
2569
2570 // Force original if to stay in loop.
2571 short_circuit_if(if_cmpu, lp_continue);
2572
2573 return cmpi_exit->in(0)->as_If();
2574 }
2575
2576 //------------------------------ remove_cmpi_loop_exit -------------------------------------
2577 // Remove a previously inserted signed compare loop exit.
remove_cmpi_loop_exit(IfNode * if_cmp,IdealLoopTree * loop)2578 void PhaseIdealLoop::remove_cmpi_loop_exit(IfNode* if_cmp, IdealLoopTree *loop) {
2579 Node* lp_proj = stay_in_loop(if_cmp, loop);
2580 assert(if_cmp->in(1)->in(1)->Opcode() == Op_CmpI &&
2581 stay_in_loop(lp_proj, loop)->is_If() &&
2582 stay_in_loop(lp_proj, loop)->in(1)->in(1)->Opcode() == Op_CmpU, "inserted cmpi before cmpu");
2583 Node *con = _igvn.makecon(lp_proj->is_IfTrue() ? TypeInt::ONE : TypeInt::ZERO);
2584 set_ctrl(con, C->root());
2585 if_cmp->set_req(1, con);
2586 }
2587
2588 //------------------------------ scheduled_nodelist -------------------------------------
2589 // Create a post order schedule of nodes that are in the
2590 // "member" set. The list is returned in "sched".
2591 // The first node in "sched" is the loop head, followed by
2592 // nodes which have no inputs in the "member" set, and then
2593 // followed by the nodes that have an immediate input dependence
2594 // on a node in "sched".
scheduled_nodelist(IdealLoopTree * loop,VectorSet & member,Node_List & sched)2595 void PhaseIdealLoop::scheduled_nodelist( IdealLoopTree *loop, VectorSet& member, Node_List &sched ) {
2596
2597 assert(member.test(loop->_head->_idx), "loop head must be in member set");
2598 Arena *a = Thread::current()->resource_area();
2599 VectorSet visited(a);
2600 Node_Stack nstack(a, loop->_body.size());
2601
2602 Node* n = loop->_head; // top of stack is cached in "n"
2603 uint idx = 0;
2604 visited.set(n->_idx);
2605
2606 // Initially push all with no inputs from within member set
2607 for(uint i = 0; i < loop->_body.size(); i++ ) {
2608 Node *elt = loop->_body.at(i);
2609 if (member.test(elt->_idx)) {
2610 bool found = false;
2611 for (uint j = 0; j < elt->req(); j++) {
2612 Node* def = elt->in(j);
2613 if (def && member.test(def->_idx) && def != elt) {
2614 found = true;
2615 break;
2616 }
2617 }
2618 if (!found && elt != loop->_head) {
2619 nstack.push(n, idx);
2620 n = elt;
2621 assert(!visited.test(n->_idx), "not seen yet");
2622 visited.set(n->_idx);
2623 }
2624 }
2625 }
2626
2627 // traverse out's that are in the member set
2628 while (true) {
2629 if (idx < n->outcnt()) {
2630 Node* use = n->raw_out(idx);
2631 idx++;
2632 if (!visited.test_set(use->_idx)) {
2633 if (member.test(use->_idx)) {
2634 nstack.push(n, idx);
2635 n = use;
2636 idx = 0;
2637 }
2638 }
2639 } else {
2640 // All outputs processed
2641 sched.push(n);
2642 if (nstack.is_empty()) break;
2643 n = nstack.node();
2644 idx = nstack.index();
2645 nstack.pop();
2646 }
2647 }
2648 }
2649
2650
2651 //------------------------------ has_use_in_set -------------------------------------
2652 // Has a use in the vector set
has_use_in_set(Node * n,VectorSet & vset)2653 bool PhaseIdealLoop::has_use_in_set( Node* n, VectorSet& vset ) {
2654 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2655 Node* use = n->fast_out(j);
2656 if (vset.test(use->_idx)) {
2657 return true;
2658 }
2659 }
2660 return false;
2661 }
2662
2663
2664 //------------------------------ has_use_internal_to_set -------------------------------------
2665 // Has use internal to the vector set (ie. not in a phi at the loop head)
has_use_internal_to_set(Node * n,VectorSet & vset,IdealLoopTree * loop)2666 bool PhaseIdealLoop::has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop ) {
2667 Node* head = loop->_head;
2668 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2669 Node* use = n->fast_out(j);
2670 if (vset.test(use->_idx) && !(use->is_Phi() && use->in(0) == head)) {
2671 return true;
2672 }
2673 }
2674 return false;
2675 }
2676
2677
2678 //------------------------------ clone_for_use_outside_loop -------------------------------------
2679 // clone "n" for uses that are outside of loop
clone_for_use_outside_loop(IdealLoopTree * loop,Node * n,Node_List & worklist)2680 int PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist ) {
2681 int cloned = 0;
2682 assert(worklist.size() == 0, "should be empty");
2683 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2684 Node* use = n->fast_out(j);
2685 if( !loop->is_member(get_loop(has_ctrl(use) ? get_ctrl(use) : use)) ) {
2686 worklist.push(use);
2687 }
2688 }
2689 while( worklist.size() ) {
2690 Node *use = worklist.pop();
2691 if (!has_node(use) || use->in(0) == C->top()) continue;
2692 uint j;
2693 for (j = 0; j < use->req(); j++) {
2694 if (use->in(j) == n) break;
2695 }
2696 assert(j < use->req(), "must be there");
2697
2698 // clone "n" and insert it between the inputs of "n" and the use outside the loop
2699 Node* n_clone = n->clone();
2700 _igvn.replace_input_of(use, j, n_clone);
2701 cloned++;
2702 Node* use_c;
2703 if (!use->is_Phi()) {
2704 use_c = has_ctrl(use) ? get_ctrl(use) : use->in(0);
2705 } else {
2706 // Use in a phi is considered a use in the associated predecessor block
2707 use_c = use->in(0)->in(j);
2708 }
2709 set_ctrl(n_clone, use_c);
2710 assert(!loop->is_member(get_loop(use_c)), "should be outside loop");
2711 get_loop(use_c)->_body.push(n_clone);
2712 _igvn.register_new_node_with_optimizer(n_clone);
2713 #ifndef PRODUCT
2714 if (TracePartialPeeling) {
2715 tty->print_cr("loop exit cloning old: %d new: %d newbb: %d", n->_idx, n_clone->_idx, get_ctrl(n_clone)->_idx);
2716 }
2717 #endif
2718 }
2719 return cloned;
2720 }
2721
2722
2723 //------------------------------ clone_for_special_use_inside_loop -------------------------------------
2724 // clone "n" for special uses that are in the not_peeled region.
2725 // If these def-uses occur in separate blocks, the code generator
2726 // marks the method as not compilable. For example, if a "BoolNode"
2727 // is in a different basic block than the "IfNode" that uses it, then
2728 // the compilation is aborted in the code generator.
clone_for_special_use_inside_loop(IdealLoopTree * loop,Node * n,VectorSet & not_peel,Node_List & sink_list,Node_List & worklist)2729 void PhaseIdealLoop::clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n,
2730 VectorSet& not_peel, Node_List& sink_list, Node_List& worklist ) {
2731 if (n->is_Phi() || n->is_Load()) {
2732 return;
2733 }
2734 assert(worklist.size() == 0, "should be empty");
2735 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2736 Node* use = n->fast_out(j);
2737 if ( not_peel.test(use->_idx) &&
2738 (use->is_If() || use->is_CMove() || use->is_Bool()) &&
2739 use->in(1) == n) {
2740 worklist.push(use);
2741 }
2742 }
2743 if (worklist.size() > 0) {
2744 // clone "n" and insert it between inputs of "n" and the use
2745 Node* n_clone = n->clone();
2746 loop->_body.push(n_clone);
2747 _igvn.register_new_node_with_optimizer(n_clone);
2748 set_ctrl(n_clone, get_ctrl(n));
2749 sink_list.push(n_clone);
2750 not_peel <<= n_clone->_idx; // add n_clone to not_peel set.
2751 #ifndef PRODUCT
2752 if (TracePartialPeeling) {
2753 tty->print_cr("special not_peeled cloning old: %d new: %d", n->_idx, n_clone->_idx);
2754 }
2755 #endif
2756 while( worklist.size() ) {
2757 Node *use = worklist.pop();
2758 _igvn.rehash_node_delayed(use);
2759 for (uint j = 1; j < use->req(); j++) {
2760 if (use->in(j) == n) {
2761 use->set_req(j, n_clone);
2762 }
2763 }
2764 }
2765 }
2766 }
2767
2768
2769 //------------------------------ insert_phi_for_loop -------------------------------------
2770 // Insert phi(lp_entry_val, back_edge_val) at use->in(idx) for loop lp if phi does not already exist
insert_phi_for_loop(Node * use,uint idx,Node * lp_entry_val,Node * back_edge_val,LoopNode * lp)2771 void PhaseIdealLoop::insert_phi_for_loop( Node* use, uint idx, Node* lp_entry_val, Node* back_edge_val, LoopNode* lp ) {
2772 Node *phi = PhiNode::make(lp, back_edge_val);
2773 phi->set_req(LoopNode::EntryControl, lp_entry_val);
2774 // Use existing phi if it already exists
2775 Node *hit = _igvn.hash_find_insert(phi);
2776 if( hit == NULL ) {
2777 _igvn.register_new_node_with_optimizer(phi);
2778 set_ctrl(phi, lp);
2779 } else {
2780 // Remove the new phi from the graph and use the hit
2781 _igvn.remove_dead_node(phi);
2782 phi = hit;
2783 }
2784 _igvn.replace_input_of(use, idx, phi);
2785 }
2786
2787 #ifdef ASSERT
2788 //------------------------------ is_valid_loop_partition -------------------------------------
2789 // Validate the loop partition sets: peel and not_peel
is_valid_loop_partition(IdealLoopTree * loop,VectorSet & peel,Node_List & peel_list,VectorSet & not_peel)2790 bool PhaseIdealLoop::is_valid_loop_partition( IdealLoopTree *loop, VectorSet& peel, Node_List& peel_list,
2791 VectorSet& not_peel ) {
2792 uint i;
2793 // Check that peel_list entries are in the peel set
2794 for (i = 0; i < peel_list.size(); i++) {
2795 if (!peel.test(peel_list.at(i)->_idx)) {
2796 return false;
2797 }
2798 }
2799 // Check at loop members are in one of peel set or not_peel set
2800 for (i = 0; i < loop->_body.size(); i++ ) {
2801 Node *def = loop->_body.at(i);
2802 uint di = def->_idx;
2803 // Check that peel set elements are in peel_list
2804 if (peel.test(di)) {
2805 if (not_peel.test(di)) {
2806 return false;
2807 }
2808 // Must be in peel_list also
2809 bool found = false;
2810 for (uint j = 0; j < peel_list.size(); j++) {
2811 if (peel_list.at(j)->_idx == di) {
2812 found = true;
2813 break;
2814 }
2815 }
2816 if (!found) {
2817 return false;
2818 }
2819 } else if (not_peel.test(di)) {
2820 if (peel.test(di)) {
2821 return false;
2822 }
2823 } else {
2824 return false;
2825 }
2826 }
2827 return true;
2828 }
2829
2830 //------------------------------ is_valid_clone_loop_exit_use -------------------------------------
2831 // Ensure a use outside of loop is of the right form
is_valid_clone_loop_exit_use(IdealLoopTree * loop,Node * use,uint exit_idx)2832 bool PhaseIdealLoop::is_valid_clone_loop_exit_use( IdealLoopTree *loop, Node* use, uint exit_idx) {
2833 Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
2834 return (use->is_Phi() &&
2835 use_c->is_Region() && use_c->req() == 3 &&
2836 (use_c->in(exit_idx)->Opcode() == Op_IfTrue ||
2837 use_c->in(exit_idx)->Opcode() == Op_IfFalse ||
2838 use_c->in(exit_idx)->Opcode() == Op_JumpProj) &&
2839 loop->is_member( get_loop( use_c->in(exit_idx)->in(0) ) ) );
2840 }
2841
2842 //------------------------------ is_valid_clone_loop_form -------------------------------------
2843 // Ensure that all uses outside of loop are of the right form
is_valid_clone_loop_form(IdealLoopTree * loop,Node_List & peel_list,uint orig_exit_idx,uint clone_exit_idx)2844 bool PhaseIdealLoop::is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& peel_list,
2845 uint orig_exit_idx, uint clone_exit_idx) {
2846 uint len = peel_list.size();
2847 for (uint i = 0; i < len; i++) {
2848 Node *def = peel_list.at(i);
2849
2850 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
2851 Node *use = def->fast_out(j);
2852 Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
2853 if (!loop->is_member(get_loop(use_c))) {
2854 // use is not in the loop, check for correct structure
2855 if (use->in(0) == def) {
2856 // Okay
2857 } else if (!is_valid_clone_loop_exit_use(loop, use, orig_exit_idx)) {
2858 return false;
2859 }
2860 }
2861 }
2862 }
2863 return true;
2864 }
2865 #endif
2866
2867 //------------------------------ partial_peel -------------------------------------
2868 // Partially peel (aka loop rotation) the top portion of a loop (called
2869 // the peel section below) by cloning it and placing one copy just before
2870 // the new loop head and the other copy at the bottom of the new loop.
2871 //
2872 // before after where it came from
2873 //
2874 // stmt1 stmt1
2875 // loop: stmt2 clone
2876 // stmt2 if condA goto exitA clone
2877 // if condA goto exitA new_loop: new
2878 // stmt3 stmt3 clone
2879 // if !condB goto loop if condB goto exitB clone
2880 // exitB: stmt2 orig
2881 // stmt4 if !condA goto new_loop orig
2882 // exitA: goto exitA
2883 // exitB:
2884 // stmt4
2885 // exitA:
2886 //
2887 // Step 1: find the cut point: an exit test on probable
2888 // induction variable.
2889 // Step 2: schedule (with cloning) operations in the peel
2890 // section that can be executed after the cut into
2891 // the section that is not peeled. This may need
2892 // to clone operations into exit blocks. For
2893 // instance, a reference to A[i] in the not-peel
2894 // section and a reference to B[i] in an exit block
2895 // may cause a left-shift of i by 2 to be placed
2896 // in the peel block. This step will clone the left
2897 // shift into the exit block and sink the left shift
2898 // from the peel to the not-peel section.
2899 // Step 3: clone the loop, retarget the control, and insert
2900 // phis for values that are live across the new loop
2901 // head. This is very dependent on the graph structure
2902 // from clone_loop. It creates region nodes for
2903 // exit control and associated phi nodes for values
2904 // flow out of the loop through that exit. The region
2905 // node is dominated by the clone's control projection.
2906 // So the clone's peel section is placed before the
2907 // new loop head, and the clone's not-peel section is
2908 // forms the top part of the new loop. The original
2909 // peel section forms the tail of the new loop.
2910 // Step 4: update the dominator tree and recompute the
2911 // dominator depth.
2912 //
2913 // orig
2914 //
2915 // stmt1
2916 // |
2917 // v
2918 // loop predicate
2919 // |
2920 // v
2921 // loop<----+
2922 // | |
2923 // stmt2 |
2924 // | |
2925 // v |
2926 // ifA |
2927 // / | |
2928 // v v |
2929 // false true ^ <-- last_peel
2930 // / | |
2931 // / ===|==cut |
2932 // / stmt3 | <-- first_not_peel
2933 // / | |
2934 // | v |
2935 // v ifB |
2936 // exitA: / \ |
2937 // / \ |
2938 // v v |
2939 // false true |
2940 // / \ |
2941 // / ----+
2942 // |
2943 // v
2944 // exitB:
2945 // stmt4
2946 //
2947 //
2948 // after clone loop
2949 //
2950 // stmt1
2951 // |
2952 // v
2953 // loop predicate
2954 // / \
2955 // clone / \ orig
2956 // / \
2957 // / \
2958 // v v
2959 // +---->loop loop<----+
2960 // | | | |
2961 // | stmt2 stmt2 |
2962 // | | | |
2963 // | v v |
2964 // | ifA ifA |
2965 // | | \ / | |
2966 // | v v v v |
2967 // ^ true false false true ^ <-- last_peel
2968 // | | ^ \ / | |
2969 // | cut==|== \ \ / ===|==cut |
2970 // | stmt3 \ \ / stmt3 | <-- first_not_peel
2971 // | | dom | | | |
2972 // | v \ 1v v2 v |
2973 // | ifB regionA ifB |
2974 // | / \ | / \ |
2975 // | / \ v / \ |
2976 // | v v exitA: v v |
2977 // | true false false true |
2978 // | / ^ \ / \ |
2979 // +---- \ \ / ----+
2980 // dom \ /
2981 // \ 1v v2
2982 // regionB
2983 // |
2984 // v
2985 // exitB:
2986 // stmt4
2987 //
2988 //
2989 // after partial peel
2990 //
2991 // stmt1
2992 // |
2993 // v
2994 // loop predicate
2995 // /
2996 // clone / orig
2997 // / TOP
2998 // / \
2999 // v v
3000 // TOP->loop loop----+
3001 // | | |
3002 // stmt2 stmt2 |
3003 // | | |
3004 // v v |
3005 // ifA ifA |
3006 // | \ / | |
3007 // v v v v |
3008 // true false false true | <-- last_peel
3009 // | ^ \ / +------|---+
3010 // +->newloop \ \ / === ==cut | |
3011 // | stmt3 \ \ / TOP | |
3012 // | | dom | | stmt3 | | <-- first_not_peel
3013 // | v \ 1v v2 v | |
3014 // | ifB regionA ifB ^ v
3015 // | / \ | / \ | |
3016 // | / \ v / \ | |
3017 // | v v exitA: v v | |
3018 // | true false false true | |
3019 // | / ^ \ / \ | |
3020 // | | \ \ / v | |
3021 // | | dom \ / TOP | |
3022 // | | \ 1v v2 | |
3023 // ^ v regionB | |
3024 // | | | | |
3025 // | | v ^ v
3026 // | | exitB: | |
3027 // | | stmt4 | |
3028 // | +------------>-----------------+ |
3029 // | |
3030 // +-----------------<---------------------+
3031 //
3032 //
3033 // final graph
3034 //
3035 // stmt1
3036 // |
3037 // v
3038 // loop predicate
3039 // |
3040 // v
3041 // stmt2 clone
3042 // |
3043 // v
3044 // ........> ifA clone
3045 // : / |
3046 // dom / |
3047 // : v v
3048 // : false true
3049 // : | |
3050 // : | v
3051 // : | newloop<-----+
3052 // : | | |
3053 // : | stmt3 clone |
3054 // : | | |
3055 // : | v |
3056 // : | ifB |
3057 // : | / \ |
3058 // : | v v |
3059 // : | false true |
3060 // : | | | |
3061 // : | v stmt2 |
3062 // : | exitB: | |
3063 // : | stmt4 v |
3064 // : | ifA orig |
3065 // : | / \ |
3066 // : | / \ |
3067 // : | v v |
3068 // : | false true |
3069 // : | / \ |
3070 // : v v -----+
3071 // RegionA
3072 // |
3073 // v
3074 // exitA
3075 //
partial_peel(IdealLoopTree * loop,Node_List & old_new)3076 bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
3077
3078 assert(!loop->_head->is_CountedLoop(), "Non-counted loop only");
3079 if (!loop->_head->is_Loop()) {
3080 return false;
3081 }
3082 LoopNode *head = loop->_head->as_Loop();
3083
3084 if (head->is_partial_peel_loop() || head->partial_peel_has_failed()) {
3085 return false;
3086 }
3087
3088 // Check for complex exit control
3089 for (uint ii = 0; ii < loop->_body.size(); ii++) {
3090 Node *n = loop->_body.at(ii);
3091 int opc = n->Opcode();
3092 if (n->is_Call() ||
3093 opc == Op_Catch ||
3094 opc == Op_CatchProj ||
3095 opc == Op_Jump ||
3096 opc == Op_JumpProj) {
3097 #ifndef PRODUCT
3098 if (TracePartialPeeling) {
3099 tty->print_cr("\nExit control too complex: lp: %d", head->_idx);
3100 }
3101 #endif
3102 return false;
3103 }
3104 }
3105
3106 int dd = dom_depth(head);
3107
3108 // Step 1: find cut point
3109
3110 // Walk up dominators to loop head looking for first loop exit
3111 // which is executed on every path thru loop.
3112 IfNode *peel_if = NULL;
3113 IfNode *peel_if_cmpu = NULL;
3114
3115 Node *iff = loop->tail();
3116 while (iff != head) {
3117 if (iff->is_If()) {
3118 Node *ctrl = get_ctrl(iff->in(1));
3119 if (ctrl->is_top()) return false; // Dead test on live IF.
3120 // If loop-varying exit-test, check for induction variable
3121 if (loop->is_member(get_loop(ctrl)) &&
3122 loop->is_loop_exit(iff) &&
3123 is_possible_iv_test(iff)) {
3124 Node* cmp = iff->in(1)->in(1);
3125 if (cmp->Opcode() == Op_CmpI) {
3126 peel_if = iff->as_If();
3127 } else {
3128 assert(cmp->Opcode() == Op_CmpU, "must be CmpI or CmpU");
3129 peel_if_cmpu = iff->as_If();
3130 }
3131 }
3132 }
3133 iff = idom(iff);
3134 }
3135
3136 // Prefer signed compare over unsigned compare.
3137 IfNode* new_peel_if = NULL;
3138 if (peel_if == NULL) {
3139 if (!PartialPeelAtUnsignedTests || peel_if_cmpu == NULL) {
3140 return false; // No peel point found
3141 }
3142 new_peel_if = insert_cmpi_loop_exit(peel_if_cmpu, loop);
3143 if (new_peel_if == NULL) {
3144 return false; // No peel point found
3145 }
3146 peel_if = new_peel_if;
3147 }
3148 Node* last_peel = stay_in_loop(peel_if, loop);
3149 Node* first_not_peeled = stay_in_loop(last_peel, loop);
3150 if (first_not_peeled == NULL || first_not_peeled == head) {
3151 return false;
3152 }
3153
3154 #ifndef PRODUCT
3155 if (TraceLoopOpts) {
3156 tty->print("PartialPeel ");
3157 loop->dump_head();
3158 }
3159
3160 if (TracePartialPeeling) {
3161 tty->print_cr("before partial peel one iteration");
3162 Node_List wl;
3163 Node* t = head->in(2);
3164 while (true) {
3165 wl.push(t);
3166 if (t == head) break;
3167 t = idom(t);
3168 }
3169 while (wl.size() > 0) {
3170 Node* tt = wl.pop();
3171 tt->dump();
3172 if (tt == last_peel) tty->print_cr("-- cut --");
3173 }
3174 }
3175 #endif
3176 ResourceArea *area = Thread::current()->resource_area();
3177 VectorSet peel(area);
3178 VectorSet not_peel(area);
3179 Node_List peel_list(area);
3180 Node_List worklist(area);
3181 Node_List sink_list(area);
3182
3183 if (!may_require_nodes(loop->est_loop_clone_sz(2))) {
3184 return false;
3185 }
3186
3187 // Set of cfg nodes to peel are those that are executable from
3188 // the head through last_peel.
3189 assert(worklist.size() == 0, "should be empty");
3190 worklist.push(head);
3191 peel.set(head->_idx);
3192 while (worklist.size() > 0) {
3193 Node *n = worklist.pop();
3194 if (n != last_peel) {
3195 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
3196 Node* use = n->fast_out(j);
3197 if (use->is_CFG() &&
3198 loop->is_member(get_loop(use)) &&
3199 !peel.test_set(use->_idx)) {
3200 worklist.push(use);
3201 }
3202 }
3203 }
3204 }
3205
3206 // Set of non-cfg nodes to peel are those that are control
3207 // dependent on the cfg nodes.
3208 uint i;
3209 for(i = 0; i < loop->_body.size(); i++ ) {
3210 Node *n = loop->_body.at(i);
3211 Node *n_c = has_ctrl(n) ? get_ctrl(n) : n;
3212 if (peel.test(n_c->_idx)) {
3213 peel.set(n->_idx);
3214 } else {
3215 not_peel.set(n->_idx);
3216 }
3217 }
3218
3219 // Step 2: move operations from the peeled section down into the
3220 // not-peeled section
3221
3222 // Get a post order schedule of nodes in the peel region
3223 // Result in right-most operand.
3224 scheduled_nodelist(loop, peel, peel_list );
3225
3226 assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
3227
3228 // For future check for too many new phis
3229 uint old_phi_cnt = 0;
3230 for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
3231 Node* use = head->fast_out(j);
3232 if (use->is_Phi()) old_phi_cnt++;
3233 }
3234
3235 #ifndef PRODUCT
3236 if (TracePartialPeeling) {
3237 tty->print_cr("\npeeled list");
3238 }
3239 #endif
3240
3241 // Evacuate nodes in peel region into the not_peeled region if possible
3242 uint new_phi_cnt = 0;
3243 uint cloned_for_outside_use = 0;
3244 for (i = 0; i < peel_list.size();) {
3245 Node* n = peel_list.at(i);
3246 #ifndef PRODUCT
3247 if (TracePartialPeeling) n->dump();
3248 #endif
3249 bool incr = true;
3250 if ( !n->is_CFG() ) {
3251
3252 if ( has_use_in_set(n, not_peel) ) {
3253
3254 // If not used internal to the peeled region,
3255 // move "n" from peeled to not_peeled region.
3256
3257 if ( !has_use_internal_to_set(n, peel, loop) ) {
3258
3259 // if not pinned and not a load (which maybe anti-dependent on a store)
3260 // and not a CMove (Matcher expects only bool->cmove).
3261 if (n->in(0) == NULL && !n->is_Load() && !n->is_CMove()) {
3262 cloned_for_outside_use += clone_for_use_outside_loop( loop, n, worklist );
3263 sink_list.push(n);
3264 peel >>= n->_idx; // delete n from peel set.
3265 not_peel <<= n->_idx; // add n to not_peel set.
3266 peel_list.remove(i);
3267 incr = false;
3268 #ifndef PRODUCT
3269 if (TracePartialPeeling) {
3270 tty->print_cr("sink to not_peeled region: %d newbb: %d",
3271 n->_idx, get_ctrl(n)->_idx);
3272 }
3273 #endif
3274 }
3275 } else {
3276 // Otherwise check for special def-use cases that span
3277 // the peel/not_peel boundary such as bool->if
3278 clone_for_special_use_inside_loop( loop, n, not_peel, sink_list, worklist );
3279 new_phi_cnt++;
3280 }
3281 }
3282 }
3283 if (incr) i++;
3284 }
3285
3286 if (new_phi_cnt > old_phi_cnt + PartialPeelNewPhiDelta) {
3287 #ifndef PRODUCT
3288 if (TracePartialPeeling) {
3289 tty->print_cr("\nToo many new phis: %d old %d new cmpi: %c",
3290 new_phi_cnt, old_phi_cnt, new_peel_if != NULL?'T':'F');
3291 }
3292 #endif
3293 if (new_peel_if != NULL) {
3294 remove_cmpi_loop_exit(new_peel_if, loop);
3295 }
3296 // Inhibit more partial peeling on this loop
3297 assert(!head->is_partial_peel_loop(), "not partial peeled");
3298 head->mark_partial_peel_failed();
3299 if (cloned_for_outside_use > 0) {
3300 // Terminate this round of loop opts because
3301 // the graph outside this loop was changed.
3302 C->set_major_progress();
3303 return true;
3304 }
3305 return false;
3306 }
3307
3308 // Step 3: clone loop, retarget control, and insert new phis
3309
3310 // Create new loop head for new phis and to hang
3311 // the nodes being moved (sinked) from the peel region.
3312 LoopNode* new_head = new LoopNode(last_peel, last_peel);
3313 new_head->set_unswitch_count(head->unswitch_count()); // Preserve
3314 _igvn.register_new_node_with_optimizer(new_head);
3315 assert(first_not_peeled->in(0) == last_peel, "last_peel <- first_not_peeled");
3316 _igvn.replace_input_of(first_not_peeled, 0, new_head);
3317 set_loop(new_head, loop);
3318 loop->_body.push(new_head);
3319 not_peel.set(new_head->_idx);
3320 set_idom(new_head, last_peel, dom_depth(first_not_peeled));
3321 set_idom(first_not_peeled, new_head, dom_depth(first_not_peeled));
3322
3323 while (sink_list.size() > 0) {
3324 Node* n = sink_list.pop();
3325 set_ctrl(n, new_head);
3326 }
3327
3328 assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
3329
3330 clone_loop(loop, old_new, dd, IgnoreStripMined);
3331
3332 const uint clone_exit_idx = 1;
3333 const uint orig_exit_idx = 2;
3334 assert(is_valid_clone_loop_form( loop, peel_list, orig_exit_idx, clone_exit_idx ), "bad clone loop");
3335
3336 Node* head_clone = old_new[head->_idx];
3337 LoopNode* new_head_clone = old_new[new_head->_idx]->as_Loop();
3338 Node* orig_tail_clone = head_clone->in(2);
3339
3340 // Add phi if "def" node is in peel set and "use" is not
3341
3342 for(i = 0; i < peel_list.size(); i++ ) {
3343 Node *def = peel_list.at(i);
3344 if (!def->is_CFG()) {
3345 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
3346 Node *use = def->fast_out(j);
3347 if (has_node(use) && use->in(0) != C->top() &&
3348 (!peel.test(use->_idx) ||
3349 (use->is_Phi() && use->in(0) == head)) ) {
3350 worklist.push(use);
3351 }
3352 }
3353 while( worklist.size() ) {
3354 Node *use = worklist.pop();
3355 for (uint j = 1; j < use->req(); j++) {
3356 Node* n = use->in(j);
3357 if (n == def) {
3358
3359 // "def" is in peel set, "use" is not in peel set
3360 // or "use" is in the entry boundary (a phi) of the peel set
3361
3362 Node* use_c = has_ctrl(use) ? get_ctrl(use) : use;
3363
3364 if ( loop->is_member(get_loop( use_c )) ) {
3365 // use is in loop
3366 if (old_new[use->_idx] != NULL) { // null for dead code
3367 Node* use_clone = old_new[use->_idx];
3368 _igvn.replace_input_of(use, j, C->top());
3369 insert_phi_for_loop( use_clone, j, old_new[def->_idx], def, new_head_clone );
3370 }
3371 } else {
3372 assert(is_valid_clone_loop_exit_use(loop, use, orig_exit_idx), "clone loop format");
3373 // use is not in the loop, check if the live range includes the cut
3374 Node* lp_if = use_c->in(orig_exit_idx)->in(0);
3375 if (not_peel.test(lp_if->_idx)) {
3376 assert(j == orig_exit_idx, "use from original loop");
3377 insert_phi_for_loop( use, clone_exit_idx, old_new[def->_idx], def, new_head_clone );
3378 }
3379 }
3380 }
3381 }
3382 }
3383 }
3384 }
3385
3386 // Step 3b: retarget control
3387
3388 // Redirect control to the new loop head if a cloned node in
3389 // the not_peeled region has control that points into the peeled region.
3390 // This necessary because the cloned peeled region will be outside
3391 // the loop.
3392 // from to
3393 // cloned-peeled <---+
3394 // new_head_clone: | <--+
3395 // cloned-not_peeled in(0) in(0)
3396 // orig-peeled
3397
3398 for(i = 0; i < loop->_body.size(); i++ ) {
3399 Node *n = loop->_body.at(i);
3400 if (!n->is_CFG() && n->in(0) != NULL &&
3401 not_peel.test(n->_idx) && peel.test(n->in(0)->_idx)) {
3402 Node* n_clone = old_new[n->_idx];
3403 _igvn.replace_input_of(n_clone, 0, new_head_clone);
3404 }
3405 }
3406
3407 // Backedge of the surviving new_head (the clone) is original last_peel
3408 _igvn.replace_input_of(new_head_clone, LoopNode::LoopBackControl, last_peel);
3409
3410 // Cut first node in original not_peel set
3411 _igvn.rehash_node_delayed(new_head); // Multiple edge updates:
3412 new_head->set_req(LoopNode::EntryControl, C->top()); // use rehash_node_delayed / set_req instead of
3413 new_head->set_req(LoopNode::LoopBackControl, C->top()); // multiple replace_input_of calls
3414
3415 // Copy head_clone back-branch info to original head
3416 // and remove original head's loop entry and
3417 // clone head's back-branch
3418 _igvn.rehash_node_delayed(head); // Multiple edge updates
3419 head->set_req(LoopNode::EntryControl, head_clone->in(LoopNode::LoopBackControl));
3420 head->set_req(LoopNode::LoopBackControl, C->top());
3421 _igvn.replace_input_of(head_clone, LoopNode::LoopBackControl, C->top());
3422
3423 // Similarly modify the phis
3424 for (DUIterator_Fast kmax, k = head->fast_outs(kmax); k < kmax; k++) {
3425 Node* use = head->fast_out(k);
3426 if (use->is_Phi() && use->outcnt() > 0) {
3427 Node* use_clone = old_new[use->_idx];
3428 _igvn.rehash_node_delayed(use); // Multiple edge updates
3429 use->set_req(LoopNode::EntryControl, use_clone->in(LoopNode::LoopBackControl));
3430 use->set_req(LoopNode::LoopBackControl, C->top());
3431 _igvn.replace_input_of(use_clone, LoopNode::LoopBackControl, C->top());
3432 }
3433 }
3434
3435 // Step 4: update dominator tree and dominator depth
3436
3437 set_idom(head, orig_tail_clone, dd);
3438 recompute_dom_depth();
3439
3440 // Inhibit more partial peeling on this loop
3441 new_head_clone->set_partial_peel_loop();
3442 C->set_major_progress();
3443 loop->record_for_igvn();
3444
3445 #ifndef PRODUCT
3446 if (TracePartialPeeling) {
3447 tty->print_cr("\nafter partial peel one iteration");
3448 Node_List wl(area);
3449 Node* t = last_peel;
3450 while (true) {
3451 wl.push(t);
3452 if (t == head_clone) break;
3453 t = idom(t);
3454 }
3455 while (wl.size() > 0) {
3456 Node* tt = wl.pop();
3457 if (tt == head) tty->print_cr("orig head");
3458 else if (tt == new_head_clone) tty->print_cr("new head");
3459 else if (tt == head_clone) tty->print_cr("clone head");
3460 tt->dump();
3461 }
3462 }
3463 #endif
3464 return true;
3465 }
3466
3467 //------------------------------reorg_offsets----------------------------------
3468 // Reorganize offset computations to lower register pressure. Mostly
3469 // prevent loop-fallout uses of the pre-incremented trip counter (which are
3470 // then alive with the post-incremented trip counter forcing an extra
3471 // register move)
reorg_offsets(IdealLoopTree * loop)3472 void PhaseIdealLoop::reorg_offsets(IdealLoopTree *loop) {
3473 // Perform it only for canonical counted loops.
3474 // Loop's shape could be messed up by iteration_split_impl.
3475 if (!loop->_head->is_CountedLoop())
3476 return;
3477 if (!loop->_head->as_Loop()->is_valid_counted_loop())
3478 return;
3479
3480 CountedLoopNode *cl = loop->_head->as_CountedLoop();
3481 CountedLoopEndNode *cle = cl->loopexit();
3482 Node *exit = cle->proj_out(false);
3483 Node *phi = cl->phi();
3484
3485 // Check for the special case when using the pre-incremented trip-counter on
3486 // the fall-out path (forces the pre-incremented and post-incremented trip
3487 // counter to be live at the same time). Fix this by adjusting to use the
3488 // post-increment trip counter.
3489
3490 bool progress = true;
3491 while (progress) {
3492 progress = false;
3493 for (DUIterator_Fast imax, i = phi->fast_outs(imax); i < imax; i++) {
3494 Node* use = phi->fast_out(i); // User of trip-counter
3495 if (!has_ctrl(use)) continue;
3496 Node *u_ctrl = get_ctrl(use);
3497 if (use->is_Phi()) {
3498 u_ctrl = NULL;
3499 for (uint j = 1; j < use->req(); j++)
3500 if (use->in(j) == phi)
3501 u_ctrl = dom_lca(u_ctrl, use->in(0)->in(j));
3502 }
3503 IdealLoopTree *u_loop = get_loop(u_ctrl);
3504 // Look for loop-invariant use
3505 if (u_loop == loop) continue;
3506 if (loop->is_member(u_loop)) continue;
3507 // Check that use is live out the bottom. Assuming the trip-counter
3508 // update is right at the bottom, uses of of the loop middle are ok.
3509 if (dom_lca(exit, u_ctrl) != exit) continue;
3510 // Hit! Refactor use to use the post-incremented tripcounter.
3511 // Compute a post-increment tripcounter.
3512 Node* c = exit;
3513 if (cl->is_strip_mined()) {
3514 IdealLoopTree* outer_loop = get_loop(cl->outer_loop());
3515 if (!outer_loop->is_member(u_loop)) {
3516 c = cl->outer_loop_exit();
3517 }
3518 }
3519 Node *opaq = new Opaque2Node(C, cle->incr());
3520 register_new_node(opaq, c);
3521 Node *neg_stride = _igvn.intcon(-cle->stride_con());
3522 set_ctrl(neg_stride, C->root());
3523 Node *post = new AddINode(opaq, neg_stride);
3524 register_new_node(post, c);
3525 _igvn.rehash_node_delayed(use);
3526 for (uint j = 1; j < use->req(); j++) {
3527 if (use->in(j) == phi)
3528 use->set_req(j, post);
3529 }
3530 // Since DU info changed, rerun loop
3531 progress = true;
3532 break;
3533 }
3534 }
3535
3536 }
3537