1 /*
2 * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "c1/c1_Instruction.hpp"
27 #include "c1/c1_LinearScan.hpp"
28 #include "utilities/bitMap.inline.hpp"
29
30
31 #ifdef _LP64
allocate_fpu_stack()32 void LinearScan::allocate_fpu_stack() {
33 // No FPU stack used on x86-64
34 }
35 #else
36 //----------------------------------------------------------------------
37 // Allocation of FPU stack slots (Intel x86 only)
38 //----------------------------------------------------------------------
39
allocate_fpu_stack()40 void LinearScan::allocate_fpu_stack() {
41 // First compute which FPU registers are live at the start of each basic block
42 // (To minimize the amount of work we have to do if we have to merge FPU stacks)
43 if (ComputeExactFPURegisterUsage) {
44 Interval* intervals_in_register, *intervals_in_memory;
45 create_unhandled_lists(&intervals_in_register, &intervals_in_memory, is_in_fpu_register, NULL);
46
47 // ignore memory intervals by overwriting intervals_in_memory
48 // the dummy interval is needed to enforce the walker to walk until the given id:
49 // without it, the walker stops when the unhandled-list is empty -> live information
50 // beyond this point would be incorrect.
51 Interval* dummy_interval = new Interval(any_reg);
52 dummy_interval->add_range(max_jint - 2, max_jint - 1);
53 dummy_interval->set_next(Interval::end());
54 intervals_in_memory = dummy_interval;
55
56 IntervalWalker iw(this, intervals_in_register, intervals_in_memory);
57
58 const int num_blocks = block_count();
59 for (int i = 0; i < num_blocks; i++) {
60 BlockBegin* b = block_at(i);
61
62 // register usage is only needed for merging stacks -> compute only
63 // when more than one predecessor.
64 // the block must not have any spill moves at the beginning (checked by assertions)
65 // spill moves would use intervals that are marked as handled and so the usage bit
66 // would been set incorrectly
67
68 // NOTE: the check for number_of_preds > 1 is necessary. A block with only one
69 // predecessor may have spill moves at the begin of the block.
70 // If an interval ends at the current instruction id, it is not possible
71 // to decide if the register is live or not at the block begin -> the
72 // register information would be incorrect.
73 if (b->number_of_preds() > 1) {
74 int id = b->first_lir_instruction_id();
75 ResourceBitMap regs(FrameMap::nof_fpu_regs);
76
77 iw.walk_to(id); // walk after the first instruction (always a label) of the block
78 assert(iw.current_position() == id, "did not walk completely to id");
79
80 // Only consider FPU values in registers
81 Interval* interval = iw.active_first(fixedKind);
82 while (interval != Interval::end()) {
83 int reg = interval->assigned_reg();
84 assert(reg >= pd_first_fpu_reg && reg <= pd_last_fpu_reg, "no fpu register");
85 assert(interval->assigned_regHi() == -1, "must not have hi register (doubles stored in one register)");
86 assert(interval->from() <= id && id < interval->to(), "interval out of range");
87
88 #ifndef PRODUCT
89 if (TraceFPURegisterUsage) {
90 tty->print("fpu reg %d is live because of ", reg - pd_first_fpu_reg); interval->print();
91 }
92 #endif
93
94 regs.set_bit(reg - pd_first_fpu_reg);
95 interval = interval->next();
96 }
97
98 b->set_fpu_register_usage(regs);
99
100 #ifndef PRODUCT
101 if (TraceFPURegisterUsage) {
102 tty->print("FPU regs for block %d, LIR instr %d): ", b->block_id(), id); regs.print_on(tty); tty->cr();
103 }
104 #endif
105 }
106 }
107 }
108
109 FpuStackAllocator alloc(ir()->compilation(), this);
110 _fpu_stack_allocator = &alloc;
111 alloc.allocate();
112 _fpu_stack_allocator = NULL;
113 }
114
115
FpuStackAllocator(Compilation * compilation,LinearScan * allocator)116 FpuStackAllocator::FpuStackAllocator(Compilation* compilation, LinearScan* allocator)
117 : _compilation(compilation)
118 , _allocator(allocator)
119 , _lir(NULL)
120 , _pos(-1)
121 , _sim(compilation)
122 , _temp_sim(compilation)
123 {}
124
allocate()125 void FpuStackAllocator::allocate() {
126 int num_blocks = allocator()->block_count();
127 for (int i = 0; i < num_blocks; i++) {
128 // Set up to process block
129 BlockBegin* block = allocator()->block_at(i);
130 intArray* fpu_stack_state = block->fpu_stack_state();
131
132 #ifndef PRODUCT
133 if (TraceFPUStack) {
134 tty->cr();
135 tty->print_cr("------- Begin of new Block %d -------", block->block_id());
136 }
137 #endif
138
139 assert(fpu_stack_state != NULL ||
140 block->end()->as_Base() != NULL ||
141 block->is_set(BlockBegin::exception_entry_flag),
142 "FPU stack state must be present due to linear-scan order for FPU stack allocation");
143 // note: exception handler entries always start with an empty fpu stack
144 // because stack merging would be too complicated
145
146 if (fpu_stack_state != NULL) {
147 sim()->read_state(fpu_stack_state);
148 } else {
149 sim()->clear();
150 }
151
152 #ifndef PRODUCT
153 if (TraceFPUStack) {
154 tty->print("Reading FPU state for block %d:", block->block_id());
155 sim()->print();
156 tty->cr();
157 }
158 #endif
159
160 allocate_block(block);
161 CHECK_BAILOUT();
162 }
163 }
164
allocate_block(BlockBegin * block)165 void FpuStackAllocator::allocate_block(BlockBegin* block) {
166 bool processed_merge = false;
167 LIR_OpList* insts = block->lir()->instructions_list();
168 set_lir(block->lir());
169 set_pos(0);
170
171
172 // Note: insts->length() may change during loop
173 while (pos() < insts->length()) {
174 LIR_Op* op = insts->at(pos());
175 _debug_information_computed = false;
176
177 #ifndef PRODUCT
178 if (TraceFPUStack) {
179 op->print();
180 }
181 check_invalid_lir_op(op);
182 #endif
183
184 LIR_OpBranch* branch = op->as_OpBranch();
185 LIR_Op1* op1 = op->as_Op1();
186 LIR_Op2* op2 = op->as_Op2();
187 LIR_OpCall* opCall = op->as_OpCall();
188
189 if (branch != NULL && branch->block() != NULL) {
190 if (!processed_merge) {
191 // propagate stack at first branch to a successor
192 processed_merge = true;
193 bool required_merge = merge_fpu_stack_with_successors(block);
194
195 assert(!required_merge || branch->cond() == lir_cond_always, "splitting of critical edges should prevent FPU stack mismatches at cond branches");
196 }
197
198 } else if (op1 != NULL) {
199 handle_op1(op1);
200 } else if (op2 != NULL) {
201 handle_op2(op2);
202 } else if (opCall != NULL) {
203 handle_opCall(opCall);
204 }
205
206 compute_debug_information(op);
207
208 set_pos(1 + pos());
209 }
210
211 // Propagate stack when block does not end with branch
212 if (!processed_merge) {
213 merge_fpu_stack_with_successors(block);
214 }
215 }
216
217
compute_debug_information(LIR_Op * op)218 void FpuStackAllocator::compute_debug_information(LIR_Op* op) {
219 if (!_debug_information_computed && op->id() != -1 && allocator()->has_info(op->id())) {
220 visitor.visit(op);
221
222 // exception handling
223 if (allocator()->compilation()->has_exception_handlers()) {
224 XHandlers* xhandlers = visitor.all_xhandler();
225 int n = xhandlers->length();
226 for (int k = 0; k < n; k++) {
227 allocate_exception_handler(xhandlers->handler_at(k));
228 }
229 } else {
230 assert(visitor.all_xhandler()->length() == 0, "missed exception handler");
231 }
232
233 // compute debug information
234 int n = visitor.info_count();
235 assert(n > 0, "should not visit operation otherwise");
236
237 for (int j = 0; j < n; j++) {
238 CodeEmitInfo* info = visitor.info_at(j);
239 // Compute debug information
240 allocator()->compute_debug_info(info, op->id());
241 }
242 }
243 _debug_information_computed = true;
244 }
245
allocate_exception_handler(XHandler * xhandler)246 void FpuStackAllocator::allocate_exception_handler(XHandler* xhandler) {
247 if (!sim()->is_empty()) {
248 LIR_List* old_lir = lir();
249 int old_pos = pos();
250 intArray* old_state = sim()->write_state();
251
252 #ifndef PRODUCT
253 if (TraceFPUStack) {
254 tty->cr();
255 tty->print_cr("------- begin of exception handler -------");
256 }
257 #endif
258
259 if (xhandler->entry_code() == NULL) {
260 // need entry code to clear FPU stack
261 LIR_List* entry_code = new LIR_List(_compilation);
262 entry_code->jump(xhandler->entry_block());
263 xhandler->set_entry_code(entry_code);
264 }
265
266 LIR_OpList* insts = xhandler->entry_code()->instructions_list();
267 set_lir(xhandler->entry_code());
268 set_pos(0);
269
270 // Note: insts->length() may change during loop
271 while (pos() < insts->length()) {
272 LIR_Op* op = insts->at(pos());
273
274 #ifndef PRODUCT
275 if (TraceFPUStack) {
276 op->print();
277 }
278 check_invalid_lir_op(op);
279 #endif
280
281 switch (op->code()) {
282 case lir_move:
283 assert(op->as_Op1() != NULL, "must be LIR_Op1");
284 assert(pos() != insts->length() - 1, "must not be last operation");
285
286 handle_op1((LIR_Op1*)op);
287 break;
288
289 case lir_branch:
290 assert(op->as_OpBranch()->cond() == lir_cond_always, "must be unconditional branch");
291 assert(pos() == insts->length() - 1, "must be last operation");
292
293 // remove all remaining dead registers from FPU stack
294 clear_fpu_stack(LIR_OprFact::illegalOpr);
295 break;
296
297 default:
298 // other operations not allowed in exception entry code
299 ShouldNotReachHere();
300 }
301
302 set_pos(pos() + 1);
303 }
304
305 #ifndef PRODUCT
306 if (TraceFPUStack) {
307 tty->cr();
308 tty->print_cr("------- end of exception handler -------");
309 }
310 #endif
311
312 set_lir(old_lir);
313 set_pos(old_pos);
314 sim()->read_state(old_state);
315 }
316 }
317
318
fpu_num(LIR_Opr opr)319 int FpuStackAllocator::fpu_num(LIR_Opr opr) {
320 assert(opr->is_fpu_register() && !opr->is_xmm_register(), "shouldn't call this otherwise");
321 return opr->is_single_fpu() ? opr->fpu_regnr() : opr->fpu_regnrLo();
322 }
323
tos_offset(LIR_Opr opr)324 int FpuStackAllocator::tos_offset(LIR_Opr opr) {
325 return sim()->offset_from_tos(fpu_num(opr));
326 }
327
328
to_fpu_stack(LIR_Opr opr)329 LIR_Opr FpuStackAllocator::to_fpu_stack(LIR_Opr opr) {
330 assert(opr->is_fpu_register() && !opr->is_xmm_register(), "shouldn't call this otherwise");
331
332 int stack_offset = tos_offset(opr);
333 if (opr->is_single_fpu()) {
334 return LIR_OprFact::single_fpu(stack_offset)->make_fpu_stack_offset();
335 } else {
336 assert(opr->is_double_fpu(), "shouldn't call this otherwise");
337 return LIR_OprFact::double_fpu(stack_offset)->make_fpu_stack_offset();
338 }
339 }
340
to_fpu_stack_top(LIR_Opr opr,bool dont_check_offset)341 LIR_Opr FpuStackAllocator::to_fpu_stack_top(LIR_Opr opr, bool dont_check_offset) {
342 assert(opr->is_fpu_register() && !opr->is_xmm_register(), "shouldn't call this otherwise");
343 assert(dont_check_offset || tos_offset(opr) == 0, "operand is not on stack top");
344
345 int stack_offset = 0;
346 if (opr->is_single_fpu()) {
347 return LIR_OprFact::single_fpu(stack_offset)->make_fpu_stack_offset();
348 } else {
349 assert(opr->is_double_fpu(), "shouldn't call this otherwise");
350 return LIR_OprFact::double_fpu(stack_offset)->make_fpu_stack_offset();
351 }
352 }
353
354
355
insert_op(LIR_Op * op)356 void FpuStackAllocator::insert_op(LIR_Op* op) {
357 lir()->insert_before(pos(), op);
358 set_pos(1 + pos());
359 }
360
361
insert_exchange(int offset)362 void FpuStackAllocator::insert_exchange(int offset) {
363 if (offset > 0) {
364 LIR_Op1* fxch_op = new LIR_Op1(lir_fxch, LIR_OprFact::intConst(offset), LIR_OprFact::illegalOpr);
365 insert_op(fxch_op);
366 sim()->swap(offset);
367
368 #ifndef PRODUCT
369 if (TraceFPUStack) {
370 tty->print("Exchanged register: %d New state: ", sim()->get_slot(0)); sim()->print(); tty->cr();
371 }
372 #endif
373
374 }
375 }
376
insert_exchange(LIR_Opr opr)377 void FpuStackAllocator::insert_exchange(LIR_Opr opr) {
378 insert_exchange(tos_offset(opr));
379 }
380
381
insert_free(int offset)382 void FpuStackAllocator::insert_free(int offset) {
383 // move stack slot to the top of stack and then pop it
384 insert_exchange(offset);
385
386 LIR_Op* fpop = new LIR_Op0(lir_fpop_raw);
387 insert_op(fpop);
388 sim()->pop();
389
390 #ifndef PRODUCT
391 if (TraceFPUStack) {
392 tty->print("Inserted pop New state: "); sim()->print(); tty->cr();
393 }
394 #endif
395 }
396
397
insert_free_if_dead(LIR_Opr opr)398 void FpuStackAllocator::insert_free_if_dead(LIR_Opr opr) {
399 if (sim()->contains(fpu_num(opr))) {
400 int res_slot = tos_offset(opr);
401 insert_free(res_slot);
402 }
403 }
404
insert_free_if_dead(LIR_Opr opr,LIR_Opr ignore)405 void FpuStackAllocator::insert_free_if_dead(LIR_Opr opr, LIR_Opr ignore) {
406 if (fpu_num(opr) != fpu_num(ignore) && sim()->contains(fpu_num(opr))) {
407 int res_slot = tos_offset(opr);
408 insert_free(res_slot);
409 }
410 }
411
insert_copy(LIR_Opr from,LIR_Opr to)412 void FpuStackAllocator::insert_copy(LIR_Opr from, LIR_Opr to) {
413 int offset = tos_offset(from);
414 LIR_Op1* fld = new LIR_Op1(lir_fld, LIR_OprFact::intConst(offset), LIR_OprFact::illegalOpr);
415 insert_op(fld);
416
417 sim()->push(fpu_num(to));
418
419 #ifndef PRODUCT
420 if (TraceFPUStack) {
421 tty->print("Inserted copy (%d -> %d) New state: ", fpu_num(from), fpu_num(to)); sim()->print(); tty->cr();
422 }
423 #endif
424 }
425
do_rename(LIR_Opr from,LIR_Opr to)426 void FpuStackAllocator::do_rename(LIR_Opr from, LIR_Opr to) {
427 sim()->rename(fpu_num(from), fpu_num(to));
428 }
429
do_push(LIR_Opr opr)430 void FpuStackAllocator::do_push(LIR_Opr opr) {
431 sim()->push(fpu_num(opr));
432 }
433
pop_if_last_use(LIR_Op * op,LIR_Opr opr)434 void FpuStackAllocator::pop_if_last_use(LIR_Op* op, LIR_Opr opr) {
435 assert(op->fpu_pop_count() == 0, "fpu_pop_count alredy set");
436 assert(tos_offset(opr) == 0, "can only pop stack top");
437
438 if (opr->is_last_use()) {
439 op->set_fpu_pop_count(1);
440 sim()->pop();
441 }
442 }
443
pop_always(LIR_Op * op,LIR_Opr opr)444 void FpuStackAllocator::pop_always(LIR_Op* op, LIR_Opr opr) {
445 assert(op->fpu_pop_count() == 0, "fpu_pop_count alredy set");
446 assert(tos_offset(opr) == 0, "can only pop stack top");
447
448 op->set_fpu_pop_count(1);
449 sim()->pop();
450 }
451
clear_fpu_stack(LIR_Opr preserve)452 void FpuStackAllocator::clear_fpu_stack(LIR_Opr preserve) {
453 int result_stack_size = (preserve->is_fpu_register() && !preserve->is_xmm_register() ? 1 : 0);
454 while (sim()->stack_size() > result_stack_size) {
455 assert(!sim()->slot_is_empty(0), "not allowed");
456
457 if (result_stack_size == 0 || sim()->get_slot(0) != fpu_num(preserve)) {
458 insert_free(0);
459 } else {
460 // move "preserve" to bottom of stack so that all other stack slots can be popped
461 insert_exchange(sim()->stack_size() - 1);
462 }
463 }
464 }
465
466
handle_op1(LIR_Op1 * op1)467 void FpuStackAllocator::handle_op1(LIR_Op1* op1) {
468 LIR_Opr in = op1->in_opr();
469 LIR_Opr res = op1->result_opr();
470
471 LIR_Opr new_in = in; // new operands relative to the actual fpu stack top
472 LIR_Opr new_res = res;
473
474 // Note: this switch is processed for all LIR_Op1, regardless if they have FPU-arguments,
475 // so checks for is_float_kind() are necessary inside the cases
476 switch (op1->code()) {
477
478 case lir_return: {
479 // FPU-Stack must only contain the (optional) fpu return value.
480 // All remaining dead values are popped from the stack
481 // If the input operand is a fpu-register, it is exchanged to the bottom of the stack
482
483 clear_fpu_stack(in);
484 if (in->is_fpu_register() && !in->is_xmm_register()) {
485 new_in = to_fpu_stack_top(in);
486 }
487
488 break;
489 }
490
491 case lir_move: {
492 if (in->is_fpu_register() && !in->is_xmm_register()) {
493 if (res->is_xmm_register()) {
494 // move from fpu register to xmm register (necessary for operations that
495 // are not available in the SSE instruction set)
496 insert_exchange(in);
497 new_in = to_fpu_stack_top(in);
498 pop_always(op1, in);
499
500 } else if (res->is_fpu_register() && !res->is_xmm_register()) {
501 // move from fpu-register to fpu-register:
502 // * input and result register equal:
503 // nothing to do
504 // * input register is last use:
505 // rename the input register to result register -> input register
506 // not present on fpu-stack afterwards
507 // * input register not last use:
508 // duplicate input register to result register to preserve input
509 //
510 // Note: The LIR-Assembler does not produce any code for fpu register moves,
511 // so input and result stack index must be equal
512
513 if (fpu_num(in) == fpu_num(res)) {
514 // nothing to do
515 } else if (in->is_last_use()) {
516 insert_free_if_dead(res);//, in);
517 do_rename(in, res);
518 } else {
519 insert_free_if_dead(res);
520 insert_copy(in, res);
521 }
522 new_in = to_fpu_stack(res);
523 new_res = new_in;
524
525 } else {
526 // move from fpu-register to memory
527 // input operand must be on top of stack
528
529 insert_exchange(in);
530
531 // create debug information here because afterwards the register may have been popped
532 compute_debug_information(op1);
533
534 new_in = to_fpu_stack_top(in);
535 pop_if_last_use(op1, in);
536 }
537
538 } else if (res->is_fpu_register() && !res->is_xmm_register()) {
539 // move from memory/constant to fpu register
540 // result is pushed on the stack
541
542 insert_free_if_dead(res);
543
544 // create debug information before register is pushed
545 compute_debug_information(op1);
546
547 do_push(res);
548 new_res = to_fpu_stack_top(res);
549 }
550 break;
551 }
552
553 case lir_convert: {
554 Bytecodes::Code bc = op1->as_OpConvert()->bytecode();
555 switch (bc) {
556 case Bytecodes::_d2f:
557 case Bytecodes::_f2d:
558 assert(res->is_fpu_register(), "must be");
559 assert(in->is_fpu_register(), "must be");
560
561 if (!in->is_xmm_register() && !res->is_xmm_register()) {
562 // this is quite the same as a move from fpu-register to fpu-register
563 // Note: input and result operands must have different types
564 if (fpu_num(in) == fpu_num(res)) {
565 // nothing to do
566 new_in = to_fpu_stack(in);
567 } else if (in->is_last_use()) {
568 insert_free_if_dead(res);//, in);
569 new_in = to_fpu_stack(in);
570 do_rename(in, res);
571 } else {
572 insert_free_if_dead(res);
573 insert_copy(in, res);
574 new_in = to_fpu_stack_top(in, true);
575 }
576 new_res = to_fpu_stack(res);
577 }
578
579 break;
580
581 case Bytecodes::_i2f:
582 case Bytecodes::_l2f:
583 case Bytecodes::_i2d:
584 case Bytecodes::_l2d:
585 assert(res->is_fpu_register(), "must be");
586 if (!res->is_xmm_register()) {
587 insert_free_if_dead(res);
588 do_push(res);
589 new_res = to_fpu_stack_top(res);
590 }
591 break;
592
593 case Bytecodes::_f2i:
594 case Bytecodes::_d2i:
595 assert(in->is_fpu_register(), "must be");
596 if (!in->is_xmm_register()) {
597 insert_exchange(in);
598 new_in = to_fpu_stack_top(in);
599
600 // TODO: update registes of stub
601 }
602 break;
603
604 case Bytecodes::_f2l:
605 case Bytecodes::_d2l:
606 assert(in->is_fpu_register(), "must be");
607 if (!in->is_xmm_register()) {
608 insert_exchange(in);
609 new_in = to_fpu_stack_top(in);
610 pop_always(op1, in);
611 }
612 break;
613
614 case Bytecodes::_i2l:
615 case Bytecodes::_l2i:
616 case Bytecodes::_i2b:
617 case Bytecodes::_i2c:
618 case Bytecodes::_i2s:
619 // no fpu operands
620 break;
621
622 default:
623 ShouldNotReachHere();
624 }
625 break;
626 }
627
628 case lir_roundfp: {
629 assert(in->is_fpu_register() && !in->is_xmm_register(), "input must be in register");
630 assert(res->is_stack(), "result must be on stack");
631
632 insert_exchange(in);
633 new_in = to_fpu_stack_top(in);
634 pop_if_last_use(op1, in);
635 break;
636 }
637
638 default: {
639 assert(!in->is_float_kind() && !res->is_float_kind(), "missed a fpu-operation");
640 }
641 }
642
643 op1->set_in_opr(new_in);
644 op1->set_result_opr(new_res);
645 }
646
handle_op2(LIR_Op2 * op2)647 void FpuStackAllocator::handle_op2(LIR_Op2* op2) {
648 LIR_Opr left = op2->in_opr1();
649 if (!left->is_float_kind()) {
650 return;
651 }
652 if (left->is_xmm_register()) {
653 return;
654 }
655
656 LIR_Opr right = op2->in_opr2();
657 LIR_Opr res = op2->result_opr();
658 LIR_Opr new_left = left; // new operands relative to the actual fpu stack top
659 LIR_Opr new_right = right;
660 LIR_Opr new_res = res;
661
662 assert(!left->is_xmm_register() && !right->is_xmm_register() && !res->is_xmm_register(), "not for xmm registers");
663
664 switch (op2->code()) {
665 case lir_cmp:
666 case lir_cmp_fd2i:
667 case lir_ucmp_fd2i:
668 case lir_assert: {
669 assert(left->is_fpu_register(), "invalid LIR");
670 assert(right->is_fpu_register(), "invalid LIR");
671
672 // the left-hand side must be on top of stack.
673 // the right-hand side is never popped, even if is_last_use is set
674 insert_exchange(left);
675 new_left = to_fpu_stack_top(left);
676 new_right = to_fpu_stack(right);
677 pop_if_last_use(op2, left);
678 break;
679 }
680
681 case lir_mul:
682 case lir_div: {
683 if (res->is_double_fpu()) {
684 assert(op2->tmp1_opr()->is_fpu_register(), "strict operations need temporary fpu stack slot");
685 insert_free_if_dead(op2->tmp1_opr());
686 assert(sim()->stack_size() <= 7, "at least one stack slot must be free");
687 }
688 // fall-through: continue with the normal handling of lir_mul and lir_div
689 }
690 case lir_add:
691 case lir_sub: {
692 assert(left->is_fpu_register(), "must be");
693 assert(res->is_fpu_register(), "must be");
694 assert(left->is_equal(res), "must be");
695
696 // either the left-hand or the right-hand side must be on top of stack
697 // (if right is not a register, left must be on top)
698 if (!right->is_fpu_register()) {
699 insert_exchange(left);
700 new_left = to_fpu_stack_top(left);
701 } else {
702 // no exchange necessary if right is alredy on top of stack
703 if (tos_offset(right) == 0) {
704 new_left = to_fpu_stack(left);
705 new_right = to_fpu_stack_top(right);
706 } else {
707 insert_exchange(left);
708 new_left = to_fpu_stack_top(left);
709 new_right = to_fpu_stack(right);
710 }
711
712 if (right->is_last_use()) {
713 op2->set_fpu_pop_count(1);
714
715 if (tos_offset(right) == 0) {
716 sim()->pop();
717 } else {
718 // if left is on top of stack, the result is placed in the stack
719 // slot of right, so a renaming from right to res is necessary
720 assert(tos_offset(left) == 0, "must be");
721 sim()->pop();
722 do_rename(right, res);
723 }
724 }
725 }
726 new_res = to_fpu_stack(res);
727
728 break;
729 }
730
731 case lir_rem: {
732 assert(left->is_fpu_register(), "must be");
733 assert(right->is_fpu_register(), "must be");
734 assert(res->is_fpu_register(), "must be");
735 assert(left->is_equal(res), "must be");
736
737 // Must bring both operands to top of stack with following operand ordering:
738 // * fpu stack before rem: ... right left
739 // * fpu stack after rem: ... left
740 if (tos_offset(right) != 1) {
741 insert_exchange(right);
742 insert_exchange(1);
743 }
744 insert_exchange(left);
745 assert(tos_offset(right) == 1, "check");
746 assert(tos_offset(left) == 0, "check");
747
748 new_left = to_fpu_stack_top(left);
749 new_right = to_fpu_stack(right);
750
751 op2->set_fpu_pop_count(1);
752 sim()->pop();
753 do_rename(right, res);
754
755 new_res = to_fpu_stack_top(res);
756 break;
757 }
758
759 case lir_abs:
760 case lir_sqrt:
761 case lir_neg: {
762 // Right argument appears to be unused
763 assert(right->is_illegal(), "must be");
764 assert(left->is_fpu_register(), "must be");
765 assert(res->is_fpu_register(), "must be");
766 assert(left->is_last_use(), "old value gets destroyed");
767
768 insert_free_if_dead(res, left);
769 insert_exchange(left);
770 do_rename(left, res);
771
772 new_left = to_fpu_stack_top(res);
773 new_res = new_left;
774
775 op2->set_fpu_stack_size(sim()->stack_size());
776 break;
777 }
778
779 default: {
780 assert(false, "missed a fpu-operation");
781 }
782 }
783
784 op2->set_in_opr1(new_left);
785 op2->set_in_opr2(new_right);
786 op2->set_result_opr(new_res);
787 }
788
handle_opCall(LIR_OpCall * opCall)789 void FpuStackAllocator::handle_opCall(LIR_OpCall* opCall) {
790 LIR_Opr res = opCall->result_opr();
791
792 // clear fpu-stack before call
793 // it may contain dead values that could not have been remved by previous operations
794 clear_fpu_stack(LIR_OprFact::illegalOpr);
795 assert(sim()->is_empty(), "fpu stack must be empty now");
796
797 // compute debug information before (possible) fpu result is pushed
798 compute_debug_information(opCall);
799
800 if (res->is_fpu_register() && !res->is_xmm_register()) {
801 do_push(res);
802 opCall->set_result_opr(to_fpu_stack_top(res));
803 }
804 }
805
806 #ifndef PRODUCT
check_invalid_lir_op(LIR_Op * op)807 void FpuStackAllocator::check_invalid_lir_op(LIR_Op* op) {
808 switch (op->code()) {
809 case lir_fpop_raw:
810 case lir_fxch:
811 case lir_fld:
812 assert(false, "operations only inserted by FpuStackAllocator");
813 break;
814
815 default:
816 break;
817 }
818 }
819 #endif
820
821
merge_insert_add(LIR_List * instrs,FpuStackSim * cur_sim,int reg)822 void FpuStackAllocator::merge_insert_add(LIR_List* instrs, FpuStackSim* cur_sim, int reg) {
823 LIR_Op1* move = new LIR_Op1(lir_move, LIR_OprFact::doubleConst(0), LIR_OprFact::double_fpu(reg)->make_fpu_stack_offset());
824
825 instrs->instructions_list()->push(move);
826
827 cur_sim->push(reg);
828 move->set_result_opr(to_fpu_stack(move->result_opr()));
829
830 #ifndef PRODUCT
831 if (TraceFPUStack) {
832 tty->print("Added new register: %d New state: ", reg); cur_sim->print(); tty->cr();
833 }
834 #endif
835 }
836
merge_insert_xchg(LIR_List * instrs,FpuStackSim * cur_sim,int slot)837 void FpuStackAllocator::merge_insert_xchg(LIR_List* instrs, FpuStackSim* cur_sim, int slot) {
838 assert(slot > 0, "no exchange necessary");
839
840 LIR_Op1* fxch = new LIR_Op1(lir_fxch, LIR_OprFact::intConst(slot));
841 instrs->instructions_list()->push(fxch);
842 cur_sim->swap(slot);
843
844 #ifndef PRODUCT
845 if (TraceFPUStack) {
846 tty->print("Exchanged register: %d New state: ", cur_sim->get_slot(slot)); cur_sim->print(); tty->cr();
847 }
848 #endif
849 }
850
merge_insert_pop(LIR_List * instrs,FpuStackSim * cur_sim)851 void FpuStackAllocator::merge_insert_pop(LIR_List* instrs, FpuStackSim* cur_sim) {
852 int reg = cur_sim->get_slot(0);
853
854 LIR_Op* fpop = new LIR_Op0(lir_fpop_raw);
855 instrs->instructions_list()->push(fpop);
856 cur_sim->pop(reg);
857
858 #ifndef PRODUCT
859 if (TraceFPUStack) {
860 tty->print("Removed register: %d New state: ", reg); cur_sim->print(); tty->cr();
861 }
862 #endif
863 }
864
merge_rename(FpuStackSim * cur_sim,FpuStackSim * sux_sim,int start_slot,int change_slot)865 bool FpuStackAllocator::merge_rename(FpuStackSim* cur_sim, FpuStackSim* sux_sim, int start_slot, int change_slot) {
866 int reg = cur_sim->get_slot(change_slot);
867
868 for (int slot = start_slot; slot >= 0; slot--) {
869 int new_reg = sux_sim->get_slot(slot);
870
871 if (!cur_sim->contains(new_reg)) {
872 cur_sim->set_slot(change_slot, new_reg);
873
874 #ifndef PRODUCT
875 if (TraceFPUStack) {
876 tty->print("Renamed register %d to %d New state: ", reg, new_reg); cur_sim->print(); tty->cr();
877 }
878 #endif
879
880 return true;
881 }
882 }
883 return false;
884 }
885
886
merge_fpu_stack(LIR_List * instrs,FpuStackSim * cur_sim,FpuStackSim * sux_sim)887 void FpuStackAllocator::merge_fpu_stack(LIR_List* instrs, FpuStackSim* cur_sim, FpuStackSim* sux_sim) {
888 #ifndef PRODUCT
889 if (TraceFPUStack) {
890 tty->cr();
891 tty->print("before merging: pred: "); cur_sim->print(); tty->cr();
892 tty->print(" sux: "); sux_sim->print(); tty->cr();
893 }
894
895 int slot;
896 for (slot = 0; slot < cur_sim->stack_size(); slot++) {
897 assert(!cur_sim->slot_is_empty(slot), "not handled by algorithm");
898 }
899 for (slot = 0; slot < sux_sim->stack_size(); slot++) {
900 assert(!sux_sim->slot_is_empty(slot), "not handled by algorithm");
901 }
902 #endif
903
904 // size difference between cur and sux that must be resolved by adding or removing values form the stack
905 int size_diff = cur_sim->stack_size() - sux_sim->stack_size();
906
907 if (!ComputeExactFPURegisterUsage) {
908 // add slots that are currently free, but used in successor
909 // When the exact FPU register usage is computed, the stack does
910 // not contain dead values at merging -> no values must be added
911
912 int sux_slot = sux_sim->stack_size() - 1;
913 while (size_diff < 0) {
914 assert(sux_slot >= 0, "slot out of bounds -> error in algorithm");
915
916 int reg = sux_sim->get_slot(sux_slot);
917 if (!cur_sim->contains(reg)) {
918 merge_insert_add(instrs, cur_sim, reg);
919 size_diff++;
920
921 if (sux_slot + size_diff != 0) {
922 merge_insert_xchg(instrs, cur_sim, sux_slot + size_diff);
923 }
924 }
925 sux_slot--;
926 }
927 }
928
929 assert(cur_sim->stack_size() >= sux_sim->stack_size(), "stack size must be equal or greater now");
930 assert(size_diff == cur_sim->stack_size() - sux_sim->stack_size(), "must be");
931
932 // stack merge algorithm:
933 // 1) as long as the current stack top is not in the right location (that meens
934 // it should not be on the stack top), exchange it into the right location
935 // 2) if the stack top is right, but the remaining stack is not ordered correctly,
936 // the stack top is exchanged away to get another value on top ->
937 // now step 1) can be continued
938 // the stack can also contain unused items -> these items are removed from stack
939
940 int finished_slot = sux_sim->stack_size() - 1;
941 while (finished_slot >= 0 || size_diff > 0) {
942 while (size_diff > 0 || (cur_sim->stack_size() > 0 && cur_sim->get_slot(0) != sux_sim->get_slot(0))) {
943 int reg = cur_sim->get_slot(0);
944 if (sux_sim->contains(reg)) {
945 int sux_slot = sux_sim->offset_from_tos(reg);
946 merge_insert_xchg(instrs, cur_sim, sux_slot + size_diff);
947
948 } else if (!merge_rename(cur_sim, sux_sim, finished_slot, 0)) {
949 assert(size_diff > 0, "must be");
950
951 merge_insert_pop(instrs, cur_sim);
952 size_diff--;
953 }
954 assert(cur_sim->stack_size() == 0 || cur_sim->get_slot(0) != reg, "register must have been changed");
955 }
956
957 while (finished_slot >= 0 && cur_sim->get_slot(finished_slot) == sux_sim->get_slot(finished_slot)) {
958 finished_slot--;
959 }
960
961 if (finished_slot >= 0) {
962 int reg = cur_sim->get_slot(finished_slot);
963
964 if (sux_sim->contains(reg) || !merge_rename(cur_sim, sux_sim, finished_slot, finished_slot)) {
965 assert(sux_sim->contains(reg) || size_diff > 0, "must be");
966 merge_insert_xchg(instrs, cur_sim, finished_slot);
967 }
968 assert(cur_sim->get_slot(finished_slot) != reg, "register must have been changed");
969 }
970 }
971
972 #ifndef PRODUCT
973 if (TraceFPUStack) {
974 tty->print("after merging: pred: "); cur_sim->print(); tty->cr();
975 tty->print(" sux: "); sux_sim->print(); tty->cr();
976 tty->cr();
977 }
978 #endif
979 assert(cur_sim->stack_size() == sux_sim->stack_size(), "stack size must be equal now");
980 }
981
982
merge_cleanup_fpu_stack(LIR_List * instrs,FpuStackSim * cur_sim,BitMap & live_fpu_regs)983 void FpuStackAllocator::merge_cleanup_fpu_stack(LIR_List* instrs, FpuStackSim* cur_sim, BitMap& live_fpu_regs) {
984 #ifndef PRODUCT
985 if (TraceFPUStack) {
986 tty->cr();
987 tty->print("before cleanup: state: "); cur_sim->print(); tty->cr();
988 tty->print(" live: "); live_fpu_regs.print_on(tty); tty->cr();
989 }
990 #endif
991
992 int slot = 0;
993 while (slot < cur_sim->stack_size()) {
994 int reg = cur_sim->get_slot(slot);
995 if (!live_fpu_regs.at(reg)) {
996 if (slot != 0) {
997 merge_insert_xchg(instrs, cur_sim, slot);
998 }
999 merge_insert_pop(instrs, cur_sim);
1000 } else {
1001 slot++;
1002 }
1003 }
1004
1005 #ifndef PRODUCT
1006 if (TraceFPUStack) {
1007 tty->print("after cleanup: state: "); cur_sim->print(); tty->cr();
1008 tty->print(" live: "); live_fpu_regs.print_on(tty); tty->cr();
1009 tty->cr();
1010 }
1011
1012 // check if fpu stack only contains live registers
1013 for (unsigned int i = 0; i < live_fpu_regs.size(); i++) {
1014 if (live_fpu_regs.at(i) != cur_sim->contains(i)) {
1015 tty->print_cr("mismatch between required and actual stack content");
1016 break;
1017 }
1018 }
1019 #endif
1020 }
1021
1022
merge_fpu_stack_with_successors(BlockBegin * block)1023 bool FpuStackAllocator::merge_fpu_stack_with_successors(BlockBegin* block) {
1024 #ifndef PRODUCT
1025 if (TraceFPUStack) {
1026 tty->print_cr("Propagating FPU stack state for B%d at LIR_Op position %d to successors:",
1027 block->block_id(), pos());
1028 sim()->print();
1029 tty->cr();
1030 }
1031 #endif
1032
1033 bool changed = false;
1034 int number_of_sux = block->number_of_sux();
1035
1036 if (number_of_sux == 1 && block->sux_at(0)->number_of_preds() > 1) {
1037 // The successor has at least two incoming edges, so a stack merge will be necessary
1038 // If this block is the first predecessor, cleanup the current stack and propagate it
1039 // If this block is not the first predecessor, a stack merge will be necessary
1040
1041 BlockBegin* sux = block->sux_at(0);
1042 intArray* state = sux->fpu_stack_state();
1043 LIR_List* instrs = new LIR_List(_compilation);
1044
1045 if (state != NULL) {
1046 // Merge with a successors that already has a FPU stack state
1047 // the block must only have one successor because critical edges must been split
1048 FpuStackSim* cur_sim = sim();
1049 FpuStackSim* sux_sim = temp_sim();
1050 sux_sim->read_state(state);
1051
1052 merge_fpu_stack(instrs, cur_sim, sux_sim);
1053
1054 } else {
1055 // propagate current FPU stack state to successor without state
1056 // clean up stack first so that there are no dead values on the stack
1057 if (ComputeExactFPURegisterUsage) {
1058 FpuStackSim* cur_sim = sim();
1059 ResourceBitMap live_fpu_regs = block->sux_at(0)->fpu_register_usage();
1060 assert(live_fpu_regs.size() == FrameMap::nof_fpu_regs, "missing register usage");
1061
1062 merge_cleanup_fpu_stack(instrs, cur_sim, live_fpu_regs);
1063 }
1064
1065 intArray* state = sim()->write_state();
1066 if (TraceFPUStack) {
1067 tty->print_cr("Setting FPU stack state of B%d (merge path)", sux->block_id());
1068 sim()->print(); tty->cr();
1069 }
1070 sux->set_fpu_stack_state(state);
1071 }
1072
1073 if (instrs->instructions_list()->length() > 0) {
1074 lir()->insert_before(pos(), instrs);
1075 set_pos(instrs->instructions_list()->length() + pos());
1076 changed = true;
1077 }
1078
1079 } else {
1080 // Propagate unmodified Stack to successors where a stack merge is not necessary
1081 intArray* state = sim()->write_state();
1082 for (int i = 0; i < number_of_sux; i++) {
1083 BlockBegin* sux = block->sux_at(i);
1084
1085 #ifdef ASSERT
1086 for (int j = 0; j < sux->number_of_preds(); j++) {
1087 assert(block == sux->pred_at(j), "all critical edges must be broken");
1088 }
1089
1090 // check if new state is same
1091 if (sux->fpu_stack_state() != NULL) {
1092 intArray* sux_state = sux->fpu_stack_state();
1093 assert(state->length() == sux_state->length(), "overwriting existing stack state");
1094 for (int j = 0; j < state->length(); j++) {
1095 assert(state->at(j) == sux_state->at(j), "overwriting existing stack state");
1096 }
1097 }
1098 #endif
1099 #ifndef PRODUCT
1100 if (TraceFPUStack) {
1101 tty->print_cr("Setting FPU stack state of B%d", sux->block_id());
1102 sim()->print(); tty->cr();
1103 }
1104 #endif
1105
1106 sux->set_fpu_stack_state(state);
1107 }
1108 }
1109
1110 #ifndef PRODUCT
1111 // assertions that FPU stack state conforms to all successors' states
1112 intArray* cur_state = sim()->write_state();
1113 for (int i = 0; i < number_of_sux; i++) {
1114 BlockBegin* sux = block->sux_at(i);
1115 intArray* sux_state = sux->fpu_stack_state();
1116
1117 assert(sux_state != NULL, "no fpu state");
1118 assert(cur_state->length() == sux_state->length(), "incorrect length");
1119 for (int i = 0; i < cur_state->length(); i++) {
1120 assert(cur_state->at(i) == sux_state->at(i), "element not equal");
1121 }
1122 }
1123 #endif
1124
1125 return changed;
1126 }
1127 #endif // _LP64
1128