/* * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. * */ #include "precompiled.hpp" #include "asm/assembler.hpp" #include "assembler_arm.inline.hpp" #include "code/debugInfoRec.hpp" #include "code/icBuffer.hpp" #include "code/vtableStubs.hpp" #include "interpreter/interpreter.hpp" #include "logging/log.hpp" #include "memory/resourceArea.hpp" #include "oops/compiledICHolder.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/vframeArray.hpp" #include "utilities/align.hpp" #include "vmreg_arm.inline.hpp" #ifdef COMPILER1 #include "c1/c1_Runtime1.hpp" #include "runtime/safepoint.hpp" #endif #ifdef COMPILER2 #include "opto/runtime.hpp" #endif #define __ masm-> class RegisterSaver { public: // Special registers: // 32-bit ARM 64-bit ARM // Rthread: R10 R28 // LR: R14 R30 // Rthread is callee saved in the C ABI and never changed by compiled code: // no need to save it. // 2 slots for LR: the one at LR_offset and an other one at R14/R30_offset. // The one at LR_offset is a return address that is needed by stack walking. // A c2 method uses LR as a standard register so it may be live when we // branch to the runtime. The slot at R14/R30_offset is for the value of LR // in case it's live in the method we are coming from. enum RegisterLayout { fpu_save_size = FloatRegisterImpl::number_of_registers, #ifndef __SOFTFP__ D0_offset = 0, #endif R0_offset = fpu_save_size, R1_offset, R2_offset, R3_offset, R4_offset, R5_offset, R6_offset, #if (FP_REG_NUM != 7) // if not saved as FP R7_offset, #endif R8_offset, R9_offset, #if (FP_REG_NUM != 11) // if not saved as FP R11_offset, #endif R12_offset, R14_offset, FP_offset, LR_offset, reg_save_size, Rmethod_offset = R9_offset, Rtemp_offset = R12_offset, }; // all regs but Rthread (R10), FP (R7 or R11), SP and PC // (altFP_7_11 is the one amoung R7 and R11 which is not FP) #define SAVED_BASE_REGS (RegisterSet(R0, R6) | RegisterSet(R8, R9) | RegisterSet(R12) | R14 | altFP_7_11) // When LR may be live in the nmethod from which we are comming // then lr_saved is true, the return address is saved before the // call to save_live_register by the caller and LR contains the // live value. static OopMap* save_live_registers(MacroAssembler* masm, int* total_frame_words, bool lr_saved = false); static void restore_live_registers(MacroAssembler* masm, bool restore_lr = true); }; OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int* total_frame_words, bool lr_saved) { *total_frame_words = reg_save_size; OopMapSet *oop_maps = new OopMapSet(); OopMap* map = new OopMap(VMRegImpl::slots_per_word * (*total_frame_words), 0); if (lr_saved) { __ push(RegisterSet(FP)); } else { __ push(RegisterSet(FP) | RegisterSet(LR)); } __ push(SAVED_BASE_REGS); if (HaveVFP) { if (VM_Version::has_vfp3_32()) { __ fpush(FloatRegisterSet(D16, 16)); } else { if (FloatRegisterImpl::number_of_registers > 32) { assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64"); __ sub(SP, SP, 32 * wordSize); } } __ fpush(FloatRegisterSet(D0, 16)); } else { __ sub(SP, SP, fpu_save_size * wordSize); } int i; int j=0; for (i = R0_offset; i <= R9_offset; i++) { if (j == FP_REG_NUM) { // skip the FP register, managed below. j++; } map->set_callee_saved(VMRegImpl::stack2reg(i), as_Register(j)->as_VMReg()); j++; } assert(j == R10->encoding(), "must be"); #if (FP_REG_NUM != 11) // add R11, if not managed as FP map->set_callee_saved(VMRegImpl::stack2reg(R11_offset), R11->as_VMReg()); #endif map->set_callee_saved(VMRegImpl::stack2reg(R12_offset), R12->as_VMReg()); map->set_callee_saved(VMRegImpl::stack2reg(R14_offset), R14->as_VMReg()); if (HaveVFP) { for (i = 0; i < (VM_Version::has_vfp3_32() ? 64 : 32); i+=2) { map->set_callee_saved(VMRegImpl::stack2reg(i), as_FloatRegister(i)->as_VMReg()); map->set_callee_saved(VMRegImpl::stack2reg(i + 1), as_FloatRegister(i)->as_VMReg()->next()); } } return map; } void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_lr) { if (HaveVFP) { __ fpop(FloatRegisterSet(D0, 16)); if (VM_Version::has_vfp3_32()) { __ fpop(FloatRegisterSet(D16, 16)); } else { if (FloatRegisterImpl::number_of_registers > 32) { assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64"); __ add(SP, SP, 32 * wordSize); } } } else { __ add(SP, SP, fpu_save_size * wordSize); } __ pop(SAVED_BASE_REGS); if (restore_lr) { __ pop(RegisterSet(FP) | RegisterSet(LR)); } else { __ pop(RegisterSet(FP)); } } static void push_result_registers(MacroAssembler* masm, BasicType ret_type) { #ifdef __ABI_HARD__ if (ret_type == T_DOUBLE || ret_type == T_FLOAT) { __ sub(SP, SP, 8); __ fstd(D0, Address(SP)); return; } #endif // __ABI_HARD__ __ raw_push(R0, R1); } static void pop_result_registers(MacroAssembler* masm, BasicType ret_type) { #ifdef __ABI_HARD__ if (ret_type == T_DOUBLE || ret_type == T_FLOAT) { __ fldd(D0, Address(SP)); __ add(SP, SP, 8); return; } #endif // __ABI_HARD__ __ raw_pop(R0, R1); } static void push_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) { // R1-R3 arguments need to be saved, but we push 4 registers for 8-byte alignment __ push(RegisterSet(R0, R3)); // preserve arguments // Likely not needed as the locking code won't probably modify volatile FP registers, // but there is no way to guarantee that if (fp_regs_in_arguments) { // convert fp_regs_in_arguments to a number of double registers int double_regs_num = (fp_regs_in_arguments + 1) >> 1; __ fpush_hardfp(FloatRegisterSet(D0, double_regs_num)); } } static void pop_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) { if (fp_regs_in_arguments) { int double_regs_num = (fp_regs_in_arguments + 1) >> 1; __ fpop_hardfp(FloatRegisterSet(D0, double_regs_num)); } __ pop(RegisterSet(R0, R3)); } // Is vector's size (in bytes) bigger than a size saved by default? // All vector registers are saved by default on ARM. bool SharedRuntime::is_wide_vector(int size) { return false; } size_t SharedRuntime::trampoline_size() { return 16; } void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) { InlinedAddress dest(destination); __ indirect_jump(dest, Rtemp); __ bind_literal(dest); } int SharedRuntime::c_calling_convention(const BasicType *sig_bt, VMRegPair *regs, VMRegPair *regs2, int total_args_passed) { assert(regs2 == NULL, "not needed on arm"); int slot = 0; int ireg = 0; #ifdef __ABI_HARD__ int fp_slot = 0; int single_fpr_slot = 0; #endif // __ABI_HARD__ for (int i = 0; i < total_args_passed; i++) { switch (sig_bt[i]) { case T_SHORT: case T_CHAR: case T_BYTE: case T_BOOLEAN: case T_INT: case T_ARRAY: case T_OBJECT: case T_ADDRESS: case T_METADATA: #ifndef __ABI_HARD__ case T_FLOAT: #endif // !__ABI_HARD__ if (ireg < 4) { Register r = as_Register(ireg); regs[i].set1(r->as_VMReg()); ireg++; } else { regs[i].set1(VMRegImpl::stack2reg(slot)); slot++; } break; case T_LONG: #ifndef __ABI_HARD__ case T_DOUBLE: #endif // !__ABI_HARD__ assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" ); if (ireg <= 2) { #if (ALIGN_WIDE_ARGUMENTS == 1) if(ireg & 1) ireg++; // Aligned location required #endif Register r1 = as_Register(ireg); Register r2 = as_Register(ireg + 1); regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg()); ireg += 2; #if (ALIGN_WIDE_ARGUMENTS == 0) } else if (ireg == 3) { // uses R3 + one stack slot Register r = as_Register(ireg); regs[i].set_pair(VMRegImpl::stack2reg(slot), r->as_VMReg()); ireg += 1; slot += 1; #endif } else { if (slot & 1) slot++; // Aligned location required regs[i].set_pair(VMRegImpl::stack2reg(slot+1), VMRegImpl::stack2reg(slot)); slot += 2; ireg = 4; } break; case T_VOID: regs[i].set_bad(); break; #ifdef __ABI_HARD__ case T_FLOAT: if ((fp_slot < 16)||(single_fpr_slot & 1)) { if ((single_fpr_slot & 1) == 0) { single_fpr_slot = fp_slot; fp_slot += 2; } FloatRegister r = as_FloatRegister(single_fpr_slot); single_fpr_slot++; regs[i].set1(r->as_VMReg()); } else { regs[i].set1(VMRegImpl::stack2reg(slot)); slot++; } break; case T_DOUBLE: assert(ALIGN_WIDE_ARGUMENTS == 1, "ABI_HARD not supported with unaligned wide arguments"); if (fp_slot <= 14) { FloatRegister r1 = as_FloatRegister(fp_slot); FloatRegister r2 = as_FloatRegister(fp_slot+1); regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg()); fp_slot += 2; } else { if(slot & 1) slot++; regs[i].set_pair(VMRegImpl::stack2reg(slot+1), VMRegImpl::stack2reg(slot)); slot += 2; single_fpr_slot = 16; } break; #endif // __ABI_HARD__ default: ShouldNotReachHere(); } } return slot; } int SharedRuntime::java_calling_convention(const BasicType *sig_bt, VMRegPair *regs, int total_args_passed, int is_outgoing) { #ifdef __SOFTFP__ // soft float is the same as the C calling convention. return c_calling_convention(sig_bt, regs, NULL, total_args_passed); #endif // __SOFTFP__ (void) is_outgoing; int slot = 0; int ireg = 0; int freg = 0; int single_fpr = 0; for (int i = 0; i < total_args_passed; i++) { switch (sig_bt[i]) { case T_SHORT: case T_CHAR: case T_BYTE: case T_BOOLEAN: case T_INT: case T_ARRAY: case T_OBJECT: case T_ADDRESS: if (ireg < 4) { Register r = as_Register(ireg++); regs[i].set1(r->as_VMReg()); } else { regs[i].set1(VMRegImpl::stack2reg(slot++)); } break; case T_FLOAT: // C2 utilizes S14/S15 for mem-mem moves if ((freg < 16 COMPILER2_PRESENT(-2)) || (single_fpr & 1)) { if ((single_fpr & 1) == 0) { single_fpr = freg; freg += 2; } FloatRegister r = as_FloatRegister(single_fpr++); regs[i].set1(r->as_VMReg()); } else { regs[i].set1(VMRegImpl::stack2reg(slot++)); } break; case T_DOUBLE: // C2 utilizes S14/S15 for mem-mem moves if (freg <= 14 COMPILER2_PRESENT(-2)) { FloatRegister r1 = as_FloatRegister(freg); FloatRegister r2 = as_FloatRegister(freg + 1); regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg()); freg += 2; } else { // Keep internally the aligned calling convention, // ignoring ALIGN_WIDE_ARGUMENTS if (slot & 1) slot++; regs[i].set_pair(VMRegImpl::stack2reg(slot + 1), VMRegImpl::stack2reg(slot)); slot += 2; single_fpr = 16; } break; case T_LONG: // Keep internally the aligned calling convention, // ignoring ALIGN_WIDE_ARGUMENTS if (ireg <= 2) { if (ireg & 1) ireg++; Register r1 = as_Register(ireg); Register r2 = as_Register(ireg + 1); regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg()); ireg += 2; } else { if (slot & 1) slot++; regs[i].set_pair(VMRegImpl::stack2reg(slot + 1), VMRegImpl::stack2reg(slot)); slot += 2; ireg = 4; } break; case T_VOID: regs[i].set_bad(); break; default: ShouldNotReachHere(); } } if (slot & 1) slot++; return slot; } static void patch_callers_callsite(MacroAssembler *masm) { Label skip; __ ldr(Rtemp, Address(Rmethod, Method::code_offset())); __ cbz(Rtemp, skip); // Pushing an even number of registers for stack alignment. // Selecting R9, which had to be saved anyway for some platforms. __ push(RegisterSet(R0, R3) | R9 | LR); __ fpush_hardfp(FloatRegisterSet(D0, 8)); __ mov(R0, Rmethod); __ mov(R1, LR); __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)); __ fpop_hardfp(FloatRegisterSet(D0, 8)); __ pop(RegisterSet(R0, R3) | R9 | LR); __ bind(skip); } void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, int total_args_passed, int comp_args_on_stack, const BasicType *sig_bt, const VMRegPair *regs) { // TODO: ARM - May be can use ldm to load arguments const Register tmp = Rtemp; // avoid erasing R5_mh // Next assert may not be needed but safer. Extra analysis required // if this there is not enough free registers and we need to use R5 here. assert_different_registers(tmp, R5_mh); // 6243940 We might end up in handle_wrong_method if // the callee is deoptimized as we race thru here. If that // happens we don't want to take a safepoint because the // caller frame will look interpreted and arguments are now // "compiled" so it is much better to make this transition // invisible to the stack walking code. Unfortunately if // we try and find the callee by normal means a safepoint // is possible. So we stash the desired callee in the thread // and the vm will find there should this case occur. Address callee_target_addr(Rthread, JavaThread::callee_target_offset()); __ str(Rmethod, callee_target_addr); assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, Rmethod); const Register initial_sp = Rmethod; // temporarily scratched // Old code was modifying R4 but this looks unsafe (particularly with JSR292) assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, initial_sp); __ mov(initial_sp, SP); if (comp_args_on_stack) { __ sub_slow(SP, SP, comp_args_on_stack * VMRegImpl::stack_slot_size); } __ bic(SP, SP, StackAlignmentInBytes - 1); for (int i = 0; i < total_args_passed; i++) { if (sig_bt[i] == T_VOID) { assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); continue; } assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "must be ordered"); int arg_offset = Interpreter::expr_offset_in_bytes(total_args_passed - 1 - i); VMReg r_1 = regs[i].first(); VMReg r_2 = regs[i].second(); if (r_1->is_stack()) { int stack_offset = r_1->reg2stack() * VMRegImpl::stack_slot_size; if (!r_2->is_valid()) { __ ldr(tmp, Address(initial_sp, arg_offset)); __ str(tmp, Address(SP, stack_offset)); } else { __ ldr(tmp, Address(initial_sp, arg_offset - Interpreter::stackElementSize)); __ str(tmp, Address(SP, stack_offset)); __ ldr(tmp, Address(initial_sp, arg_offset)); __ str(tmp, Address(SP, stack_offset + wordSize)); } } else if (r_1->is_Register()) { if (!r_2->is_valid()) { __ ldr(r_1->as_Register(), Address(initial_sp, arg_offset)); } else { __ ldr(r_1->as_Register(), Address(initial_sp, arg_offset - Interpreter::stackElementSize)); __ ldr(r_2->as_Register(), Address(initial_sp, arg_offset)); } } else if (r_1->is_FloatRegister()) { #ifdef __SOFTFP__ ShouldNotReachHere(); #endif // __SOFTFP__ if (!r_2->is_valid()) { __ flds(r_1->as_FloatRegister(), Address(initial_sp, arg_offset)); } else { __ fldd(r_1->as_FloatRegister(), Address(initial_sp, arg_offset - Interpreter::stackElementSize)); } } else { assert(!r_1->is_valid() && !r_2->is_valid(), "must be"); } } // restore Rmethod (scratched for initial_sp) __ ldr(Rmethod, callee_target_addr); __ ldr(PC, Address(Rmethod, Method::from_compiled_offset())); } static void gen_c2i_adapter(MacroAssembler *masm, int total_args_passed, int comp_args_on_stack, const BasicType *sig_bt, const VMRegPair *regs, Label& skip_fixup) { // TODO: ARM - May be can use stm to deoptimize arguments const Register tmp = Rtemp; patch_callers_callsite(masm); __ bind(skip_fixup); __ mov(Rsender_sp, SP); // not yet saved int extraspace = total_args_passed * Interpreter::stackElementSize; if (extraspace) { __ sub_slow(SP, SP, extraspace); } for (int i = 0; i < total_args_passed; i++) { if (sig_bt[i] == T_VOID) { assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); continue; } int stack_offset = (total_args_passed - 1 - i) * Interpreter::stackElementSize; VMReg r_1 = regs[i].first(); VMReg r_2 = regs[i].second(); if (r_1->is_stack()) { int arg_offset = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; if (!r_2->is_valid()) { __ ldr(tmp, Address(SP, arg_offset)); __ str(tmp, Address(SP, stack_offset)); } else { __ ldr(tmp, Address(SP, arg_offset)); __ str(tmp, Address(SP, stack_offset - Interpreter::stackElementSize)); __ ldr(tmp, Address(SP, arg_offset + wordSize)); __ str(tmp, Address(SP, stack_offset)); } } else if (r_1->is_Register()) { if (!r_2->is_valid()) { __ str(r_1->as_Register(), Address(SP, stack_offset)); } else { __ str(r_1->as_Register(), Address(SP, stack_offset - Interpreter::stackElementSize)); __ str(r_2->as_Register(), Address(SP, stack_offset)); } } else if (r_1->is_FloatRegister()) { #ifdef __SOFTFP__ ShouldNotReachHere(); #endif // __SOFTFP__ if (!r_2->is_valid()) { __ fsts(r_1->as_FloatRegister(), Address(SP, stack_offset)); } else { __ fstd(r_1->as_FloatRegister(), Address(SP, stack_offset - Interpreter::stackElementSize)); } } else { assert(!r_1->is_valid() && !r_2->is_valid(), "must be"); } } __ ldr(PC, Address(Rmethod, Method::interpreter_entry_offset())); } AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, int total_args_passed, int comp_args_on_stack, const BasicType *sig_bt, const VMRegPair *regs, AdapterFingerPrint* fingerprint) { address i2c_entry = __ pc(); gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); address c2i_unverified_entry = __ pc(); Label skip_fixup; const Register receiver = R0; const Register holder_klass = Rtemp; // XXX should be OK for C2 but not 100% sure const Register receiver_klass = R4; __ load_klass(receiver_klass, receiver); __ ldr(holder_klass, Address(Ricklass, CompiledICHolder::holder_klass_offset())); __ ldr(Rmethod, Address(Ricklass, CompiledICHolder::holder_metadata_offset())); __ cmp(receiver_klass, holder_klass); __ ldr(Rtemp, Address(Rmethod, Method::code_offset()), eq); __ cmp(Rtemp, 0, eq); __ b(skip_fixup, eq); __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, noreg, ne); address c2i_entry = __ pc(); gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); __ flush(); return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); } static int reg2offset_in(VMReg r) { // Account for saved FP and LR return r->reg2stack() * VMRegImpl::stack_slot_size + 2*wordSize; } static int reg2offset_out(VMReg r) { return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; } static void verify_oop_args(MacroAssembler* masm, const methodHandle& method, const BasicType* sig_bt, const VMRegPair* regs) { Register temp_reg = Rmethod; // not part of any compiled calling seq if (VerifyOops) { for (int i = 0; i < method->size_of_parameters(); i++) { if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) { VMReg r = regs[i].first(); assert(r->is_valid(), "bad oop arg"); if (r->is_stack()) { __ ldr(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); __ verify_oop(temp_reg); } else { __ verify_oop(r->as_Register()); } } } } } static void gen_special_dispatch(MacroAssembler* masm, const methodHandle& method, const BasicType* sig_bt, const VMRegPair* regs) { verify_oop_args(masm, method, sig_bt, regs); vmIntrinsics::ID iid = method->intrinsic_id(); // Now write the args into the outgoing interpreter space bool has_receiver = false; Register receiver_reg = noreg; int member_arg_pos = -1; Register member_reg = noreg; int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); if (ref_kind != 0) { member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument member_reg = Rmethod; // known to be free at this point has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); } else if (iid == vmIntrinsics::_invokeBasic) { has_receiver = true; } else { fatal("unexpected intrinsic id %d", iid); } if (member_reg != noreg) { // Load the member_arg into register, if necessary. SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); VMReg r = regs[member_arg_pos].first(); if (r->is_stack()) { __ ldr(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); } else { // no data motion is needed member_reg = r->as_Register(); } } if (has_receiver) { // Make sure the receiver is loaded into a register. assert(method->size_of_parameters() > 0, "oob"); assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); VMReg r = regs[0].first(); assert(r->is_valid(), "bad receiver arg"); if (r->is_stack()) { // Porting note: This assumes that compiled calling conventions always // pass the receiver oop in a register. If this is not true on some // platform, pick a temp and load the receiver from stack. assert(false, "receiver always in a register"); receiver_reg = j_rarg0; // known to be free at this point __ ldr(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); } else { // no data motion is needed receiver_reg = r->as_Register(); } } // Figure out which address we are really jumping to: MethodHandles::generate_method_handle_dispatch(masm, iid, receiver_reg, member_reg, /*for_compiler_entry:*/ true); } // --------------------------------------------------------------------------- // Generate a native wrapper for a given method. The method takes arguments // in the Java compiled code convention, marshals them to the native // convention (handlizes oops, etc), transitions to native, makes the call, // returns to java state (possibly blocking), unhandlizes any result and // returns. nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, const methodHandle& method, int compile_id, BasicType* in_sig_bt, VMRegPair* in_regs, BasicType ret_type, address critical_entry) { if (method->is_method_handle_intrinsic()) { vmIntrinsics::ID iid = method->intrinsic_id(); intptr_t start = (intptr_t)__ pc(); int vep_offset = ((intptr_t)__ pc()) - start; gen_special_dispatch(masm, method, in_sig_bt, in_regs); int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period __ flush(); int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually return nmethod::new_native_nmethod(method, compile_id, masm->code(), vep_offset, frame_complete, stack_slots / VMRegImpl::slots_per_word, in_ByteSize(-1), in_ByteSize(-1), (OopMapSet*)NULL); } // Arguments for JNI method include JNIEnv and Class if static // Usage of Rtemp should be OK since scratched by native call bool is_static = method->is_static(); const int total_in_args = method->size_of_parameters(); int total_c_args = total_in_args + 1; if (is_static) { total_c_args++; } BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); int argc = 0; out_sig_bt[argc++] = T_ADDRESS; if (is_static) { out_sig_bt[argc++] = T_OBJECT; } int i; for (i = 0; i < total_in_args; i++) { out_sig_bt[argc++] = in_sig_bt[i]; } int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; // Since object arguments need to be wrapped, we must preserve space // for those object arguments which come in registers (GPR_PARAMS maximum) // plus one more slot for Klass handle (for static methods) int oop_handle_offset = stack_slots; stack_slots += (GPR_PARAMS + 1) * VMRegImpl::slots_per_word; // Plus a lock if needed int lock_slot_offset = 0; if (method->is_synchronized()) { lock_slot_offset = stack_slots; assert(sizeof(BasicLock) == wordSize, "adjust this code"); stack_slots += VMRegImpl::slots_per_word; } // Space to save return address and FP stack_slots += 2 * VMRegImpl::slots_per_word; // Calculate the final stack size taking account of alignment stack_slots = align_up(stack_slots, StackAlignmentInBytes / VMRegImpl::stack_slot_size); int stack_size = stack_slots * VMRegImpl::stack_slot_size; int lock_slot_fp_offset = stack_size - 2 * wordSize - lock_slot_offset * VMRegImpl::stack_slot_size; // Unverified entry point address start = __ pc(); // Inline cache check, same as in C1_MacroAssembler::inline_cache_check() const Register receiver = R0; // see receiverOpr() __ load_klass(Rtemp, receiver); __ cmp(Rtemp, Ricklass); Label verified; __ b(verified, eq); // jump over alignment no-ops too __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp); __ align(CodeEntryAlignment); // Verified entry point __ bind(verified); int vep_offset = __ pc() - start; if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) { // Object.hashCode, System.identityHashCode can pull the hashCode from the header word // instead of doing a full VM transition once it's been computed. Label slow_case; const Register obj_reg = R0; // Unlike for Object.hashCode, System.identityHashCode is static method and // gets object as argument instead of the receiver. if (method->intrinsic_id() == vmIntrinsics::_identityHashCode) { assert(method->is_static(), "method should be static"); // return 0 for null reference input, return val = R0 = obj_reg = 0 __ cmp(obj_reg, 0); __ bx(LR, eq); } __ ldr(Rtemp, Address(obj_reg, oopDesc::mark_offset_in_bytes())); assert(markOopDesc::unlocked_value == 1, "adjust this code"); __ tbz(Rtemp, exact_log2(markOopDesc::unlocked_value), slow_case); if (UseBiasedLocking) { assert(is_power_of_2(markOopDesc::biased_lock_bit_in_place), "adjust this code"); __ tbnz(Rtemp, exact_log2(markOopDesc::biased_lock_bit_in_place), slow_case); } __ bics(Rtemp, Rtemp, ~markOopDesc::hash_mask_in_place); __ mov(R0, AsmOperand(Rtemp, lsr, markOopDesc::hash_shift), ne); __ bx(LR, ne); __ bind(slow_case); } // Bang stack pages __ arm_stack_overflow_check(stack_size, Rtemp); // Setup frame linkage __ raw_push(FP, LR); __ mov(FP, SP); __ sub_slow(SP, SP, stack_size - 2*wordSize); int frame_complete = __ pc() - start; OopMapSet* oop_maps = new OopMapSet(); OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); const int extra_args = is_static ? 2 : 1; int receiver_offset = -1; int fp_regs_in_arguments = 0; for (i = total_in_args; --i >= 0; ) { switch (in_sig_bt[i]) { case T_ARRAY: case T_OBJECT: { VMReg src = in_regs[i].first(); VMReg dst = out_regs[i + extra_args].first(); if (src->is_stack()) { assert(dst->is_stack(), "must be"); assert(i != 0, "Incoming receiver is always in a register"); __ ldr(Rtemp, Address(FP, reg2offset_in(src))); __ cmp(Rtemp, 0); __ add(Rtemp, FP, reg2offset_in(src), ne); __ str(Rtemp, Address(SP, reg2offset_out(dst))); int offset_in_older_frame = src->reg2stack() + SharedRuntime::out_preserve_stack_slots(); map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots)); } else { int offset = oop_handle_offset * VMRegImpl::stack_slot_size; __ str(src->as_Register(), Address(SP, offset)); map->set_oop(VMRegImpl::stack2reg(oop_handle_offset)); if ((i == 0) && (!is_static)) { receiver_offset = offset; } oop_handle_offset += VMRegImpl::slots_per_word; if (dst->is_stack()) { __ movs(Rtemp, src->as_Register()); __ add(Rtemp, SP, offset, ne); __ str(Rtemp, Address(SP, reg2offset_out(dst))); } else { __ movs(dst->as_Register(), src->as_Register()); __ add(dst->as_Register(), SP, offset, ne); } } } case T_VOID: break; #ifdef __SOFTFP__ case T_DOUBLE: #endif case T_LONG: { VMReg src_1 = in_regs[i].first(); VMReg src_2 = in_regs[i].second(); VMReg dst_1 = out_regs[i + extra_args].first(); VMReg dst_2 = out_regs[i + extra_args].second(); #if (ALIGN_WIDE_ARGUMENTS == 0) // C convention can mix a register and a stack slot for a // 64-bits native argument. // Note: following code should work independently of whether // the Java calling convention follows C convention or whether // it aligns 64-bit values. if (dst_2->is_Register()) { if (src_1->as_Register() != dst_1->as_Register()) { assert(src_1->as_Register() != dst_2->as_Register() && src_2->as_Register() != dst_2->as_Register(), "must be"); __ mov(dst_2->as_Register(), src_2->as_Register()); __ mov(dst_1->as_Register(), src_1->as_Register()); } else { assert(src_2->as_Register() == dst_2->as_Register(), "must be"); } } else if (src_2->is_Register()) { if (dst_1->is_Register()) { // dst mixes a register and a stack slot assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be"); assert(src_1->as_Register() != dst_1->as_Register(), "must be"); __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2))); __ mov(dst_1->as_Register(), src_1->as_Register()); } else { // registers to stack slots assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be"); __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1))); __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2))); } } else if (src_1->is_Register()) { if (dst_1->is_Register()) { // src and dst must be R3 + stack slot assert(dst_1->as_Register() == src_1->as_Register(), "must be"); __ ldr(Rtemp, Address(FP, reg2offset_in(src_2))); __ str(Rtemp, Address(SP, reg2offset_out(dst_2))); } else { // -> assert(dst_2->is_stack() && src_2->is_stack(), "must be"); __ ldr(LR, Address(FP, reg2offset_in(src_2))); __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1))); __ str(LR, Address(SP, reg2offset_out(dst_2))); } } else { assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be"); __ ldr(Rtemp, Address(FP, reg2offset_in(src_1))); __ ldr(LR, Address(FP, reg2offset_in(src_2))); __ str(Rtemp, Address(SP, reg2offset_out(dst_1))); __ str(LR, Address(SP, reg2offset_out(dst_2))); } #else // ALIGN_WIDE_ARGUMENTS if (src_1->is_stack()) { assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be"); __ ldr(Rtemp, Address(FP, reg2offset_in(src_1))); __ ldr(LR, Address(FP, reg2offset_in(src_2))); __ str(Rtemp, Address(SP, reg2offset_out(dst_1))); __ str(LR, Address(SP, reg2offset_out(dst_2))); } else if (dst_1->is_stack()) { assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be"); __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1))); __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2))); } else if (src_1->as_Register() == dst_1->as_Register()) { assert(src_2->as_Register() == dst_2->as_Register(), "must be"); } else { assert(src_1->as_Register() != dst_2->as_Register() && src_2->as_Register() != dst_2->as_Register(), "must be"); __ mov(dst_2->as_Register(), src_2->as_Register()); __ mov(dst_1->as_Register(), src_1->as_Register()); } #endif // ALIGN_WIDE_ARGUMENTS break; } #if (!defined __SOFTFP__ && !defined __ABI_HARD__) case T_FLOAT: { VMReg src = in_regs[i].first(); VMReg dst = out_regs[i + extra_args].first(); if (src->is_stack()) { assert(dst->is_stack(), "must be"); __ ldr(Rtemp, Address(FP, reg2offset_in(src))); __ str(Rtemp, Address(SP, reg2offset_out(dst))); } else if (dst->is_stack()) { __ fsts(src->as_FloatRegister(), Address(SP, reg2offset_out(dst))); } else { assert(src->is_FloatRegister() && dst->is_Register(), "must be"); __ fmrs(dst->as_Register(), src->as_FloatRegister()); } break; } case T_DOUBLE: { VMReg src_1 = in_regs[i].first(); VMReg src_2 = in_regs[i].second(); VMReg dst_1 = out_regs[i + extra_args].first(); VMReg dst_2 = out_regs[i + extra_args].second(); if (src_1->is_stack()) { assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be"); __ ldr(Rtemp, Address(FP, reg2offset_in(src_1))); __ ldr(LR, Address(FP, reg2offset_in(src_2))); __ str(Rtemp, Address(SP, reg2offset_out(dst_1))); __ str(LR, Address(SP, reg2offset_out(dst_2))); } else if (dst_1->is_stack()) { assert(dst_2->is_stack() && src_1->is_FloatRegister(), "must be"); __ fstd(src_1->as_FloatRegister(), Address(SP, reg2offset_out(dst_1))); #if (ALIGN_WIDE_ARGUMENTS == 0) } else if (dst_2->is_stack()) { assert(! src_2->is_stack(), "must be"); // assuming internal java convention is aligned // double register must go into R3 + one stack slot __ fmrrd(dst_1->as_Register(), Rtemp, src_1->as_FloatRegister()); __ str(Rtemp, Address(SP, reg2offset_out(dst_2))); #endif } else { assert(src_1->is_FloatRegister() && dst_1->is_Register() && dst_2->is_Register(), "must be"); __ fmrrd(dst_1->as_Register(), dst_2->as_Register(), src_1->as_FloatRegister()); } break; } #endif // __SOFTFP__ #ifdef __ABI_HARD__ case T_FLOAT: { VMReg src = in_regs[i].first(); VMReg dst = out_regs[i + extra_args].first(); if (src->is_stack()) { if (dst->is_stack()) { __ ldr(Rtemp, Address(FP, reg2offset_in(src))); __ str(Rtemp, Address(SP, reg2offset_out(dst))); } else { // C2 Java calling convention does not populate S14 and S15, therefore // those need to be loaded from stack here __ flds(dst->as_FloatRegister(), Address(FP, reg2offset_in(src))); fp_regs_in_arguments++; } } else { assert(src->is_FloatRegister(), "must be"); fp_regs_in_arguments++; } break; } case T_DOUBLE: { VMReg src_1 = in_regs[i].first(); VMReg src_2 = in_regs[i].second(); VMReg dst_1 = out_regs[i + extra_args].first(); VMReg dst_2 = out_regs[i + extra_args].second(); if (src_1->is_stack()) { if (dst_1->is_stack()) { assert(dst_2->is_stack(), "must be"); __ ldr(Rtemp, Address(FP, reg2offset_in(src_1))); __ ldr(LR, Address(FP, reg2offset_in(src_2))); __ str(Rtemp, Address(SP, reg2offset_out(dst_1))); __ str(LR, Address(SP, reg2offset_out(dst_2))); } else { // C2 Java calling convention does not populate S14 and S15, therefore // those need to be loaded from stack here __ fldd(dst_1->as_FloatRegister(), Address(FP, reg2offset_in(src_1))); fp_regs_in_arguments += 2; } } else { assert(src_1->is_FloatRegister() && src_2->is_FloatRegister(), "must be"); fp_regs_in_arguments += 2; } break; } #endif // __ABI_HARD__ default: { assert(in_sig_bt[i] != T_ADDRESS, "found T_ADDRESS in java args"); VMReg src = in_regs[i].first(); VMReg dst = out_regs[i + extra_args].first(); if (src->is_stack()) { assert(dst->is_stack(), "must be"); __ ldr(Rtemp, Address(FP, reg2offset_in(src))); __ str(Rtemp, Address(SP, reg2offset_out(dst))); } else if (dst->is_stack()) { __ str(src->as_Register(), Address(SP, reg2offset_out(dst))); } else { assert(src->is_Register() && dst->is_Register(), "must be"); __ mov(dst->as_Register(), src->as_Register()); } } } } // Get Klass mirror int klass_offset = -1; if (is_static) { klass_offset = oop_handle_offset * VMRegImpl::stack_slot_size; __ mov_oop(Rtemp, JNIHandles::make_local(method->method_holder()->java_mirror())); __ add(c_rarg1, SP, klass_offset); __ str(Rtemp, Address(SP, klass_offset)); map->set_oop(VMRegImpl::stack2reg(oop_handle_offset)); } // the PC offset given to add_gc_map must match the PC saved in set_last_Java_frame int pc_offset = __ set_last_Java_frame(SP, FP, true, Rtemp); assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin"); oop_maps->add_gc_map(pc_offset, map); // Order last_Java_pc store with the thread state transition (to _thread_in_native) __ membar(MacroAssembler::StoreStore, Rtemp); // RedefineClasses() tracing support for obsolete method entry if (log_is_enabled(Trace, redefine, class, obsolete)) { __ save_caller_save_registers(); __ mov(R0, Rthread); __ mov_metadata(R1, method()); __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), R0, R1); __ restore_caller_save_registers(); } const Register sync_handle = R5; const Register sync_obj = R6; const Register disp_hdr = altFP_7_11; const Register tmp = R8; Label slow_lock, slow_lock_biased, lock_done, fast_lock; if (method->is_synchronized()) { // The first argument is a handle to sync object (a class or an instance) __ ldr(sync_obj, Address(R1)); // Remember the handle for the unlocking code __ mov(sync_handle, R1); __ resolve(IS_NOT_NULL, sync_obj); if(UseBiasedLocking) { __ biased_locking_enter(sync_obj, tmp, disp_hdr/*scratched*/, false, Rtemp, lock_done, slow_lock_biased); } const Register mark = tmp; // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread. // That would be acceptable as either CAS or slow case path is taken in that case __ ldr(mark, Address(sync_obj, oopDesc::mark_offset_in_bytes())); __ sub(disp_hdr, FP, lock_slot_fp_offset); __ tst(mark, markOopDesc::unlocked_value); __ b(fast_lock, ne); // Check for recursive lock // See comments in InterpreterMacroAssembler::lock_object for // explanations on the fast recursive locking check. // Check independently the low bits and the distance to SP // -1- test low 2 bits __ movs(Rtemp, AsmOperand(mark, lsl, 30)); // -2- test (hdr - SP) if the low two bits are 0 __ sub(Rtemp, mark, SP, eq); __ movs(Rtemp, AsmOperand(Rtemp, lsr, exact_log2(os::vm_page_size())), eq); // If still 'eq' then recursive locking OK: set displaced header to 0 __ str(Rtemp, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()), eq); __ b(lock_done, eq); __ b(slow_lock); __ bind(fast_lock); __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes())); __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock); __ bind(lock_done); } // Get JNIEnv* __ add(c_rarg0, Rthread, in_bytes(JavaThread::jni_environment_offset())); // Perform thread state transition __ mov(Rtemp, _thread_in_native); __ str(Rtemp, Address(Rthread, JavaThread::thread_state_offset())); // Finally, call the native method __ call(method->native_function()); // Set FPSCR/FPCR to a known state if (AlwaysRestoreFPU) { __ restore_default_fp_mode(); } // Ensure a Boolean result is mapped to 0..1 if (ret_type == T_BOOLEAN) { __ c2bool(R0); } // Do a safepoint check while thread is in transition state InlinedAddress safepoint_state(SafepointSynchronize::address_of_state()); Label call_safepoint_runtime, return_to_java; __ mov(Rtemp, _thread_in_native_trans); __ ldr_literal(R2, safepoint_state); __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset())); // make sure the store is observed before reading the SafepointSynchronize state and further mem refs __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad | MacroAssembler::StoreStore), Rtemp); __ ldr_s32(R2, Address(R2)); __ ldr_u32(R3, Address(Rthread, JavaThread::suspend_flags_offset())); __ cmp(R2, SafepointSynchronize::_not_synchronized); __ cond_cmp(R3, 0, eq); __ b(call_safepoint_runtime, ne); __ bind(return_to_java); // Perform thread state transition and reguard stack yellow pages if needed Label reguard, reguard_done; __ mov(Rtemp, _thread_in_Java); __ ldr_s32(R2, Address(Rthread, JavaThread::stack_guard_state_offset())); __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset())); __ cmp(R2, JavaThread::stack_guard_yellow_reserved_disabled); __ b(reguard, eq); __ bind(reguard_done); Label slow_unlock, unlock_done; if (method->is_synchronized()) { __ ldr(sync_obj, Address(sync_handle)); __ resolve(IS_NOT_NULL, sync_obj); if(UseBiasedLocking) { __ biased_locking_exit(sync_obj, Rtemp, unlock_done); // disp_hdr may not have been saved on entry with biased locking __ sub(disp_hdr, FP, lock_slot_fp_offset); } // See C1_MacroAssembler::unlock_object() for more comments __ ldr(R2, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes())); __ cbz(R2, unlock_done); __ cas_for_lock_release(disp_hdr, R2, sync_obj, Rtemp, slow_unlock); __ bind(unlock_done); } // Set last java frame and handle block to zero __ ldr(LR, Address(Rthread, JavaThread::active_handles_offset())); __ reset_last_Java_frame(Rtemp); // sets Rtemp to 0 on 32-bit ARM __ str_32(Rtemp, Address(LR, JNIHandleBlock::top_offset_in_bytes())); if (CheckJNICalls) { __ str(__ zero_register(Rtemp), Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset())); } // Unbox oop result, e.g. JNIHandles::resolve value in R0. if (ret_type == T_OBJECT || ret_type == T_ARRAY) { __ resolve_jobject(R0, // value Rtemp, // tmp1 R1_tmp); // tmp2 } // Any exception pending? __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset())); __ mov(SP, FP); __ cmp(Rtemp, 0); // Pop the frame and return if no exception pending __ pop(RegisterSet(FP) | RegisterSet(PC), eq); // Pop the frame and forward the exception. Rexception_pc contains return address. __ ldr(FP, Address(SP, wordSize, post_indexed), ne); __ ldr(Rexception_pc, Address(SP, wordSize, post_indexed), ne); __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp); // Safepoint operation and/or pending suspend request is in progress. // Save the return values and call the runtime function by hand. __ bind(call_safepoint_runtime); push_result_registers(masm, ret_type); __ mov(R0, Rthread); __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)); pop_result_registers(masm, ret_type); __ b(return_to_java); __ bind_literal(safepoint_state); // Reguard stack pages. Save native results around a call to C runtime. __ bind(reguard); push_result_registers(masm, ret_type); __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); pop_result_registers(masm, ret_type); __ b(reguard_done); if (method->is_synchronized()) { // Locking slow case if(UseBiasedLocking) { __ bind(slow_lock_biased); __ sub(disp_hdr, FP, lock_slot_fp_offset); } __ bind(slow_lock); push_param_registers(masm, fp_regs_in_arguments); // last_Java_frame is already set, so do call_VM manually; no exception can occur __ mov(R0, sync_obj); __ mov(R1, disp_hdr); __ mov(R2, Rthread); __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C)); pop_param_registers(masm, fp_regs_in_arguments); __ b(lock_done); // Unlocking slow case __ bind(slow_unlock); push_result_registers(masm, ret_type); // Clear pending exception before reentering VM. // Can store the oop in register since it is a leaf call. assert_different_registers(Rtmp_save1, sync_obj, disp_hdr); __ ldr(Rtmp_save1, Address(Rthread, Thread::pending_exception_offset())); Register zero = __ zero_register(Rtemp); __ str(zero, Address(Rthread, Thread::pending_exception_offset())); __ mov(R0, sync_obj); __ mov(R1, disp_hdr); __ mov(R2, Rthread); __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)); __ str(Rtmp_save1, Address(Rthread, Thread::pending_exception_offset())); pop_result_registers(masm, ret_type); __ b(unlock_done); } __ flush(); return nmethod::new_native_nmethod(method, compile_id, masm->code(), vep_offset, frame_complete, stack_slots / VMRegImpl::slots_per_word, in_ByteSize(is_static ? klass_offset : receiver_offset), in_ByteSize(lock_slot_offset * VMRegImpl::stack_slot_size), oop_maps); } // this function returns the adjust size (in number of words) to a c2i adapter // activation for use during deoptimization int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { int extra_locals_size = (callee_locals - callee_parameters) * Interpreter::stackElementWords; return extra_locals_size; } uint SharedRuntime::out_preserve_stack_slots() { return 0; } //------------------------------generate_deopt_blob---------------------------- void SharedRuntime::generate_deopt_blob() { ResourceMark rm; CodeBuffer buffer("deopt_blob", 1024, 1024); int frame_size_in_words; OopMapSet* oop_maps; int reexecute_offset; int exception_in_tls_offset; int exception_offset; MacroAssembler* masm = new MacroAssembler(&buffer); Label cont; const Register Rkind = R9; // caller-saved const Register Rublock = R6; const Register Rsender = altFP_7_11; assert_different_registers(Rkind, Rublock, Rsender, Rexception_obj, Rexception_pc, R0, R1, R2, R3, R8, Rtemp); address start = __ pc(); oop_maps = new OopMapSet(); // LR saved by caller (can be live in c2 method) // A deopt is a case where LR may be live in the c2 nmethod. So it's // not possible to call the deopt blob from the nmethod and pass the // address of the deopt handler of the nmethod in LR. What happens // now is that the caller of the deopt blob pushes the current // address so the deopt blob doesn't have to do it. This way LR can // be preserved, contains the live value from the nmethod and is // saved at R14/R30_offset here. OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_in_words, true); __ mov(Rkind, Deoptimization::Unpack_deopt); __ b(cont); exception_offset = __ pc() - start; // Transfer Rexception_obj & Rexception_pc in TLS and fall thru to the // exception_in_tls_offset entry point. __ str(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset())); __ str(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset())); // Force return value to NULL to avoid confusing the escape analysis // logic. Everything is dead here anyway. __ mov(R0, 0); exception_in_tls_offset = __ pc() - start; // Exception data is in JavaThread structure // Patch the return address of the current frame __ ldr(LR, Address(Rthread, JavaThread::exception_pc_offset())); (void) RegisterSaver::save_live_registers(masm, &frame_size_in_words); { const Register Rzero = __ zero_register(Rtemp); // XXX should be OK for C2 but not 100% sure __ str(Rzero, Address(Rthread, JavaThread::exception_pc_offset())); } __ mov(Rkind, Deoptimization::Unpack_exception); __ b(cont); reexecute_offset = __ pc() - start; (void) RegisterSaver::save_live_registers(masm, &frame_size_in_words); __ mov(Rkind, Deoptimization::Unpack_reexecute); // Calculate UnrollBlock and save the result in Rublock __ bind(cont); __ mov(R0, Rthread); __ mov(R1, Rkind); int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); // note: FP may not need to be saved (not on x86) assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin"); __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)); if (pc_offset == -1) { pc_offset = __ offset(); } oop_maps->add_gc_map(pc_offset, map); __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call __ mov(Rublock, R0); // Reload Rkind from the UnrollBlock (might have changed) __ ldr_s32(Rkind, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); Label noException; __ cmp_32(Rkind, Deoptimization::Unpack_exception); // Was exception pending? __ b(noException, ne); // handle exception case #ifdef ASSERT // assert that exception_pc is zero in tls { Label L; __ ldr(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset())); __ cbz(Rexception_pc, L); __ stop("exception pc should be null"); __ bind(L); } #endif __ ldr(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset())); __ verify_oop(Rexception_obj); { const Register Rzero = __ zero_register(Rtemp); __ str(Rzero, Address(Rthread, JavaThread::exception_oop_offset())); } __ bind(noException); // This frame is going away. Fetch return value, so we can move it to // a new frame. __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize)); __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize)); #ifndef __SOFTFP__ __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize)); #endif // pop frame __ add(SP, SP, RegisterSaver::reg_save_size * wordSize); // Set initial stack state before pushing interpreter frames __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes())); __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes())); __ add(SP, SP, Rtemp); #ifdef ASSERT // Compilers generate code that bang the stack by as much as the // interpreter would need. So this stack banging should never // trigger a fault. Verify that it does not on non product builds. // See if it is enough stack to push deoptimized frames if (UseStackBanging) { // The compiled method that we are deoptimizing was popped from the stack. // If the stack bang results in a stack overflow, we don't return to the // method that is being deoptimized. The stack overflow exception is // propagated to the caller of the deoptimized method. Need to get the pc // from the caller in LR and restore FP. __ ldr(LR, Address(R2, 0)); __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); __ arm_stack_overflow_check(R8, Rtemp); } #endif __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes())); // Pick up the initial fp we should save // XXX Note: was ldr(FP, Address(FP)); // The compiler no longer uses FP as a frame pointer for the // compiled code. It can be used by the allocator in C2 or to // memorize the original SP for JSR292 call sites. // Hence, ldr(FP, Address(FP)) is probably not correct. For x86, // Deoptimization::fetch_unroll_info computes the right FP value and // stores it in Rublock.initial_info. This has been activated for ARM. __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes())); __ mov(Rsender, SP); __ sub(SP, SP, Rtemp); // Push interpreter frames in a loop Label loop; __ bind(loop); __ ldr(LR, Address(R2, wordSize, post_indexed)); // load frame pc __ ldr(Rtemp, Address(R3, wordSize, post_indexed)); // load frame size __ raw_push(FP, LR); // create new frame __ mov(FP, SP); __ sub(Rtemp, Rtemp, 2*wordSize); __ sub(SP, SP, Rtemp); __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); __ mov(LR, 0); __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); __ subs(R8, R8, 1); // decrement counter __ mov(Rsender, SP); __ b(loop, ne); // Re-push self-frame __ ldr(LR, Address(R2)); __ raw_push(FP, LR); __ mov(FP, SP); __ sub(SP, SP, (frame_size_in_words - 2) * wordSize); // Restore frame locals after moving the frame __ str(R0, Address(SP, RegisterSaver::R0_offset * wordSize)); __ str(R1, Address(SP, RegisterSaver::R1_offset * wordSize)); #ifndef __SOFTFP__ __ str_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize)); #endif // !__SOFTFP__ #ifdef ASSERT // Reload Rkind from the UnrollBlock and check that it was not overwritten (Rkind is not callee-saved) { Label L; __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); __ cmp_32(Rkind, Rtemp); __ b(L, eq); __ stop("Rkind was overwritten"); __ bind(L); } #endif // Call unpack_frames with proper arguments __ mov(R0, Rthread); __ mov(R1, Rkind); pc_offset = __ set_last_Java_frame(SP, FP, true, Rtemp); assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin"); __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)); if (pc_offset == -1) { pc_offset = __ offset(); } oop_maps->add_gc_map(pc_offset, new OopMap(frame_size_in_words * VMRegImpl::slots_per_word, 0)); __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call // Collect return values, pop self-frame and jump to interpreter __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize)); __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize)); // Interpreter floats controlled by __SOFTFP__, but compiler // float return value registers controlled by __ABI_HARD__ // This matters for vfp-sflt builds. #ifndef __SOFTFP__ // Interpreter hard float #ifdef __ABI_HARD__ // Compiler float return value in FP registers __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize)); #else // Compiler float return value in integer registers, // copy to D0 for interpreter (S0 <-- R0) __ fmdrr(D0_tos, R0, R1); #endif #endif // !__SOFTFP__ __ mov(SP, FP); __ pop(RegisterSet(FP) | RegisterSet(PC)); __ flush(); _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); } #ifdef COMPILER2 //------------------------------generate_uncommon_trap_blob-------------------- // Ought to generate an ideal graph & compile, but here's some SPARC ASM // instead. void SharedRuntime::generate_uncommon_trap_blob() { // allocate space for the code ResourceMark rm; // setup code generation tools int pad = VerifyThread ? 512 : 0; #ifdef _LP64 CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512); #else // Measured 8/7/03 at 660 in 32bit debug build (no VerifyThread) // Measured 8/7/03 at 1028 in 32bit debug build (VerifyThread) CodeBuffer buffer("uncommon_trap_blob", 2000+pad, 512); #endif // bypassed when code generation useless MacroAssembler* masm = new MacroAssembler(&buffer); const Register Rublock = R6; const Register Rsender = altFP_7_11; assert_different_registers(Rublock, Rsender, Rexception_obj, R0, R1, R2, R3, R8, Rtemp); // // This is the entry point for all traps the compiler takes when it thinks // it cannot handle further execution of compilation code. The frame is // deoptimized in these cases and converted into interpreter frames for // execution // The steps taken by this frame are as follows: // - push a fake "unpack_frame" // - call the C routine Deoptimization::uncommon_trap (this function // packs the current compiled frame into vframe arrays and returns // information about the number and size of interpreter frames which // are equivalent to the frame which is being deoptimized) // - deallocate the "unpack_frame" // - deallocate the deoptimization frame // - in a loop using the information returned in the previous step // push interpreter frames; // - create a dummy "unpack_frame" // - call the C routine: Deoptimization::unpack_frames (this function // lays out values on the interpreter frame which was just created) // - deallocate the dummy unpack_frame // - return to the interpreter entry point // // Refer to the following methods for more information: // - Deoptimization::uncommon_trap // - Deoptimization::unpack_frame // the unloaded class index is in R0 (first parameter to this blob) __ raw_push(FP, LR); __ set_last_Java_frame(SP, FP, false, Rtemp); __ mov(R2, Deoptimization::Unpack_uncommon_trap); __ mov(R1, R0); __ mov(R0, Rthread); __ call(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap)); __ mov(Rublock, R0); __ reset_last_Java_frame(Rtemp); __ raw_pop(FP, LR); #ifdef ASSERT { Label L; __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); __ cmp_32(Rtemp, Deoptimization::Unpack_uncommon_trap); __ b(L, eq); __ stop("SharedRuntime::generate_uncommon_trap_blob: expected Unpack_uncommon_trap"); __ bind(L); } #endif // Set initial stack state before pushing interpreter frames __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes())); __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes())); __ add(SP, SP, Rtemp); // See if it is enough stack to push deoptimized frames #ifdef ASSERT // Compilers generate code that bang the stack by as much as the // interpreter would need. So this stack banging should never // trigger a fault. Verify that it does not on non product builds. if (UseStackBanging) { // The compiled method that we are deoptimizing was popped from the stack. // If the stack bang results in a stack overflow, we don't return to the // method that is being deoptimized. The stack overflow exception is // propagated to the caller of the deoptimized method. Need to get the pc // from the caller in LR and restore FP. __ ldr(LR, Address(R2, 0)); __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); __ arm_stack_overflow_check(R8, Rtemp); } #endif __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes())); __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes())); __ mov(Rsender, SP); __ sub(SP, SP, Rtemp); // __ ldr(FP, Address(FP)); __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); // Push interpreter frames in a loop Label loop; __ bind(loop); __ ldr(LR, Address(R2, wordSize, post_indexed)); // load frame pc __ ldr(Rtemp, Address(R3, wordSize, post_indexed)); // load frame size __ raw_push(FP, LR); // create new frame __ mov(FP, SP); __ sub(Rtemp, Rtemp, 2*wordSize); __ sub(SP, SP, Rtemp); __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); __ mov(LR, 0); __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); __ subs(R8, R8, 1); // decrement counter __ mov(Rsender, SP); __ b(loop, ne); // Re-push self-frame __ ldr(LR, Address(R2)); __ raw_push(FP, LR); __ mov(FP, SP); // Call unpack_frames with proper arguments __ mov(R0, Rthread); __ mov(R1, Deoptimization::Unpack_uncommon_trap); __ set_last_Java_frame(SP, FP, true, Rtemp); __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)); // oop_maps->add_gc_map(__ pc() - start, new OopMap(frame_size_in_words, 0)); __ reset_last_Java_frame(Rtemp); __ mov(SP, FP); __ pop(RegisterSet(FP) | RegisterSet(PC)); masm->flush(); _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, 2 /* LR+FP */); } #endif // COMPILER2 //------------------------------generate_handler_blob------ // // Generate a special Compile2Runtime blob that saves all registers, // setup oopmap, and calls safepoint code to stop the compiled code for // a safepoint. // SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before"); ResourceMark rm; CodeBuffer buffer("handler_blob", 256, 256); int frame_size_words; OopMapSet* oop_maps; bool cause_return = (poll_type == POLL_AT_RETURN); MacroAssembler* masm = new MacroAssembler(&buffer); address start = __ pc(); oop_maps = new OopMapSet(); if (!cause_return) { __ sub(SP, SP, 4); // make room for LR which may still be live // here if we are coming from a c2 method } OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words, !cause_return); if (!cause_return) { // update saved PC with correct value // need 2 steps because LR can be live in c2 method __ ldr(LR, Address(Rthread, JavaThread::saved_exception_pc_offset())); __ str(LR, Address(SP, RegisterSaver::LR_offset * wordSize)); } __ mov(R0, Rthread); int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); // note: FP may not need to be saved (not on x86) assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin"); __ call(call_ptr); if (pc_offset == -1) { pc_offset = __ offset(); } oop_maps->add_gc_map(pc_offset, map); __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call // Check for pending exception __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset())); __ cmp(Rtemp, 0); if (!cause_return) { RegisterSaver::restore_live_registers(masm, false); __ pop(PC, eq); __ pop(Rexception_pc); } else { RegisterSaver::restore_live_registers(masm); __ bx(LR, eq); __ mov(Rexception_pc, LR); } __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp); __ flush(); return SafepointBlob::create(&buffer, oop_maps, frame_size_words); } RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before"); ResourceMark rm; CodeBuffer buffer(name, 1000, 512); int frame_size_words; OopMapSet *oop_maps; int frame_complete; MacroAssembler* masm = new MacroAssembler(&buffer); Label pending_exception; int start = __ offset(); oop_maps = new OopMapSet(); OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words); frame_complete = __ offset(); __ mov(R0, Rthread); int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); assert(start == 0, "warning: start differs from code_begin"); __ call(destination); if (pc_offset == -1) { pc_offset = __ offset(); } oop_maps->add_gc_map(pc_offset, map); __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call __ ldr(R1, Address(Rthread, Thread::pending_exception_offset())); __ cbnz(R1, pending_exception); // Overwrite saved register values // Place metadata result of VM call into Rmethod __ get_vm_result_2(R1, Rtemp); __ str(R1, Address(SP, RegisterSaver::Rmethod_offset * wordSize)); // Place target address (VM call result) into Rtemp __ str(R0, Address(SP, RegisterSaver::Rtemp_offset * wordSize)); RegisterSaver::restore_live_registers(masm); __ jump(Rtemp); __ bind(pending_exception); RegisterSaver::restore_live_registers(masm); const Register Rzero = __ zero_register(Rtemp); __ str(Rzero, Address(Rthread, JavaThread::vm_result_2_offset())); __ mov(Rexception_pc, LR); __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp); __ flush(); return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true); }